package org.apache.uima.ruta.textruler.core;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.resource.ResourceConfigurationException;
import org.apache.uima.ruta.ide.core.builder.RutaProjectUtils;
import org.apache.uima.ruta.textruler.TextRulerPlugin;
import org.apache.uima.ruta.textruler.core.TextRulerTarget;
import org.apache.uima.ruta.textruler.extension.TextRulerLearner;
import org.apache.uima.ruta.textruler.extension.TextRulerLearnerDelegate;
import org.apache.uima.ruta.textruler.learner.trabal.TrabalLearner;
import org.apache.uima.ruta.textruler.learner.whisk.generic.Whisk;
import org.apache.uima.ruta.textruler.preferences.TextRulerPreferences;
import org.apache.uima.util.FileUtils;
import org.eclipse.core.resources.ResourcesPlugin;
import org.eclipse.core.runtime.CoreException;
import org.eclipse.core.runtime.IPath;
import org.eclipse.core.runtime.Path;
import org.eclipse.jface.preference.IPreferenceStore;

/* loaded from: input_file:org/apache/uima/ruta/textruler/core/TextRulerBasicLearner.class */
public abstract class TextRulerBasicLearner implements TextRulerLearner, CasCacheLoader {
    protected TextRulerLearnerDelegate delegate;
    protected AnalysisEngine ae;
    protected TextRulerExampleDocumentSet exampleDocuments;
    protected String inputDirectory;
    protected String tempDirectory;
    protected String preprocessorFile;
    protected Set<String> filterSet;
    protected Set<String> filterSetWithSlotNames;
    protected String[] slotNames;
    protected CasCache casCache;
    protected CAS algTestCAS;
    private boolean skip;
    private boolean useDefaultFiltering;
    private double maxErrorRate;
    protected boolean useDynamicAnchoring = false;
    protected boolean supportBoundaries = false;
    private Map<String, TextRulerStatisticsCollector> inducedRules = new TreeMap();

    public TextRulerBasicLearner(String str, String str2, String str3, String[] strArr, Set<String> set, boolean z, TextRulerLearnerDelegate textRulerLearnerDelegate) {
        this.maxErrorRate = 5.0d;
        this.preprocessorFile = str2;
        this.tempDirectory = str3;
        this.slotNames = strArr;
        this.inputDirectory = str;
        this.skip = z;
        this.delegate = textRulerLearnerDelegate;
        this.filterSet = set;
        this.filterSetWithSlotNames = new HashSet(set);
        this.filterSetWithSlotNames.add(TrabalLearner.ANNOTATION_TYPE_BASIC);
        for (String str4 : strArr) {
            this.filterSetWithSlotNames.add(str4);
            this.filterSetWithSlotNames.add(TextRulerTarget.getSingleSlotTypeName(TextRulerTarget.MLTargetType.SINGLE_LEFT_BOUNDARY, str4));
            this.filterSetWithSlotNames.add(TextRulerTarget.getSingleSlotTypeName(TextRulerTarget.MLTargetType.SINGLE_RIGHT_BOUNDARY, str4));
        }
        this.useDefaultFiltering = true;
        this.useDefaultFiltering &= set.size() == 4;
        this.useDefaultFiltering &= set.contains(TextRulerToolkit.RUTA_SPACE_TYPE_NAME);
        this.useDefaultFiltering &= set.contains(TextRulerToolkit.RUTA_BREAK_TYPE_NAME);
        this.useDefaultFiltering &= set.contains(TextRulerToolkit.RUTA_NBSP_TYPE_NAME);
        this.useDefaultFiltering &= set.contains(TextRulerToolkit.RUTA_MARKUP_TYPE_NAME);
        IPreferenceStore preferenceStore = TextRulerPlugin.getDefault().getPreferenceStore();
        this.maxErrorRate = preferenceStore.getInt(TextRulerPreferences.MAX_ERROR_RATE);
        this.casCache = new CasCache(preferenceStore.getInt(TextRulerPreferences.CAS_CACHE), this);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String tempDirectory() {
        return TextRulerToolkit.addTrailingSlashToPath(this.tempDirectory);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean shouldAbort() {
        if (this.delegate != null) {
            return this.delegate.shouldAbort();
        }
        return false;
    }

    @Override // org.apache.uima.ruta.textruler.extension.TextRulerLearner
    public AnalysisEngine getAnalysisEngine() {
        if (this.ae == null) {
            updateAE();
        }
        return this.ae;
    }

    private void updateAE() {
        IPath iPath = null;
        try {
            iPath = RutaProjectUtils.getAnalysisEngineDescriptorPath(this.preprocessorFile);
        } catch (CoreException e) {
            sendStatusUpdateToDelegate("Failed to locate descriptor.", TextRulerLearner.TextRulerLearnerState.ML_INITIALIZING, false);
        }
        String portableString = iPath.toPortableString();
        sendStatusUpdateToDelegate("loading AE...", TextRulerLearner.TextRulerLearnerState.ML_INITIALIZING, false);
        AnalysisEngineDescription analysisEngineDescription = TextRulerToolkit.getAnalysisEngineDescription(portableString);
        if (analysisEngineDescription == null) {
            sendStatusUpdateToDelegate("Failed to load descriptor. Please rebuild the project.", TextRulerLearner.TextRulerLearnerState.ML_INITIALIZING, false);
            return;
        }
        TextRulerToolkit.addBoundaryTypes(analysisEngineDescription, this.slotNames);
        this.ae = TextRulerToolkit.loadAnalysisEngine(analysisEngineDescription);
        Path path = new Path(getTempRulesFileName());
        this.ae.setConfigParameterValue("mainScript", path.removeFileExtension().lastSegment());
        this.ae.setConfigParameterValue("scriptPaths", new String[]{path.removeLastSegments(1).toPortableString()});
        this.ae.setConfigParameterValue("additionalScripts", new String[0]);
        this.ae.setConfigParameterValue("reloadScript", true);
        if (this.useDynamicAnchoring) {
            this.ae.setConfigParameterValue("dynamicAnchoring", true);
        }
        IPreferenceStore preferenceStore = TextRulerPlugin.getDefault().getPreferenceStore();
        boolean z = preferenceStore.getBoolean(TextRulerPreferences.LOW_MEMORY_PROFILE);
        boolean z2 = preferenceStore.getBoolean(TextRulerPreferences.REMOVE_BASICS);
        this.ae.setConfigParameterValue("lowMemoryProfile", Boolean.valueOf(z));
        this.ae.setConfigParameterValue("removeBasics", Boolean.valueOf(z2));
        try {
            this.ae.reconfigure();
        } catch (ResourceConfigurationException e2) {
            TextRulerPlugin.error(e2);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean checkForMandatoryTypes() {
        CAS testCAS = getTestCAS();
        if (testCAS == null) {
            return false;
        }
        TypeSystem typeSystem = testCAS.getTypeSystem();
        boolean z = true;
        ArrayList arrayList = new ArrayList();
        for (String str : this.slotNames) {
            if (typeSystem.getType(str) == null) {
                arrayList.add(str);
                z = false;
            }
        }
        String str2 = Whisk.STANDARD_CONSIDERED_FEATURES;
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            str2 = str2 + ((String) it.next()) + ", ";
        }
        if (!StringUtils.isEmpty(str2)) {
            str2 = str2.substring(0, str2.length() - 2);
        }
        if (!z) {
            sendStatusUpdateToDelegate("Error: Some Slot- or Helper-Types were not found in TypeSystem: " + str2, TextRulerLearner.TextRulerLearnerState.ML_ERROR, false);
        }
        return z;
    }

    protected boolean createTempDirIfNeccessary() {
        File file = new File(tempDirectory());
        if (file.exists() && file.isDirectory()) {
            return true;
        }
        return file.mkdir();
    }

    @Override // org.apache.uima.ruta.textruler.extension.TextRulerLearner
    public void run() {
        if (!createTempDirIfNeccessary()) {
            sendStatusUpdateToDelegate("ERROR CREATING TEMPORARY DIRECTORY!", TextRulerLearner.TextRulerLearnerState.ML_ERROR, false);
            return;
        }
        updateAE();
        if (this.ae == null) {
            return;
        }
        this.inducedRules.clear();
        if (checkForMandatoryTypes()) {
            sendStatusUpdateToDelegate("Finding documents...", TextRulerLearner.TextRulerLearnerState.ML_INITIALIZING, false);
            this.exampleDocuments = new TextRulerExampleDocumentSet(this.inputDirectory, this.casCache);
            if (!shouldAbort()) {
                sendStatusUpdateToDelegate("Starting...", TextRulerLearner.TextRulerLearnerState.ML_RUNNING, true);
                try {
                    doRun();
                } catch (Exception e) {
                    TextRulerPlugin.error(e);
                    sendStatusUpdateToDelegate("Aborted due to exception!", TextRulerLearner.TextRulerLearnerState.ML_ERROR, true);
                }
                cleanUp();
            }
        }
        this.casCache.clear();
        this.casCache = null;
        this.exampleDocuments = null;
        if (this.algTestCAS != null) {
            this.algTestCAS.reset();
            GlobalCASSource.releaseCAS(this.algTestCAS);
            this.algTestCAS = null;
        }
        if (shouldAbort()) {
            sendStatusUpdateToDelegate("Aborted!", TextRulerLearner.TextRulerLearnerState.ML_ABORTED, false);
        }
    }

    @Override // org.apache.uima.ruta.textruler.core.CasCacheLoader
    public CAS loadCAS(String str, CAS cas) {
        return TextRulerToolkit.readCASfromXMIFile(str, this.ae, cas);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void sendStatusUpdateToDelegate(String str, TextRulerLearner.TextRulerLearnerState textRulerLearnerState, boolean z) {
        if (this.delegate != null) {
            this.delegate.algorithmStatusUpdate(this, str, textRulerLearnerState, z);
        }
    }

    protected abstract void doRun();

    /* JADX INFO: Access modifiers changed from: protected */
    public void cleanUp() {
    }

    public String getTempRulesFileName() {
        return tempDirectory() + "rules.ruta";
    }

    public String getIntermediateRulesFileName() {
        return tempDirectory() + "intermediaterules.ruta";
    }

    public void compareOriginalDocumentWithTestCAS(TextRulerExampleDocument textRulerExampleDocument, CAS cas, TextRulerTarget textRulerTarget, TextRulerStatisticsCollector textRulerStatisticsCollector, boolean z) {
        List<TextRulerExample> positiveExamples = textRulerExampleDocument.getPositiveExamples();
        for (TextRulerExample textRulerExample : textRulerExampleDocument.createSlotInstancesForCAS(cas, textRulerTarget, false)) {
            TextRulerExample exampleListContainsAnnotation = TextRulerToolkit.exampleListContainsAnnotation(positiveExamples, textRulerExample.getAnnotation());
            if (exampleListContainsAnnotation != null) {
                textRulerStatisticsCollector.addCoveredPositive(exampleListContainsAnnotation);
            } else if (z) {
                textRulerExample.setPositive(false);
                textRulerStatisticsCollector.addCoveredNegative(textRulerExample);
            } else {
                textRulerStatisticsCollector.incCoveredNegatives(1);
            }
        }
    }

    public abstract boolean collectNegativeCoveredInstancesWhenTesting();

    public void testRuleOnDocument(TextRulerRule textRulerRule, TextRulerExampleDocument textRulerExampleDocument, TextRulerStatisticsCollector textRulerStatisticsCollector) {
        CAS testCAS = getTestCAS();
        textRulerExampleDocument.resetAndFillTestCAS(testCAS, textRulerRule.getTarget());
        testRuleOnDocument(textRulerRule, textRulerExampleDocument, textRulerStatisticsCollector, testCAS);
        testCAS.reset();
    }

    public void testRuleOnDocument(TextRulerRule textRulerRule, TextRulerExampleDocument textRulerExampleDocument, TextRulerStatisticsCollector textRulerStatisticsCollector, CAS cas) {
        try {
            textRulerRule.saveToRulesFile(getTempRulesFileName());
            this.ae.process(cas);
            compareOriginalDocumentWithTestCAS(textRulerExampleDocument, cas, textRulerRule.getTarget(), textRulerStatisticsCollector, collectNegativeCoveredInstancesWhenTesting());
        } catch (Exception e) {
            TextRulerPlugin.error(e);
        }
    }

    public void testRuleOnDocumentSet(TextRulerRule textRulerRule, TextRulerExampleDocumentSet textRulerExampleDocumentSet) {
        TextRulerStatisticsCollector textRulerStatisticsCollector = new TextRulerStatisticsCollector();
        for (TextRulerExampleDocument textRulerExampleDocument : textRulerExampleDocumentSet.getSortedDocumentsInCacheOptimizedOrder()) {
            testRuleOnDocument(textRulerRule, textRulerExampleDocument, textRulerStatisticsCollector);
            if (shouldAbort()) {
                break;
            }
        }
        textRulerRule.setCoveringStatistics(textRulerStatisticsCollector);
    }

    public CAS applyScriptOnDocument(String str, TextRulerExampleDocument textRulerExampleDocument, TextRulerTarget textRulerTarget) {
        try {
            FileUtils.saveString2File(str, new File(getTempRulesFileName()));
        } catch (IOException e) {
            TextRulerPlugin.error(e);
        }
        AnalysisEngine analysisEngine = getAnalysisEngine();
        CAS testCAS = getTestCAS();
        textRulerExampleDocument.resetAndFillTestCAS(testCAS, textRulerTarget);
        try {
            analysisEngine.process(testCAS);
        } catch (AnalysisEngineProcessException e2) {
            TextRulerPlugin.error(e2);
        }
        return testCAS;
    }

    public void testRulesOnDocumentSet(List<? extends TextRulerRule> list, TextRulerExampleDocumentSet textRulerExampleDocumentSet) {
        if (list.isEmpty()) {
            return;
        }
        ArrayList arrayList = new ArrayList();
        TextRulerExampleDocument[] sortedDocumentsInCacheOptimizedOrder = textRulerExampleDocumentSet.getSortedDocumentsInCacheOptimizedOrder();
        TextRulerTarget target = list.get(0).getTarget();
        for (TextRulerRule textRulerRule : list) {
            arrayList.add(new TextRulerStatisticsCollector());
        }
        CAS testCAS = getTestCAS();
        for (int i = 0; i < list.size(); i++) {
            TextRulerRule textRulerRule2 = list.get(i);
            String ruleString = textRulerRule2.getRuleString();
            System.out.println("testing: " + ruleString);
            if (this.inducedRules.containsKey(ruleString)) {
                textRulerRule2.setCoveringStatistics(this.inducedRules.get(ruleString));
                System.out.println("skipped with " + this.inducedRules.get(ruleString));
            } else {
                TextRulerStatisticsCollector textRulerStatisticsCollector = (TextRulerStatisticsCollector) arrayList.get(i);
                int length = sortedDocumentsInCacheOptimizedOrder.length;
                int i2 = 0;
                while (true) {
                    if (i2 >= length) {
                        break;
                    }
                    TextRulerExampleDocument textRulerExampleDocument = sortedDocumentsInCacheOptimizedOrder[i2];
                    textRulerExampleDocument.resetAndFillTestCAS(testCAS, target);
                    testRuleOnDocument(textRulerRule2, textRulerExampleDocument, textRulerStatisticsCollector, testCAS);
                    if (textRulerStatisticsCollector.n / Math.max(textRulerStatisticsCollector.p, 1) > this.maxErrorRate) {
                        System.out.println("stopped:" + textRulerStatisticsCollector);
                        break;
                    } else if (shouldAbort()) {
                        return;
                    } else {
                        i2++;
                    }
                }
                this.inducedRules.put(ruleString, textRulerStatisticsCollector);
            }
        }
        testCAS.reset();
        for (int i3 = 0; i3 < list.size(); i3++) {
            list.get(i3).setCoveringStatistics((TextRulerStatisticsCollector) arrayList.get(i3));
        }
    }

    public void testRulesOnDocument(List<? extends TextRulerRule> list, TextRulerExampleDocument textRulerExampleDocument) {
        if (list.isEmpty()) {
            return;
        }
        ArrayList arrayList = new ArrayList();
        TextRulerTarget target = list.get(0).getTarget();
        for (TextRulerRule textRulerRule : list) {
            arrayList.add(new TextRulerStatisticsCollector());
        }
        CAS testCAS = getTestCAS();
        for (int i = 0; i < list.size(); i++) {
            TextRulerRule textRulerRule2 = list.get(i);
            TextRulerStatisticsCollector textRulerStatisticsCollector = (TextRulerStatisticsCollector) arrayList.get(i);
            textRulerExampleDocument.resetAndFillTestCAS(testCAS, target);
            testRuleOnDocument(textRulerRule2, textRulerExampleDocument, textRulerStatisticsCollector, testCAS);
            if (shouldAbort()) {
                return;
            }
        }
        testCAS.reset();
        for (int i2 = 0; i2 < list.size(); i2++) {
            list.get(i2).setCoveringStatistics((TextRulerStatisticsCollector) arrayList.get(i2));
        }
    }

    public String getFileHeaderString(boolean z) {
        return getPackageString() + getScriptImport(z) + getFilterCommandString() + getUseDynamicAnchoring(z) + getBoundaryDeclarations(z);
    }

    private String getBoundaryDeclarations(boolean z) {
        if (!z || !this.supportBoundaries || this.slotNames.length <= 0) {
            return Whisk.STANDARD_CONSIDERED_FEATURES;
        }
        StringBuilder sb = new StringBuilder();
        sb.append("DECLARE ");
        int i = 0;
        for (String str : this.slotNames) {
            String[] split = str.split("[.]");
            String str2 = split[split.length - 1];
            sb.append(str2);
            sb.append(TextRulerToolkit.LEFT_BOUNDARY_EXTENSION);
            sb.append(", ");
            sb.append(str2);
            sb.append(TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION);
            if (i < this.slotNames.length - 1) {
                sb.append(", ");
            }
            i++;
        }
        sb.append(";\n");
        return sb.toString();
    }

    private String getUseDynamicAnchoring(boolean z) {
        return (this.useDynamicAnchoring && z) ? "Document{-> DYNAMICANCHORING(true)};\n" : Whisk.STANDARD_CONSIDERED_FEATURES;
    }

    private String getScriptImport(boolean z) {
        if (!z) {
            return Whisk.STANDARD_CONSIDERED_FEATURES;
        }
        IPath fromOSString = Path.fromOSString(this.preprocessorFile);
        String str = null;
        try {
            str = RutaProjectUtils.getScriptWithPackage(fromOSString, ResourcesPlugin.getWorkspace().getRoot().getContainerForLocation(fromOSString.removeLastSegments(1)).getProject());
        } catch (CoreException e) {
        }
        String moduleName = RutaProjectUtils.getModuleName(fromOSString);
        if (str == null) {
            return Whisk.STANDARD_CONSIDERED_FEATURES;
        }
        String str2 = "SCRIPT " + str + ";\n";
        if (!this.skip) {
            str2 = str2 + "Document{-> CALL(" + moduleName + ")};\n";
        }
        return str2;
    }

    public String getPackageString() {
        return "PACKAGE " + ResourcesPlugin.getWorkspace().getRoot().getContainerForLocation(Path.fromOSString(this.preprocessorFile).removeLastSegments(1)).getProjectRelativePath().removeFirstSegments(1).toPortableString().replaceAll("/", ".") + ";\n\n";
    }

    public String getFilterCommandString() {
        if (this.filterSet == null || this.filterSet.size() <= 0 || isDefaultFiltering()) {
            return Whisk.STANDARD_CONSIDERED_FEATURES;
        }
        String str = Whisk.STANDARD_CONSIDERED_FEATURES;
        for (String str2 : this.filterSet) {
            str = str.length() == 0 ? str + TextRulerToolkit.getTypeShortName(str2) : str + ", " + TextRulerToolkit.getTypeShortName(str2);
        }
        return "Document{->FILTERTYPE(" + str + ")};\n\n";
    }

    private boolean isDefaultFiltering() {
        return this.useDefaultFiltering;
    }

    @Override // org.apache.uima.ruta.textruler.extension.TextRulerLearner
    public CAS getTestCAS() {
        if (this.algTestCAS == null) {
            try {
                this.algTestCAS = GlobalCASSource.allocCAS(this.ae);
            } catch (Exception e) {
                TextRulerPlugin.error(e);
                return null;
            }
        }
        return this.algTestCAS;
    }

    protected void saveParametersToTempFolder(Map<String, Object> map) {
        if (createTempDirIfNeccessary()) {
            String str = ((("\nSettings:\n\ninputDir: " + this.inputDirectory) + "\ntempDir: " + this.tempDirectory) + "\npreprocessTMFile: " + this.preprocessorFile) + "\n";
            for (Map.Entry<String, Object> entry : map.entrySet()) {
                str = str + entry.getKey() + " = " + entry.getValue() + "\n";
            }
            if (createTempDirIfNeccessary()) {
                TextRulerToolkit.appendStringToFile(tempDirectory() + "settings.txt", str);
            }
        }
    }

    public Set<String> getFilterSet() {
        return this.filterSet;
    }
}
