package org.apache.uima.ruta.engine;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.jcas.JCas;

/* loaded from: input_file:ruta-core-2.6.1.jar:org/apache/uima/ruta/engine/PlainTextAnnotator.class */
public class PlainTextAnnotator extends JCasAnnotator_ImplBase {
    public static final String TYPE_LINE = "org.apache.uima.ruta.type.Line";
    public static final String TYPE_WSLINE = "org.apache.uima.ruta.type.WSLine";
    public static final String TYPE_EMPTYLINE = "org.apache.uima.ruta.type.EmptyLine";
    public static final String TYPE_PARAGRAPH = "org.apache.uima.ruta.type.Paragraph";

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        CAS cas = jCas.getCas();
        String documentText = cas.getDocumentText();
        BufferedReader bufferedReader = new BufferedReader(new StringReader(documentText));
        Type type = cas.getTypeSystem().getType(TYPE_LINE);
        Type type2 = cas.getTypeSystem().getType(TYPE_WSLINE);
        Type type3 = cas.getTypeSystem().getType(TYPE_EMPTYLINE);
        Type type4 = cas.getTypeSystem().getType(TYPE_PARAGRAPH);
        int i = 0;
        int i2 = -1;
        int i3 = 0;
        boolean z = true;
        while (true) {
            try {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    return;
                }
                boolean isBlank = StringUtils.isBlank(readLine);
                if (!isBlank && StringUtils.isBlank(readLine.trim().replaceAll(" | |\ufeff| |\u180e", ""))) {
                    isBlank = true;
                }
                boolean isEmpty = StringUtils.isEmpty(readLine);
                int length = i + readLine.length();
                int i4 = 1;
                if (documentText.length() >= length + 2 && documentText.substring(length, length + 2).equals(IOUtils.LINE_SEPARATOR_WINDOWS)) {
                    i4 = 2;
                }
                if (z && !isBlank) {
                    i2 = i;
                }
                if (isBlank && isEmpty) {
                    cas.addFsToIndexes(cas.createAnnotation(type3, i, i + i4));
                } else if (isBlank && !isEmpty) {
                    cas.addFsToIndexes(cas.createAnnotation(type2, i, i + readLine.length()));
                } else if (!isEmpty) {
                    cas.addFsToIndexes(cas.createAnnotation(type, i, i + readLine.length()));
                    z = false;
                    i3 = i + readLine.length();
                }
                if (isBlank && !z && i3 != 0) {
                    cas.addFsToIndexes(cas.createAnnotation(type4, i2, i3));
                } else if (length + i4 == documentText.length()) {
                    cas.addFsToIndexes(cas.createAnnotation(type4, i2, length));
                } else if (length == documentText.length()) {
                    cas.addFsToIndexes(cas.createAnnotation(type4, i2, length));
                }
                if (isBlank) {
                    z = true;
                }
                i = i + readLine.length() + i4;
            } catch (IOException e) {
                throw new AnalysisEngineProcessException(e);
            }
        }
    }
}
