package com.acrolinx.client.oXygen.extraction.text;

import com.acrolinx.client.oXygen.extraction.CheckRangeFactory;
import com.acrolinx.client.oXygen.extraction.RangeInOxygenDocument;
import com.acrolinx.client.oXygen.extraction.text.SimpleOxygenDOMNode;
import com.acrolinx.client.oXygen.sdkextensions.OxygenClientExtensionProvider;
import com.acrolinx.javasdk.api.extraction.Authors;
import com.acrolinx.javasdk.api.extraction.DocumentsFactory;
import com.acrolinx.javasdk.api.extraction.FileFormats;
import com.acrolinx.javasdk.api.extraction.FileName;
import com.acrolinx.javasdk.api.extraction.Filenames;
import com.acrolinx.javasdk.api.extraction.TypedCheckInformation;
import com.acrolinx.javasdk.api.extraction.documents.block.Block;
import com.acrolinx.javasdk.api.extraction.documents.block.BlockDocument;
import com.acrolinx.javasdk.api.extraction.documents.block.BlockDocumentBuilder;
import com.acrolinx.javasdk.api.factory.AcrolinxFactory;
import com.acrolinx.javasdk.api.factory.AcrolinxFactoryInstantiator;
import com.acrolinx.javasdk.api.validation.Preconditions;
import com.acrolinx.javasdk.core.extraction.AbstractSimpleDOMNode;
import com.acrolinx.javasdk.core.extraction.DOMGlobalSettings;
import com.acrolinx.javasdk.core.extraction.DOMNodeWrapper;
import com.acrolinx.javasdk.core.extraction.DocumentTypeIdentifier;
import com.acrolinx.javasdk.core.extraction.FilterExtensionSettings;
import com.acrolinx.javasdk.core.extraction.SegmentationExtensionSettings;
import com.acrolinx.javasdk.core.extraction.Tag;
import com.acrolinx.javasdk.core.extraction.XMLDocumentExtractor;
import com.acrolinx.javasdk.gui.checking.CheckCapabilities;
import com.acrolinx.javasdk.gui.sessions.impl.ExtractionScope;
import com.acrolinx.javasdk.gui.sessions.impl.Extractor;
import com.acrolinx.javasdk.gui.sessions.impl.SelectionScope;
import com.acrolinx.javasdk.gui.settings.client.ClientSettings;
import com.acrolinx.util.extraction.segmentation.csd.SegmentationSettings;
import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.io.File;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.swing.JTextArea;
import javax.xml.stream.XMLStreamException;
import org.apache.commons.logging.Log;
import ro.sync.contentcompletion.xml.CIElement;
import ro.sync.exml.workspace.api.PluginWorkspace;
import ro.sync.exml.workspace.api.editor.WSEditor;
import ro.sync.exml.workspace.api.editor.page.text.WSTextEditorPage;
import ro.sync.exml.workspace.api.editor.page.text.WSTextXMLSchemaManager;

/* loaded from: input_file:Acrolinx oXygen plugin/lib/oxygen-standalone-3.5.1.160.jar:com/acrolinx/client/oXygen/extraction/text/TextDocumentExtractor.class */
public class TextDocumentExtractor implements Extractor<BlockDocument<RangeInOxygenDocument>> {
    private final Log log = AcrolinxFactoryInstantiator.getLoggerFactory().getLogger(TextDocumentExtractor.class);
    private final OxygenClientExtensionProvider oxygenClientExtensionProvider;
    private final WSTextEditorPage textEditorPage;
    private final PluginWorkspace workspace;
    private final WSEditor editorAccess;
    private final CheckRangeFactory checkRangeFactory;
    private final AcrolinxFactory acrolinxFactory;

    private TextDocumentExtractor(WSTextEditorPage wSTextEditorPage, WSEditor wSEditor, CheckRangeFactory checkRangeFactory, OxygenClientExtensionProvider oxygenClientExtensionProvider, PluginWorkspace pluginWorkspace, AcrolinxFactory acrolinxFactory) {
        Preconditions.checkNotNull(oxygenClientExtensionProvider, "oxygenClientExtensionProvider should not be null");
        Preconditions.checkNotNull(wSTextEditorPage, "textEditorPage should not be null");
        Preconditions.checkNotNull(wSEditor, "editorAccess should not be null");
        Preconditions.checkNotNull(pluginWorkspace, "workspace should not be null");
        Preconditions.checkNotNull(checkRangeFactory, "checkRangeFactory should not be null");
        Preconditions.checkNotNull(acrolinxFactory, "acrolinxFactory should not be null");
        this.checkRangeFactory = checkRangeFactory;
        this.oxygenClientExtensionProvider = oxygenClientExtensionProvider;
        this.textEditorPage = wSTextEditorPage;
        this.workspace = pluginWorkspace;
        this.editorAccess = wSEditor;
        this.acrolinxFactory = acrolinxFactory;
    }

    public static TextDocumentExtractor create(WSTextEditorPage wSTextEditorPage, WSEditor wSEditor, CheckRangeFactory checkRangeFactory, OxygenClientExtensionProvider oxygenClientExtensionProvider, PluginWorkspace pluginWorkspace, AcrolinxFactory acrolinxFactory) {
        Preconditions.checkNotNull(acrolinxFactory, "acrolinxFactory should not be null");
        return new TextDocumentExtractor(wSTextEditorPage, wSEditor, checkRangeFactory, oxygenClientExtensionProvider, pluginWorkspace, acrolinxFactory);
    }

    @Override // com.acrolinx.javasdk.gui.extensions.segmentation.TagListProvider
    public Set<Tag> provideTags() {
        WSTextXMLSchemaManager xMLSchemaManager = this.textEditorPage.getXMLSchemaManager();
        if (xMLSchemaManager == null || xMLSchemaManager.getGlobalElements().isEmpty()) {
            return getTagsFromXmlDocument();
        }
        List globalElements = xMLSchemaManager.getGlobalElements();
        HashSet newHashSet = Sets.newHashSet();
        Iterator it = globalElements.iterator();
        while (it.hasNext()) {
            newHashSet.add(new Tag(((CIElement) it.next()).getName()));
        }
        return newHashSet;
    }

    private Set<Tag> getTagsFromXmlDocument() {
        TagCollectingXmlReaderContentHandler tagCollectingXmlReaderContentHandler = new TagCollectingXmlReaderContentHandler();
        parseXml(getTextContent(), tagCollectingXmlReaderContentHandler);
        return tagCollectingXmlReaderContentHandler.getCollectedTags();
    }

    DocumentTypeIdentifier getDocumentTypeIdentifierFromDoctypeDefinition(String str) {
        DocumentTypeIdentifier firstNodeInDocumentType = getFirstNodeInDocumentType();
        Matcher matcher = Pattern.compile("<!\\s*DOCTYPE\\s+(\\S+)\\s+PUBLIC\\s+\"([^\"]*)\"\\s*\"([^\"]*)\"", 8).matcher(str);
        String str2 = null;
        String str3 = null;
        if (matcher.find()) {
            try {
                str2 = matcher.group(2);
                str3 = matcher.group(3);
            } catch (Exception e) {
            }
        }
        Matcher matcher2 = Pattern.compile("<!\\s*DOCTYPE\\s+(\\S+)\\s+SYSTEM\\s+\"([^\"]*)\".", 8).matcher(str);
        if (matcher2.find()) {
            try {
                str3 = matcher2.group(2);
            } catch (Exception e2) {
            }
        }
        if (str3 != null) {
            firstNodeInDocumentType.withDetail(DocumentTypeIdentifier.Detail.Type.SYSTEM_ID, str3);
        }
        if (str2 != null) {
            firstNodeInDocumentType.withDetail(DocumentTypeIdentifier.Detail.Type.PUBLIC_ID, str2);
        }
        return firstNodeInDocumentType;
    }

    @Override // com.acrolinx.javasdk.gui.extensions.segmentation.TagListProvider
    public DocumentTypeIdentifier getDocumentType() {
        String textContent = getTextContent();
        int moveToNextOccurenceOf = new XMLCommentSkipper(textContent, 0).moveToNextOccurenceOf("<!DOCTYPE", 0);
        DocumentTypeIdentifier documentTypeIdentifierFromDoctypeDefinition = moveToNextOccurenceOf >= 0 ? getDocumentTypeIdentifierFromDoctypeDefinition(textContent.substring(moveToNextOccurenceOf)) : getFirstNodeInDocumentType();
        WSTextXMLSchemaManager xMLSchemaManager = this.textEditorPage.getXMLSchemaManager();
        if (xMLSchemaManager == null) {
            return documentTypeIdentifierFromDoctypeDefinition;
        }
        URL[] grammarURLs = xMLSchemaManager.getGrammarURLs();
        if (grammarURLs == null) {
            return getFirstNodeInDocumentType();
        }
        documentTypeIdentifierFromDoctypeDefinition.withDetail(DocumentTypeIdentifier.Detail.Type.SCHEMA_ID, getSchemaIdList(grammarURLs));
        return documentTypeIdentifierFromDoctypeDefinition;
    }

    private DocumentTypeIdentifier getFirstNodeInDocumentType() {
        return new DocumentTypeIdentifier(SimpleXMLEventReader.getRootTag(getTextContent(), this.workspace.getXMLUtilAccess().getEntityResolver()));
    }

    private static String getSchemaIdList(URL[] urlArr) {
        ArrayList newArrayList = Lists.newArrayList();
        for (URL url : urlArr) {
            String name = new File(url.getFile()).getName();
            if (!Strings.isNullOrEmpty(name)) {
                newArrayList.add(name);
            }
        }
        Collections.sort(newArrayList);
        return Joiner.on(SegmentationSettings.PAIRS_SEPARATOR).join(newArrayList);
    }

    private DOMGlobalSettings createDOMGlobalStettings(ClientSettings clientSettings) {
        SegmentationExtensionSettings fromExtensionSettings = this.oxygenClientExtensionProvider.getSegmentationExtension().fromExtensionSettings(clientSettings.getExtensionSettings());
        FilterExtensionSettings fromExtensionSettings2 = this.oxygenClientExtensionProvider.getFilterExtension().fromExtensionSettings(clientSettings.getExtensionSettings());
        DocumentTypeIdentifier documentType = getDocumentType();
        return new DOMGlobalSettings(fromExtensionSettings.getSegmentationSettings(documentType), fromExtensionSettings2.getFilterSettings(documentType));
    }

    private DOMGlobalSettings rereadDOMGlobalSettings(DOMGlobalSettings dOMGlobalSettings, ClientSettings clientSettings) {
        SegmentationExtensionSettings fromExtensionSettings = this.oxygenClientExtensionProvider.getSegmentationExtension().fromExtensionSettings(clientSettings.getExtensionSettings());
        FilterExtensionSettings fromExtensionSettings2 = this.oxygenClientExtensionProvider.getFilterExtension().fromExtensionSettings(clientSettings.getExtensionSettings());
        DocumentTypeIdentifier documentType = getDocumentType();
        SegmentationExtensionSettings.DocumentTypeSegmentationSettings segmentationSettings = fromExtensionSettings.getSegmentationSettings(documentType);
        FilterExtensionSettings.DocumentTypeFilterSettings filterSettings = fromExtensionSettings2.getFilterSettings(documentType);
        this.log.debug("Extract with segmentation settings: " + fromExtensionSettings);
        this.log.debug("Extract with filter settings: " + fromExtensionSettings2);
        this.log.debug("Document type is: " + documentType);
        dOMGlobalSettings.readSettings(segmentationSettings, filterSettings);
        return dOMGlobalSettings;
    }

    DOMNodeWrapper<RangeInOxygenDocument> readWholeDocument(DOMGlobalSettings dOMGlobalSettings, DocumentsFactory documentsFactory) {
        return readWholeDocumentWithXMLReader(dOMGlobalSettings, documentsFactory);
    }

    DOMNodeWrapper<RangeInOxygenDocument> readWholeDocumentWithXMLReader(DOMGlobalSettings dOMGlobalSettings, DocumentsFactory documentsFactory) {
        SimpleOxygenDOMNode.Factory factory = new SimpleOxygenDOMNode.Factory(dOMGlobalSettings, documentsFactory);
        AbstractSimpleDOMNode.Chain<RangeInOxygenDocument> element = factory.element("#document", Collections.emptyMap());
        String textContent = getTextContent();
        parseXml(textContent, SimpleXMLReaderContentHandler.create(factory, element, textContent, TextNodeHandler.getEntities(this.textEditorPage.getXMLSchemaManager())));
        return element.endOfTree();
    }

    protected void parseXml(String str, XmlReaderContentHandler xmlReaderContentHandler) {
        try {
            SimpleXMLEventReader.create(str, xmlReaderContentHandler, this.workspace.getXMLUtilAccess().getEntityResolver()).parse();
        } catch (XMLStreamException e) {
            this.log.debug("Could not completely parse xml-text.", e);
        }
    }

    protected String getTextContent() {
        return ((JTextArea) this.textEditorPage.getTextComponent()).getText();
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v57, types: [java.util.List] */
    @Override // com.acrolinx.javasdk.gui.sessions.impl.Extractor
    public TypedCheckInformation<BlockDocument<RangeInOxygenDocument>> extract(AcrolinxFactory acrolinxFactory, ExtractionScope extractionScope, ClientSettings clientSettings, CheckCapabilities checkCapabilities) {
        ArrayList newArrayList;
        Preconditions.checkNotNull(clientSettings, "clientSettings should not be null");
        Preconditions.checkNotNull(extractionScope, "extractionScope should not be null");
        Preconditions.checkNotNull(acrolinxFactory, "factory should not be null");
        DOMGlobalSettings createDOMGlobalStettings = createDOMGlobalStettings(clientSettings);
        DOMNodeWrapper<RangeInOxygenDocument> readWholeDocument = readWholeDocument(createDOMGlobalStettings, this.acrolinxFactory.documents());
        rereadDOMGlobalSettings(createDOMGlobalStettings, clientSettings);
        if (extractionScope == ExtractionScope.WHOLE) {
            newArrayList = XMLDocumentExtractor.extract(readWholeDocument, this.acrolinxFactory.documents());
        } else {
            newArrayList = Lists.newArrayList();
            for (Block block : XMLDocumentExtractor.extract(readWholeDocument, this.acrolinxFactory.documents())) {
                if (((RangeInOxygenDocument) block.getPositionInDocument()).getBegin() < this.textEditorPage.getSelectionEnd() && ((RangeInOxygenDocument) block.getPositionInDocument()).getEnd() > this.textEditorPage.getSelectionStart()) {
                    newArrayList.add(block);
                }
            }
        }
        BlockDocumentBuilder createBlockDocumentBuilder = acrolinxFactory.documents().createBlockDocumentBuilder();
        createBlockDocumentBuilder.withFileFormat(FileFormats.applicationXml());
        createBlockDocumentBuilder.withAuthor(Authors.newAuthor(clientSettings.getConnectionSettings().getUser().getUsername()));
        createBlockDocumentBuilder.withFileName(getFileName());
        createBlockDocumentBuilder.add(newArrayList);
        createBlockDocumentBuilder.withSoftSkipping(!checkCapabilities.isUseHardExclusion());
        BlockDocument build = createBlockDocumentBuilder.build();
        if (this.log.isDebugEnabled()) {
            this.log.debug("Extracted Text:\n" + build.getText());
            this.log.debug("Extracted contexts:\n" + build.getContextInformation());
        }
        return acrolinxFactory.checkInformations().create(build).withCompleteCheck(extractionScope == ExtractionScope.WHOLE).build();
    }

    @Override // com.acrolinx.javasdk.gui.sessions.impl.Extractor
    public SelectionScope getSelectionScope() {
        return this.checkRangeFactory.create(this.textEditorPage, ExtractionScope.SELECTION).getSelectionScope();
    }

    @Override // com.acrolinx.javasdk.gui.sessions.impl.Extractor
    public FileName getFileName() {
        return Filenames.filenameFrom(this.editorAccess.getEditorLocation());
    }
}
