package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.international.french.FrenchMorphoFeatureSpecification;
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.process.SerializableFunction;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeReaderFactory;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.international.french.DybroFrenchHeadFinder;
import edu.stanford.nlp.trees.international.french.FrenchTreeReaderFactory;
import edu.stanford.nlp.trees.international.french.FrenchTreebankLanguagePack;
import edu.stanford.nlp.trees.international.french.FrenchXMLTreeReaderFactory;
import edu.stanford.nlp.trees.tregex.TregexMatcher;
import edu.stanford.nlp.trees.tregex.TregexParseException;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.trees.tregex.TregexPatternCompiler;
import edu.stanford.nlp.util.Function;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.Pair;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import jregex.WildcardPattern;
import org.apache.commons.io.IOUtils;
import weka.gui.GenericObjectEditorHistory;
import weka.gui.arffviewer.ArffViewerMainPanel;

/* loaded from: input_file:edu/stanford/nlp/parser/lexparser/FrenchTreebankParserParams.class */
public class FrenchTreebankParserParams extends AbstractTreebankParserParams {
    private static final long serialVersionUID = -6976724734594763986L;
    private final StringBuilder optionsString;
    private HeadFinder headFinder;
    private final Map<String, Pair<TregexPattern, Function<TregexMatcher, String>>> annotationPatterns;
    private final List<Pair<TregexPattern, Function<TregexMatcher, String>>> activeAnnotations;
    private boolean readPennFormat;
    private boolean collinizerRetainsPunctuation;
    private TwoDimensionalCounter<String, String> mwCounter;
    private MorphoFeatureSpecification morphoSpec;
    private MorphoFeatureSpecification tagSpec;
    private final List<String> baselineFeatures;
    private final List<String> additionalFeatures;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/parser/lexparser/FrenchTreebankParserParams$AddPOSSequenceFunction.class */
    public class AddPOSSequenceFunction implements SerializableFunction<TregexMatcher, String> {
        private final String annotationMark;
        private final boolean doBasicCat;
        private final double cutoff;
        private static final long serialVersionUID = 1;

        public AddPOSSequenceFunction(String str, int i, boolean z) {
            this.annotationMark = str;
            this.doBasicCat = z;
            this.cutoff = i;
        }

        @Override // edu.stanford.nlp.util.Function
        public String apply(TregexMatcher tregexMatcher) {
            if (FrenchTreebankParserParams.this.mwCounter == null) {
                throw new RuntimeException("Cannot enable POSSequence features without POS sequence map. Use option -frenchMWMap.");
            }
            Tree match = tregexMatcher.getMatch();
            StringBuilder sb = new StringBuilder();
            for (Tree tree : match.children()) {
                if (!tree.isPreTerminal()) {
                    throw new RuntimeException("Not POS sequence for tree: " + match.toString());
                }
                sb.append(this.doBasicCat ? FrenchTreebankParserParams.this.tlp.basicCategory(tree.value()) : tree.value()).append(" ");
            }
            return FrenchTreebankParserParams.this.mwCounter.getCount(match.value(), sb.toString().trim()) > this.cutoff ? this.annotationMark + sb.toString().replaceAll("\\s+", "").toLowerCase() : "";
        }

        public String toString() {
            return "AddPOSSequenceFunction[" + this.annotationMark + ',' + this.cutoff + ',' + this.doBasicCat + ']';
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/parser/lexparser/FrenchTreebankParserParams$AddRelativeNodeFunction.class */
    public class AddRelativeNodeFunction implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private String key2;
        private boolean doBasicCat;
        private boolean toLower;
        private static final long serialVersionUID = 1;

        public AddRelativeNodeFunction(String str, String str2, boolean z) {
            this.doBasicCat = false;
            this.toLower = false;
            this.annotationMark = str;
            this.key = str2;
            this.key2 = null;
            this.doBasicCat = z;
        }

        public AddRelativeNodeFunction(FrenchTreebankParserParams frenchTreebankParserParams, String str, String str2, String str3, boolean z) {
            this(str, str2, z);
            this.key2 = str3;
        }

        public AddRelativeNodeFunction(FrenchTreebankParserParams frenchTreebankParserParams, String str, String str2, boolean z, boolean z2) {
            this(str, str2, z);
            this.toLower = z2;
        }

        @Override // edu.stanford.nlp.util.Function
        public String apply(TregexMatcher tregexMatcher) {
            String str;
            if (this.key2 == null) {
                str = this.annotationMark + (this.doBasicCat ? FrenchTreebankParserParams.this.tlp.basicCategory(tregexMatcher.getNode(this.key).label().value()) : tregexMatcher.getNode(this.key).label().value());
            } else {
                str = this.annotationMark + (this.doBasicCat ? FrenchTreebankParserParams.this.tlp.basicCategory(tregexMatcher.getNode(this.key).label().value()) : tregexMatcher.getNode(this.key).label().value()) + this.annotationMark + (this.doBasicCat ? FrenchTreebankParserParams.this.tlp.basicCategory(tregexMatcher.getNode(this.key2).label().value()) : tregexMatcher.getNode(this.key2).label().value());
            }
            return this.toLower ? str.toLowerCase() : str;
        }

        public String toString() {
            return this.key2 == null ? "AddRelativeNodeFunction[" + this.annotationMark + ',' + this.key + ']' : "AddRelativeNodeFunction[" + this.annotationMark + ',' + this.key + ',' + this.key2 + ']';
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/parser/lexparser/FrenchTreebankParserParams$AnnotatePunctuationFunction.class */
    public static class AnnotatePunctuationFunction implements SerializableFunction<TregexMatcher, String> {
        static final String key = "term";
        private static final long serialVersionUID = 1;

        private AnnotatePunctuationFunction() {
        }

        @Override // edu.stanford.nlp.util.Function
        public String apply(TregexMatcher tregexMatcher) {
            String value = tregexMatcher.getNode(key).value();
            return value.equals(WildcardPattern.ANY_CHAR) ? "-fs" : value.equals("?") ? "-quest" : value.equals(",") ? "-comma" : (value.equals(":") || value.equals(";")) ? "-colon" : "";
        }

        public String toString() {
            return "AnnotatePunctuationFunction";
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/parser/lexparser/FrenchTreebankParserParams$SimpleStringFunction.class */
    public static class SimpleStringFunction implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private static final long serialVersionUID = 1;

        public SimpleStringFunction(String str) {
            this.annotationMark = str;
        }

        @Override // edu.stanford.nlp.util.Function
        public String apply(TregexMatcher tregexMatcher) {
            return this.annotationMark;
        }

        public String toString() {
            return "SimpleStringFunction[" + this.annotationMark + ']';
        }
    }

    public FrenchTreebankParserParams() {
        super(new FrenchTreebankLanguagePack());
        this.readPennFormat = true;
        this.collinizerRetainsPunctuation = false;
        this.baselineFeatures = new ArrayList();
        this.baselineFeatures.add("-tagPAFr");
        this.baselineFeatures.add("-markInf");
        this.baselineFeatures.add("-markPart");
        this.baselineFeatures.add("-markVN");
        this.baselineFeatures.add("-coord1");
        this.baselineFeatures.add("-de2");
        this.baselineFeatures.add("-markP1");
        this.baselineFeatures.add("-MWAdvS");
        this.baselineFeatures.add("-MWADVSel1");
        this.baselineFeatures.add("-MWADVSel2");
        this.baselineFeatures.add("-MWNSel1");
        this.baselineFeatures.add("-MWNSel2");
        this.baselineFeatures.add("-splitPUNC");
        this.additionalFeatures = new ArrayList();
        setInputEncoding("UTF-8");
        this.optionsString = new StringBuilder();
        this.optionsString.append("FrenchTreebankParserParams\n");
        this.annotationPatterns = Generics.newHashMap();
        this.activeAnnotations = new ArrayList();
        initializeAnnotationPatterns();
    }

    private void initializeAnnotationPatterns() {
        try {
            TregexPatternCompiler tregexPatternCompiler = new TregexPatternCompiler(headFinder());
            this.annotationPatterns.put("-markInf", new Pair<>(tregexPatternCompiler.compile("@V > (@VN > @VPinf)"), new SimpleStringFunction("-infinitive")));
            this.annotationPatterns.put("-markPart", new Pair<>(tregexPatternCompiler.compile("@V > (@VN > @VPpart)"), new SimpleStringFunction("-participle")));
            this.annotationPatterns.put("-markVN", new Pair<>(tregexPatternCompiler.compile("__ << @VN"), new SimpleStringFunction("-withVN")));
            this.annotationPatterns.put("-tagPAFr", new Pair<>(tregexPatternCompiler.compile("!@PUNC < (__ !< __) > __=parent"), new AddRelativeNodeFunction("-", "parent", true)));
            this.annotationPatterns.put("-coord1", new Pair<>(tregexPatternCompiler.compile("@COORD <2 __=word"), new AddRelativeNodeFunction("-", "word", true)));
            this.annotationPatterns.put("-de2", new Pair<>(tregexPatternCompiler.compile("@P < /^([Dd]es?|du|d')$/"), new SimpleStringFunction("-de2")));
            this.annotationPatterns.put("-de3", new Pair<>(tregexPatternCompiler.compile("@NP|PP|COORD >+(@NP|PP) (@PP <, (@P < /^([Dd]es?|du|d')$/))"), new SimpleStringFunction("-de3")));
            this.annotationPatterns.put("-markP1", new Pair<>(tregexPatternCompiler.compile("@P > (@PP > @NP)"), new SimpleStringFunction("-n")));
            this.annotationPatterns.put("-MWAdvS", new Pair<>(tregexPatternCompiler.compile("@MWADV > /S/"), new SimpleStringFunction("-mwadv-s")));
            this.annotationPatterns.put("-MWADVSel1", new Pair<>(tregexPatternCompiler.compile("@MWADV <1 @P <2 @N !<3 __"), new SimpleStringFunction("-mwadv1")));
            this.annotationPatterns.put("-MWADVSel2", new Pair<>(tregexPatternCompiler.compile("@MWADV <1 @P <2 @D <3 @N !<4 __"), new SimpleStringFunction("-mwadv2")));
            this.annotationPatterns.put("-MWNSel1", new Pair<>(tregexPatternCompiler.compile("@MWN <1 @N <2 @A !<3 __"), new SimpleStringFunction("-mwn1")));
            this.annotationPatterns.put("-MWNSel2", new Pair<>(tregexPatternCompiler.compile("@MWN <1 @N <2 @P <3 @N !<4 __"), new SimpleStringFunction("-mwn2")));
            this.annotationPatterns.put("-MWNSel3", new Pair<>(tregexPatternCompiler.compile("@MWN <1 @N <2 @- <3 @N !<4 __"), new SimpleStringFunction("-mwn3")));
            this.annotationPatterns.put("-splitPUNC", new Pair<>(tregexPatternCompiler.compile("@PUNC < __=term"), new AnnotatePunctuationFunction()));
            this.annotationPatterns.put("-mweTag", new Pair<>(tregexPatternCompiler.compile("!@PUNC < (__ !< __) > /MW/=parent"), new AddRelativeNodeFunction("-", "parent", true)));
            this.annotationPatterns.put("-sq", new Pair<>(tregexPatternCompiler.compile("@SENT << /\\?/"), new SimpleStringFunction("-Q")));
            this.annotationPatterns.put("-hasVP", new Pair<>(tregexPatternCompiler.compile("!@ROOT|SENT << /^VP/"), new SimpleStringFunction("-hasVP")));
            this.annotationPatterns.put("-hasVP2", new Pair<>(tregexPatternCompiler.compile("__ << /^VP/"), new SimpleStringFunction("-hasVP")));
            this.annotationPatterns.put("-npCOORD", new Pair<>(tregexPatternCompiler.compile("@NP < @COORD"), new SimpleStringFunction("-coord")));
            this.annotationPatterns.put("-npVP", new Pair<>(tregexPatternCompiler.compile("@NP < /VP/"), new SimpleStringFunction("-vp")));
            this.annotationPatterns.put("-baseNP1", new Pair<>(tregexPatternCompiler.compile("@NP <1 @D <2 @N !<3 __"), new SimpleStringFunction("-np1")));
            this.annotationPatterns.put("-baseNP2", new Pair<>(tregexPatternCompiler.compile("@NP <1 @D <2 @MWN !<3 __"), new SimpleStringFunction("-np2")));
            this.annotationPatterns.put("-baseNP3", new Pair<>(tregexPatternCompiler.compile("@NP <1 @MWD <2 @N !<3 __ "), new SimpleStringFunction("-np3")));
            this.annotationPatterns.put("-npMWN1", new Pair<>(tregexPatternCompiler.compile("@NP < (@MWN < @A)"), new SimpleStringFunction("-mwna")));
            this.annotationPatterns.put("-npMWN2", new Pair<>(tregexPatternCompiler.compile("@NP <1 @D <2 @MWN <3 @PP !<4 __"), new SimpleStringFunction("-mwn2")));
            this.annotationPatterns.put("-npMWN3", new Pair<>(tregexPatternCompiler.compile("@NP <1 @D <2 (@MWN <1 @N <2 @A !<3 __) !<3 __"), new SimpleStringFunction("-mwn3")));
            this.annotationPatterns.put("-npMWN4", new Pair<>(tregexPatternCompiler.compile("@PP <, @P <2 (@NP <1 @D <2 (@MWN <1 @N <2 @A !<3 __) !<3 __) !<3 __"), new SimpleStringFunction("-mwn3")));
            this.annotationPatterns.put("-MWNSel", new Pair<>(tregexPatternCompiler.compile("@MWN"), new AddPOSSequenceFunction("-", ArffViewerMainPanel.HEIGHT, true)));
            this.annotationPatterns.put("-MWADVSel", new Pair<>(tregexPatternCompiler.compile("@MWADV"), new AddPOSSequenceFunction("-", TrainOptions.DEFAULT_BATCH_SIZE, true)));
            this.annotationPatterns.put("-MWASel", new Pair<>(tregexPatternCompiler.compile("@MWA"), new AddPOSSequenceFunction("-", 100, true)));
            this.annotationPatterns.put("-MWCSel", new Pair<>(tregexPatternCompiler.compile("@MWC"), new AddPOSSequenceFunction("-", 400, true)));
            this.annotationPatterns.put("-MWDSel", new Pair<>(tregexPatternCompiler.compile("@MWD"), new AddPOSSequenceFunction("-", 100, true)));
            this.annotationPatterns.put("-MWPSel", new Pair<>(tregexPatternCompiler.compile("@MWP"), new AddPOSSequenceFunction("-", ArffViewerMainPanel.HEIGHT, true)));
            this.annotationPatterns.put("-MWPROSel", new Pair<>(tregexPatternCompiler.compile("@MWPRO"), new AddPOSSequenceFunction("-", 60, true)));
            this.annotationPatterns.put("-MWVSel", new Pair<>(tregexPatternCompiler.compile("@MWV"), new AddPOSSequenceFunction("-", GenericObjectEditorHistory.MAX_HISTORY_LENGTH, true)));
            this.annotationPatterns.put("-mwn1", new Pair<>(tregexPatternCompiler.compile("@MWN <1 @N <2 @A !<3 __"), new SimpleStringFunction("-na")));
            this.annotationPatterns.put("-mwn2", new Pair<>(tregexPatternCompiler.compile("@MWN <1 @N <2 @P <3 @N !<4 __"), new SimpleStringFunction("-npn")));
            this.annotationPatterns.put("-mwn3", new Pair<>(tregexPatternCompiler.compile("@MWN <1 @N <2 @- <3 @N !<4 __"), new SimpleStringFunction("-n-n")));
            this.annotationPatterns.put("-mwn4", new Pair<>(tregexPatternCompiler.compile("@MWN <1 @N <2 @N !<3 __"), new SimpleStringFunction("-nn")));
            this.annotationPatterns.put("-mwn5", new Pair<>(tregexPatternCompiler.compile("@MWN <1 @D <2 @N !<3 __"), new SimpleStringFunction("-dn")));
            this.annotationPatterns.put("-hasWH", new Pair<>(tregexPatternCompiler.compile("__ < /^(qui|quoi|comment|quel|quelle|quels|quelles|où|combien|que|pourquoi|quand)$/"), new SimpleStringFunction("-wh")));
            this.annotationPatterns.put("-markNNP2", new Pair<>(tregexPatternCompiler.compile("@N < /^[A-Z]/"), new SimpleStringFunction("-nnp")));
            this.annotationPatterns.put("-markD1", new Pair<>(tregexPatternCompiler.compile("@D > (__ > @PP)"), new SimpleStringFunction("-p")));
            this.annotationPatterns.put("-markD2", new Pair<>(tregexPatternCompiler.compile("@D > (__ > @NP)"), new SimpleStringFunction("-n")));
            this.annotationPatterns.put("-markD3", new Pair<>(tregexPatternCompiler.compile("@D > (__ > /^VP/)"), new SimpleStringFunction("-v")));
            this.annotationPatterns.put("-markD4", new Pair<>(tregexPatternCompiler.compile("@D > (__ > /^S/)"), new SimpleStringFunction("-s")));
            this.annotationPatterns.put("-markD5", new Pair<>(tregexPatternCompiler.compile("@D > (__ > @COORD)"), new SimpleStringFunction("-c")));
            this.annotationPatterns.put("-app1", new Pair<>(tregexPatternCompiler.compile("@NP < /[,]/"), new SimpleStringFunction("-app1")));
            this.annotationPatterns.put("-app2", new Pair<>(tregexPatternCompiler.compile("/[^,\\-:;\"]/ > (@NP < /^[,]$/) $,, /^[,]$/"), new SimpleStringFunction("-app2")));
            this.annotationPatterns.put("-coord2", new Pair<>(tregexPatternCompiler.compile("@COORD !< @C"), new SimpleStringFunction("-nonC")));
            this.annotationPatterns.put("-hasCOORD", new Pair<>(tregexPatternCompiler.compile("__ < @COORD"), new SimpleStringFunction("-hasCOORD")));
            this.annotationPatterns.put("-hasCOORDLS", new Pair<>(tregexPatternCompiler.compile("@SENT <, @COORD"), new SimpleStringFunction("-hasCOORDLS")));
            this.annotationPatterns.put("-hasCOORDNonS", new Pair<>(tregexPatternCompiler.compile("__ < @COORD !<, @COORD"), new SimpleStringFunction("-hasCOORDNonS")));
            this.annotationPatterns.put("-pp1", new Pair<>(tregexPatternCompiler.compile("@P < /^(du|des|au|aux)$/=word"), new AddRelativeNodeFunction("-", "word", false)));
            this.annotationPatterns.put("-vpinf1", new Pair<>(tregexPatternCompiler.compile("@VPinf <, __=word"), new AddRelativeNodeFunction("-", "word", false)));
            this.annotationPatterns.put("-vpinf2", new Pair<>(tregexPatternCompiler.compile("@VPinf <, __=word"), new AddRelativeNodeFunction("-", "word", true)));
            this.annotationPatterns.put("-splitIN", new Pair<>(tregexPatternCompiler.compile("@PP <, (P < /^([Dd]e|[Dd]'|[Dd]es|[Dd]u|à|[Aa]u|[Aa]ux|[Ee]n|[Dd]ans|[Pp]ar|[Ss]ur|[Pp]our|[Aa]vec|[Ee]ntre)$/=word)"), new AddRelativeNodeFunction(this, "-", "word", false, true)));
            this.annotationPatterns.put("-splitP", new Pair<>(tregexPatternCompiler.compile("@P < /^([Dd]e|[Dd]'|[Dd]es|[Dd]u|à|[Aa]u|[Aa]ux|[Ee]n|[Dd]ans|[Pp]ar|[Ss]ur|[Pp]our|[Aa]vec|[Ee]ntre)$/=word"), new AddRelativeNodeFunction(this, "-", "word", false, true)));
            this.annotationPatterns.put("-hasde", new Pair<>(tregexPatternCompiler.compile("@NP|PP <+(@NP|PP) (P < de)"), new SimpleStringFunction("-hasDE")));
            this.annotationPatterns.put("-hasde2", new Pair<>(tregexPatternCompiler.compile("@PP < de"), new SimpleStringFunction("-hasDE2")));
            this.annotationPatterns.put("-np1", new Pair<>(tregexPatternCompiler.compile("@NP < /^,$/"), new SimpleStringFunction("-np1")));
            this.annotationPatterns.put("-np2", new Pair<>(tregexPatternCompiler.compile("@NP <, (@D < le|la|les)"), new SimpleStringFunction("-np2")));
            this.annotationPatterns.put("-np3", new Pair<>(tregexPatternCompiler.compile("@D < le|la|les"), new SimpleStringFunction("-def")));
            this.annotationPatterns.put("-baseNP", new Pair<>(tregexPatternCompiler.compile("@NP <, @D <- (@N , @D)"), new SimpleStringFunction("-baseNP")));
            this.annotationPatterns.put("-markP2", new Pair<>(tregexPatternCompiler.compile("@P > (@PP > @AP)"), new SimpleStringFunction("-a")));
            this.annotationPatterns.put("-markP3", new Pair<>(tregexPatternCompiler.compile("@P > (@PP > @SENT|Ssub|VPinf|VPpart)"), new SimpleStringFunction("-v")));
            this.annotationPatterns.put("-markP4", new Pair<>(tregexPatternCompiler.compile("@P > (@PP > @Srel)"), new SimpleStringFunction("-r")));
            this.annotationPatterns.put("-markP5", new Pair<>(tregexPatternCompiler.compile("@P > (@PP > @COORD)"), new SimpleStringFunction("-c")));
            this.annotationPatterns.put("-markP6", new Pair<>(tregexPatternCompiler.compile("@P > @VPinf"), new SimpleStringFunction("-b")));
            this.annotationPatterns.put("-markP7", new Pair<>(tregexPatternCompiler.compile("@P > @VPpart"), new SimpleStringFunction("-b")));
            this.annotationPatterns.put("-markP8", new Pair<>(tregexPatternCompiler.compile("@P > /^MW|NP/"), new SimpleStringFunction("-internal")));
            this.annotationPatterns.put("-markP9", new Pair<>(tregexPatternCompiler.compile("@P > @COORD"), new SimpleStringFunction("-c")));
            this.annotationPatterns.put("-hasMWP", new Pair<>(tregexPatternCompiler.compile("!/S/ < @MWP"), new SimpleStringFunction("-mwp")));
            this.annotationPatterns.put("-hasMWP2", new Pair<>(tregexPatternCompiler.compile("@PP < @MWP"), new SimpleStringFunction("-mwp2")));
            this.annotationPatterns.put("-hasMWN2", new Pair<>(tregexPatternCompiler.compile("@PP <+(@NP) @MWN"), new SimpleStringFunction("-hasMWN2")));
            this.annotationPatterns.put("-hasMWN3", new Pair<>(tregexPatternCompiler.compile("@NP < @MWN"), new SimpleStringFunction("-hasMWN3")));
            this.annotationPatterns.put("-hasMWADV", new Pair<>(tregexPatternCompiler.compile("/^A/ < @MWADV"), new SimpleStringFunction("-hasmwadv")));
            this.annotationPatterns.put("-hasC1", new Pair<>(tregexPatternCompiler.compile("__ < @MWC"), new SimpleStringFunction("-hasc1")));
            this.annotationPatterns.put("-hasC2", new Pair<>(tregexPatternCompiler.compile("@MWC > /S/"), new SimpleStringFunction("-hasc2")));
            this.annotationPatterns.put("-hasC3", new Pair<>(tregexPatternCompiler.compile("@COORD < @MWC"), new SimpleStringFunction("-hasc3")));
            this.annotationPatterns.put("-uMWN", new Pair<>(tregexPatternCompiler.compile("@NP <: @MWN"), new SimpleStringFunction("-umwn")));
            this.annotationPatterns.put("-splitC", new Pair<>(tregexPatternCompiler.compile("@C < __=word"), new AddRelativeNodeFunction("-", "word", false)));
            this.annotationPatterns.put("-splitD", new Pair<>(tregexPatternCompiler.compile("@D < /^[^\\d+]{1,4}$/=word"), new AddRelativeNodeFunction("-", "word", false)));
            this.annotationPatterns.put("-de1", new Pair<>(tregexPatternCompiler.compile("@D < /^([Dd]es?|du|d')$/"), new SimpleStringFunction("-de1")));
            this.annotationPatterns.put("-markNNP1", new Pair<>(tregexPatternCompiler.compile("@NP < (N < /^[A-Z]/) !< /^[^NA]/"), new SimpleStringFunction("-nnp")));
            this.annotationPatterns.put("-markPP1", new Pair<>(tregexPatternCompiler.compile("@PP > @NP"), new SimpleStringFunction("-n")));
            this.annotationPatterns.put("-markPP2", new Pair<>(tregexPatternCompiler.compile("@PP > @AP"), new SimpleStringFunction("-a")));
            this.annotationPatterns.put("-markPP3", new Pair<>(tregexPatternCompiler.compile("@PP > @SENT|Ssub|VPinf|VPpart"), new SimpleStringFunction("-v")));
            this.annotationPatterns.put("-markPP4", new Pair<>(tregexPatternCompiler.compile("@PP > @Srel"), new SimpleStringFunction("-r")));
            this.annotationPatterns.put("-markPP5", new Pair<>(tregexPatternCompiler.compile("@PP > @COORD"), new SimpleStringFunction("-c")));
            this.annotationPatterns.put("-dominateCC", new Pair<>(tregexPatternCompiler.compile("__ << @COORD"), new SimpleStringFunction("-withCC")));
            this.annotationPatterns.put("-dominateIN", new Pair<>(tregexPatternCompiler.compile("__ << @PP"), new SimpleStringFunction("-withPP")));
            this.annotationPatterns.put("-markContainsVP", new Pair<>(tregexPatternCompiler.compile("__ << /^VP/"), new SimpleStringFunction("-hasV")));
            this.annotationPatterns.put("-markContainsVP2", new Pair<>(tregexPatternCompiler.compile("__ << /^VP/=word"), new AddRelativeNodeFunction("-hasV-", "word", false)));
            this.annotationPatterns.put("-markVNArgs", new Pair<>(tregexPatternCompiler.compile("@VN $+ __=word1"), new AddRelativeNodeFunction("-", "word1", false)));
            this.annotationPatterns.put("-markVNArgs2", new Pair<>(tregexPatternCompiler.compile("@VN > __=word1 $+ __=word2"), new AddRelativeNodeFunction(this, "-", "word1", "word2", false)));
            this.annotationPatterns.put("-markContainsMW", new Pair<>(tregexPatternCompiler.compile("__ << /^MW/"), new SimpleStringFunction("-hasMW")));
            this.annotationPatterns.put("-markContainsMW2", new Pair<>(tregexPatternCompiler.compile("__ << /^MW/=word"), new AddRelativeNodeFunction("-has-", "word", false)));
            this.annotationPatterns.put("-mwStart", new Pair<>(tregexPatternCompiler.compile("__ >, /^MW/"), new SimpleStringFunction("-mwStart")));
            this.annotationPatterns.put("-mwMiddle", new Pair<>(tregexPatternCompiler.compile("__ !>- /^MW/ !>, /^MW/ > /^MW/"), new SimpleStringFunction("-mwMid")));
            this.annotationPatterns.put("-mwMiddle2", new Pair<>(tregexPatternCompiler.compile("__ !>- /^MW/ !>, /^MW/ > /^MW/ , __=pos"), new AddRelativeNodeFunction("-", "pos", true)));
            this.annotationPatterns.put("-mwEnd", new Pair<>(tregexPatternCompiler.compile("__ >- /^MW/"), new SimpleStringFunction("-mwEnd")));
            this.annotationPatterns.put("-nonNAP", new Pair<>(tregexPatternCompiler.compile("@AP !$, @N|AP"), new SimpleStringFunction("-nap")));
            this.annotationPatterns.put("-markNPTMP", new Pair<>(tregexPatternCompiler.compile("@NP < (@N < /^(lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche|Lundi|Mardi|Mercredi|Jeudi|Vendredi|Samedi|Dimanche|janvier|février|mars|avril|mai|juin|juillet|août|septembre|octobre|novembre|décembre|Janvier|Février|Mars|Avril|Mai|Juin|Juillet|Août|Septembre|Octobre|Novembre|Décembre)$/)"), new SimpleStringFunction("-tmp")));
            this.annotationPatterns.put("-markSing1", new Pair<>(tregexPatternCompiler.compile("@NP < (D < /^(ce|cette|une|la|le|un|sa|son|ma|mon|ta|ton)$/)"), new SimpleStringFunction("-sing")));
            this.annotationPatterns.put("-markSing2", new Pair<>(tregexPatternCompiler.compile("@AP < (A < (/[^sx]$/ !< __))"), new SimpleStringFunction("-sing")));
            this.annotationPatterns.put("-markSing3", new Pair<>(tregexPatternCompiler.compile("@VPpart < (V < /(e|é)$/)"), new SimpleStringFunction("-sing")));
            this.annotationPatterns.put("-markPl1", new Pair<>(tregexPatternCompiler.compile("@NP < (D < /s$/)"), new SimpleStringFunction("-pl")));
            this.annotationPatterns.put("-markPl2", new Pair<>(tregexPatternCompiler.compile("@AP < (A < /[sx]$/)"), new SimpleStringFunction("-pl")));
            this.annotationPatterns.put("-markPl3", new Pair<>(tregexPatternCompiler.compile("@VPpart < (V < /(es|és)$/)"), new SimpleStringFunction("-pl")));
        } catch (TregexParseException e) {
            int size = this.annotationPatterns.size() + 1;
            System.err.println("Parse exception on " + (size == 1 ? "1st" : size == 2 ? "2nd" : size + "th") + " annotation pattern initialization:" + e);
        }
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public HeadFinder headFinder() {
        if (this.headFinder == null) {
            this.headFinder = new DybroFrenchHeadFinder(treebankLanguagePack());
        }
        return this.headFinder;
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public HeadFinder typedDependencyHeadFinder() {
        return headFinder();
    }

    private void setHeadFinder(HeadFinder headFinder) {
        if (headFinder == null) {
            throw new IllegalArgumentException();
        }
        this.headFinder = headFinder;
        initializeAnnotationPatterns();
        this.activeAnnotations.clear();
        Iterator<String> it = this.baselineFeatures.iterator();
        while (it.hasNext()) {
            this.activeAnnotations.add(this.annotationPatterns.get(it.next()));
        }
        Iterator<String> it2 = this.additionalFeatures.iterator();
        while (it2.hasNext()) {
            this.activeAnnotations.add(this.annotationPatterns.get(it2.next()));
        }
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public Lexicon lex(Options options, Index<String> index, Index<String> index2) {
        if (options.lexOptions.uwModelTrainer == null) {
            options.lexOptions.uwModelTrainer = "edu.stanford.nlp.parser.lexparser.FrenchUnknownWordModelTrainer";
        }
        return this.morphoSpec != null ? new FactoredLexicon(options, this.morphoSpec, index, index2) : new BaseLexicon(options, index, index2);
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public String[] sisterSplitters() {
        return new String[0];
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public TreeTransformer collinizer() {
        return new TreeCollinizer(treebankLanguagePack());
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public TreeTransformer collinizerEvalb() {
        return new TreeCollinizer(treebankLanguagePack(), this.collinizerRetainsPunctuation, false);
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public DiskTreebank diskTreebank() {
        return new DiskTreebank(treeReaderFactory(), this.inputEncoding);
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public MemoryTreebank memoryTreebank() {
        return new MemoryTreebank(treeReaderFactory(), this.inputEncoding);
    }

    @Override // edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public TreeReaderFactory treeReaderFactory() {
        return this.readPennFormat ? new FrenchTreeReaderFactory() : new FrenchXMLTreeReaderFactory(false);
    }

    @Override // edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public List<HasWord> defaultTestSentence() {
        return Sentence.toWordList("Ceci", "est", "seulement", "un", "test", WildcardPattern.ANY_CHAR);
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public Tree transformTree(Tree tree, Tree tree2) {
        String value = tree.value();
        StringBuilder sb = new StringBuilder();
        for (Pair<TregexPattern, Function<TregexMatcher, String>> pair : this.activeAnnotations) {
            TregexMatcher matcher = pair.first().matcher(tree2);
            if (matcher.matchesAt(tree)) {
                sb.append(pair.second().apply(matcher));
            }
        }
        if (tree.isPreTerminal() && this.tagSpec != null) {
            if (!(tree.firstChild().label() instanceof CoreLabel) || ((CoreLabel) tree.firstChild().label()).originalText() == null) {
                throw new RuntimeException(String.format("%s: Term lacks morpho analysis: %s", getClass().getName(), tree.toString()));
            }
            value = this.tagSpec.strToFeatures(MorphoFeatureSpecification.splitMorphString("", ((CoreLabel) tree.firstChild().label()).originalText()).second()).getTag(value);
        }
        String str = value + sb.toString();
        tree.setValue(str);
        if (tree.isPreTerminal() && (tree.label() instanceof HasTag)) {
            ((HasTag) tree.label()).setTag(str);
        }
        return tree;
    }

    private void loadMWMap(String str) {
        this.mwCounter = new TwoDimensionalCounter<>();
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(str)), "UTF-8"));
            int i = 0;
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    bufferedReader.close();
                    System.err.printf("%s: Loaded %d lines from %s into MWE counter%n", getClass().getName(), Integer.valueOf(i), str);
                    return;
                }
                String[] split = readLine.split("\t");
                if (!$assertionsDisabled && split.length != 3) {
                    throw new AssertionError();
                }
                this.mwCounter.setCount(split[0].trim(), split[1].trim(), Double.parseDouble(split[2].trim()));
                i++;
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (UnsupportedEncodingException e2) {
            e2.printStackTrace();
        } catch (IOException e3) {
            e3.printStackTrace();
        }
    }

    private String setupMorphoFeatures(String str) {
        String[] split = str.split(",");
        this.morphoSpec = this.tlp.morphFeatureSpec();
        for (String str2 : split) {
            this.morphoSpec.activate(MorphoFeatureSpecification.MorphoFeatureType.valueOf(str2.trim()));
        }
        return this.morphoSpec.toString();
    }

    private void removeBaselineFeature(String str) {
        if (this.baselineFeatures.contains(str)) {
            this.baselineFeatures.remove(str);
            this.activeAnnotations.remove(this.annotationPatterns.get(str));
        }
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public void display() {
        System.err.println(this.optionsString.toString());
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public int setOptionFlag(String[] strArr, int i) {
        if (this.annotationPatterns.keySet().contains(strArr[i])) {
            if (!this.baselineFeatures.contains(strArr[i])) {
                this.additionalFeatures.add(strArr[i]);
            }
            Pair<TregexPattern, Function<TregexMatcher, String>> pair = this.annotationPatterns.get(strArr[i]);
            this.activeAnnotations.add(pair);
            this.optionsString.append("Option " + strArr[i] + " added annotation pattern " + pair.first() + " with annotation " + pair.second() + '\n');
            i++;
        } else if (strArr[i].equals("-collinizerRetainsPunctuation")) {
            this.optionsString.append("Collinizer retains punctuation.\n");
            this.collinizerRetainsPunctuation = true;
            i++;
        } else if (strArr[i].equalsIgnoreCase("-headFinder") && i + 1 < strArr.length) {
            try {
                setHeadFinder((HeadFinder) Class.forName(strArr[i + 1]).newInstance());
                this.optionsString.append("HeadFinder: " + strArr[i + 1] + IOUtils.LINE_SEPARATOR_UNIX);
            } catch (Exception e) {
                System.err.println(e);
                System.err.println(getClass().getName() + ": Could not load head finder " + strArr[i + 1]);
            }
            i += 2;
        } else if (strArr[i].equals("-xmlFormat")) {
            this.optionsString.append("Reading trees in XML format.\n");
            this.readPennFormat = false;
            setInputEncoding(this.tlp.getEncoding());
            i++;
        } else if (strArr[i].equals("-frenchFactored")) {
            Iterator<String> it = this.baselineFeatures.iterator();
            while (it.hasNext()) {
                setOptionFlag(new String[]{it.next()}, 0);
            }
            i++;
        } else if (strArr[i].equals("-frenchMWMap")) {
            loadMWMap(strArr[i + 1]);
            i += 2;
        } else if (strArr[i].equals("-tsg")) {
            this.optionsString.append("Removing baseline features: ");
            removeBaselineFeature("-markVN");
            this.optionsString.append(" (removed -markVN)");
            removeBaselineFeature("-coord1");
            this.optionsString.append(" (removed -coord1)\n");
            i++;
        } else if (strArr[i].equals("-factlex") && i + 1 < strArr.length) {
            this.optionsString.append("Factored Lexicon: active features: ").append(setupMorphoFeatures(strArr[i + 1]));
            removeBaselineFeature("-tagPAFr");
            this.optionsString.append(" (removed -tagPAFr)\n");
            setOptionFlag(new String[]{"-mweTag"}, 0);
            i += 2;
        } else if (strArr[i].equals("-noFeatures")) {
            this.activeAnnotations.clear();
            this.optionsString.append("Removed all manual features.\n");
            i++;
        } else if (strArr[i].equals("-ccTagsetAnnotations")) {
            this.tagSpec = new FrenchMorphoFeatureSpecification();
            this.tagSpec.activate(MorphoFeatureSpecification.MorphoFeatureType.OTHER);
            this.optionsString.append("Adding CC tagset as POS state splits.\n");
            i++;
        }
        return i;
    }

    static {
        $assertionsDisabled = !FrenchTreebankParserParams.class.desiredAssertionStatus();
    }
}
