package cc.mallet.util;

import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.IDSorter;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.util.CommandOption;
import edu.stanford.nlp.ling.tokensregex.types.Expressions;
import edu.stanford.nlp.tagger.maxent.TaggerConfig;
import gnu.trove.TIntIntHashMap;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.text.NumberFormat;
import java.util.Arrays;
import java.util.Iterator;
import java.util.logging.Logger;

/* loaded from: input_file:cc/mallet/util/FeatureCooccurrenceCounter.class */
public class FeatureCooccurrenceCounter {
    private static Logger logger = MalletLogger.getLogger(FeatureCooccurrenceCounter.class.getName());
    static CommandOption.String inputFile = new CommandOption.String(FeatureCooccurrenceCounter.class, "input", "FILENAME", true, null, "The filename from which to read the list of training instances.  Use - for stdin.  The instances must be FeatureSequence or FeatureSequenceWithBigrams, not FeatureVector", null);
    static CommandOption.String weightsFile = new CommandOption.String(FeatureCooccurrenceCounter.class, "weights-filename", "FILENAME", true, null, "The filename to write the word-word weights file.", null);
    static CommandOption.Double idfCutoff = new CommandOption.Double(FeatureCooccurrenceCounter.class, "idf-cutoff", Expressions.TYPE_NUMBER, true, 3.0d, "Words with IDF below this threshold will not be linked to any other word.", null);
    static CommandOption.String unlinkedFile = new CommandOption.String(FeatureCooccurrenceCounter.class, "unlinked-filename", "FILENAME", true, null, "A file to write words that were not linked.", null);
    TIntIntHashMap[] featureFeatureCounts;
    InstanceList instances;
    int numFeatures;
    int[] documentFrequencies;

    public FeatureCooccurrenceCounter(InstanceList instanceList) {
        this.instances = instanceList;
        this.numFeatures = instanceList.getDataAlphabet().size();
        this.featureFeatureCounts = new TIntIntHashMap[this.numFeatures];
        for (int i = 0; i < this.numFeatures; i++) {
            this.featureFeatureCounts[i] = new TIntIntHashMap();
        }
        this.documentFrequencies = new int[this.numFeatures];
    }

    public void count() {
        TIntIntHashMap tIntIntHashMap = new TIntIntHashMap();
        int i = 0;
        Iterator<Instance> it = this.instances.iterator();
        while (it.hasNext()) {
            FeatureSequence featureSequence = (FeatureSequence) it.next().getData();
            for (int i2 = 0; i2 < featureSequence.getLength(); i2++) {
                tIntIntHashMap.adjustOrPutValue(featureSequence.getIndexAtPosition(i2), 1, 1);
            }
            int[] keys = tIntIntHashMap.keys();
            for (int i3 = 0; i3 < keys.length - 1; i3++) {
                int i4 = keys[i3];
                for (int i5 = i3 + 1; i5 < keys.length; i5++) {
                    int i6 = keys[i5];
                    this.featureFeatureCounts[i4].adjustOrPutValue(i6, 1, 1);
                    this.featureFeatureCounts[i6].adjustOrPutValue(i4, 1, 1);
                }
            }
            for (int i7 : keys) {
                int[] iArr = this.documentFrequencies;
                iArr[i7] = iArr[i7] + 1;
            }
            tIntIntHashMap = new TIntIntHashMap();
            i++;
            if (i % 1000 == 0) {
                System.err.println(i);
            }
        }
    }

    public double g2(double d, double d2, double d3, double d4) {
        double d5 = (d - d3) + 0.01d;
        double d6 = (d2 - d3) + 0.01d;
        double d7 = d3 + 0.01d;
        double d8 = ((d4 - d) - d2) + d7 + 0.01d;
        double d9 = d4 + 0.04d;
        double d10 = (d5 + d7) / d9;
        double d11 = (d6 + d7) / d9;
        double log = Math.log(d10);
        double log2 = Math.log(d11);
        double log3 = Math.log(1.0d - d10);
        double log4 = Math.log(1.0d - d11);
        return (d7 * ((Math.log(d7 / d9) - log) - log2)) + (d5 * ((Math.log(d5 / d9) - log) - log4)) + (d6 * ((Math.log(d6 / d9) - log3) - log2)) + (d8 * ((Math.log(d8 / d9) - log3) - log4));
    }

    public void printCounts() throws IOException {
        NumberFormat.getInstance().setMaximumFractionDigits(3);
        Alphabet dataAlphabet = this.instances.getDataAlphabet();
        double log = Math.log(this.instances.size());
        double[] dArr = new double[this.instances.size() + 1];
        for (int i = 1; i < dArr.length; i++) {
            dArr[i] = Math.log(i);
        }
        if (unlinkedFile.value != null) {
            PrintWriter printWriter = new PrintWriter(unlinkedFile.value);
            for (int i2 = 0; i2 < this.numFeatures; i2++) {
                if (log - dArr[this.documentFrequencies[i2]] < idfCutoff.value) {
                    printWriter.println(dataAlphabet.lookupObject(i2));
                }
            }
            printWriter.close();
        }
        PrintWriter printWriter2 = new PrintWriter(weightsFile.value);
        for (int i3 = 0; i3 < this.numFeatures; i3++) {
            int[] keys = this.featureFeatureCounts[i3].keys();
            double d = log - dArr[this.documentFrequencies[i3]];
            StringBuilder sb = new StringBuilder();
            sb.append(dataAlphabet.lookupObject(i3));
            sb.append("\t");
            sb.append(TaggerConfig.DEFAULT_REG_L1);
            if (this.documentFrequencies[i3] <= 5) {
                printWriter2.println(sb);
            } else {
                if (d - idfCutoff.value > 0.0d) {
                    IDSorter[] iDSorterArr = new IDSorter[keys.length];
                    int i4 = 0;
                    for (int i5 : keys) {
                        double d2 = log - dArr[this.documentFrequencies[i5]];
                        if (d2 - idfCutoff.value > 0.0d) {
                            iDSorterArr[i4] = new IDSorter(i5, ((d2 - idfCutoff.value) / (d - idfCutoff.value)) * (r0.get(i5) / this.documentFrequencies[i3]));
                        } else {
                            iDSorterArr[i4] = new IDSorter(i5, 0);
                        }
                        i4++;
                    }
                    Arrays.sort(iDSorterArr);
                    for (int i6 = 0; i6 < 10 && i6 < iDSorterArr.length; i6++) {
                        iDSorterArr[i6].getID();
                        Object lookupObject = dataAlphabet.lookupObject(iDSorterArr[i6].getID());
                        double weight = iDSorterArr[i6].getWeight();
                        if (weight < 0.05d) {
                            break;
                        }
                        sb.append("\t" + lookupObject + "\t" + weight);
                    }
                }
                printWriter2.println(sb);
            }
        }
        printWriter2.close();
    }

    public static void main(String[] strArr) throws Exception {
        CommandOption.setSummary(FeatureCooccurrenceCounter.class, "Build a file containing weights between word types");
        CommandOption.process(FeatureCooccurrenceCounter.class, strArr);
        FeatureCooccurrenceCounter featureCooccurrenceCounter = new FeatureCooccurrenceCounter(InstanceList.load(new File(inputFile.value)));
        featureCooccurrenceCounter.count();
        featureCooccurrenceCounter.printCounts();
    }
}
