package cc.mallet.pipe;

import cc.mallet.types.Instance;
import edu.stanford.nlp.ling.tokensregex.types.Expressions;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashSet;

/* loaded from: input_file:cc/mallet/pipe/SimpleTokenizer.class */
public class SimpleTokenizer extends Pipe {
    public static final int USE_EMPTY_STOPLIST = 0;
    public static final int USE_DEFAULT_ENGLISH_STOPLIST = 1;
    protected HashSet<String> stoplist;
    static final long serialVersionUID = 1;

    public SimpleTokenizer(int i) {
        this.stoplist = new HashSet<>();
        if (i == 1) {
            stop("the");
            stop("a");
            stop("an");
            stop("and");
            stop("or");
            stop("of");
            stop("for");
            stop("in");
            stop("on");
            stop("to");
            stop("with");
            stop("by");
            stop("this");
            stop("that");
            stop("these");
            stop("those");
            stop("some");
            stop("other");
            stop("it");
            stop("its");
            stop("we");
            stop("our");
            stop("as");
            stop("but");
            stop("not");
            stop("do");
            stop("does");
            stop("is");
            stop("be");
            stop("are");
            stop("can");
            stop("was");
            stop("were");
        }
    }

    public SimpleTokenizer(File file) {
        this.stoplist = new HashSet<>();
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    bufferedReader.close();
                    return;
                }
                stop(readLine);
            }
        } catch (Exception e) {
            System.err.println("problem loading stoplist: " + e);
        }
    }

    public SimpleTokenizer(HashSet<String> hashSet) {
        this.stoplist = hashSet;
    }

    public SimpleTokenizer deepClone() {
        return new SimpleTokenizer((HashSet<String>) this.stoplist.clone());
    }

    public void stop(String str) {
        this.stoplist.add(str);
    }

    @Override // cc.mallet.pipe.Pipe
    public Instance pipe(Instance instance) {
        int codePointAt = Character.codePointAt(Expressions.VAR_SELF, 0);
        if (!(instance.getData() instanceof CharSequence)) {
            throw new IllegalArgumentException("Looking for a CharSequence, found a " + instance.getData().getClass());
        }
        CharSequence charSequence = (CharSequence) instance.getData();
        ArrayList arrayList = new ArrayList();
        int[] iArr = new int[1000];
        int i = -1;
        int codePointCount = Character.codePointCount(charSequence, 0, charSequence.length());
        for (int i2 = 0; i2 < codePointCount; i2++) {
            int codePointAt2 = Character.codePointAt(charSequence, i2);
            int type = Character.getType(codePointAt2);
            if (type == 2 || type == 1 || codePointAt2 == codePointAt) {
                i++;
                iArr[i] = codePointAt2;
            } else if (type == 12 || type == 13 || type == 14 || type == 22 || type == 20 || type == 23 || type == 21 || type == 29 || type == 30 || type == 24) {
                if (i != -1) {
                    String str = new String(iArr, 0, i + 1);
                    if (!this.stoplist.contains(str)) {
                        arrayList.add(str);
                    }
                    i = -1;
                }
            } else if (type == 8 || type == 7 || type == 6 || type == 3 || type == 4 || type == 5) {
                i++;
                iArr[i] = codePointAt2;
            }
            if (i + 1 == iArr.length) {
                String str2 = new String(iArr, 0, i + 1);
                if (!this.stoplist.contains(str2)) {
                    arrayList.add(str2);
                }
                i = -1;
            }
        }
        if (i != -1) {
            String str3 = new String(iArr, 0, i + 1);
            if (!this.stoplist.contains(str3)) {
                arrayList.add(str3);
            }
        }
        instance.setData(arrayList);
        return instance;
    }
}
