/*
 * Decompiled with CFR 0.152.
 */
package weka.filters.unsupervised.attribute;

import java.io.Serializable;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.SparseInstance;
import weka.core.Stopwords;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;

public class StringToWordVector
extends Filter
implements UnsupervisedFilter,
OptionHandler {
    private String delimiters = " \n\t.,:'\"()?!";
    protected Range m_SelectedRange = null;
    private TreeMap m_Dictionary = new TreeMap();
    private boolean m_FirstBatchDone = false;
    private boolean m_OutputCounts = false;
    private String m_Prefix = "";
    private int[] docsCounts;
    private int numInstances = -1;
    private double avgDocLength = -1.0;
    private int m_WordsToKeep = 1000;
    private boolean m_TFTransform;
    private boolean m_normalizeDocLength;
    private boolean m_IDFTransform;
    private boolean m_onlyAlphabeticTokens;
    private boolean m_lowerCaseTokens;
    private boolean m_useStoplist;

    public Enumeration listOptions() {
        Vector<Option> vector = new Vector<Option>(3);
        vector.addElement(new Option("\tOutput word counts rather than boolean word presence.\n", "C", 0, "-C"));
        vector.addElement(new Option("\tString containing the set of delimiter characters\n\t(default: \" \\n\\t.,:'\\\"()?!\")", "D", 1, "-D <delimiter set>"));
        vector.addElement(new Option("\tSpecify list of string attributes to convert to words (as weka Range).\n\t(default: select all string attributes)", "R", 1, "-R <index1,index2-index4,...>"));
        vector.addElement(new Option("\tSpecify a prefix for the created attribute names.\n\t(default: \"\")", "P", 1, "-P <attribute name prefix>"));
        vector.addElement(new Option("\tSpecify approximate number of word fields to create.\n\tSurplus words will be discarded..\n\t(default: 1000)", "W", 1, "-W <number of words to keep>"));
        vector.addElement(new Option("\tTransform the word frequencies into log(1+fij)\n\twhere fij is the frequency of word i in jth document(instance).\n", "T", 0, "-T"));
        vector.addElement(new Option("\tTransform each word frequency into:\n\tfij*log(num of Documents/num of  documents containing word i)\n\t  where fij if frequency of word i in  jth document(instance)", "I", 0, "-I"));
        vector.addElement(new Option("\tNormalize word frequencies of each document(instance) to average length of documents.", "N", 0, "-N"));
        vector.addElement(new Option("\tOnly form tokens from contiguous alphabetic sequences (The delimiter string is ignored if this is set).", "A", 0, "-A"));
        vector.addElement(new Option("\tConvert all tokens to lowercase before adding to the dictionary.", "L", 0, "-L"));
        vector.addElement(new Option("\tIgnore words that are in the stoplist.", "S", 0, "-S"));
        return vector.elements();
    }

    public void setOptions(String[] stringArray) throws Exception {
        String string = Utils.getOption('D', stringArray);
        if (string.length() != 0) {
            this.setDelimiters(string);
        }
        if ((string = Utils.getOption('R', stringArray)).length() != 0) {
            this.setSelectedRange(string);
        }
        if ((string = Utils.getOption('P', stringArray)).length() != 0) {
            this.setAttributeNamePrefix(string);
        }
        if ((string = Utils.getOption('W', stringArray)).length() != 0) {
            this.setWordsToKeep(Integer.valueOf(string));
        }
        this.setOutputWordCounts(Utils.getFlag('C', stringArray));
        this.setTFTransform(Utils.getFlag('T', stringArray));
        this.setIDFTransform(Utils.getFlag('I', stringArray));
        this.setNormalizeDocLength(Utils.getFlag('N', stringArray));
        this.setLowerCaseTokens(Utils.getFlag('L', stringArray));
        this.setOnlyAlphabeticTokens(Utils.getFlag('A', stringArray));
        this.setUseStoplist(Utils.getFlag('S', stringArray));
    }

    public String[] getOptions() {
        String[] stringArray = new String[16];
        int n = 0;
        stringArray[n++] = "-D";
        stringArray[n++] = this.getDelimiters();
        if (this.getSelectedRange() != null) {
            stringArray[n++] = "-R";
            this.m_SelectedRange.setUpper(this.getInputFormat().numAttributes() - 1);
            stringArray[n++] = this.getSelectedRange().getRanges();
        }
        if (!"".equals(this.getAttributeNamePrefix())) {
            stringArray[n++] = "-P";
            stringArray[n++] = this.getAttributeNamePrefix();
        }
        stringArray[n++] = "-W";
        stringArray[n++] = String.valueOf(this.getWordsToKeep());
        if (this.getOutputWordCounts()) {
            stringArray[n++] = "-C";
        }
        if (this.getTFTransform()) {
            stringArray[n++] = "-T";
        }
        if (this.getIDFTransform()) {
            stringArray[n++] = "-I";
        }
        if (this.getNormalizeDocLength()) {
            stringArray[n++] = "-N";
        }
        if (this.getLowerCaseTokens()) {
            stringArray[n++] = "-L";
        }
        if (this.getOnlyAlphabeticTokens()) {
            stringArray[n++] = "-A";
        }
        if (this.getUseStoplist()) {
            stringArray[n++] = "-S";
        }
        while (n < stringArray.length) {
            stringArray[n++] = "";
        }
        return stringArray;
    }

    public StringToWordVector() {
    }

    public StringToWordVector(int n) {
        this.m_WordsToKeep = n;
    }

    public boolean setInputFormat(Instances instances) throws Exception {
        super.setInputFormat(instances);
        this.m_FirstBatchDone = false;
        return false;
    }

    public boolean input(Instance instance) throws Exception {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_NewBatch) {
            this.resetQueue();
            this.m_NewBatch = false;
        }
        if (this.m_FirstBatchDone) {
            this.convertInstance(instance);
            return true;
        }
        this.bufferInput(instance);
        return false;
    }

    public boolean batchFinished() throws Exception {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (!this.m_FirstBatchDone) {
            this.determineDictionary();
        }
        if (!this.m_normalizeDocLength || this.m_FirstBatchDone) {
            for (int i = 0; i < this.getInputFormat().numInstances(); ++i) {
                this.convertInstance(this.getInputFormat().instance(i));
            }
            this.flushInput();
        } else {
            double d;
            Instance instance;
            int n;
            FastVector fastVector = new FastVector();
            int n2 = 0;
            Instances instances = this.getInputFormat();
            this.avgDocLength = 0.0;
            for (n = 0; n < instances.numInstances(); ++n) {
                n2 = this.convertInstancewoDocNorm(instances.instance(n), fastVector);
            }
            for (n = 0; n < fastVector.size(); ++n) {
                int n3;
                instance = (Instance)fastVector.elementAt(n);
                d = 0.0;
                double d2 = 0.0;
                for (n3 = 0; n3 < instance.numValues(); ++n3) {
                    if (instance.index(n3) < n2) continue;
                    d2 = instance.valueSparse(n3);
                    d += d2 * d2;
                }
                d = Math.sqrt(d);
                this.avgDocLength += d;
                for (n3 = 0; n3 < instance.numValues(); ++n3) {
                    if (instance.index(n3) < n2) continue;
                    d2 = instance.valueSparse(n3);
                    instance.setValueSparse(n3, d2 /= d);
                    if (d2 != 0.0) continue;
                    System.err.println("setting value " + instance.index(n3) + " to zero.");
                    --n3;
                }
            }
            this.avgDocLength /= (double)instances.numInstances();
            for (n = 0; n < fastVector.size(); ++n) {
                instance = (Instance)fastVector.elementAt(n);
                d = 0.0;
                for (int i = 0; i < instance.numValues(); ++i) {
                    if (instance.index(i) < n2) continue;
                    d = instance.valueSparse(i);
                    instance.setValueSparse(i, d *= this.avgDocLength);
                    if (d != 0.0) continue;
                    System.err.println("setting value " + instance.index(i) + " to zero.");
                    --i;
                }
                this.push(instance);
            }
            this.flushInput();
        }
        this.m_NewBatch = true;
        this.m_FirstBatchDone = true;
        return this.numPendingOutput() != 0;
    }

    public String globalInfo() {
        return "Converts String attributes into a set of attributes representing word occurrence information from the text contained in the strings. The set of words (attributes) is determined by the first batch filtered (typically training data).";
    }

    public boolean getOutputWordCounts() {
        return this.m_OutputCounts;
    }

    public void setOutputWordCounts(boolean bl) {
        this.m_OutputCounts = bl;
    }

    public String outputWordCountsTipText() {
        return "Output word counts rather than boolean 0 or 1(indicating presence or absence of a word).";
    }

    public String getDelimiters() {
        return this.delimiters;
    }

    public void setDelimiters(String string) {
        this.delimiters = string;
    }

    public String delimitersTipText() {
        return "Set of delimiter characters to use in tokenizing (default: \" \\n\\t.,:'\\\"()?!\"). This option is ignored if onlyAlphabeticTokens option is set to true.";
    }

    public Range getSelectedRange() {
        return this.m_SelectedRange;
    }

    public void setSelectedRange(String string) {
        this.m_SelectedRange = new Range(string);
    }

    public String getAttributeNamePrefix() {
        return this.m_Prefix;
    }

    public void setAttributeNamePrefix(String string) {
        this.m_Prefix = string;
    }

    public String attributeNamePrefixTipText() {
        return "Prefix for the created attribute names. (default: \"\")";
    }

    public int getWordsToKeep() {
        return this.m_WordsToKeep;
    }

    public void setWordsToKeep(int n) {
        this.m_WordsToKeep = n;
    }

    public String wordsToKeepTipText() {
        return "The number of words (per class if there is a class attribute assigned) to attempt to keep.";
    }

    public boolean getTFTransform() {
        return this.m_TFTransform;
    }

    public void setTFTransform(boolean bl) {
        this.m_TFTransform = bl;
    }

    public String TFTransformTipText() {
        return "Sets whether if the word frequencies should be transformed into:\n    log(1+fij) \n       where fij is the frequency of word i in document (instance) j.";
    }

    public boolean getIDFTransform() {
        return this.m_IDFTransform;
    }

    public void setIDFTransform(boolean bl) {
        this.m_IDFTransform = bl;
    }

    public String IDFTransformTipText() {
        return "Sets whether if the word frequencies in a document should be transformed into: \n   fij*log(num of Docs/num of Docs with word i) \n      where fij is the frequency of word i in document (instance) j.";
    }

    public boolean getNormalizeDocLength() {
        return this.m_normalizeDocLength;
    }

    public void setNormalizeDocLength(boolean bl) {
        this.m_normalizeDocLength = bl;
    }

    public String normalizeDocLengthTipText() {
        return "Sets whether if the word frequencies for a document (instance) should be normalized or not.";
    }

    public boolean getOnlyAlphabeticTokens() {
        return this.m_onlyAlphabeticTokens;
    }

    public void setOnlyAlphabeticTokens(boolean bl) {
        this.m_onlyAlphabeticTokens = bl;
    }

    public String onlyAlphabeticTokensTipText() {
        return "Sets whether if the word tokens are to be formed only from contiguous alphabetic sequences (The delimiter string is ignored if this option is set to true).";
    }

    public boolean getLowerCaseTokens() {
        return this.m_lowerCaseTokens;
    }

    public void setLowerCaseTokens(boolean bl) {
        this.m_lowerCaseTokens = bl;
    }

    public String lowerCaseTokensTipText() {
        return "If set then all the word tokens are converted to lower case before being added to the dictionary.";
    }

    public boolean getUseStoplist() {
        return this.m_useStoplist;
    }

    public void setUseStoplist(boolean bl) {
        this.m_useStoplist = bl;
    }

    public String useStoplistTipText() {
        return "Ignores all the words that are on the stoplist, if set to true.";
    }

    private static void sortArray(int[] nArray) {
        int n = nArray.length - 1;
        int n2 = 1;
        while (n2 <= n / 9) {
            n2 = 3 * n2 + 1;
        }
        while (n2 > 0) {
            for (int i = n2 + 1; i <= n; ++i) {
                int n3 = nArray[i];
                for (int j = i; j > n2 && nArray[j - n2] > n3; j -= n2) {
                    nArray[j] = nArray[j - n2];
                }
                nArray[j] = n3;
            }
            n2 /= 3;
        }
    }

    private void determineSelectedRange() {
        int n;
        StringBuffer stringBuffer;
        Instances instances = this.getInputFormat();
        if (this.m_SelectedRange == null) {
            stringBuffer = new StringBuffer();
            for (n = 0; n < instances.numAttributes(); ++n) {
                if (instances.attribute(n).type() != 2) continue;
                stringBuffer.append(n + 1 + ",");
            }
            this.m_SelectedRange = new Range(stringBuffer.toString());
        }
        this.m_SelectedRange.setUpper(instances.numAttributes() - 1);
        stringBuffer = new StringBuffer();
        for (n = 0; n < instances.numAttributes(); ++n) {
            if (!this.m_SelectedRange.isInRange(n) || instances.attribute(n).type() != 2) continue;
            stringBuffer.append(n + 1 + ",");
        }
        this.m_SelectedRange.setRanges(stringBuffer.toString());
        this.m_SelectedRange.setUpper(instances.numAttributes() - 1);
    }

    private void determineDictionary() {
        Object object;
        Object object2;
        Object object3;
        Object object4;
        int n;
        Object object5;
        int n2;
        int n3 = this.getInputFormat().classIndex();
        int n4 = 1;
        if (n3 != -1) {
            n4 = this.getInputFormat().attribute(n3).numValues();
        }
        TreeMap[] treeMapArray = new TreeMap[n4];
        for (n2 = 0; n2 < n4; ++n2) {
            treeMapArray[n2] = new TreeMap();
        }
        this.determineSelectedRange();
        for (n2 = 0; n2 < this.getInputFormat().numInstances(); ++n2) {
            object5 = this.getInputFormat().instance(n2);
            n = 0;
            if (n3 != -1) {
                n = (int)((Instance)object5).classValue();
            }
            object4 = new Hashtable();
            for (int i = 0; i < ((Instance)object5).numAttributes(); ++i) {
                if (!this.m_SelectedRange.isInRange(i) || ((Instance)object5).isMissing(i)) continue;
                object3 = !this.m_onlyAlphabeticTokens ? new StringTokenizer(((Instance)object5).stringValue(i), this.delimiters) : new AlphabeticStringTokenizer(((Instance)object5).stringValue(i));
                while (object3.hasMoreElements()) {
                    object2 = ((String)object3.nextElement()).intern();
                    if (this.m_lowerCaseTokens) {
                        object2 = ((String)object2).toLowerCase();
                    }
                    if (this.m_useStoplist && Stopwords.isStopword((String)object2)) continue;
                    if (!((Hashtable)object4).contains(object2)) {
                        ((Hashtable)object4).put(object2, new Integer(0));
                    }
                    if ((object = (Count)treeMapArray[n].get(object2)) == null) {
                        treeMapArray[n].put(object2, new Count(1));
                        continue;
                    }
                    ++((Count)object).count;
                }
            }
            Enumeration enumeration = ((Hashtable)object4).keys();
            while (enumeration.hasMoreElements()) {
                object3 = (String)enumeration.nextElement();
                object2 = (Count)treeMapArray[n].get(object3);
                if (object2 != null) {
                    ++((Count)object2).docCount;
                    continue;
                }
                System.err.println("Warning: A word should definitely be in the dictionary.Please check the code");
            }
        }
        n2 = 0;
        object5 = new int[n4];
        for (n = 0; n < n4; ++n) {
            n2 += treeMapArray[n].size();
            object4 = new int[treeMapArray[n].size()];
            int n5 = 0;
            object3 = treeMapArray[n].keySet().iterator();
            while (object3.hasNext()) {
                object2 = (String)object3.next();
                object = (Count)treeMapArray[n].get(object2);
                object4[n5] = ((Count)object).count;
                ++n5;
            }
            StringToWordVector.sortArray((int[])object4);
            object5[n] = ((Object)object4).length < this.m_WordsToKeep ? (Object)true : (Object)Math.max(1, (int)object4[((Object)object4).length - this.m_WordsToKeep]);
        }
        FastVector fastVector = new FastVector(n2 + this.getInputFormat().numAttributes());
        int n6 = -1;
        for (int i = 0; i < this.getInputFormat().numAttributes(); ++i) {
            if (this.m_SelectedRange.isInRange(i)) continue;
            if (this.getInputFormat().classIndex() == i) {
                n6 = fastVector.size();
            }
            fastVector.addElement(this.getInputFormat().attribute(i).copy());
        }
        TreeMap<String, Integer> treeMap = new TreeMap<String, Integer>();
        int n7 = fastVector.size();
        for (int i = 0; i < n4; ++i) {
            object = treeMapArray[i].keySet().iterator();
            while (object.hasNext()) {
                String string = (String)object.next();
                Count count = (Count)treeMapArray[i].get(string);
                if (count.count < object5[i] || treeMap.get(string) != null) continue;
                treeMap.put(string, new Integer(n7++));
                fastVector.addElement(new Attribute(this.m_Prefix + string));
            }
        }
        this.docsCounts = new int[fastVector.size()];
        Iterator iterator = treeMap.keySet().iterator();
        while (iterator.hasNext()) {
            object = (String)iterator.next();
            int n8 = (Integer)treeMap.get(object);
            int n9 = 0;
            for (int i = 0; i < n4; ++i) {
                Count count = (Count)treeMapArray[i].get(object);
                if (count == null) continue;
                n9 += count.docCount;
            }
            this.docsCounts[n8] = n9;
        }
        fastVector.trimToSize();
        this.m_Dictionary = treeMap;
        this.numInstances = this.getInputFormat().numInstances();
        object = new Instances(this.getInputFormat().relationName(), fastVector, 0);
        ((Instances)object).setClassIndex(n6);
        this.setOutputFormat((Instances)object);
    }

    private void convertInstance(Instance instance) throws Exception {
        Number number;
        Object object;
        int n;
        TreeMap<Object, Double> treeMap = new TreeMap<Object, Double>();
        int n2 = 0;
        for (n = 0; n < this.getInputFormat().numAttributes(); ++n) {
            if (this.m_SelectedRange.isInRange(n)) continue;
            if (this.getInputFormat().attribute(n).type() != 2) {
                if (instance.value(n) != 0.0) {
                    treeMap.put(new Integer(n2), new Double(instance.value(n)));
                }
            } else if (instance.isMissing(n)) {
                treeMap.put(new Integer(n2), new Double(Instance.missingValue()));
            } else {
                if (this.outputFormatPeek().attribute(n2).numValues() == 0) {
                    this.outputFormatPeek().attribute(n2).addStringValue("Hack to defeat SparseInstance bug");
                }
                int n3 = this.outputFormatPeek().attribute(n2).addStringValue(instance.stringValue(n));
                treeMap.put(new Integer(n2), new Double(n3));
            }
            ++n2;
        }
        for (n = 0; n < instance.numAttributes(); ++n) {
            if (!this.m_SelectedRange.isInRange(n) || instance.isMissing(n)) continue;
            Enumeration enumeration = !this.m_onlyAlphabeticTokens ? new StringTokenizer(instance.stringValue(n), this.delimiters) : new AlphabeticStringTokenizer(instance.stringValue(n));
            while (enumeration.hasMoreElements()) {
                Integer n4;
                object = (String)enumeration.nextElement();
                if (this.m_lowerCaseTokens) {
                    object = ((String)object).toLowerCase();
                }
                if ((n4 = (Integer)this.m_Dictionary.get(object)) == null) continue;
                if (this.m_OutputCounts) {
                    number = (Double)treeMap.get(n4);
                    if (number != null) {
                        treeMap.put(n4, new Double((Double)number + 1.0));
                        continue;
                    }
                    treeMap.put(n4, new Double(1.0));
                    continue;
                }
                treeMap.put(n4, new Double(1.0));
            }
        }
        if (this.m_TFTransform) {
            Iterator iterator = treeMap.keySet().iterator();
            int n5 = 0;
            while (iterator.hasNext()) {
                object = (Integer)iterator.next();
                if ((Integer)object >= n2) {
                    double d = (Double)treeMap.get(object);
                    d = Math.log(d + 1.0);
                    treeMap.put(object, new Double(d));
                }
                ++n5;
            }
        }
        if (this.m_IDFTransform) {
            Iterator iterator = treeMap.keySet().iterator();
            int n6 = 0;
            while (iterator.hasNext()) {
                object = (Integer)iterator.next();
                if ((Integer)object >= n2) {
                    double d = (Double)treeMap.get(object);
                    treeMap.put(object, new Double(d *= Math.log((double)this.numInstances / (double)this.docsCounts[(Integer)object])));
                }
                ++n6;
            }
        }
        if (this.m_normalizeDocLength) {
            double d;
            if (this.avgDocLength < 0.0) {
                throw new Exception("Error. Average Doc Length not defined yet.");
            }
            double d2 = 0.0;
            object = treeMap.keySet().iterator();
            int n7 = 0;
            while (object.hasNext()) {
                number = (Integer)object.next();
                if ((Integer)number >= n2) {
                    d = (Double)treeMap.get(number);
                    d2 += d * d;
                }
                ++n7;
            }
            object = treeMap.keySet().iterator();
            n7 = 0;
            while (object.hasNext()) {
                number = (Integer)object.next();
                if ((Integer)number >= n2) {
                    d = (Double)treeMap.get(number);
                    d /= Math.sqrt(d2);
                    treeMap.put(number, new Double(d *= this.avgDocLength));
                }
                ++n7;
            }
        }
        double[] dArray = new double[treeMap.size()];
        int[] nArray = new int[treeMap.size()];
        object = treeMap.keySet().iterator();
        int n8 = 0;
        while (object.hasNext()) {
            number = (Integer)object.next();
            Double d = (Double)treeMap.get(number);
            dArray[n8] = d;
            nArray[n8] = (Integer)number;
            ++n8;
        }
        SparseInstance sparseInstance = new SparseInstance(instance.weight(), dArray, nArray, this.outputFormatPeek().numAttributes());
        sparseInstance.setDataset(this.outputFormatPeek());
        this.push(sparseInstance);
    }

    private int convertInstancewoDocNorm(Instance instance, FastVector fastVector) {
        Number number;
        Object object;
        int n;
        TreeMap<Object, Double> treeMap = new TreeMap<Object, Double>();
        int n2 = 0;
        for (n = 0; n < this.getInputFormat().numAttributes(); ++n) {
            if (this.m_SelectedRange.isInRange(n)) continue;
            if (this.getInputFormat().attribute(n).type() != 2) {
                if (instance.value(n) != 0.0) {
                    treeMap.put(new Integer(n2), new Double(instance.value(n)));
                }
            } else if (instance.isMissing(n)) {
                treeMap.put(new Integer(n2), new Double(Instance.missingValue()));
            } else {
                if (this.outputFormatPeek().attribute(n2).numValues() == 0) {
                    this.outputFormatPeek().attribute(n2).addStringValue("Hack to defeat SparseInstance bug");
                }
                int n3 = this.outputFormatPeek().attribute(n2).addStringValue(instance.stringValue(n));
                treeMap.put(new Integer(n2), new Double(n3));
            }
            ++n2;
        }
        for (n = 0; n < instance.numAttributes(); ++n) {
            if (!this.m_SelectedRange.isInRange(n) || instance.isMissing(n)) continue;
            Enumeration enumeration = !this.m_onlyAlphabeticTokens ? new StringTokenizer(instance.stringValue(n), this.delimiters) : new AlphabeticStringTokenizer(instance.stringValue(n));
            while (enumeration.hasMoreElements()) {
                Integer n4;
                object = (String)enumeration.nextElement();
                if (this.m_lowerCaseTokens) {
                    object = ((String)object).toLowerCase();
                }
                if ((n4 = (Integer)this.m_Dictionary.get(object)) == null) continue;
                if (this.m_OutputCounts) {
                    number = (Double)treeMap.get(n4);
                    if (number != null) {
                        treeMap.put(n4, new Double((Double)number + 1.0));
                        continue;
                    }
                    treeMap.put(n4, new Double(1.0));
                    continue;
                }
                treeMap.put(n4, new Double(1.0));
            }
        }
        if (this.m_TFTransform) {
            Iterator iterator = treeMap.keySet().iterator();
            int n5 = 0;
            while (iterator.hasNext()) {
                object = (Integer)iterator.next();
                if ((Integer)object >= n2) {
                    double d = (Double)treeMap.get(object);
                    d = Math.log(d + 1.0);
                    treeMap.put(object, new Double(d));
                }
                ++n5;
            }
        }
        if (this.m_IDFTransform) {
            Iterator iterator = treeMap.keySet().iterator();
            int n6 = 0;
            while (iterator.hasNext()) {
                object = (Integer)iterator.next();
                if ((Integer)object >= n2) {
                    double d = (Double)treeMap.get(object);
                    treeMap.put(object, new Double(d *= Math.log((double)this.numInstances / (double)this.docsCounts[(Integer)object])));
                }
                ++n6;
            }
        }
        double[] dArray = new double[treeMap.size()];
        int[] nArray = new int[treeMap.size()];
        object = treeMap.keySet().iterator();
        int n7 = 0;
        while (object.hasNext()) {
            number = (Integer)object.next();
            Double d = (Double)treeMap.get(number);
            dArray[n7] = d;
            nArray[n7] = (Integer)number;
            ++n7;
        }
        SparseInstance sparseInstance = new SparseInstance(instance.weight(), dArray, nArray, this.outputFormatPeek().numAttributes());
        sparseInstance.setDataset(this.outputFormatPeek());
        fastVector.addElement(sparseInstance);
        return n2;
    }

    public static void main(String[] stringArray) {
        try {
            if (Utils.getFlag('b', stringArray)) {
                Filter.batchFilterFile(new StringToWordVector(), stringArray);
            } else {
                Filter.filterFile(new StringToWordVector(), stringArray);
            }
        }
        catch (Exception exception) {
            exception.printStackTrace();
            System.out.println(exception.getMessage());
        }
    }

    private class AlphabeticStringTokenizer
    implements Enumeration {
        private char[] str;
        int currentPos = 0;

        public AlphabeticStringTokenizer(String string) {
            this.str = new char[string.length()];
            string.getChars(0, string.length(), this.str, 0);
        }

        public boolean hasMoreElements() {
            int n;
            for (n = this.currentPos; !(n >= this.str.length || this.str[n] >= 'a' && this.str[n] <= 'z' || this.str[n] >= 'A' && this.str[n] <= 'Z'); ++n) {
            }
            this.currentPos = n;
            return n < this.str.length && (this.str[n] >= 'a' && this.str[n] <= 'z' || this.str[n] >= 'A' && this.str[n] <= 'Z');
        }

        public Object nextElement() {
            int n;
            int n2;
            for (n2 = this.currentPos; n2 < this.str.length && this.str[n2] < 'a' && this.str[n2] > 'z' && this.str[n2] < 'A' && this.str[n2] > 'Z'; ++n2) {
            }
            this.currentPos = n = n2;
            if (n2 >= this.str.length) {
                throw new NoSuchElementException("no more tokens present");
            }
            while (n < this.str.length && (this.str[n] >= 'a' && this.str[n] <= 'z' || this.str[n] >= 'A' && this.str[n] <= 'Z')) {
                ++n;
            }
            String string = new String(this.str, n2, n - this.currentPos);
            this.currentPos = n;
            return string;
        }
    }

    private class Count
    implements Serializable {
        public int count;
        public int docCount;

        public Count(int n) {
            this.count = n;
        }
    }
}

