/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.examples;

import cc.mallet.pipe.CharSequence2TokenSequence;
import cc.mallet.pipe.FeatureSequence2FeatureVector;
import cc.mallet.pipe.Input2CharSequence;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.PrintInputAndTarget;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.Target2Label;
import cc.mallet.pipe.TokenSequence2FeatureSequence;
import cc.mallet.pipe.TokenSequenceLowercase;
import cc.mallet.pipe.TokenSequenceRemoveStopwords;
import cc.mallet.pipe.iterator.FileIterator;
import cc.mallet.types.InstanceList;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.regex.Pattern;

public class TextImporter {
    Pipe pipe = this.buildPipe();

    public Pipe buildPipe() {
        ArrayList<Pipe> pipeList = new ArrayList<Pipe>();
        pipeList.add(new Input2CharSequence("UTF-8"));
        Pattern tokenPattern = Pattern.compile("[\\p{L}\\p{N}_]+");
        pipeList.add(new CharSequence2TokenSequence(tokenPattern));
        pipeList.add(new TokenSequenceLowercase());
        pipeList.add(new TokenSequenceRemoveStopwords(false, false));
        pipeList.add(new TokenSequence2FeatureSequence());
        pipeList.add(new Target2Label());
        pipeList.add(new FeatureSequence2FeatureVector());
        pipeList.add(new PrintInputAndTarget());
        return new SerialPipes(pipeList);
    }

    public InstanceList readDirectory(File directory) {
        return this.readDirectories(new File[]{directory});
    }

    public InstanceList readDirectories(File[] directories) {
        FileIterator iterator = new FileIterator(directories, (FileFilter)new TxtFilter(), FileIterator.LAST_DIRECTORY);
        InstanceList instances = new InstanceList(this.pipe);
        instances.addThruPipe(iterator);
        return instances;
    }

    public static void main(String[] args) throws IOException {
        TextImporter importer = new TextImporter();
        InstanceList instances = importer.readDirectory(new File(args[0]));
        instances.save(new File(args[1]));
    }

    class TxtFilter
    implements FileFilter {
        TxtFilter() {
        }

        @Override
        public boolean accept(File file) {
            return file.toString().endsWith(".txt");
        }
    }
}

