/*
 * Decompiled with CFR 0.152.
 */
package weka.filters.unsupervised.instance;

import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.StreamableFilter;
import weka.filters.UnsupervisedFilter;

public class ReservoirSample
extends Filter
implements UnsupervisedFilter,
OptionHandler,
StreamableFilter {
    static final long serialVersionUID = 3119607037607101160L;
    protected int m_SampleSize = 100;
    protected Instance[] m_subSample;
    protected int m_currentInst;
    protected int m_RandomSeed = 1;
    protected Random m_random;

    public String globalInfo() {
        return "Produces a random subsample of a dataset using the reservoir sampling Algorithm \"R\" by Vitter. The original data set does not have to fit into main memory, but the reservoir does. ";
    }

    public Enumeration listOptions() {
        Vector<Option> result = new Vector<Option>();
        result.addElement(new Option("\tSpecify the random number seed (default 1)", "S", 1, "-S <num>"));
        result.addElement(new Option("\tThe size of the output dataset - number of instances\n\t(default 100)", "Z", 1, "-Z <num>"));
        return result.elements();
    }

    public void setOptions(String[] options) throws Exception {
        String tmpStr = Utils.getOption('S', options);
        if (tmpStr.length() != 0) {
            this.setRandomSeed(Integer.parseInt(tmpStr));
        } else {
            this.setRandomSeed(1);
        }
        tmpStr = Utils.getOption('Z', options);
        if (tmpStr.length() != 0) {
            this.setSampleSize(Integer.parseInt(tmpStr));
        } else {
            this.setSampleSize(100);
        }
    }

    public String[] getOptions() {
        Vector<String> result = new Vector<String>();
        result.add("-S");
        result.add("" + this.getRandomSeed());
        result.add("-Z");
        result.add("" + this.getSampleSize());
        return result.toArray(new String[result.size()]);
    }

    public String randomSeedTipText() {
        return "The seed used for random sampling.";
    }

    public int getRandomSeed() {
        return this.m_RandomSeed;
    }

    public void setRandomSeed(int newSeed) {
        this.m_RandomSeed = newSeed;
    }

    public String sampleSizeTipText() {
        return "Size of the subsample (reservoir). i.e. the number of instances.";
    }

    public int getSampleSize() {
        return this.m_SampleSize;
    }

    public void setSampleSize(int newSampleSize) {
        this.m_SampleSize = newSampleSize;
    }

    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enableAllAttributes();
        result.enable(Capabilities.Capability.MISSING_VALUES);
        result.enableAllClasses();
        result.enable(Capabilities.Capability.MISSING_CLASS_VALUES);
        result.enable(Capabilities.Capability.NO_CLASS);
        return result;
    }

    public boolean setInputFormat(Instances instanceInfo) throws Exception {
        super.setInputFormat(instanceInfo);
        this.setOutputFormat(instanceInfo);
        this.m_subSample = new Instance[this.m_SampleSize];
        this.m_currentInst = 0;
        this.m_random = new Random(this.m_RandomSeed);
        return true;
    }

    protected void processInstance(Instance instance) {
        if (this.m_currentInst < this.m_SampleSize) {
            this.m_subSample[this.m_currentInst] = (Instance)instance.copy();
        } else {
            double r = this.m_random.nextDouble();
            if (r < (double)this.m_SampleSize / (double)this.m_currentInst) {
                r = this.m_random.nextDouble();
                int replace = (int)((double)this.m_SampleSize * r);
                this.m_subSample[replace] = (Instance)instance.copy();
            }
        }
        ++this.m_currentInst;
    }

    public boolean input(Instance instance) {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_NewBatch) {
            this.resetQueue();
            this.m_NewBatch = false;
        }
        if (this.isFirstBatchDone()) {
            this.push(instance);
            return true;
        }
        this.copyValues(instance, false);
        this.processInstance(instance);
        return false;
    }

    public boolean batchFinished() {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (!this.isFirstBatchDone()) {
            this.createSubsample();
        }
        this.flushInput();
        this.m_NewBatch = true;
        this.m_FirstBatchDone = true;
        return this.numPendingOutput() != 0;
    }

    protected void createSubsample() {
        for (int i = 0; i < this.m_SampleSize && this.m_subSample[i] != null; ++i) {
            Instance copy = (Instance)this.m_subSample[i].copy();
            this.push(copy);
        }
        this.m_subSample = null;
    }

    public String getRevision() {
        return RevisionUtils.extract("$Revision: 8034 $");
    }

    public static void main(String[] argv) {
        ReservoirSample.runFilter(new ReservoirSample(), argv);
    }
}

