/*
 * Created on Mar 17, 2008 To change the template for this generated file go to Window&gt;Preferences&gt;Java&gt;Code
 * Generation&gt;Code and Comments
 */
package ohd.hseb.hefs.utils.dist.types;

import java.util.List;

import ohd.hseb.hefs.utils.dist.DataFittingDistributionException;
import ohd.hseb.hefs.utils.dist.Distribution;
import ohd.hseb.hefs.utils.tools.NumberTools;
import ohd.hseb.hefs.utils.xml.vars.XMLDouble;
import ohd.hseb.util.data.DataSet;

/**
 * Empirical distribution using Weibull plotting points and doing no interpolation beyond the tails. This assigns a
 * probability of 1.0 to a value being smaller than the largest sample value, and 0.0 to a value being smaller than the
 * smallest sample value.
 * 
 * @author hank
 */
public class EmpiricalDist extends ContinuousDist implements Distribution<Double>
{

    public final static int VALUES = 0;
    public final static int PROBABILITIES = 1;

    /**
     * Records the empirical CDF data set.
     */
    private DataSet _cdfDataSet;

    public EmpiricalDist()
    {
        super(new XMLDouble("domain"));
        _cdfDataSet = null;
    }

    /**
     * @param values Array of values to which to fit Weibull plotting points.
     */
    public EmpiricalDist(final double[] values)
    {
        super(new XMLDouble("domain", 0.0d, null, null));
        setupCDFDataSet(values, null);
    }

    /**
     * @param values Values of the distribution.
     * @param probabilities Externally computed probabilities assigned to the values.
     */
    public EmpiricalDist(final double[] values, final double[] probabilities)
    {
        super(new XMLDouble("domain", 0.0d, null, null));
        setupCDFDataSet(values, probabilities);
    }

    /**
     * @param values
     * @param probabilities
     */
    public EmpiricalDist(final List<Float> values, final List<Double> probabilities)
    {
        super(new XMLDouble("domain", 0.0d, null, null));
        setupCDFDataSet(NumberTools.convertNumbersToDoublesArray(values),
                        NumberTools.convertNumbersToDoublesArray(probabilities));
    }

    /**
     * Sets up {@link #_cdfDataSet} given the provided values and probabilities.
     * 
     * @param values Must contain the data to be modeled.
     * @param probabilities Either specifies the probabilities (of the same length as values) or is null implying that
     *            default probabilities are computed via {@link DataSet#createCDFUsingWeibullPlottingPosition(int, int)}
     *            .
     */
    private void setupCDFDataSet(final double[] values, final double[] probabilities)
    {
        //Check for validity.
        if(probabilities != null)
        {
            if(values.length != probabilities.length)
            {
                throw new IllegalArgumentException("Length of values array, " + values.length
                    + ", is different than the provided probabilities, " + probabilities.length + ".");
            }
        }

        //Using true for make rows variables, since I think the ability to extract variables (all values or all probabilities, sorted) is more powerful
        //that the ability to extract samples (a single value with corresponding probability).  
        //Changed back to false, because using true causes bugs; DataSet has some holes in its implementation.
        _cdfDataSet = new DataSet(values.length, 2, false);
        _cdfDataSet.setSampleSize(values.length);
        _cdfDataSet.setVariable(VALUES, values);
        if(probabilities != null)
        {
            _cdfDataSet.setVariable(PROBABILITIES, probabilities);
        }
        else
        {
            _cdfDataSet.createCDFUsingWeibullPlottingPosition(VALUES, PROBABILITIES);
        }
        _cdfDataSet.setFitSampleVariable(VALUES);
        _cdfDataSet.setFitCDFVariable(PROBABILITIES);
    }

    public double[] getValues()
    {
        return _cdfDataSet.getVariable(VALUES);
    }

    public double[] getProbabilities()
    {
        return _cdfDataSet.getVariable(PROBABILITIES);
    }

    public DataSet getCDFDataSet()
    {
        return this._cdfDataSet;
    }

    public boolean isEmpty()
    {
        return _cdfDataSet.isEmpty();
    }

    /**
     * The probability returned is set the smallest fitted probability if the value is smaller than the smallest fitted
     * value. It returns the largest fitted probability if the value is larger than the largest fitted value.
     */
    @Override
    public double functionCDF(final Double value)
    {
        double prob = _cdfDataSet.getProbability(value, VALUES, PROBABILITIES);

        //This section was added to avoid returning 0 or 1 for a value.  It is how ESPADP functions.
        if(prob == 0.0d)
        {
            prob = _cdfDataSet.getValue(0, PROBABILITIES);
        }
        else if(prob == 1.0d)
        {
            prob = _cdfDataSet.getValue(_cdfDataSet.getSampleSize() - 1, PROBABILITIES);
        }
        return prob;
    }

    @Override
    public double functionInverseCDF(final double prob)
    {
        return _cdfDataSet.getQuantile(prob, VALUES, PROBABILITIES);
    }

    @Override
    public double functionPDF(final Double value)
    {
        return getMissing();
    }

    @Override
    public void fitToData(final DataSet data, final double[] fitParms) throws DataFittingDistributionException
    {
        _cdfDataSet = new DataSet(data.getSampleSize(), 2);
        _cdfDataSet.setSampleSize(data.getSampleSize());
        _cdfDataSet.setVariable(VALUES, data.getVariable(data.getFitSampleVariable()));
        _cdfDataSet.createCDFUsingWeibullPlottingPosition(VALUES, PROBABILITIES);
    }

    /**
     * Returns true if the data set employed by the two distributions is completely equal. This calls
     * {@link DataSet#areDataTablesCompletelyEqual(DataSet)} on the two instances of {@link #_cdfDataSet}. Note that it
     * checks to see if the data is completely equal and is not limited at all by the number of samples within
     * {@link #_cdfDataSet}. Rather, it looks at the entire table of data. See
     * {@link DataSet#areDataTablesCompletelyEqual(DataSet)}.
     */
    @Override
    public boolean equals(final Object arg0)
    {
        return _cdfDataSet.areDataTablesCompletelyEqual(((EmpiricalDist)arg0)._cdfDataSet);
    }

}
