package ohd.hseb.hefs.utils.dist;

import ohd.hseb.util.data.DataSet;

/**
 * General tools available for use by the implemented distributions.
 * 
 * @author Hank.Herr
 */
public abstract class DistributionTools
{
    /**
     * Generally useful constant defines how closely we want to fit continuous distributions if the estimation routine
     * is an optimization of some sort.
     */
    public static final double FIT_ACCURACY = 0.001D;

    public static double CLOSEST_PROB_TO_1 = 0.9999999d;

    public static double CLOSEST_PROB_TO_0 = 0.0000001d;

    /**
     * @return The provided probability if an equal comparison with 0 or 1 returns not equal. If it is equal to 1, it
     *         return {@link #CLOSEST_PROB_TO_1}. If it is equal to 0, returns {@link #CLOSEST_PROB_TO_0}.
     */
    public static double returnTrimmedProbability(final double baseProb)
    {
        if(baseProb >= 1)
        {
            return DistributionTools.CLOSEST_PROB_TO_1;
        }
        if(baseProb <= 0)
        {
            return DistributionTools.CLOSEST_PROB_TO_0;
        }
        return baseProb;
    }

    /**
     * @param distribution A {@link Distribution} that implements {@link ShiftOptimizationFittingDistribution} and is
     *            bounded below.
     * @param data The {@link DataSet} that provides the sample values. Note that if an empirical variable is not
     *            defined, it will be created as part of this method, which may result in the data being sorted after
     *            this call is done.
     * @param fitParms Either null (i.e., shift is fixed to 0) or a 1-d array in which the only element specifies the
     *            smallest allowed value for the shift.
     * @throws DataFittingDistributionException
     */
    public static void optimizeShiftFitForBoundedBelowDistribution(final ShiftOptimizationFittingDistribution distribution,
                                                                   final DataSet data,
                                                                   final double[] fitParms) throws DataFittingDistributionException
    {
        //First, if fitparms is null, then I am going to assume that you are fitting the distribution with a fixed 0 shift.
        if(fitParms == null)
        {
            distribution.estimateParameters(data, 0.0D);
            return;
        }

        //Otherwise, make sure fitparms is of length 1.
        if((fitParms.length != 1) && (fitParms.length != 2))
        {
            throw new DataFittingDistributionException("When optimizing the shift-parameter, the provided fit-parms must specify a single value that is the smallest "
                + "allowed shift and an optional second value providing the largest allowed shift.");
        }

        //Get the smallest shift from fitparms[0].
        final double smallestShift = fitParms[0];
        Double largestShift = null;
        if(fitParms.length == 2)
        {
            largestShift = fitParms[1];
        }

        data.resetPtr();
        double min, max;
        double lower = 0.0;
        double higher = 0.0;
        double lowermse = 0.0;
        double highermse = 0.0;

        //Some QC checks...

        //Check the sample variable and empirical variable.
        int samplevar, empvar;
        samplevar = data.getFitSampleVariable();
        empvar = data.getFitCDFVariable();
        if((samplevar < 0) || (samplevar == empvar))
        {
            throw new IllegalArgumentException("Sample variable number, "
                + samplevar + ", or empirical variable number, " + empvar
                + " is invalid.");
        }

        //Estimate the CDF if needed.
        if(empvar < 0)
        {
            data.addNewVariable();
            empvar = data.getNumberOfVariables() - 1;
            data.createCDFUsingWeibullPlottingPosition(samplevar, empvar);
        }

        //Set the min and max on the shift parameter to be the smallestshift and the smallest data value.
        //Check the values.
        min = smallestShift;
        if(largestShift != null)
        {
            max = largestShift;
        }
        else
        {
            max = data.getSmallest(samplevar);
        }
        if(min > max)
        {
            throw new DataFittingDistributionException("Smallest data value provided, "
                + max
                + ", is smaller than this distribution's smallest allowed shift, "
                + min);
        }

        //At this point, the_data is set up for a search.
        //Find the best shift for the data set.  This uses Golden Section search to find the smallest MSE value.
        //End the search when max and min are less than ACCURACY (see Distribution.java) units apart.
        while((max - min) > FIT_ACCURACY)
        {
            data.resetPtr();
            //Determine the lower guess.  Do not change the coefficient value of 0.618.
            //This allows me to make only one new fit per iteration.
            if(lower == 0)
            {
                lower = -0.618 * (max - min) + max;
                distribution.estimateParameters(data, lower);
                lowermse = data.meanSquaredError(samplevar,
                                                 empvar,
                                                 (Distribution)distribution);
            }

            //Determine the higher guess, and don't change 0.6120.
            if(higher == 0)
            {
                higher = 0.618 * (max - min) + min;
                distribution.estimateParameters(data, higher);
                highermse = data.meanSquaredError(samplevar,
                                                  empvar,
                                                  (Distribution)distribution);
            }

            //If the error of the lower guess is less than that for the higher guess,
            //then the minimum is not between higher and max.  Reset max as higher, 
            //and adjust the other numbers appropriately.
            if(lowermse <= highermse)
            {
                max = higher;
                higher = lower;
                lower = 0;
                highermse = lowermse;
            }

            //Otherwise, the minimum must not be between min and lower.  Adjust appropriately.
            else
            {
                min = lower;
                lower = higher;
                higher = 0;
                lowermse = highermse;
            }
        }
    }

    /**
     * Call {@link DataFittingDistribution#fitToData(DataSet, double[])} and ignore the exception.
     * 
     * @param dist Distribution to fit.
     * @param data Data to pass in.
     * @param fitParms Fit parameters to pass in.
     */
    public static void fitToDataDiscardException(final DataFittingDistribution dist,
                                                 final DataSet data,
                                                 final double[] fitParms)
    {
        try
        {
            dist.fitToData(data, fitParms);
        }
        catch(final DataFittingDistributionException e)
        {
            //Ignored!
        }
    }

}
