package ohd.hseb.util.data;

import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.List;
import java.util.Vector;

import nl.wldelft.util.timeseries.TimeSeriesArray;
import ohd.hseb.hefs.utils.dist.Distribution;
import ohd.hseb.hefs.utils.dist.types.NormalDist;
import ohd.hseb.util.misc.HCalendar;

import org.jfree.chart.JFreeChart;

/**
 * ================================== <br>
 * The DataSet Class. <br>
 * ================================== <br>
 * <br>
 * This class provides tools for accessing and manipulating a two-dimensional table of double values (double[][]). The
 * constructor accepts as arguments either double arrays, numbers of rows and columns, or other DataSets. <br>
 * <br>
 * -------------------------------------- <br>
 * RECOMMENDED USAGE<br>
 * -------------------------------------- <br>
 * <br>
 * When you create a new program, just create a subclass of this class which will provide a means to fill the data set,
 * either from a file or some other source. Then, you can use all the functionality provided here for the data from that
 * data source -- and there is a lot of functionality. <br>
 * <br>
 * This class will not throw Exceptions. Once the data is in place, it really shouldn't be necessary. Still, there are a
 * lot of false/null/MISSING return functions, so you can use these for error checking. Then, the subclass can do
 * quality control and throw exceptions as desired. <br>
 * <br>
 * -------------------------------------- <br>
 * FITTING DISTRIBUTIONS <br>
 * -------------------------------------- <br>
 * <br>
 * To fit a Distribution subclass object using this DataSet, do the following: <br>
 * <br>
 * 1. Create the CDF by calling addNewVariable() followed by createCDF(...), passing in the the variable index you want
 * to fit a distribution to and the newly created variable, which will hold the CDF data. <br>
 * <br>
 * 2. Set the Fit parameters by calling setFitSampleVariable(...) and setFitCDFVariable(...) to specify the variable
 * containing the sample and the new CDF variable. <br>
 * <br>
 * 3. Create an instance of a Distribution subclass. <br>
 * <br>
 * 4. Call its fitting routine, fitToData(...). You must pass in the data and the parameters for the fitting routine. <br>
 * <br>
 * <br>
 * To see how the CDF is constructed, lookup the createCDF(...) method in the code. <br>
 * <br>
 * -------------------------------------- <br>
 * USING CONDITIONING <br>
 * -------------------------------------- <br>
 * <br>
 * The static variables LESS_THAN, GREATER_THAN, LESS_THAN_OR_EQUAL_TO, NOT_EQUAL_TO GREATER_THAN_OR_EQUAL_TO, EQUAL_TO,
 * NO_CONDITION, AND, and OR, are all used to extract subsets, count samples, and calculate probabilities of detection,
 * false alarm, and so forth. Methods which take them in as argument will have a parameter list that includes: <br>
 * <br>
 * int var, int cond1, double val1, boolean and, int cond2, double val2 <br>
 * <br>
 * where var is the variable being considered and cond1, val1, "and", cond2, and val2 define the conditioning using
 * static variables. For example, to find the subset where variable 0 satisfies the condition "a <= var0 < c", pass in
 * the arguments: <br>
 * <br>
 * 0, GREATER_THAN_OR_EQUAL_TO, a, AND, LESS_THAN, b. <br>
 * <br>
 * -------------------------------------- <br>
 * DATES WITHIN DATASET <br>
 * -------------------------------------- <br>
 * The mechanism built into DataSet to handle dates as a variable work using julian hours. This implies that the minutes
 * and seconds are discarded if you use the built in numbers. If you need to allow for dates that include minutes or
 * seconds, then you should convert the dates (either Date or Calendar) into milliseconds (i.e. Calendar getTimeInMillis
 * method) making sure that the Calendar is in GMT so that all data is stored in GMT. When you acquire the variable
 * value for a sample, convert it back into a Calendar by using HCalendar.computeCalendarFromMilliseconds(...).
 * -------------------------------------- <br>
 * METHOD SUMMARY <br>
 * -------------------------------------- <br>
 * <br>
 * Methods can be classified as follows: <br>
 * <br>
 * BASIC STATISTICS: <br>
 * <br> {@link #mean}, {@link #sum}, {@link #sumSquares}, {@link #sumProduct}, {@link #sampleCovariance},
 * {@link #sampleVariance}, {@link #sampleStandardDeviation}, {@link #covariance}, {@link #variance},
 * {@link #standardDeviation}, {@link #correlation} <br>
 * <br>
 * FORECAST/OBSERVED ERROR STATISTICS <br>
 * <br> {@link #meanSquaredError(int, int)}, {@link #maximumError(int, int)}, {@link #averageAbsError},
 * {@link #averageError} <br>
 * <br>
 * CATEGORICAL STATISTICS <br>
 * <br> {@link #probabilityOfDetection}, {@link #probabilityOfFalseAlarm}, {@link #probabilityOfHydroFalseAlarm},
 * {@link #probabilityOfOverForecast}, {@link #probabilityOfUnderForecast}, {@link #calculateCSI} <br>
 * <br>
 * COUNTS <br>
 * <br> {@link #countValues(int , int , double )}, {@link #countValues(int , int , double , boolean , int , double )} <br>
 * <br>
 * DISTRIBUTION TOOLS <br>
 * <br> {@link #sortBy}, {@link #createCDF}, {@link #meanSquaredError(int , int , Distribution )},
 * {@link #maximumError(int , int , Distribution )},
 * {@link #maximumError(int , int , DataSet , int , int )}, {@link #getQuantile}, {@link #getProbability} <br>
 * <br>
 * FANCY STATISTICS <br>
 * <br> {@link #multipleRegression(int , int[] , boolean )} <br>
 * <br>
 * TRANSFORMATIONS <br>
 * <br> {@link #applyDistTransform(int , Distribution )},
 * {@link #applyDistTransform(int , int , Distribution ) },
 * {@link #applyDistTransform(int , Distribution , Distribution )},
 * {@link #applyNormalQuantileTransform(int ) }, {@link #applyNormalQuantileTransform(int , int )},
 * {@link #applyShiftTransform}, {@link #applyScaleTransform}, {@link #applyLogTransform(int ) },
 * {@link #applyLogTransform(int , double ) }, {@link #applyPowerTransform}, {@link #applySquareTransform} ,
 * {@link #applySquareRootTransform}, {@link #applyInverseTransform}, {@link #applyExponentTransform},
 * {@link #shiftVariable}, {@link #scaleVariable}, {@link #sumVariables}, {@link #multiplyVariables} <br>
 * <br>
 * BUILD AND NAVIGATE <br>
 * <br> {@link #addSample}, {@link #next}, {@link #resetPtr}, {@link #findSampleIndex(int , double )},
 * {@link #findSampleIndex(double[] )}, {@link #findSampleIndex(int , Calendar )}, {@link #gotoNextSample(double[] )},
 * {@link #gotoNextSample(int , int , double )}, {@link #gotoNextSample(int , int , double , boolean , int , double )} <br>
 * <br>
 * ATTRIBUTE SETS <br>
 * <br> {@link #setSample}, {@link #setSampleBlindly}, {@link #setVariable}, {@link #setVariableBlindly,
 * 
 * @link #setCurrentValue}, {@link #setValue}, {@link #setValueBlindly}, {@link #setCurrentValue}, {@link #setValue},
 *       {@link #setSampleSize}, {@link #setFitSampleVariable}, {@link #setFitCDFVariable} <br>
 * <br>
 *       COPYING <br>
 * <br> {@link #copyVarToVar}, {@link #copySampleToSample} <br>
 * <br>
 *       MERGING TWO DATASETS <br>
 * <br> {@link #mergeDataSetsAsVariables(DataSet )}, {@link #mergeDataSetsAsSamples} <br>
 * <br>
 *       RESIZING TABLE <br>
 * <br> {@link #removeVariable}, {@link #addNewVariable()}, {@link #addNewVariable(double )}, {@link #removeSample},
 *       {@link #removeCurrentSampl}, {@link #changeMaximumNumberOfSamples} <br>
 * <br>
 *       ADVANCED SETTING <br>
 * <br> {@link #makeSampleValuesEqualTo}, {@link #makeVariableValuesEqualTo}, {@link #clearSample}, {@link #clearVariable},
 *       {@link #clearAllData}, {@link #makeSampleZero}, {@link #makeVariableZero} <br>
 * <br>
 *       ATTRIBUTE/ADVANCED GETS <br>
 * <br> {@link #getSmallest(int , int , double )}, {@link #getLargest(int , int , double )}, {@link #getSmallest(int )},
 *       {@link #getLargest(int )}, {@link #getSmallest(int[] , int , double )}, {@link #getLargest(int[] , int , double )}
 *       , {@link #getSampleSize}, {@link #getMaximumSampleSize}, {@link #getNumberOfVariables}, {@link #getData},
 *       {@link #getDataTable}, {@link #getCurrentSample}, {@link #getSample}, {@link #getCopyOfCurrentSample},
 *       {@link #getCopyOfSample}, {@link #getVariable}, {@link #getCurrentSampleIndex}, {@link #getValue},
 *       {@link #getCurrentValue}, {@link #getValueBlindly}, {@link #getValueAsDate}, {@link #getCurrentValueAsDate},
 *       {@link #getFitSampleVariable}, {@link #getFitCDFVariable}, {@link #getMakeRowsVariables} <br>
 * <br>
 *       SUBSETTING <br>
 * <br> {@link #extractSubset(int , int , double )}, {@link #extractSubset(int , int , double , boolean , int , double )},
 *       {@link #extractSubset(int[] , int[] , double[] , boolean[] , int[] ,double[] )},
 *       {@link #extractSubset(int , int , int )}, {@link #extractSubset(int[] )}, {@link #union}, {@link #intersection}
 *       , {@link #extractUnique} <br>
 * <br>
 *       TEMPORAL AVERAGING <br>
 * <br> {@link #temporalAverage}, {@link #dailyAverage(int )}, {@link #dailyAverage(int , Calendar )} <br>
 * <br>
 *       QUALITY CONTROL <br>
 * <br> {@link #isAnyDataMissing}, {@link #isAnySampleDataMissing}, {@link #isAnyVariableDataMissing},
 *       {@link #isSampleEqualTo}<br>
 * <br>
 * @author Hank Herr
 */
public class DataSet
{
    final String CLASSNAME = "DataSet";
    public static final double MISSING = -999.0d;

    public static final int NO_VARIABLE = -999;
    public static final int LESS_THAN = -1;
    public static final int GREATER_THAN = 1;
    public static final int LESS_THAN_OR_EQUAL_TO = -2;
    public static final int GREATER_THAN_OR_EQUAL_TO = 2;
    public static final int EQUAL_TO = 0;
    public static final int NOT_EQUAL_TO = -3;
    public static final int NO_CONDITION = -999;
    public static final boolean AND = true;
    public static final boolean OR = false;

    public static final int DEFAULT_DATA_SIZE = 100;

    //General Attributes

    /**
     * Core attribute storing the data in a double array.
     */
    protected DataTable _dataTable;

    /**
     * The number of samples in the data set.
     */
    protected int _sampleSize;

    /**
     * A pointer to the "current" sample, facilitating looping via next(), resetPtr(), and goto methods.
     */
    protected int _currentSample;

    /**
     * The number of variables in the table.
     */
    protected int _numberOfVariables;

    /**
     * The largest number of samples allowed. This is used to provide a size for the double array and can be increased
     * if necessary.
     */
    protected int _maximumSize;

    //Distribution fitting variables
    protected int _fitSample;
    protected int _fitCDF;

    /**
     * If this flag is true, then the first index of the _data double array will be variables, and the second will be
     * samples. This means that, to acquire the values for a variable, it can return the array itself (_data[i]) instead
     * of needing to make a copy ({_data[0...n][i]}). However, for samples, a copy must always be made.
     */
    protected boolean _makeRowsVariables;

    //===============================================================================================
    // CONSTRUCTORS
    //===============================================================================================////

    /**
     * Empty Constructor.
     */
    public DataSet()
    {
        _dataTable = null;
    }

    /**
     * Constructor for known dimensions of DataSet.
     * 
     * @param samples The number of samples (rows) to allow for.
     * @param vars The number of variables (columns) to allow for.
     */
    public DataSet(final int samples, final int vars)
    {
        this(samples, vars, false);
    }

    /**
     * Constructor for known dimensions, with option makeRowsVariables flag.
     * 
     * @param maxSamples The number of samples (rows) to allow for.
     * @param vars The number of variables (columns) to allow for.
     * @param makeRowsVariables True if you want the first dimension of _data to be variables. False for samples.
     */
    public DataSet(final int maxSamples, final int vars, final boolean makeRowsVariables)
    {
        if((maxSamples <= 0) || (vars <= 0))
        {
            _dataTable = null;
        }

        _makeRowsVariables = makeRowsVariables;
        _dataTable = DataSet.createDataAttribute(maxSamples, vars, _makeRowsVariables);
        _sampleSize = 0;
        _currentSample = 0;
        _numberOfVariables = vars;
        _maximumSize = maxSamples;
        _fitSample = (int)MISSING;
        _fitCDF = (int)MISSING;
        clearAllData();
    }

    /**
     * Copy constructor. This is literally a copy!
     * 
     * @param base The base DataSet.
     */
    public DataSet(final DataSet base)
    {
        _makeRowsVariables = base.getMakeRowsVariables();
        _sampleSize = base.getSampleSize();
        _currentSample = 0;
        _numberOfVariables = base.getNumberOfVariables();
        _maximumSize = base.getMaximumSampleSize();
        _fitSample = base.getFitSampleVariable();
        _fitCDF = base.getFitCDFVariable();

        _dataTable = DataSet.createDataAttribute(_maximumSize, _numberOfVariables, _makeRowsVariables);

        int i, j;
        for(i = 0; i < _sampleSize; i++)
        {
            for(j = 0; j < _numberOfVariables; j++)
            {
                setValueBlindly(i, j, base.getValueBlindly(i, j));
            }
        }
    }

    /**
     * Copy constructor. This is literally a copy!
     * 
     * @param base The base DataSet.
     */
    public DataSet(final DataSet base, final int newNumberOfVariables)
    {
        _makeRowsVariables = base.getMakeRowsVariables();
        _sampleSize = base.getSampleSize();
        _currentSample = 0;
        _numberOfVariables = newNumberOfVariables;
        _maximumSize = base.getMaximumSampleSize();
        _fitSample = base.getFitSampleVariable();
        _fitCDF = base.getFitCDFVariable();

        _dataTable = DataSet.createDataAttribute(_maximumSize, _numberOfVariables, _makeRowsVariables);

        int i, j;
        for(i = 0; i < _sampleSize; i++)
        {
            for(j = 0; j < base.getNumberOfVariables(); j++)
            {
                setValueBlindly(i, j, base.getValueBlindly(i, j));
            }
        }
    }

    /**
     * Copies the content of the provided time series into this {@link DataSet}, with column 0 being the time (long
     * recorded as double) and column 1 being the value (float cast to double).
     * 
     * @param ts {@link TimeSeriesArray} to copy into this {@link DataSet}.
     */
    public DataSet(final TimeSeriesArray ts, final int numberOfVariables)
    {
        this(ts.size(), numberOfVariables, true);
        for(int i = 0; i < ts.size(); i++)
        {
            final double[] sample = new double[numberOfVariables];
            sample[0] = ts.getTime(i);
            sample[1] = ts.getValue(i);

            if(!addSample(sample))
            {
                throw new IllegalStateException("The method addSample returned false.  Based on how the DataSet is sized, this should never happen.");
            }
        }
    }

    /**
     * Copies the content of the provided time series into this {@link DataSet}, with column 0 being the time (long
     * recorded as double) and column 1 being the value (float cast to double).
     * 
     * @param ts {@link TimeSeriesArray} to copy into this {@link DataSet}.
     */
    public DataSet(final TimeSeriesArray ts)
    {
        this(ts, 2);
    }

    /**
     * Routine to initialize the data table to have the specified dimensions adn be empty.
     * 
     * @param samples The number of samples (rows) to allow for.
     * @param vars The number of variables (columns) to allow for.
     */
    public void initialize(final int samples, final int vars, final boolean makeRowsVariables)
    {
        if((samples <= 0) || (vars <= 0))
        {
            //PRINT ERROR MESSAGE HERE
            return;
        }
        _makeRowsVariables = makeRowsVariables;
        _dataTable = DataSet.createDataAttribute(samples, vars, _makeRowsVariables);
        clearAllData();

        _sampleSize = 0;
        _currentSample = 0;
        _numberOfVariables = vars;
        _maximumSize = samples;
        _fitSample = (int)MISSING;
        _fitCDF = (int)MISSING;

    }

    //===============================================================================================
    // STATISTICS
    //===============================================================================================

    /////////////////////////////////////////////////////////////////////////
    //Mean, Sums, Variances, Correlation
    /////////////////////////////////////////////////////////////////////////

    /**
     * Compute the mean over all samples (rows) of the passed in variable (column).
     * 
     * @param var The variable or column to use.
     * @return The mean.
     */
    public double mean(final int var)
    {
        final double total = sum(var);
        if(total != DataSet.MISSING)
        {
            return total / _sampleSize;
        }
        return DataSet.MISSING;
    }

    /**
     * Compute the sum over all samples of the passed in variable.
     * 
     * @param var The variable to use.
     * @return The sum.
     */
    public double sum(final int var)
    {
        int i;
        double total = 0;

        if((var >= _numberOfVariables) || (var < 0))
        {
            return MISSING;
        }

        for(i = 0; i < _sampleSize; i++)
        {
            if(getValueBlindly(i, var) != MISSING)
            {
                total += getValueBlindly(i, var);
            }
        }

        return total;
    }

    /**
     * Compute the sum of squares of over all samples for the passed in variable.
     * 
     * @param var The variable to use.
     * @return The sum of each value squared.
     */
    public double sumSquares(final int var)
    {
        return sumProduct(var, var);
    }

    /**
     * Compute the sum-product over all samples for the two variables.
     * 
     * @param var1 The first variable.
     * @param var2 The second variable.
     * @return The sum overall samples of the first variable times the second variable.
     */
    public double sumProduct(final int var1, final int var2)
    {
        int i;
        double total = 0;

        if(((var1 >= _numberOfVariables) || (var1 < 0)) || ((var2 >= _numberOfVariables) || (var2 < 0)))
        {
            return MISSING;
        }

        for(i = 0; i < _sampleSize; i++)
        {
            if((getValueBlindly(i, var1) != MISSING) && (getValueBlindly(i, var2) != MISSING))
            {
                total += getValueBlindly(i, var1) * getValueBlindly(i, var2);
            }
        }

        return total;
    }

    /**
     * Compute the sample covariance between the two variables specified. Sample covariance uses (N - 1) as the divisor
     * after calculating the sum.
     * 
     * @param var1 The first variable.
     * @param var2 The second variable.
     * @return The covariance between the two. MISSING if bad parameters or only one sample.
     */
    public double sampleCovariance(final int var1, final int var2)
    {
        if(((var1 >= _numberOfVariables) || (var1 < 0)) || ((var2 >= _numberOfVariables) || (var2 < 0)))
        {
            return MISSING;
        }

        if(_sampleSize == 1)
        {
            return MISSING;
        }

        int i;
        double total = 0;
        final double mean1 = mean(var1);
        final double mean2 = mean(var2);

        //This is the sum of the product of var1[] minus mean1 and var2[] minus mean2
        for(i = 0; i < _sampleSize; i++)
        {
            if((getValueBlindly(i, var1) != MISSING) && (getValueBlindly(i, var2) != MISSING))
            {
                total += (getValueBlindly(i, var1) - mean1) * (getValueBlindly(i, var2) - mean2);
            }
        }

        return total / (_sampleSize - 1);

    }

    /**
     * Compute the sample variance, which is the same as sample covariance (above) in which both variables are the same.
     * 
     * @param var1 The variable to use.
     * @return The sample variance.
     */
    public double sampleVariance(final int var1)
    {
        return sampleCovariance(var1, var1);
    }

    /**
     * Computes the sample standard deviation, or sqrt of the sample variance.
     * 
     * @param var1 The variable to use.
     * @return The sample standard deviation.
     */
    public double sampleStandardDeviation(final int var1)
    {
        final double samplevar = sampleVariance(var1);
        if(samplevar == MISSING)
        {
            return MISSING;
        }
        return Math.sqrt(samplevar);
    }

    /**
     * Same as sampleCovariance(...) except uses N as the divisor.
     * 
     * @param var1 The first variable.
     * @param var2 The second variable.
     * @return The covariance between the two.
     */
    public double covariance(final int var1, final int var2)
    {
        final double samplecov = sampleCovariance(var1, var2);
        if(samplecov == MISSING)
        {
            return MISSING;
        }
        return samplecov * (_sampleSize - 1) / _sampleSize;
    }

    /**
     * Computes the variance, which is the covariance of a variable with itself.
     * 
     * @param var1 The variable to use.
     * @return The variance.
     */
    public double variance(final int var1)
    {
        return covariance(var1, var1);
    }

    /**
     * Computes the standard deviation, or sqrt of the variance.
     * 
     * @param var1 The variable to use.
     * @return The standard deviation.
     */
    public double standardDeviation(final int var1)
    {
        final double var = variance(var1);
        if(var == MISSING)
        {
            return MISSING;
        }
        return Math.sqrt(var);
    }

    /**
     * Computes the Pearson's correlation coefficient between two variables, using the sample covariance (not normal
     * covariance).
     * 
     * @param var1 The first variable.
     * @param var2 The second variable.
     * @return The Pearson's correlation coefficient.
     */
    public double correlation(final int var1, final int var2)
    {
        if(((var1 >= _numberOfVariables) || (var1 < 0)) || ((var2 >= _numberOfVariables) || (var2 < 0)))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }

        final double variance1 = sampleVariance(var1);
        final double variance2 = sampleVariance(var2);
        final double covariance = sampleCovariance(var1, var2);

        if((variance1 == DataSet.MISSING) || (variance2 == DataSet.MISSING) || (covariance == DataSet.MISSING))
        {
            return DataSet.MISSING;
        }

        if((variance1 == 0) || (variance2 == 0))
        {
            return DataSet.MISSING;
        }

        return covariance / (Math.sqrt(variance1) * Math.sqrt(variance2));
    }

    /////////////////////////////////////////////////////////////////////////
    //Forecast/Observed Error Statistics
    /////////////////////////////////////////////////////////////////////////

    /**
     * Uses the sumSquaredError method to calculate the mean squared error.
     * 
     * @param obsindex The first variable (predictand/predictor).
     * @param fcstindex The second variable (predictor/predictand).
     * @return The mean squared error.
     */
    public double meanSquaredError(final int obsindex, final int fcstindex)
    {
        final double sse = sumSquaredError(obsindex, fcstindex);
        if(sse != DataSet.MISSING)
        {
            return sse / _sampleSize;
        }
        return DataSet.MISSING;
    }

    /**
     * Computes the sum of squared errors between two variables. This is the difference between the two variables,
     * summed over all samples.
     * 
     * @param obsindex The first variable (predictand/predictor).
     * @param fcstindex The second variable (predictor/predictand).
     * @return The sum of squared errors.
     */
    public double sumSquaredError(final int obsindex, final int fcstindex)
    {
        if(((fcstindex >= _numberOfVariables) || (fcstindex < 0))
            || ((obsindex >= _numberOfVariables) || (obsindex < 0)))
        {
            return MISSING;
        }

        int i;
        double total = 0;

        for(i = 0; i < _sampleSize; i++)
        {
            if((getValueBlindly(i, fcstindex) != MISSING) && (getValueBlindly(i, obsindex) != MISSING))
            {
                total += Math.pow(getValueBlindly(i, fcstindex) - getValueBlindly(i, obsindex), 2);
            }
        }
        return total;
    }

    /**
     * Computes the largest error between the two variables.
     * 
     * @param obsindex The first variable (predictand/predictor).
     * @param fcstindex The second variable (predictor/predictand).
     * @return The maximum error.
     */
    public double maximumError(final int obsindex, final int fcstindex)
    {
        if(((fcstindex >= _numberOfVariables) || (fcstindex < 0))
            || ((obsindex >= _numberOfVariables) || (obsindex < 0)))
        {
            return MISSING;
        }

        int i;
        double maxerr = 0;
        double currenterr = 0;

        for(i = 0; i < _sampleSize; i++)
        {
            if((getValueBlindly(i, fcstindex) != MISSING) && (getValueBlindly(i, obsindex) != MISSING))
            {
                currenterr = Math.abs(getValueBlindly(i, fcstindex) - getValueBlindly(i, obsindex));
                if(currenterr > maxerr)
                {
                    maxerr = currenterr;
                }
            }
        }

        return maxerr;
    }

    /**
     * Computes the average of the absolute value of the errors.
     * 
     * @param obsindex The first variable (predictand/predictor).
     * @param fcstindex The second variable (predictor/predictand).
     * @return The averable absolute error.
     */
    public double averageAbsError(final int obsindex, final int fcstindex)
    {
        if(((fcstindex >= _numberOfVariables) || (fcstindex < 0))
            || ((obsindex >= _numberOfVariables) || (obsindex < 0)))
        {
            return MISSING;
        }

        int i;
        double sum = 0;

        for(i = 0; i < _sampleSize; i++)
        {
            if((getValueBlindly(i, fcstindex) != MISSING) && (getValueBlindly(i, obsindex) != MISSING))
            {
                sum += Math.abs(getValueBlindly(i, fcstindex) - getValueBlindly(i, obsindex));
            }
        }

        return sum / _sampleSize;
    }

    /**
     * Computes the average of the errors.
     * 
     * @param obsindex The first variable (predictand/predictor).
     * @param fcstindex The second variable (predictor/predictand).
     * @return The average error.
     */
    public double averageError(final int obsindex, final int fcstindex)
    {
        if(((fcstindex >= _numberOfVariables) || (fcstindex < 0))
            || ((obsindex >= _numberOfVariables) || (obsindex < 0)))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }

        int i;
        double sum = 0;

        for(i = 0; i < _sampleSize; i++)
        {
            if((getValueBlindly(i, fcstindex) != MISSING) && (getValueBlindly(i, obsindex) != MISSING))
            {
                sum += getValueBlindly(i, fcstindex) - getValueBlindly(i, obsindex);
            }
        }

        return sum / _sampleSize;
    }

    /////////////////////////////////////////////////////////////////////////
    //Forecast/Observed Categorical Statistics
    /////////////////////////////////////////////////////////////////////////

    /**
     * Given the observed and forecast variable, and a category, this computes the probability of detection (i.e. when
     * the observed variable is in category, the probability that the forecast value is also in the category).
     * 
     * @param obsindex Column number of observed variable.
     * @param fcstindex Column number of forecast variable.
     * @param cond1 Condition on the lower bound that defines the category.
     * @param lb The lower bound value.
     * @param and The and/or condition defining how to join the lower and upper bounds.
     * @param cond2 Condition on the upper bound that defines the category.
     * @param ub The upper bound value.
     * @return The probability of detection.
     */
    public double probabilityOfDetection(final int obsindex,
                                         final int fcstindex,
                                         final int cond1,
                                         final double lb,
                                         final boolean and,
                                         final int cond2,
                                         final double ub)
    {
        if(((obsindex >= _numberOfVariables) || (obsindex < 0))
            || ((fcstindex >= _numberOfVariables) || (fcstindex < 0)))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }

        //Get the subset in which the observed satisfies the conditions and return missing if there
        //is none.
        final DataSet sub = extractSubset(obsindex, cond1, lb, and, cond2, ub);
        if(sub == null)
        {
            return MISSING;
        }

        if(sub.getSampleSize() <= 0)
        {
            return MISSING;
        }

        //Count the number of forecasted that also satifies the conditions.
        final int count = sub.countValues(fcstindex, cond1, lb, and, cond2, ub);

        return (double)count / (double)sub.getSampleSize();
    }

    /**
     * Calculates the probability of false alarm (i.e. when the forecast is in the category, the probability that the
     * observed value is not.)
     * 
     * @param obsindex Column number of observed variable.
     * @param fcstindex Column number of forecast variable.
     * @param cond1 Condition on the lower bound that defines the category.
     * @param lb The lower bound value.
     * @param and The and/or condition defining how to join the lower and upper bounds.
     * @param cond2 Condition on the upper bound that defines the category.
     * @param ub The upper bound value.
     * @return The probability of false alarm
     */
    public double probabilityOfFalseAlarm(final int obsindex,
                                          final int fcstindex,
                                          final int cond1,
                                          final double lb,
                                          final boolean and,
                                          final int cond2,
                                          final double ub)
    {
        if(((obsindex >= _numberOfVariables) || (obsindex < 0))
            || ((fcstindex >= _numberOfVariables) || (fcstindex < 0)))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }

        //Get the subset in which the forecasted satisfies the conditions and return missing if there
        //is none.
        final DataSet sub = extractSubset(fcstindex, cond1, lb, and, cond2, ub);
        if(sub == null)
        {
            return MISSING;
        }

        if(sub.getSampleSize() <= 0)
        {
            return MISSING;
        }

        //Count the number of observed that also satifies the conditions.
        final int count = sub.countValues(obsindex, cond1, lb, and, cond2, ub);

        //Return the number of observed that do NOT satisfy the conditions and divide by the total
        //number of observed.
        return (double)(sub.getSampleSize() - count) / (double)sub.getSampleSize();
    }

    /**
     * The probability of a "hydrologic" false alarm (i.e. when the forecast is in the category, the probability that
     * the observed value is BELOW the category).
     * 
     * @param obsindex Column number of observed variable.
     * @param fcstindex Column number of forecast variable.
     * @param cond1 Condition on the lower bound that defines the category.
     * @param lb The lower bound value.
     * @param and The and/or condition defining how to join the lower and upper bounds.
     * @param cond2 Condition on the upper bound that defines the category.
     * @param ub The upper bound value.
     * @return Porbability of hydrologic false alarm.
     */
    public double probabilityOfHydroFalseAlarm(final int obsindex,
                                               final int fcstindex,
                                               final int cond1,
                                               final double lb,
                                               final boolean and,
                                               final int cond2,
                                               final double ub)
    {
        if(((obsindex >= _numberOfVariables) || (obsindex < 0))
            || ((fcstindex >= _numberOfVariables) || (fcstindex < 0)))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }

        //Get the subset in which the forecasts satisfy the conditions and return missing if there
        //is none.
        final DataSet sub = extractSubset(fcstindex, cond1, lb, and, cond2, ub);
        if(sub == null)
        {
            return MISSING;
        }

        if(sub.getSampleSize() <= 0)
        {
            return MISSING;
        }

        //Count the number of observed that also satifies the conditions.
        final int count = sub.countValues(obsindex, cond1, lb, and, cond2, ub);

        //Count the number of observed that are too hi.
        //The countValues call will count all values that satisfy the ub condition, regardless 
        //of the lb condition.  The remaining number of samples is your number of obs that are too big.
        final int counthi = sub.getSampleSize() - sub.countValues(obsindex, cond2, ub);

        //Return missing if the difference between the subset sample size and counthi is 0
        //or negative (although it should never be negative).
        if(sub.getSampleSize() - counthi <= 0)
        {
            return MISSING;
        }

        //I now remove the counthi's from the count, so that I ignore values in the subset above
        //the range of interest.  

        //Return the number left over, or just those that are BELOW the range, divided by the number
        //either below (low false alarms) or inside (hits) the range.  
        return (double)(sub.getSampleSize() - count - counthi) / (double)(sub.getSampleSize() - counthi);
    }

    /**
     * Compute the probability of over forecasting (i.e. when an observed value is in the category, the probability that
     * the forecast is above the category).
     * 
     * @param obsindex Column number of observed variable.
     * @param fcstindex Column number of forecast variable.
     * @param cond1 Condition on the lower bound that defines the category.
     * @param lb The lower bound value.
     * @param and The and/or condition defining how to join the lower and upper bounds.
     * @param cond2 Condition on the upper bound that defines the category.
     * @param ub The upper bound value.
     * @return The probability of over forecasting.
     */
    public double probabilityOfOverForecast(final int obsindex,
                                            final int fcstindex,
                                            final int cond1,
                                            final double lb,
                                            final boolean and,
                                            final int cond2,
                                            final double ub)
    {
        if(((obsindex >= _numberOfVariables) || (obsindex < 0))
            || ((fcstindex >= _numberOfVariables) || (fcstindex < 0)))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }

        //Get the subset in which the ovserved that satisfy the conditions and return missing if there
        //is none.
        final DataSet sub = extractSubset(obsindex, cond1, lb, and, cond2, ub);
        if(sub == null)
        {
            return MISSING;
        }

        if(sub.getSampleSize() <= 0)
        {
            return MISSING;
        }

        //Count the number of forecasts that also satifies the conditions.
        final int count = sub.countValues(fcstindex, cond1, lb, and, cond2, ub);

        //Count the number of forecasts that are too lo.
        //The countValues call will count all values that satisfy the lb condition, regardless 
        //of the ub condition.  The remaining number of samples is your under forecasts.
        final int countlo = sub.getSampleSize() - sub.countValues(fcstindex, cond1, lb);

        //Return the number of over forecasts (total sample minus hits and low forecasts)
        //divided by the total sample size.  
        return (double)(sub.getSampleSize() - count - countlo) / (double)sub.getSampleSize();
    }

    /**
     * Compute the probability of under forecasting (i.e. when an observed value is in the category, the probability
     * that the forecast is below the category).
     * 
     * @param obsindex Column number of observed variable.
     * @param fcstindex Column number of forecast variable.
     * @param cond1 Condition on the lower bound that defines the category.
     * @param lb The lower bound value.
     * @param and The and/or condition defining how to join the lower and upper bounds.
     * @param cond2 Condition on the upper bound that defines the category.
     * @param ub The upper bound value.
     * @return The probability of under forecasting.
     */
    public double probabilityOfUnderForecast(final int obsindex,
                                             final int fcstindex,
                                             final int cond1,
                                             final double lb,
                                             final boolean and,
                                             final int cond2,
                                             final double ub)
    {
        if(((obsindex >= _numberOfVariables) || (obsindex < 0))
            || ((fcstindex >= _numberOfVariables) || (fcstindex < 0)))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }

        //Get the subset in which the ovserved that satisfy the conditions and return missing if there
        //is none.
        final DataSet sub = extractSubset(obsindex, cond1, lb, and, cond2, ub);
        if(sub == null)
        {
            return MISSING;
        }

        if(sub.getSampleSize() <= 0)
        {
            return MISSING;
        }

        //Count the number of forecasts that also satifies the conditions, the number of high
        //forecasts, and then return the number of low divided by total (similar to OverForecast
        //above).
        final int count = sub.countValues(fcstindex, cond1, lb, and, cond2, ub);
        final int counthi = sub.getSampleSize() - sub.countValues(fcstindex, cond2, ub);
        return (double)(sub.getSampleSize() - count - counthi) / (double)sub.getSampleSize();
    }

    /**
     * Compute the critical success index based on a single bound.
     * 
     * @param obsindex Column number of observed variable.
     * @param fcstindex Column number of forecast variable.
     * @param condition The condition on the boundary.
     * @param bound The boundary value.
     * @return The CSI.
     */
    public double calculateCSI(final int obsindex, final int fcstindex, final int condition, final double bound)
    {
        if(((obsindex >= _numberOfVariables) || (obsindex < 0))
            || ((fcstindex >= _numberOfVariables) || (fcstindex < 0)))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }

        //Get the subset in which the forecast satisfies the condition and return missing if there
        //is none.
        DataSet sub = extractSubset(fcstindex, condition, bound);
        if(sub == null)
        {
            return MISSING;
        }
        if(sub.getSampleSize() <= 0)
        {
            return MISSING;
        }

        //CountA is the number of samples for which the forecast satisfies the condition and
        //the observed satisfies the condition.  Countb is for when the forecast satisfies 
        //the condition and the observed does NOT satisfies the condition.
        final int counta = sub.countValues(obsindex, condition, bound);
        final int countc = sub.getSampleSize() - counta;

        //Get the subset in which the observed satisfies the condioin and return MISSING if there
        //is none.
        sub = extractSubset(obsindex, condition, bound);
        if(sub == null)
        {
            return MISSING;
        }
        if(sub.getSampleSize() <= 0)
        {
            return MISSING;
        }

        //CountB is for when the observed satisfies the condition and the forecast
        //does NOT satisfy the condition.  Note that the number where both satisfy the
        //condition is counta, so I can just get the sample size for observed satisfying
        //the condition and remove those for when both are satisfied.
        final int countb = sub.getSampleSize() - counta;

        return (double)counta / (double)(counta + countb + countc);
    }

    /////////////////////////////////////////////////////////////////////////
    //Counts
    /////////////////////////////////////////////////////////////////////////\

    /**
     * Count the number of samples satisfying a single condition on one specific variable.
     * 
     * @param var The variable to use.
     * @param condition The condition (greater than, less than, etc.) to use.
     * @param value The boundary value being checked against.
     * @return The number of samples satisfying the condition.
     */
    public int countValues(final int var, final int condition, final double value)
    {
        //I pass in OR so that countValues will work (see the NOTE below inside countValues).
        return countValues(var, condition, value, OR, NO_CONDITION, MISSING);
    }

    /**
     * Count the number of samples satisfying a double condition, typically lower and upper bounds.
     * 
     * @param var The variable checked.
     * @param cond1 The condition on the first bound.
     * @param val1 The first bound value.
     * @param and The and/or condition linking the two boundary conditions.
     * @param cond2 The condition on the second bound.
     * @param val2 The second bound value.
     * @return The number of samples satisfying the condition.
     */
    public int countValues(final int var,
                           final int cond1,
                           final double val1,
                           final boolean and,
                           final int cond2,
                           final double val2)
    {
        if((var >= _numberOfVariables) || (var < 0))
        {
            //PRINT ERROR MESSAGE HERE
            return (int)MISSING;
        }

        int count = 0;
        int i;
        boolean result1, result2;

        //Count the values one by one
        for(i = 0; i < _sampleSize; i++)
        {
            //Skip missing data.
            if(getValueBlindly(i, var) == MISSING)
            {
                continue;
            }

            //Do the comparison of val1 here
            //If less than...
            result1 = doesValueSatisfyConditions(getValueBlindly(i, var), val1, cond1);

            //Do the comparison of val2 here
            result2 = doesValueSatisfyConditions(getValueBlindly(i, var), val2, cond2);

            //NOTE: If either cond1 or cond2 is NO_CONDITION, then doesValueSatisfyConditions
            //will always return false.  SO, if the and variable is set to false (OR), then
            //the or line below will be used and the NO_CONDITION value will have no impact
            //i.e. if b is false, then if(a OR b) is the same as if(a).  

            //Check the and condition    
            if((and) && ((result1) && (result2)))
            {
                count++;
            }
            if((!and) && ((result1) || (result2)))
            {
                count++;
            }

        }
        return count;
    }

    /**
     * Counts the number of samples in which a julian hour variable has a calendar field of a set value. In otherwords,
     * all sample for July or all samples for 1993.
     * 
     * @param jhourvar The julian hour variable.
     * @param calendar_field The Calendar (class) field to check.
     * @param value The value it needs to be.
     * @return The number of samples satisfying condition.
     */
    public int countValues(final int jhourvar, final int calendar_field, final int value)
    {
        if((jhourvar >= _numberOfVariables) || (jhourvar < 0))
        {
            //PRINT ERROR MESSAGE HERE
            return (int)MISSING;
        }

        int i;
        int count = 0;
        Calendar date;

        //Count the values one by one
        for(i = 0; i < _sampleSize; i++)
        {
            //Skip missing data.
            if(getValueBlindly(i, jhourvar) == MISSING)
            {
                continue;
            }

            //Get the calendar corresponding to the julian hour
            date = HCalendar.computeCalendarFromJulianHour((int)getValueBlindly(i, jhourvar));

            //If the value of the field calendar_field of date is equal to value,
            //then count it.
            if(date.get(calendar_field) == value)
            {
                count++;
            }
        }

        return count;
    }

    /**
     * Basic tool checking whether a passed in value satifies a given condition (LESS_THAN, GREATER_THAN, etc.) relative
     * to a base value.
     * 
     * @param value The value to check.
     * @param compvalue The value checked against.
     * @param condition The condition checked (see DataSet static variables).
     * @return Either true or false.
     */
    public boolean doesValueSatisfyConditions(final double value, final double compvalue, final int condition)
    {
        //If less than...
        if((condition == LESS_THAN) && (value < compvalue))
        {
            return true;
        }
        else
        //if less than or equal to...
        if((condition == LESS_THAN_OR_EQUAL_TO) && (value <= compvalue))
        {
            return true;
        }
        else
        //if greater than...
        if((condition == GREATER_THAN) && (value > compvalue))
        {
            return true;
        }
        else
        //if greater than or equal to...
        if((condition == GREATER_THAN_OR_EQUAL_TO) && (value >= compvalue))
        {
            return true;
        }
        else
        //if equal to...
        if((condition == EQUAL_TO) && (value == compvalue))
        {
            return true;
        }
        else
        //if not equal to...
        if((condition == NOT_EQUAL_TO) && (value != compvalue))
        {
            return true;
        }

        return false;
    }

    /**
     * For some reason, this method is necessary, probably for backward compatibility. It just reverses the second and
     * third parameter.
     * 
     * @param value
     * @param condition
     * @param compvalue
     * @return
     */
    public boolean doesValueSatisfyConditions(final double value, final int condition, final double compvalue)
    {
        return doesValueSatisfyConditions(value, compvalue, condition);
    }

    /**
     * Check to see if a value satisfies a two part condition.
     * 
     * @param value The value to check.
     * @param cond1 The condition on the first bound.
     * @param val1 The bound value.
     * @param and The and/or connecting the two parts.
     * @param cond2 The condition on the second bound.
     * @param val2 The second bound value.
     * @return Either true or false.
     */
    public boolean doesValueSatisfyConditions(final double value,
                                              final int cond1,
                                              final double val1,
                                              final boolean and,
                                              final int cond2,
                                              final double val2)
    {
        if((and) && (doesValueSatisfyConditions(value, cond1, val1) && doesValueSatisfyConditions(value, cond2, val2)))
        {
            return true;
        }
        if((!and) && (doesValueSatisfyConditions(value, cond1, val1) || doesValueSatisfyConditions(value, cond2, val2)))
        {
            return true;
        }
        return false;
    }

    /////////////////////////////////////////////////////////////////////////
    //CDF Tools 
    /////////////////////////////////////////////////////////////////////////

    /**
     * Sorts all samples by the passed in variable. It calls {@link Collections#sort(List, java.util.Comparator)} and
     * puts them in ascending order.
     * 
     * @param var The variable to sort by.
     * @return Returns false only if the variable is invalid.
     */
    @SuppressWarnings("unchecked")
    public boolean sortBy(final int var)
    {
        int i;
//        int j = 0;
//        int changed = 1;
//        double temp = 0;

        if((var >= _numberOfVariables) || (var < 0))
        {
            //PRINT ERROR MESSAGE HERE
            return false;
        }

        @SuppressWarnings("rawtypes")
        final List allSamples = new ArrayList();
        for(i = 0; i < this.getSampleSize(); i++)
        {
            allSamples.add(this.getSample(i));
        }
        final DataSetSortComparator comparator = new DataSetSortComparator(var);
        Collections.sort(allSamples, comparator);
        for(i = 0; i < this.getSampleSize(); i++)
        {
            this.setSampleBlindly(i, (double[])allSamples.get(i));
        }

        return true;
    }

    /**
     * Functional tool used in computing an empirical CDF.
     * 
     * @return The value of the Tn in a Bayesian plotting position.
     */
    public double returnTn()
    {
        if((3 <= _sampleSize) && (_sampleSize <= 14))
        {
            return (1.0633 * Math.pow(_sampleSize, -0.7180) + 1);
        }
        if((15 <= _sampleSize) && (_sampleSize <= 100))
        {
            return (1.1837 * Math.pow(_sampleSize, -0.7585) + 1);
        }
        if((101 <= _sampleSize) && (_sampleSize <= 500))
        {
            return (1.4809 * Math.pow(_sampleSize, -0.8069) + 1);
        }

        return 1;
    }

    /**
     * Calculates the empirical cdf for a variable. It does the following: (1) it sorts the samples by the passed in
     * base variable (var); (2) it populates the passed in empirical var with empirical CDF values using Bayesian
     * plotting positions. It does NOT create a new variable to store the empirical CDF. This variable MUST already
     * exist!!!
     * 
     * @param var The variable.
     * @param empvar The column storing the empirical cdf.
     * @return Returns false only if a passed in param is invalid.
     */
    public boolean createCDFUsingBayesianPlottingPositions(final int var, final int empvar)
    {
        int i = 0;
        double cdf = 0;
        double tn = 0;

        if((var >= _numberOfVariables) || (var < 0))
        {
            return false;
        }
        if((empvar >= _numberOfVariables) || (empvar < 0))
        {
            return false;
        }
        //Sort the data.
        if(!sortBy(var))
        {
            return false;
        }

        //Acquire the tn value for the empirical distribution.    
        tn = returnTn();

        //Do this until all data is paired with the cdf value.
        resetPtr();
        while(i < _sampleSize)
        {

            //This performs the recalibrated estimator shown in "A Bayesian Estimator of an Empirical Distribution",
            //by Dr. Roman Krzysztofowicz.  
            cdf = Math.pow(Math.pow(((double)_sampleSize - (i + 1) + 1) / (i + 1), tn) + 1, -1);
            setCurrentValue(empvar, cdf);
            next();

            //Increment i for the next loop.
            i++;
        }

        return true;
    }

    /**
     * Creates a CDF using the Weibull formula: i + 1/n+1?
     * 
     * @param var The variable.
     * @param empvar The column storing the empirical cdf.
     * @return Returns false only if a passed in param is invalid.
     */
    public boolean createCDFUsingWeibullPlottingPosition(final int var, final int empvar)
    {
        int i = 0;
        double cdf = 0;

        if((var >= _numberOfVariables) || (var < 0))
        {
            return false;
        }
        if((empvar >= _numberOfVariables) || (empvar < 0))
        {
            return false;
        }
        //Sort the data.
        if(!sortBy(var))
        {
            return false;
        }

        //Do this until all data is paired with the cdf value.
        resetPtr();
        final int sampleSize = this.getSampleSize();
        for(i = 0; i < sampleSize; i++)
        {
            cdf = (double)(i + 1) / (double)(sampleSize + 1);
            setValue(i, empvar, cdf);
        }

        return true;
    }

    @SuppressWarnings("unchecked")
    public boolean createCDFVariableUsingProbabilityDistribution(final int var,
                                                                 final int cdfVar,
                                                                 @SuppressWarnings("rawtypes") final Distribution dist)
    {
        if((var >= _numberOfVariables) || (var < 0))
        {
            return false;
        }
        if((cdfVar >= _numberOfVariables) || (cdfVar < 0))
        {
            return false;
        }

        for(int i = 0; i < getSampleSize(); i++)
        {
            setValue(i, cdfVar, dist.functionCDF(getValue(i, var)));
        }

        return true;
    }

    /**
     * Calculates the mean squared error between an empirical cdf and a passed in Distribution instance.
     * 
     * @param var The variable.
     * @param empvar The empirical CDF of the variable.
     * @param dist The Distribution being compared with the empirical cdf.
     * @return The mean squared error of the difference.
     */
    @SuppressWarnings("unchecked")
    public double meanSquaredError(final int var,
                                   final int empvar,
                                   @SuppressWarnings("rawtypes") final Distribution dist)
    {
        double totalerr = 0;

        if((var >= _numberOfVariables) || (var < 0))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }
        if((empvar >= _numberOfVariables) || (empvar < 0))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }

        //Find the sum of the errors.
        int i;
        for(i = 0; i < _sampleSize; i++)
        {
            totalerr += Math.pow(dist.functionCDF(getValueBlindly(i, var)) - getValueBlindly(i, empvar), 2);
        }

        return totalerr / _sampleSize;
    }

    /**
     * Finds the maximum error between an empirical cdf and a passed in Distribution instance. Only values for which the
     * empirical cdf is calculated are used in finding the maximum error.
     * 
     * @param var The variable.
     * @param empvar The empirical CDF of the variable.
     * @param dist The Distribution being compared with the empirical cdf.
     * @return The largest error found.
     */
    public double maximumError(final int var, final int empvar, @SuppressWarnings("rawtypes") final Distribution dist)
    {
        double maxerr = 0;

        if((var >= _numberOfVariables) || (var < 0))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }
        if((empvar >= _numberOfVariables) || (empvar < 0))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }

        //Acquire the maximum absolute error, noting that any error is larger than 0, so 
        //the first time through the loop should always assign maxerr a value.
        int i;
        for(i = 0; i < _sampleSize; i++)
        {
            @SuppressWarnings("unchecked")
            final double distCDFvalue = dist.functionCDF(getValueBlindly(i, var));
            if(distCDFvalue == MISSING || Double.isNaN(distCDFvalue))
            {
                return Double.NaN;
            }
            if(Math.abs(getValueBlindly(i, empvar) - distCDFvalue) > maxerr)
            {
                maxerr = Math.abs(getValueBlindly(i, empvar) - distCDFvalue);
            }
        }

        return maxerr;
    }

    /**
     * Kolmogorov-Smirnov test statistic between two empirical distributions! It is computed by, first, using
     * otherset.getProbability to get the probability for the values in var1 of this data set, and finding the maximum
     * error. Then, it will use this version of getProbability to get the probability for the values in var2 of other
     * data set, and find the maximum error (relative to empvar2 of otherset). It then returns the maximum overall.
     * 
     * @param var1 The variable in this object.
     * @param empvar1 The empirical cdf for the variable in this object.
     * @param otherset The DataSet to compare against.
     * @param var2 The variable in the other DataSet.
     * @param empvar2 The empirical cdf variable in the other DataSet.
     * @return The K-S test statistic, or largest error, found.
     */
    public double maximumError(final int var1,
                               final int empvar1,
                               final DataSet otherset,
                               final int var2,
                               final int empvar2)
    {
        double maxerr = 0;

        if((var1 >= _numberOfVariables) || (var1 < 0) || (var2 >= otherset.getNumberOfVariables()) || (var2 < 0))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }
        if((empvar1 >= _numberOfVariables) || (empvar1 < 0) || (empvar2 >= otherset.getNumberOfVariables())
            || (empvar2 < 0))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }

        //Acquire the maximum absolute error between this empirical CDF and the otherset
        //empirical CDF at ONLY THE POINTS WITHIN THIS DATASET.
        int i;
        double prob;
        for(i = 0; i < _sampleSize; i++)
        {
            prob = otherset.getProbability(getValueBlindly(i, var1), var2, empvar2);
            if(Math.abs(getValueBlindly(i, empvar1) - prob) > maxerr)
            {
                maxerr = Math.abs(getValueBlindly(i, empvar1) - prob);
            }
        }
        //Now, do it for ONLY THE POINTS IN THE OTHER DATASET.
        for(i = 0; i < otherset.getSampleSize(); i++)
        {
            prob = getProbability(otherset.getValue(i, var2), var1, empvar1);
            if(Math.abs(otherset.getValue(i, empvar2) - prob) > maxerr)
            {
                maxerr = Math.abs(otherset.getValue(i, empvar2) - prob);
            }
        }

        return maxerr;
    }

    /**
     * Get the variable value corresponding to a passed in probability, using the passed in variable and empirical cdf
     * variable. Linear interpolation is used. It is assumed that this DataSet has already been sorted by the variable
     * and therefore the empirical variable is also sorted.
     * 
     * @param prob The probability.
     * @param var The variable.
     * @param empvar The empirical cdf variable.
     * @return The variable value corresponding to the passed in probability.
     */
    public double getQuantile(final double prob, final int var, final int empvar)
    {
        int lo = 0, hi = 0;

        if((var >= _numberOfVariables) || (var < 0))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }
        if((empvar >= _numberOfVariables) || (empvar < 0))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }
        if(_sampleSize == 0)
        {
            return MISSING;
        }

        //Find the first value of empvar greater than the passed in probability
        while(hi < _sampleSize)
        {
            if(getValueBlindly(hi, empvar) >= prob)
            {
                break;
            }
            hi++;
        }

        //Cond 1, 0 < hi < _samplesize: We found a prob that is greater than prob, and we can get a lo.
        if((0 < hi) && (hi < _sampleSize))
        {
            //Set lo; get the relative distance p from the desired prob to lo relative to hi to lo;
            //return p * the total distance from the hi var to the lo var.  This is a linear interpolation.
            lo = hi - 1;

            if(getValueBlindly(hi, var) == getValueBlindly(lo, var))
            {
                return getValueBlindly(hi, var);
            }

            final double p = (prob - getValueBlindly(lo, empvar))
                / (getValueBlindly(hi, empvar) - getValueBlindly(lo, empvar));
            return p * (getValueBlindly(hi, var) - getValueBlindly(lo, var)) + getValueBlindly(lo, var);
        }

        //Cond 2, 0 = hi: We found no empirical probability less than prob.
        //Return the smallest data value.  I should try to extend it with a logarithmic curve.
        if(0 == hi)
        {
            return getValueBlindly(hi, var);
        }

        //Cond 3, _samplesize = hi: We found no empirical probability greater than or equal to prob.
        //Return the largest data value.  I should try to extend it with a log curve.
        if(hi == _sampleSize)
        {
            return getValueBlindly(hi - 1, var);
        }

        return MISSING;
    }

    /**
     * Get the probability corresponding to a passed in variable value, using the passed in variable and empirical cdf
     * variable. Linear interpolation is used. It is assumed that this DataSet has already been sorted by the variable
     * and therefore the empirical variable is also sorted. If the value is off the end of the empirical distribution,
     * then 0.0 or 1.0 will be depending on if it is off the low end or hi end, respectively.
     * 
     * @param value The variable value for which the empirical cdf is evaluated.
     * @param var The variable.
     * @param empvar The empirical cdf variable.
     * @return The variable value corresponding to the passed in probability.
     */
    public double getProbability(final double value, final int var, final int empvar)
    {
        int lo = 0, hi = 0;

        if((var >= _numberOfVariables) || (var < 0))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }
        if((empvar >= _numberOfVariables) || (empvar < 0))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }

        //Find the first value of empvar greater than or equal to the passed in value
        while(getValueBlindly(hi, var) <= value)
        {
            hi++;
            if(hi == _sampleSize)
            {
                break;
            }
        }

        //Cond 1, 0 < hi < _samplesize: We found a value that is greater than value, and we can get a lo.
        if((0 < hi) && (hi < _sampleSize))
        {
            //Set lo; get the relative distance p from the desired value to lo relative to hi to lo;
            //return p * the total distance from the hi prob to the lo prob.  This is a linear interpolation.
            lo = hi - 1;
            final double p = (value - getValueBlindly(lo, var)) / (getValueBlindly(hi, var) - getValueBlindly(lo, var));
            return p * (getValueBlindly(hi, empvar) - getValueBlindly(lo, empvar)) + getValueBlindly(lo, empvar);
        }

        //Cond 2, 0 = hi: We found no value less than value.
        //Return the smallest prob.  I should try to extend it with a logarithmic curve.
        if(0 == hi)
        {
            return 0.0;
        }

        //Cond 3, _samplesize = hi: We found no value greater than or equal to value.
        //Return the largest prob.  I should try to extend it with a log curve.
        if(hi == _sampleSize)
        {
            return 1.0;
        }

        return MISSING;
    }

    /////////////////////////////////////////////////////////////////////////
    //Higher Statistics
    /////////////////////////////////////////////////////////////////////////

    /**
     * Return the regression coefficients corresponding to the predictors. Note that if constant is true, then the
     * coefficients will be shifted one notch to make room for the constant term (which will be in position 0 of the
     * returned array).
     * 
     * @param response The response variable (only one can be used).
     * @param predictors An array of indexes specifying predictor variables.
     * @param constant Either true to have a constant or false for no constant.
     * @return An array of coefficients for each predictor. Constant is in index 0.
     */
    public double[] multipleRegression(final int response, final int[] predictors, final boolean constant)
    {
        int i = 0, j = 0;

        //Check the arrays passed in.
        if((response < 0) || (response >= _numberOfVariables))
        {
            return null;
        }
        if(predictors.length <= 0)
        {
            return null;
        }

        for(i = 0; i < predictors.length; i++)
        {
            if((predictors[i] < 0) || (predictors[i] >= _numberOfVariables))
            {
                return null;
            }
        }

        //will we have a constant in the regression?
        int oneforconst = 0;
        if(constant)
        {
            oneforconst = 1;
        }

        //Construct the predictor and response matrices, making sure to account for the constant.
        final double[][] y = new double[_sampleSize][1];
        final double[][] x = new double[_sampleSize][predictors.length + oneforconst];

        //create the response vector
        for(i = 0; i < _sampleSize; i++)
        {
            y[i][0] = getValueBlindly(i, response);
        }

        //create the predictors matrix
        for(i = 0; i < _sampleSize; i++)
        {
            for(j = 0; j < predictors.length + oneforconst; j++)
            {
                if((constant) && (j == 0))
                {
                    x[i][j] = 1;
                }
                else
                {
                    x[i][j] = getValueBlindly(i, predictors[j - oneforconst]);
                }
            }
        }

        double[][] a = new double[1][1];
        double[][] b = new double[1][1];

        //b = X'X      
        b = MatrixMath.matrixMultiplication(MatrixMath.matrixTranspose(x), x);

        //a = (X'X)-1
        a = MatrixMath.matrixInverse(b);

        //b = X'Y
        b = MatrixMath.matrixMultiplication(MatrixMath.matrixTranspose(x), y);

        //a = (X'X)-1 (X'Y) --> The coefficient vector
        a = MatrixMath.matrixMultiplication(a, b);

        //So a is the matrix to return, but I only want to return the first column.
        a = MatrixMath.matrixTranspose(a);
        return a[0];
    }

    //===============================================================================================
    // VARIABLE TRANSFORM AND MANIPULATION
    //===============================================================================================

    /**
     * Applies a distribution transform, Z = Finv(G(X)) where: (1) Finv is the inverse CDF of the passed in
     * distribution; (2) G is the empirical CDF (computed internal to this routine and stored in a new variable); (3) Z
     * is the new transformed variable. This creates TWO new variables.
     * 
     * @param var The variable to transform.
     * @param f The distribution to use.
     * @return The index of the transformed variable, or MISSING if error.
     */
    public int applyDistTransform(final int var, final Distribution f)
    {
        if((var >= _numberOfVariables) || (var < 0))
        {
            //PRINT ERROR MESSAGE HERE
            return (int)MISSING;
        }

        //Add a new variable to store the computed cdf.
        if(!addNewVariable())
        {
            return (int)MISSING;
        }

        //Create the CDF.
        if(!createCDFUsingBayesianPlottingPositions(var, _numberOfVariables - 1))
        {
            return (int)MISSING;
        }

        //Call the other version of this function.
        return applyDistTransform(var, _numberOfVariables - 1, f);
    }

    /**
     * Applies a distribution transform, Z = Finv(G(X)) where: (1) Finv is the inverse CDF of the passed in
     * distribution; (2) G is the empirical CDF (its index is specified by cdfvar); (3) Z is the new transformed
     * variable.
     * 
     * @param var The variable to transform.
     * @param cdfvar Its empirical cdf variable.
     * @param f The distribution to use.
     * @return The index of the transformed variable, or MISSING if error.
     */
    public int applyDistTransform(final int var, final int cdfvar, final Distribution f)
    {
        if((var >= _numberOfVariables) || (var < 0) || (cdfvar >= _numberOfVariables) || (cdfvar < 0))
        {
            //PRINT ERROR MESSAGE HERE
            return (int)MISSING;
        }

        //Check to see if any cdf data is missing, and if it is, recalculate cdf.
        if(isAnyVariableDataMissing(cdfvar))
        {
            if(!createCDFUsingBayesianPlottingPositions(var, cdfvar))
            {
                return (int)MISSING;
            }
        }

        //Add a variable to store the transformed variable.
        if(!addNewVariable())
        {
            return (int)MISSING;
        }

        //Do the transform, filling in the new variable.
        int i;
        for(i = 0; i < _sampleSize; i++)
        {
            setValueBlindly(i, _numberOfVariables - 1, f.functionInverseCDF(getValueBlindly(i, cdfvar)));
        }

        return (_numberOfVariables - 1);
    }

    /**
     * Applies a distribution transform, Z = Finv(G(X)) where: (1) Finv is the inverse CDF of the passed in distribution
     * f; (2) G is the distribution (passed in as g) of the variable var; (3) Z is the new transformed variable.
     * 
     * @param var The variable being transformed.
     * @param f The distribution being transfored to.
     * @param g The distribution of the variable being transformed.
     * @return The index of the transformed variable.
     */
    @SuppressWarnings("unchecked")
    public int applyDistTransform(final int var,
                                  @SuppressWarnings("rawtypes") final Distribution f,
                                  @SuppressWarnings("rawtypes") final Distribution g)
    {
        if((var > _numberOfVariables) || (var < 0))
        {
            //PRINT ERROR MESSAGE HERE
            return (int)MISSING;
        }

        //Add a new variable to store the computed cdf.
        if(!addNewVariable())
        {
            return (int)MISSING;
        }

        //Do the transform, filling in the new variable.
        int i;
        for(i = 0; i < _sampleSize; i++)
        {
            setValueBlindly(i, _numberOfVariables - 1, f.functionInverseCDF(g.functionCDF(getValueBlindly(i, var))));
        }

        return (_numberOfVariables - 1);
    }

    /**
     * Applies a distribution transform, Z = Finv(G(X)) where: (1) Finv is the inverse CDF of the passed in distribution
     * f; (2) G is the distribution (passed in as g) of the variable var; (3) Z is the new transformed variable.
     * 
     * @param var The variable being transformed.
     * @param targetVar The variable to store Z.
     * @param f The distribution being transfored to.
     * @param g The distribution of the variable being transformed.
     * @return The index of the transformed variable.
     */
    @SuppressWarnings("unchecked")
    public int applyDistTransform(final int var,
                                  final int targetVar,
                                  @SuppressWarnings("rawtypes") final Distribution f,
                                  @SuppressWarnings("rawtypes") final Distribution g)
    {
        if((var > _numberOfVariables) || (var < 0))
        {
            return (int)MISSING;
        }
        if((targetVar > _numberOfVariables) || (targetVar < 0))
        {
            return (int)MISSING;
        }

        //Do the transform, filling in the new variable.
        int i;
        for(i = 0; i < _sampleSize; i++)
        {
            setValueBlindly(i, targetVar, f.functionInverseCDF(g.functionCDF(getValueBlindly(i, var))));
        }

        return (_numberOfVariables - 1);
    }

    /**
     * Special case of applyDistributionTransform where the variable to transform is passed in and applyDistTransform is
     * called with the variable and an instance of a standard normal distribution. This creates two variables: the
     * empirical cdf of the passed in var and the transformed variable.
     * 
     * @param var The variable to transform.
     * @return The index of the new transformed variable.
     */
    public int applyNormalQuantileTransform(final int var)
    {
        final NormalDist standard = new NormalDist();
        return applyDistTransform(var, standard);
    }

    /**
     * Applies a normal quantile transform (as method above) but with an already defined empirical cdf.
     * 
     * @param var The variable to transform.
     * @param cdfvar The empirical cdf variable for the variable being transformed.
     * @return The index of the new transformed variable.
     */
    public int applyNormalQuantileTransform(final int var, final int cdfvar)
    {
        final NormalDist standard = new NormalDist();
        return applyDistTransform(var, cdfvar, standard);
    }

    /////////////////////////////////////////////////////////////////////////
    //Basic Transforms
    /////////////////////////////////////////////////////////////////////////
    //  The rest all transform in place (i.e. do not create a new variable,
    //  but change var, itself) unless stated otherwise (see sumVariables and 
    //  multiplyVariables). 
    /////////////////////////////////////////////////////////////////////////

    /**
     * Shifts the variable specified by the value specified.
     * 
     * @param var The variable to transform.
     * @param value The shift value.
     * @return Returns false if a problem occurs.
     */
    public boolean applyShiftTransform(final int var, final double value)
    {
        return shiftVariable(var, value);
    }

    /**
     * Multiplies a variable specified by the value specified.
     * 
     * @param var The variable to scale.
     * @param value The scalar.
     * @return Returns false if a problem occurs.
     */
    public boolean applyScaleTransform(final int var, final double value)
    {
        return scaleVariable(var, value);
    }

    /**
     * Applies a natural-log transform to the variable specified.
     * 
     * @param var Variable to transform.
     * @return Returns false if the variable is invalid or if any value to transform is negative.
     */
    public boolean applyLogTransform(final int var)
    {
        if((var < 0) || (var >= _numberOfVariables))
        {
            return false;
        }

        //Are there any values in var which will cause the transform to crash?
        int i;
        for(i = 0; i < _sampleSize; i++)
        {
            if(getValueBlindly(i, var) <= 0)
            {
                return false;
            }
        }

        //Do the transform.    
        for(i = 0; i < _sampleSize; i++)
        {
            setValueBlindly(i, var, Math.log(getValueBlindly(i, var)));
        }
        return true;
    }

    /**
     * Applies a log transform with a base as specified.
     * 
     * @param var Variable to transform.
     * @param base Base of the log computation.
     * @return Returns false if a parameter is invalid or if a value to transform is negative.
     */
    public boolean applyLogTransform(final int var, final double base)
    {
        if((var < 0) || (var >= _numberOfVariables))
        {
            return false;
        }

        //Check on the value of base
        if(base <= 0)
        {
            return false;
        }

        //Are there any value in var which will cause the transform to crash?
        int i;
        for(i = 0; i < _sampleSize; i++)
        {
            if(getValueBlindly(i, var) <= 0)
            {
                return false;
            }
        }

        //Do the transform.    
        final double denom = Math.log(base);
        for(i = 0; i < _sampleSize; i++)
        {
            setValueBlindly(i, var, Math.log(getValueBlindly(i, var)) / denom);
        }
        return true;
    }

    /**
     * Applies a power transform using the power specified.
     * 
     * @param var The variable to transform.
     * @param power The power to use.
     * @return Returns false if the variable if a value to be transformed is invalid (i.e. 0^-1 or -1^0.5).
     */
    public boolean applyPowerTransform(final int var, final double power)
    {
        if((var < 0) || (var >= _numberOfVariables))
        {
            return false;
        }

        int i;
        if(power < 0)
        {
            //Are there any value in var which will cause the transform to crash?
            for(i = 0; i < _sampleSize; i++)
            {
                if(getValueBlindly(i, var) == 0)
                {
                    return false;
                }
            }
        }
        if((-1 < power) && (power < 1))
        {
            //Are there any value in var which will cause the transform to crash?
            for(i = 0; i < _sampleSize; i++)
            {
                if(getValueBlindly(i, var) < 0)
                {
                    return false;
                }
            }
        }

        //Do the transform.
        for(i = 0; i < _sampleSize; i++)
        {
            setValueBlindly(i, var, Math.pow(getValueBlindly(i, var), power));
        }
        return true;
    }

    /**
     * Applies a square transform to the variable specified.
     * 
     * @param var The variable.
     * @return Returns false if the variable is invalid.
     */
    public boolean applySquareTransform(final int var)
    {
        return applyPowerTransform(var, 2.0);
    }

    /**
     * Applies a square root transform.
     * 
     * @param var The variable to transform.
     * @return Returns false if the variable is invalid or if a negative number is to be transformed.
     */
    public boolean applySquareRootTransform(final int var)
    {
        return applyPowerTransform(var, 0.5);
    }

    /**
     * Applies an inverse transform.
     * 
     * @param var Variable to transform.
     * @return Returns false if the variable is invalid or if 0 is to be transformed.
     */
    public boolean applyInverseTransform(final int var)
    {
        return applyPowerTransform(var, -1.0);
    }

    /**
     * Applies an exponential transform, using the variable values as exponents on e.
     * 
     * @param var The variable.
     * @return Returns false if the variable is invalid.
     */
    public boolean applyExponentTransform(final int var)
    {
        if((var < 0) || (var >= _numberOfVariables))
        {
            return false;
        }

        //Do the transform.
        int i;
        for(i = 0; i < _sampleSize; i++)
        {
            setValueBlindly(i, var, Math.exp(getValueBlindly(i, var)));
        }
        return true;
    }

    /////////////////////////////////////////////////////////////////////////
    //Shifting, Scaling, Summing, and Multiplying Variables
    /////////////////////////////////////////////////////////////////////////

    /**
     * Shift the variable (column) value for all samples (rows) by the amount.
     * 
     * @param var The variable to shift.
     * @param amt The amount to shife the variable.
     * @return Returns false if the variable is invalid.
     */
    public boolean shiftVariable(final int var, final double amt)
    {
        if((var >= _numberOfVariables) || (var < 0))
        {
            return false;
        }

        int i;
        for(i = 0; i < _sampleSize; i++)
        {
            setValueBlindly(i, var, getValueBlindly(i, var) + amt);
            //_data[i][var] += amt;
        }

        return true;
    }

    /**
     * Scales a variable value (column) for all samples (rows).
     * 
     * @param var The variable to scale.
     * @param scale The scalar factor.
     * @return Returns false if the variable is invalid.
     */
    public boolean scaleVariable(final int var, final double scale)
    {
        if((var >= _numberOfVariables) || (var < 0))
        {
            return false;
        }

        int i;
        for(i = 0; i < _sampleSize; i++)
        {
            setValueBlindly(i, var, scale * getValueBlindly(i, var));
        }

        return true;
    }

    /**
     * Creates a new variable which is a linear combination of two existing variables.
     * 
     * @param scale1 The coefficient on the first variable.
     * @param var1 The first variable.
     * @param scale2 The coefficient on the second variable.
     * @param var2 The second variable.
     * @return The column index of the new variable, or MISSING if a problem occurs.
     */
    public int sumVariables(final double scale1, final int var1, final double scale2, final int var2)
    {
        if(((var1 >= _numberOfVariables) || (var1 < 0)) || ((var2 >= _numberOfVariables) || (var2 < 0)))
        {
            return (int)MISSING;
        }

        if(!addNewVariable())
        {
            return (int)MISSING;
        }

        int i;
        for(i = 0; i < _sampleSize; i++)
        {
            setValueBlindly(i,
                            _numberOfVariables - 1,
                            scale1 * getValueBlindly(i, var1) + scale2 * getValueBlindly(i, var2));
        }
        return _numberOfVariables - 1;
    }

    /**
     * Creates a new variable which is the product of two existing variables.
     * 
     * @param scale A constant scalar to multiply the product by.
     * @param var1 The first variable.
     * @param var2 The second variable.
     * @return The column index of the new variable, or MISSING if a problem occurs.
     */
    public int multiplyVariables(final double scale, final int var1, final int var2)
    {
        if(((var1 >= _numberOfVariables) || (var1 < 0)) || ((var2 >= _numberOfVariables) || (var2 < 0)))
        {
            return (int)MISSING;
        }

        if(!addNewVariable())
        {
            return (int)MISSING;
        }

        int i;
        for(i = 0; i < _sampleSize; i++)
        {
            setValueBlindly(i, _numberOfVariables - 1, scale * getValueBlindly(i, var1) * getValueBlindly(i, var2));
        }
        return _numberOfVariables - 1;
    }

    //===============================================================================================
    // BUILD AND NAVIGATE DATA SET
    //===============================================================================================

    /////////////////////////////////////////////////////////////////////////
    //Add To The DataSet
    /////////////////////////////////////////////////////////////////////////

    /**
     * Add the sample provided in the array of doubles to the table. If the sample length does not equal the number of
     * variables in the table, an error occurs. If the number of sample already equals the maximum number of samples, an
     * error occurs.<br>
     * <br>
     * When called, if makeRowsVariables is true it will copy the array values into place within the data table. If
     * false, it will using pointers. So, BE CAREFUL... the sample must be reconstructed before calling this method if
     * _makeRowsVariables is false!!!
     * 
     * @param sample The sample to add.
     * @return Returns false if an error occurs.
     */
    public boolean addSample(final double[] sample)
    {
        if(sample.length != _numberOfVariables)
        {
            return false;
        }

        if(_sampleSize == _maximumSize)
        {
            return false;
        }

        this.setSampleBlindly(_sampleSize, sample);

        _sampleSize++;
        return true;
    }

    /////////////////////////////////////////////////////////////////////////
    //Move Through The DataSet
    /////////////////////////////////////////////////////////////////////////

    /**
     * Basic tool that allows for moving from one sample to the next. It increments a current sample pointer.
     * 
     * @return Returns false if there are no more samples.
     */
    public boolean next()
    {
        if(_currentSample == _sampleSize - 1)
        {
            return false;
        }
        _currentSample++;
        return true;
    }

    /**
     * Resets the current sample pointer to be 0 (for the first sample).
     */
    public void resetPtr()
    {
        _currentSample = 0;
    }

    /////////////////////////////////////////////////////////////////////////
    //Find Sample Index
    /////////////////////////////////////////////////////////////////////////

    /**
     * Returns the index of the next sample AFTER that sample corresponding to the current sample pointer that has a the
     * variable value specified.
     * 
     * @param var The variable to check.
     * @param value The value it must be.
     * @return Returns MISSING if the variable is invalid or there are no more samples that qualify.
     */
    public int findSampleIndex(final int var, final double value)
    {
        if((var >= _numberOfVariables) || (var < 0))
        {
            return (int)MISSING;
        }

        int i;
        for(i = _currentSample; i < _sampleSize; i++)
        {
//            System.out.println(HCalendar.computeCalendarFromJulianHour((int)getValueBlindly(i, var)).get(Calendar.HOUR));

            if(getValueBlindly(i, var) == value)
            {
                _currentSample = i;
                return i;
            }
        }

        return (int)MISSING;
    }

    /**
     * This method should only be called if the data is sorted in ASCENDING order by the variable specified as the first
     * argument.<br>
     * <br>
     * Binary search the data for the insertion point of the searchNumber provided. The insertion point is the 'sample'
     * index of the first value equal or larger than the provided searchNumber.
     * 
     * @param variable Variable to search.
     * @param searchValue The value to look for.
     * @return The insertion point into the list if the number were to be put into it in sorted order. This is either
     *         the sample index of the first sample value found EQUAL to the searchNumber, or the first value found
     *         LARGER than the searchNumber.
     */
    public int binarySearch(final int variable, final double searchValue)
    {
        if(searchValue < getValue(0, variable))
        {
            return 0;
        }
        if(searchValue >= getValue(getSampleSize() - 1, variable))
        {
            return getSampleSize();
        }
        int regionLB = 0;
        int regionUB = getSampleSize();
        while(regionUB - regionLB > 1)
        {
            final int halfIndex = (int)((double)(regionUB - regionLB) / 2) + regionLB;
            if(getValue(halfIndex, variable) >= searchValue)
            {
                regionUB = halfIndex;
            }
            else
            {
                regionLB = halfIndex;
            }
        }
        return regionUB;
    }

    /**
     * Returns the index of the next sample AFTER that sample corresponding to the current sample pointer that equals
     * the sample passed in.
     * 
     * @param sample The sample to find.
     * @return Returns MISSING if the sample is invalid or if there are no more samples that equal.
     */
    public int findSampleIndex(final double[] sample)
    {
        if(sample.length != _numberOfVariables)
        {
            return (int)MISSING;
        }

        int i, j;
        for(i = _currentSample; i < _sampleSize; i++)
        {
            for(j = 0; j < _numberOfVariables; j++)
            {
                if(getValueBlindly(i, j) != sample[j])
                {
                    break;
                }
            }

            if(j == _numberOfVariables)
            {
                _currentSample = i;
                return i;
            }
        }

        return (int)MISSING;
    }

    /**
     * Returns the index of the next sample AFTER that sample corresponding to the current sample pointer that has a the
     * julian hour variable value specified. The date is converted into GMT prior to calculating its julian hour.
     * 
     * @param var The variable to check.
     * @param date The Calendar value it must be.
     * @return Returns MISSING if the variable is invalid or there are no more samples that qualify.
     */
    public int findSampleIndex(final int var, final Calendar date)
    {
        return findSampleIndex(var, HCalendar.computeJulianHourFromCalendar(date, false));
    }

    /**
     * Goes to the sample at the index passed it, setting _currentSample to it.
     */
    public boolean gotoSample(final int index)
    {
        if((index < 0) || (index >= getSampleSize()))
        {
            return false;
        }
        this._currentSample = index;
        return true;
    }

    /**
     * Goes to the next sample AFTER that sample corresponding to the current sample pointer that is equal to the passed
     * in sample. The current sample pointer is adjusted to point to the found sample.
     * 
     * @param sample The sample to search for.
     * @return Returns false if the sample is not found.
     */
    public boolean gotoNextSample(final double[] sample)
    {
        //Make sure the sample is of the exact correct length.
        if(sample.length != _numberOfVariables)
        {
            return false;
        }

        boolean done = next();
        while(!done)
        {
            if(isSampleEqualTo(sample))
            {
                return true;
            }

            //I'm using the NOT command (!) because I want to end when next returns FALSE.
            done = !(next());
        }

        //I never found it!
        return false;
    }

    /**
     * Goes to the next sample AFTER that sample corresponding to the current sample pointer that satisfies a passed in
     * one part condition. The current sample pointer is updated.
     * 
     * @param var The variable to check.
     * @param condition The condition on the bound.
     * @param value The bound value.
     * @return Returns false if a sample is not found.
     */
    public boolean gotoNextSample(final int var, final int condition, final double value)
    {
        //I pass in OR so that countValues will work (see the NOTE below inside countValues).
        return gotoNextSample(var, condition, value, OR, NO_CONDITION, MISSING);
    }

    /**
     * Goes to the next sample AFTER that sample corresponding to the current sample pointer that satisfies a passed in
     * two part condition. The current sample pointer is updated.
     * 
     * @param var The variable to check.
     * @param cond1 The condition on the first bound.
     * @param val1 The first bound value.
     * @param and The and/or connecting the two parts.
     * @param cond2 The condition on the second bound.
     * @param val2 The second bound value.
     * @return Returns false if a sample is not found.
     */
    public boolean gotoNextSample(final int var,
                                  final int cond1,
                                  final double val1,
                                  final boolean and,
                                  final int cond2,
                                  final double val2)
    {
        if((var >= _numberOfVariables) || (var < 0))
        {
            return false;
        }

//        int count = 0;
        boolean result1, result2, done;
        double currentvalue;

        //Count the values one by one -- starting with the next() one.
        done = !(next());
        while(!done)
        {
            //Set the current value
            currentvalue = getCurrentValue(var);

            //Skip missing data.
            if(isMissingValue(currentvalue))
            {
                continue;
            }

            //Do the comparison of val1 here
            //If less than...
            result1 = doesValueSatisfyConditions(currentvalue, val1, cond1);

            //Do the comparison of val2 here
            result2 = doesValueSatisfyConditions(currentvalue, val2, cond2);

            //NOTE: If either cond1 or cond2 is NO_CONDITION, then doesValueSatisfyConditions
            //will always return false.  SO, if the and variable is set to false (OR), then
            //the or line below will be used and the NO_CONDITION value will have no impact
            //i.e. if b is false, then if(a OR b) is the same as if(a).  

            //Check the and condition.  If either of these ifs are satisfied, then return true.   
            if((and) && ((result1) && (result2)))
            {
                return true;
            }
            if((!and) && ((result1) || (result2)))
            {
                return true;
            }

            //I want done to be TRUE when I can go no further -- hence I use the NOT operator.
            done = !(next());
        }
        return false;
    }

    //===============================================================================================
    // EDIT ALREADY BUILT DATA SET
    //===============================================================================================

    /////////////////////////////////////////////////////////////////////////
    //Setting Attributes
    /////////////////////////////////////////////////////////////////////////

    /**
     * Sets a specified sample (row) of the data table. The specified sample index must be one that already exists (i.e.
     * less than the sample size). The sample array must have the correct number of elements (use getNumberOfVariables). <br>
     * <br>
     * If _makeRowsVariables is true, then the sample in the core DataTable will be a physical copy. Otherwise, it will
     * be the actual array you passed in (pointer copy).
     * 
     * @param numsample The sample index.
     * @param sample The sample.
     * @return Returns false if the sample index or array is invalid.
     */
    public boolean setSample(final int numsample, final double[] sample)
    {
        if((numsample >= _sampleSize) || (numsample < 0))
        {
            return false;
        }
        if(sample.length != _numberOfVariables)
        {
            return false;
        }

        if(_makeRowsVariables)
        {
            _dataTable.setColumnAsCopy(numsample, sample);
        }
        else
        {
            _dataTable.setRow(numsample, sample);
        }
        return true;
    }

    /**
     * Sets a sample without doing any error checking. Provides for direct editing of the data table. <br>
     * <br>
     * If _makeRowsVariables is true, then the sample in the core DataTable will be a physical copy. Otherwise, it will
     * be the actual array you passed in (pointer copy).
     * 
     * @param numsample The sample index.
     * @param sample The sample.
     */
    public void setSampleBlindly(final int numsample, final double[] sample)
    {
        if(_makeRowsVariables)
        {
            _dataTable.setColumnAsCopy(numsample, sample);
        }
        else
        {
            _dataTable.setRow(numsample, sample);
        }
    }

    /**
     * Sets a specified variable (column) of the data table. The specified variable must be valid. The var array must
     * have the correct number of elements equal to the current sample size (NOT the maximum sample size!). <br>
     * <br>
     * If _makeRowsVariables is false, then the sample in the core DataTable will be a physical copy. Otherwise, it will
     * be the actual array you passed in (pointer copy).
     * 
     * @param numvar The variable number.
     * @param var The data array.
     * @return Returns false if the variable number or data array is invalid.
     */
    public boolean setVariable(final int numvar, final double[] var)
    {

        if((numvar >= _numberOfVariables) || (numvar < 0))
        {
            return false;
        }

        if(var == null)
        {
            makeVariableValuesEqualTo(numvar, DataSet.MISSING);
            return true;
        }

        if(var.length != _sampleSize)
        {
            return false;
        }

        if(_makeRowsVariables)
        {
            _dataTable.setRow(numvar, var);
        }
        else
        {
            _dataTable.setColumnAsCopy(numvar, var);
        }
        return true;
    }

    /**
     * Sets a variable without doing any error checking. Provides for direct editing of the data table. <br>
     * <br>
     * If _makeRowsVariables is false, then the sample in the core DataTable will be a physical copy. Otherwise, it will
     * be the actual array you passed in (pointer copy).
     * 
     * @param numsample The sample index.
     * @param sample The sample.
     */
    public void setVariableBlindly(final int numvar, final double[] var)
    {
        if(_makeRowsVariables)
        {
            _dataTable.setRow(numvar, var);
        }
        else
        {
            _dataTable.setColumnAsCopy(numvar, var);
        }
    }

    /**
     * Set a variable value for the sample corresponding to the current sample pointer.
     * 
     * @param var The variable to set.
     * @param value The value to set it to.
     * @return Return false if the variable is invalid.
     */
    public boolean setCurrentValue(final int var, final double value)
    {
        if((var >= _numberOfVariables) || (var < 0))
        {
            return false;
        }

        setValueBlindly(_currentSample, var, value);
        return true;
    }

    /**
     * Sets a specific value with the data table. The sample number must be for an already existing sample (less that
     * the sample size).
     * 
     * @param numsample The sample (row) index.
     * @param var The variable (column) index.
     * @param value The value to set it to.
     * @return Returns false if the sample index or variable index are invalid.
     */
    public boolean setValue(final int numsample, final int var, final double value)
    {
        if((numsample >= _sampleSize) || (numsample < 0))
        {
            return false;
        }
        if((var > _numberOfVariables) || (var < 0))
        {
            return false;
        }

        setValueBlindly(numsample, var, value);
        return true;
    }

    /**
     * Sets a specific value with the data table, but does no problem checking.
     * 
     * @param numsample The sample (row) index.
     * @param var The variable (column) index.
     * @param value The value to set it to.
     */
    public void setValueBlindly(final int numsample, final int var, final double value)
    {
        if(_makeRowsVariables)
        {
            _dataTable.setValue(var, numsample, value);
        }
        else
        {
            _dataTable.setValue(numsample, var, value);
        }
    }

    /**
     * Set a variable value for the sample corresponding to the current sample pointer, assuming that the variable is a
     * julian hour variable. The passed in Calendar is converted into GMT prior to computing the julian hour.
     * 
     * @param var The variable.
     * @param date The Calendar object to use.
     * @return Returns false if the variable is invalid.
     */
    public boolean setCurrentValue(final int var, final Calendar date)
    {
        if((var >= _numberOfVariables) || (var < 0))
        {
            return false;
        }

        setValueBlindly(_currentSample, var, HCalendar.computeJulianHourFromCalendar(date, false));
        return true;
    }

    /**
     * Sets a specific value with the data table, assuming it a julian hour variable. The sample number must be for an
     * already existing sample (less that the sample size). The passed in Calendar is converted to GMT prior to
     * calculating the julian hour.
     * 
     * @param numsample The sample (row) index.
     * @param var The variable (column) index.
     * @param date The Calendar object used to compute the julian hour.
     * @return Returns false if the sample index or variable index is invalid.
     */
    public boolean setValue(final int numsample, final int var, final Calendar date)
    {
        if((numsample >= _sampleSize) || (numsample < 0))
        {
            return false;
        }
        if((var > _numberOfVariables) || (var < 0))
        {
            return false;
        }

        setValueBlindly(numsample, var, HCalendar.computeJulianHourFromCalendar(date, false));
        return true;
    }

    /**
     * Allows the sample size to be manually set by the user.
     * 
     * @param size The sample size.
     * @return Returns false if the size is invalid (i.e. larger than the maximum size).
     */
    public boolean setSampleSize(final int size)
    {
        if((size > _maximumSize) || (size < 0))
        {
            return false;
        }

        _sampleSize = size;
        return true;
    }

    /**
     * Sets the variable to which a Distribution is to be fit.
     * 
     * @param var The variable.
     * @return Returns false if the variable is invalid.
     */
    public boolean setFitSampleVariable(final int var)
    {
        if((var > _numberOfVariables) || (var < 0))
        {
            return false;
        }

        _fitSample = var;
        return true;
    }

    /**
     * Sets the variable that contains the empirical CDF to which a Distribution is to be fit.
     * 
     * @param var The variable.
     * @return Returns false if the variable is invalid.
     */
    public boolean setFitCDFVariable(final int var)
    {
        if((var > _numberOfVariables) || (var < 0))
        {
            return false;
        }

        _fitCDF = var;
        return true;
    }

    /////////////////////////////////////////////////////////////////////////
    //Copying Variables and Samples
    /////////////////////////////////////////////////////////////////////////

    /**
     * Copy the contents of one variable (column) to another variable.
     * 
     * @param fromvar The index of the variable copied from.
     * @param tovar The index of the variable copied to.
     * @return Returns false if either variable is invalid.
     */
    public boolean copyVarToVar(final int fromvar, final int tovar)
    {
        if(((fromvar >= _numberOfVariables) || (fromvar < 0)) || ((tovar >= _numberOfVariables) || (tovar < 0)))
        {
            return false;
        }

        int i;
        for(i = 0; i < _sampleSize; i++)
        {
            setValueBlindly(i, tovar, getValueBlindly(i, fromvar));
        }

        return true;
    }

    /**
     * Copy the contents of one sample (row) to another sample.
     * 
     * @param fromsample The index of the sample copied from.
     * @param tosample The index of the sample copied to.
     * @return Returns false if either index points to a non-existant sample.
     */
    public boolean copySampleToSample(final int fromsample, final int tosample)
    {
        if(((fromsample >= _sampleSize) || (fromsample < 0)) || ((tosample >= _sampleSize) || (tosample < 0)))
        {
            return false;
        }

        int i;
        for(i = 0; i < _numberOfVariables; i++)
        {
            setValueBlindly(tosample, i, getValueBlindly(fromsample, i));
        }

        return true;
    }

    /////////////////////////////////////////////////////////////////////////
    //Merge With Another DataSet
    /////////////////////////////////////////////////////////////////////////

    /**
     * Merges the specified DataSet object to this data set. Both DataSets must have an identical sample size. It will
     * just add the given table of data to to the end of this table of data. The data from thedata will be COPIED to the
     * new core DataTable.
     * 
     * @param thedata The DataSet instance containing the table of data to merge.
     * @return Returns false if the given DataSet does not have an identical sample size.
     */
    public boolean mergeDataSetsAsVariables(final DataSet thedata)
    {
        //Check the sample size of thedata with this.  If they don't match, the call is invalid.
        if(thedata.getSampleSize() != _sampleSize)
        {
            //PRINT ERROR HERE
            return false;
        }

        //Compute the new number of variables.  Create a new DataTable to hold all the variables.
        //Copy the contents from this DataTable to the new one.  And then set this DataTable to be the new one.
        final int newnumvars = thedata.getNumberOfVariables() + _numberOfVariables;
        final DataTable newTable = DataSet.createDataAttribute(_maximumSize, newnumvars, _makeRowsVariables);
        newTable.copyContentsFromDataTable(_dataTable);
        _dataTable = newTable;

        //Loop through each variable within thedata and add each variable to the new DataTable.
        int i, j;
        for(i = 0; i < _sampleSize; i++)
        {
            for(j = 0; j < thedata.getNumberOfVariables(); j++)
            {
                setValueBlindly(i, _numberOfVariables + j, thedata.getValueBlindly(i, j));
            }
        }

        _numberOfVariables = newnumvars;
        return true;
    }

    //Merge the DataSet passed in with this one, but combining matching key samples.
    public boolean mergeDataSets(final DataSet data, final int thiskey, final int thatkey)
    {
        final int newnumvars = data.getNumberOfVariables() + _numberOfVariables - 1; // -1 account for not copying keyvariable
        final DataTable newTable = DataSet.createDataAttribute(_maximumSize, newnumvars, _makeRowsVariables);

//        double[][] newdata = new double[_maximumSize][newnumvars];

        int i, j, adjj;
        double currentvalue;
        int index;
        int copyindex = 0;

        //Loop through each sample getting the data for both sets and putting it into the
        //new data set.
        for(i = 0; i < _sampleSize; i++)
        {
            currentvalue = getValue(i, thiskey);
            data.resetPtr();
//            System.out.println(HCalendar.computeCalendarFromJulianHour((int)currentvalue).get(Calendar.HOUR));

            index = data.findSampleIndex(thatkey, currentvalue);

            //If the current keyvariable value was found in the passed in DataSet, then...
            if(index != MISSING)
            {
                //Create the combined sample in the new data.
                for(j = 0; j < newnumvars; j++)
                {
                    //if j is less than _numberofvariables, then we are in this dataset.
                    if(j < _numberOfVariables)
                    {
                        newTable.getData()[copyindex][j] = getValue(i, j);
                    }
                    else
                    {
                        adjj = j - _numberOfVariables;
                        if(adjj >= thatkey)
                        {
                            adjj++;
                        }
                        newTable.getData()[copyindex][j] = data.getValue(index, adjj);
                    }
                }
                copyindex++;
            }
        }

        if(copyindex == 0)
        {
            System.err.println("DataSet >> ERROR: I found nothing to merge");
            return false;
        }

        //TODO Fix this

//        _dataTable.getD = newdata;
        this._dataTable = newTable;
        _numberOfVariables = newnumvars;
        _sampleSize = copyindex;
        return true;
    }

    /**
     * Adds the samples from the passed in DataSet table of data to this table. The passed in DataSet must have the same
     * number of variables for this to work. The new sample will be copied from the data to the core DataTable.
     * 
     * @param thedata The data to merge.
     * @return Returns false if the number of variables do not match.
     */
    public boolean mergeDataSetsAsSamples(final DataSet thedata)
    {
        if(thedata.getNumberOfVariables() != _numberOfVariables)
        {
            //PRINT ERROR HERE
            return false;
        }

        //Compute the new number of samples.  Create a new DataTable to hold all the variables.
        //Copy the contents from this DataTable to the new one.  And then set this DataTable to be the new one.
        final int newnumsamples = thedata.getSampleSize() + _sampleSize;
        if(newnumsamples > _maximumSize)
        {
            _maximumSize = newnumsamples;
            final DataTable newTable = DataSet.createDataAttribute(_maximumSize, _numberOfVariables, _makeRowsVariables);
            newTable.copyContentsFromDataTable(_dataTable);
            _dataTable = newTable;
        }

        //Loop through each sample getting the data for both sets and putting it into the
        //new data set.
        int i;
//        int j;
        for(i = 0; i < thedata.getSampleSize(); i++)
        {
            addSample(thedata.getCopyOfSample(i));
        }

        return true;
    }

    /////////////////////////////////////////////////////////////////////////
    //Changing Data Table Size
    /////////////////////////////////////////////////////////////////////////

    /**
     * Removes the specified variable from the table. This will create a new table of data, copy the values over cell by
     * cell and then discard the old table. This is not the most efficient way to do it.
     * 
     * @param var The variable to discard.
     * @return Returns false if the variable is invalid.
     */
    public boolean removeVariable(final int var)
    {
        if((var >= _numberOfVariables) || (var < 0))
        {
            return false;
        }

        if(_makeRowsVariables)
        {
            _dataTable.removeRow(var);
        }
        else
        {
            _dataTable.removeColumn(var);
        }

        _numberOfVariables--;
        return true;
    }

    /**
     * Adds a new variable to the table of data. This creates a new table of data to store the old data with the new
     * empty variable. The values will initially be MISSING.
     * 
     * @return Always returns true.
     */
    public boolean addNewVariable()
    {
        if(_makeRowsVariables)
        {
            _dataTable.addRow();
        }
        else
        {
            _dataTable.addColumn();
        }

        _numberOfVariables++;
        return true;
    }

    /**
     * Adds a new variable (see addNewVariable above) and then initializes all new variable values to the given value.
     * 
     * @param value The value to initialize the variable to.
     * @return Returns false only if addNewVariable() returns false (i.e. never).
     */
    public boolean addNewVariable(final double value)
    {
        if(!addNewVariable())
        {
            return false;
        }
        return makeVariableValuesEqualTo(_numberOfVariables - 1, value);
    }

    /**
     * Removes the specified sample. It moves all samples below it on the table up one row and then subtracts 1 from the
     * sample size.
     * 
     * @param sample The sample to remove.
     * @return Returns false if the sample to remove does not exist.
     */
    public boolean removeSample(final int sample)
    {
        if((sample < 0) || (sample >= _sampleSize))
        {
            //PRINT ERROR: Illegal sample number
            return false;
        }

        if(_makeRowsVariables)
        {
            _dataTable.removeColumn(sample);
        }
        else
        {
            _dataTable.removeRow(sample);
        }

        _sampleSize--;

        return true;
    }

    /**
     * Removes the sample corresponding to the current sample pointer. It repoints the pointer to the preceding sample
     * in the table.
     * 
     * @return Returns false only if the current sample point is screwed up (probably never).
     */
    public boolean removeCurrentSample()
    {
        //The current sample index has to be increased so its no longer pointing at the
        //removed sample.
        _currentSample--;
        return removeSample(_currentSample + 1);
    }

    /**
     * Changes the maximum number of samples the table can have. If the new size is smaller than the current sample
     * size, then this function will not work. It works by creating a new data table and then using setData to transfer
     * over the sample values. It then discards the old data table.
     * 
     * @param newmax The new maximum sample size.
     * @return Returns false if the new size is smaller than the current sample size.
     */
    public boolean changeMaximumNumberOfSamples(final int newmax)
    {
        if(newmax < _sampleSize)
        {
            return false;
        }

        final DataTable newTable = DataSet.createDataAttribute(newmax, _numberOfVariables, _makeRowsVariables);
        newTable.setAllValues(DataSet.MISSING);
        int i;
        double[] sample;
        for(i = 0; i < _sampleSize; i++)
        {
            sample = getSample(i);
            if(!_makeRowsVariables)
            {
                newTable.setRow(i, sample);
            }
            else
            {
                newTable.setColumnAsCopy(i, sample);
            }
        }

        _dataTable = newTable;
        _maximumSize = newmax;

        return true;

    }

    /////////////////////////////////////////////////////////////////////////
    //Setting Entire Sample And Variable Values
    /////////////////////////////////////////////////////////////////////////   

    /**
     * Make all of the variable values for a sample equal to the value specified.
     * 
     * @param sample The index of the sample to change.
     * @param value The value to set the variables equal to.
     * @return Returns false if the sample is invalid.
     */
    public boolean makeSampleValuesEqualTo(final int sample, final double value)
    {
        if((sample < 0) || (sample >= _sampleSize))
        {
            //PRINT ERROR: Illegal sample number
            return false;
        }

        int j;
        for(j = 0; j < _numberOfVariables; j++)
        {
            setValueBlindly(sample, j, value);
        }

        return true;
    }

    /**
     * Make all of the variable values across all samples equal to a specified value.
     * 
     * @param var The variable to change.
     * @param value The value to set the variable equal to.
     * @return Returns false if the variable is invalid.
     */
    public boolean makeVariableValuesEqualTo(final int var, final double value)
    {
        if((var >= _numberOfVariables) || (var < 0))
        {
            //PRINT ERROR MESSAGE HERE
            return false;
        }

        int i;
        for(i = 0; i < _sampleSize; i++)
        {
            setValueBlindly(i, var, value);
        }

        return true;
    }

    /**
     * Sets all sample values for a sample to be MISSING.
     * 
     * @param sample The sample to change.
     * @return Returns false if makeSampleValuesEqualTo returns false.
     */
    public boolean clearSample(final int sample)
    {
        return makeSampleValuesEqualTo(sample, DataSet.MISSING);
    }

    /**
     * Sets all variable values for a sample to be MISSING.
     * 
     * @param var The variable to change.
     * @return Returns false if makeVariableValuesEqualTo returns false.
     */
    public boolean clearVariable(final int var)
    {
        return makeVariableValuesEqualTo(var, DataSet.MISSING);
    }

    /**
     * Clears all variables, setting all data values to MISSING.
     * 
     * @return Should always return true (if not, then an internal error has occurred).
     */
    public boolean clearAllData()
    {
        //Set sample size so that ENTIRE data set is cleared.
        _sampleSize = _maximumSize;

        //Clear it variable by variable.
        int i;
        for(i = 0; i < _numberOfVariables; i++)
        {
            if(!clearVariable(i))
            {
                return false;
            }
        }

        //Sample size is now 0.  
        _sampleSize = 0;
        return true;
    }

    /**
     * Sets the specified sample's variable values to zero.
     * 
     * @param sample The sample to change.
     * @return Returns false if makeSampleValuesEqualTo returns false.
     */
    public boolean makeSampleZero(final int sample)
    {
        return makeSampleValuesEqualTo(sample, 0.0);
    }

    /**
     * Sets the specified variable's values to zero.
     * 
     * @param var The variable to change.
     * @return Returns false if makeVariableValuesEqualTo returns false.
     */
    public boolean makeVariableZero(final int var)
    {
        return makeVariableValuesEqualTo(var, 0.0);
    }

    //===============================================================================================
    // READ DATA SET
    //===============================================================================================

    /**
     * Should be overridden by any subclass that cares. If the passed in value is considered a MISSING or HOLE value,
     * return true. Else, false. By default, this returns true for DataSet.MISSING, only.
     */
    public boolean isMissingValue(final double value)
    {
        if(value == DataSet.MISSING)
        {
            return true;
        }
        return false;
    }

    /**
     * Return the smallest value from var 1 in which the var2 value is greater than the keyvalue specified. A keyvalue
     * of MISSING results in ignoring the var2 condition. Note that var1 must be valid, but var2 can either be valid or
     * DataSet.NO_VARIABLE.
     * 
     * @param var1 The variable being searched.
     * @param var2 The variable used for comparison.
     * @param keyvalue The value var2 must be greater than.
     * @return The smallest var1 value such that var2 >= keyvalue, or MISSING if none found or invalid parameter.
     */
    public double getSmallest(final int var1, int var2, double keyvalue)
    {
        double current = 0, currentother = 0, smallest;
        boolean notatend;

        if(_sampleSize == 0)
        {
            return MISSING;
        }

        //Check to see that var1 is acceptable, and that var2 is acceptable if it
        //does not equal NO_VARIABLE
        if(((var1 > _numberOfVariables) || (var1 < 0))
            || (((var2 > _numberOfVariables) || (var2 < 0)) && (var2 != NO_VARIABLE)))
        {
            return MISSING;
        }

        //Setup for no variable usage if necessary.
        if(var2 == NO_VARIABLE)
        {
            var2 = var1;
            keyvalue = MISSING;
        }

        resetPtr();

        //If the keyvalue was set to missing in the NO_VARIABLE check above, then just get the
        //first value that isn't missing.
        notatend = true;
        if(keyvalue == MISSING)
        {
            current = MISSING;
            while((isMissingValue(current)) && (notatend))
            {
                current = getCurrentValue(var1);
                notatend = next();
            }
        }
        else
        {
            //Otherwise Acquire the first var1 value whose other is greater than keyvalue.
            while((currentother <= keyvalue) && (notatend))
            {
                current = getCurrentValue(var1);
                currentother = getCurrentValue(var2);
                notatend = next();
            }
        }

        //Check to see if we reached the end of the data set
        if(!notatend)
        {
            //If the last value did not match the keyvalue condition or the last value
            //was MISSING, then return MISSING
            if((currentother <= keyvalue) || (isMissingValue(current)))
            {
                return MISSING;
            }

            //Otherwise, return current
            return current;
        }

        //At this point, current will always be some value
        smallest = current;

        //Look at the variable value for each sample to find the smallest.
        notatend = true;
        while(notatend)
        {
            current = getCurrentValue(var1);
            currentother = getCurrentValue(var2);

            //If keyvalue is MISSING, 0 will force the keyvalue condition in the next if to always be true
            if(keyvalue == MISSING)
            {
                currentother = 0;
            }

            //If the new value is smaller, and it satisfies the keyvalue requirement, and it isn't MISSING,
            //then update smallest
            if((current < smallest) && (currentother > keyvalue) && (!isMissingValue(current)))
            {
                smallest = current;
            }

            notatend = next();
        }

        resetPtr();
        return smallest;
    }

    /**
     * Return the largest value from var 1 in which the var2 value is no greater than the keyvalue specified. A keyvalue
     * of MISSING results in ignoring the var2 condition. Note that var1 must be valid, but var2 can either be valid or
     * DataSet.NO_VARIABLE.
     * 
     * @param var1 The variable being searched.
     * @param var2 The variable used for comparison.
     * @param keyvalue The value var2 must be smaller than.
     * @return The largest var1 value such that var2 <= keyvalue, or MISSING if none found or invalid parameter.
     */
    public double getLargest(final int var1, int var2, double keyvalue)
    {
        double current = 0, currentother = 0, largest;
        boolean notatend;

        if(_sampleSize == 0)
        {
            return MISSING;
        }

        //Check to see that var1 is acceptable, and that var2 is acceptable if it
        //does not equal NO_VARIABLE
        if(((var1 > _numberOfVariables) || (var1 < 0))
            || (((var2 > _numberOfVariables) || (var2 < 0)) && (var2 != NO_VARIABLE)))
        {
            //PRINT ERROR MESSAGE HERE
            return MISSING;
        }

        //Setup for no variable usage if necessary.
        if(var2 == NO_VARIABLE)
        {
            var2 = var1;
            keyvalue = MISSING;
        }

        resetPtr();

        //If the keyvalue was set to missing in the NO_VARIABLE check above, then just get the
        //first value.
        notatend = true;
        if(keyvalue == MISSING)
        {
            current = MISSING;
            while(isMissingValue(current) && (notatend))
            {
                current = getCurrentValue(var1);
                notatend = next();
            }
        }
        else
        {
            //Otherwise Acquire the first var1 value whose other is larger than keyvalue.
            while((currentother >= keyvalue) && (notatend))
            {
                current = getCurrentValue(var1);
                currentother = getCurrentValue(var2);
                notatend = next();
            }
        }

        //Check to see if we reached the end of the data set
        if(!notatend)
        {
            //If the last value did not match the keyvalue condition, then return MISSING
            if((currentother <= keyvalue) || isMissingValue(current))
            {
                return MISSING;
            }

            //Otherwise, return current
            return current;
        }

        //At this point, current will always be some value
        largest = current;

        //Look at the variable value for each sample to find the smallest.
        notatend = true;
        while(notatend)
        {
            current = getCurrentValue(var1);
            currentother = getCurrentValue(var2);

            //If keyvalue is MISSING, keyvalue + 1 will force the keyvalue condition in the next if to always be true
            if(keyvalue == MISSING)
            {
                currentother = keyvalue - 1;
            }

            //If the new value is larger, and satisfies the keyvalue requirement, and is not MISSING,  
            //then update largest
            if((current > largest) && (currentother < keyvalue) && !isMissingValue(current))
            {
                largest = current;
            }

            notatend = next();
        }

        resetPtr();
        return largest;
    }

    /**
     * Returns the smallest value for a variable.
     * 
     * @param var The variable to search.
     * @return The smallest value, or MISSING if var is invalid.
     */
    public double getSmallest(final int var)
    {
        return getSmallest(var, NO_VARIABLE, MISSING);
    }

    /**
     * Returns the largest value for a variable.
     * 
     * @param var The variable to search.
     * @return The largest value, or MISSING if var is invalid.
     */
    public double getLargest(final int var)
    {
        return getLargest(var, NO_VARIABLE, MISSING);
    }

    /**
     * Gets the smallest value over a group of variables including only samples for which a specified variable is no
     * less than a keyvalue. It calls the univariate version of getSmallest multiple times, and then chooses the
     * smallest of these values.
     * 
     * @param var An array of variable indices.
     * @param var2 The variable used for comparison.
     * @param keyvalue The value compared against.
     * @return The smallest value, or MISSING if a variable is invalid.
     */
    public double getSmallest(final int[] var, final int var2, final double keyvalue)
    {
        int i;
        double currentsmallest, temp;

        //Find the smallest one...
        currentsmallest = MISSING;
        for(i = 0; i < var.length; i++)
        {
            temp = getSmallest(var[i], var2, keyvalue);

            //If temp IS NOT missing and either currentsmallest IS missing or temp is smaller
            //than currentsmallest, then we've found a smaller smallest.
            if(!isMissingValue(temp) && (isMissingValue(currentsmallest) || (temp < currentsmallest)))
            {
                currentsmallest = temp;
            }
        }

        return currentsmallest;
    }

    /**
     * Gets the largest value over a group of variables including only samples for which a specified variable is no
     * greater than a keyvalue. It calls the univariate version of getLargest multiple times, and then chooses the
     * largest of these values.
     * 
     * @param var An array of variable indices.
     * @param var2 The variable used for comparison.
     * @param keyvalue The value compared against.
     * @return The largest value, or MISSING if a variable is invalid.
     */
    public double getLargest(final int[] var, final int var2, final double keyvalue)
    {
        int i;
        double currentlargest, temp;

        //Find the smallest one...
        currentlargest = MISSING;
        for(i = 0; i < var.length; i++)
        {
            temp = getLargest(var[i], var2, keyvalue);

            //If temp IS NOT missing and either currentsmallest IS missing or temp is smaller
            //than currentsmallest, then we've found a smaller smallest.
            if(!isMissingValue(temp) && (isMissingValue(currentlargest) || (temp > currentlargest)))
            {
                currentlargest = temp;
            }
        }

        return currentlargest;
    }

    /**
     * @return Returns the sample size (rows).
     */
    public int getSampleSize()
    {
        return _sampleSize;
    }

    public boolean isEmpty()
    {
        return _sampleSize == 0; // if samplesize == 0 then returns true
    }

    /**
     * @return Returns the maximum allowed sample size (rows).
     */
    public int getMaximumSampleSize()
    {
        return _maximumSize;
    }

    /**
     * @return Returns the number of variables (columns).
     */
    public int getNumberOfVariables()
    {
        return _numberOfVariables;
    }

    /**
     * @return Returns a COPY of the table of data, not the data itself. The returned double array will ALWAYS have the
     *         first index be samples and the second index be variables, so that MatrixMath.printMatrix(...) will print
     *         it out as a table. Use getDataTable to get a non-copy of the original table as seen in memory.
     */
    public double[][] getData()
    {
        final double[][] returnvalue = new double[_sampleSize][_numberOfVariables];

        int i;
        for(i = 0; i < _sampleSize; i++)
        {
            returnvalue[i] = getCopyOfSample(i);
        }

        return returnvalue;
    }

    /**
     * Get the core DataTable object.
     */
    public DataTable getDataTable()
    {
        return _dataTable;
    }

    /**
     * @return Returns a COPY of the sample corresponding the current sample pointer.
     */
    public double[] getCurrentSample()
    {
        if(_sampleSize == 0)
        {
            return null;
        }
        return getSample(_currentSample);
    }

    /**
     * @param row The sample to acquire.
     * @return Returns the specified sample, or null if the index is invalid. Will attempt to return by pointer, if
     *         possible.
     */
    public double[] getSample(final int index)
    {
        if(_sampleSize == 0)
        {
            return null;
        }

        if(_makeRowsVariables)
        {
            return _dataTable.getCopyOfColumn(index);
        }
        return _dataTable.getRow(index);
    }

    /**
     * @return Returns a COPY of the sample corresponding the current sample pointer.
     */
    public double[] getCopyOfCurrentSample()
    {
        if(_sampleSize == 0)
        {
            return null;
        }
        return getCopyOfSample(_currentSample);
    }

    /**
     * @param row The sample to acquire.
     * @return Returns a COPY of the specified sample, or null if the index is invalid.
     */
    public double[] getCopyOfSample(final int index)
    {
        if(_sampleSize == 0)
        {
            return null;
        }

        if(_makeRowsVariables)
        {
            return _dataTable.getCopyOfColumn(index);
        }
        return _dataTable.getCopyOfRow(index);
    }

    /**
     * @param var The variable index to acquire.
     * @return Returns a COPY of the specified variable, or null if the variable is invalid.
     */
    public double[] getCopyOfVariable(final int var)
    {
        if(_sampleSize == 0)
        {
            return null;
        }

        if(_makeRowsVariables)
        {
            return _dataTable.getCopyOfRow(var);
        }
        return _dataTable.getCopyOfColumn(var);
    }

    /**
     * @param var The variable index to acquire.
     * @return Returns the variable double array. If _makeRowsVariables is false, this will be a physical copy.
     *         Otherwise, it will be a pointer copy.
     */
    public double[] getVariable(final int var)
    {
        if(_sampleSize == 0)
        {
            return null;
        }

        if(_makeRowsVariables)
        {
            return _dataTable.getRow(var);
        }
        return _dataTable.getCopyOfColumn(var);
    }

    /**
     * @param var The variable index for which to acquire values.
     * @return An array of size {@link #getSampleSize()} containing the values for the specified variable.
     */
    public double[] getSampleSizedVariable(final int var)
    {
        if(_sampleSize == 0)
        {
            return null;
        }

        final double[] results = new double[getSampleSize()];
        for(int i = 0; i < results.length; i++)
        {
            results[i] = getValue(i, var);
        }
        return results;
    }

    /**
     * @return Returns the value of the current sample pointer.
     */
    public int getCurrentSampleIndex()
    {
        return _currentSample;
    }

    /**
     * Returns the value of a specified variable for the sample corresponding to the current sample pointer.
     * 
     * @param var The variable to acquire.
     * @return The value, which may be MISSING, or MISSING if the variable is invalid.
     */
    public double getCurrentValue(final int var)
    {
        if(_sampleSize == 0)
        {
            return MISSING;
        }
        if((var > _numberOfVariables) || (var < 0))
        {
            return MISSING;
        }

        return getValueBlindly(_currentSample, var);
    }

    /**
     * Gets a value from the data table.
     * 
     * @param numsample The index of the sample.
     * @param var The index of the variable.
     * @return Returns a specific value from the table, or MISSING if the sample or variable is invalid.
     */
    public double getValue(final int numsample, final int var)
    {
        if((numsample >= _sampleSize) || (numsample < 0))
        {
            return MISSING;
        }
        if((var > _numberOfVariables) || (var < 0))
        {
            return MISSING;
        }

        return getValueBlindly(numsample, var);
    }

    /**
     * Gets a value blindly, without checking for proper parameters.
     * 
     * @param numsample The index of the sample.
     * @param var The index of the variable.
     * @return Returns a specific value from the table, or crashes if a parameter is invalid.
     */
    public double getValueBlindly(final int numsample, final int var)
    {
        if(_makeRowsVariables)
        {
            return _dataTable.getValue(var, numsample);
        }
        return _dataTable.getValue(numsample, var);
    }

    /**
     * Gets a value from the data table, assuming that the value is a julian hour variable, and it returns it as a
     * Calendar (in GMT).
     * 
     * @param numsample The index of the sample.
     * @param var The index of the variable.
     * @return Calendar or null if varible or sample is invalid.
     */
    public Calendar getValueAsDate(final int numsample, final int var)
    {
        if((numsample >= _sampleSize) || (numsample < 0))
        {
            return null;
        }
        if((var > _numberOfVariables) || (var < 0))
        {
            return null;
        }

        return HCalendar.computeCalendarFromJulianHour((int)getValueBlindly(numsample, var));
    }

    /**
     * Returns the value of a specified variable for the sample corresponding to the current sample pointer. It assumes
     * the variable is a julian hour variabe and returns a Calendar (in GMT).
     * 
     * @param var The index of the variable.
     * @return Calendar or null if variable is invalid.
     */
    public Calendar getCurrentValueAsDate(final int var)
    {
        if(_sampleSize == 0)
        {
            return null;
        }

        if((var > _numberOfVariables) || (var < 0))
        {
            return null;
        }

        return HCalendar.computeCalendarFromJulianHour((int)getValueBlindly(_currentSample, var));
    }

    /**
     * Gets the variable used as the data to fit for a Distribution fit method.
     * 
     * @return Index of the data variable in the table.
     */
    public int getFitSampleVariable()
    {
        return _fitSample;
    }

    /**
     * Gets the variable used as the empirical cdf to fit for a Distribution fit method.
     * 
     * @return Index of the cdf variable in the table.
     */
    public int getFitCDFVariable()
    {
        return _fitCDF;
    }

    /**
     * Gets the _makeRowsVariables flag.
     * 
     * @return _makeRowsVariables
     */
    public boolean getMakeRowsVariables()
    {
        return _makeRowsVariables;
    }

    //===============================================================================================
    // DataSet Creation Tools
    //===============================================================================================

    /////////////////////////////////////////////////////////////////////////
    //Set Theory:
    /////////////////////////////////////////////////////////////////////////

    /**
     * Returns the subset of the current DataSet between the two sample indices.
     * 
     * @param startIndex The first index in the subset.
     * @param endIndex The last index, NON INCLUSIVE! It points to the index immediately after the last index.
     */
    public DataSet extractSubset(final int startIndex, final int endIndex)
    {
        final DataSet newdata = new DataSet(endIndex - startIndex, _numberOfVariables);
        int i;
        for(i = startIndex; i < endIndex; i++)
        {
            newdata.addSample(getCopyOfSample(i));
        }
        return newdata;
    }

    /**
     * Returns the subset of the current DataSet for which the samples satisfy the passed in condition. The relative
     * order of samples is maintained in the returned DataSet.
     * 
     * @param var The variable to subset based upon.
     * @param condition The condition.
     * @param value The value the condition is applied to.
     * @return A new DataSet, or null if the variable is invalid or if no samples satisfy the condition.
     */
    public DataSet extractSubset(final int var, final int condition, final double value)
    {
        return extractSubset(var, condition, value, OR, NO_CONDITION, MISSING);
    }

    /**
     * Returns a subset using a two-sided condition on a single variable in the DataSet.
     * 
     * @param var The variable conditioned upon.
     * @param cond1 The first condition.
     * @param val1 The value the first condition is applied to.
     * @param and The and/or connecting the two conditions.
     * @param cond2 The second condition.
     * @param val2 The value the second condition is applied to.
     * @return A new DataSet, or null if the variable is invalid or if no samples satisfy condition.
     */
    public DataSet extractSubset(final int var,
                                 final int cond1,
                                 final double val1,
                                 final boolean and,
                                 final int cond2,
                                 final double val2)
    {
        //Make sure the number of values satisfying the conditions is positive.
        //The countValues routine will do a check on jhourvar and return MISSING if it is bad.
        //Thus, I don't need to check it, here.
        final int numvalues = countValues(var, cond1, val1, and, cond2, val2);
        if(numvalues <= 0)
        {
            return null;
        }

        final DataSet newdata = new DataSet(numvalues, _numberOfVariables);
        int i;
        boolean result1, result2;

        //Count the values one by one
        for(i = 0; i < _sampleSize; i++)
        {
            //Do the comparison of val1 here
            //If less than...
            result1 = doesValueSatisfyConditions(getValueBlindly(i, var), val1, cond1);

            //Do the comparison of val2 here
            result2 = doesValueSatisfyConditions(getValueBlindly(i, var), val2, cond2);

            //NOTE: If either cond1 or cond2 is NO_CONDITION, then doesValueSatisfyConditions
            //will always return false.  SO, if the and variable is set to false (OR), then
            //the or line below will be used and the NO_CONDITION value will have no impact
            //i.e. if b is false, then if(a OR b) is the same as if(a).  

            //Check the and condition    
            if((and) && ((result1) && (result2)))
            {
                newdata.addSample(getCopyOfSample(i));
            }
            if((!and) && ((result1) || (result2)))
            {
                newdata.addSample(getCopyOfSample(i));
            }

        }

        return newdata;
    }

    /**
     * Returns a subset of the DataSet containing samples that satisfy all of the conditions specified. All passed in
     * arrays must be of equal length, and a single two-sided condition is specified from all arrays for a given index
     * (i.e. the first condition is given by var[0], cond1[0], val1[0], etc.).
     * 
     * @param var Array of indices of variables being conditioned on.
     * @param cond1 Array of first conditions in the two sided conditions.
     * @param val1 Array of values the first conditions are applied to.
     * @param and Array of and/or's connecting the two-sided conditions.
     * @param cond2 Array of second conditions in the two sided conditions.
     * @param val2 Array of values the second conditions are applied to.
     * @return A new DataSet, or null if a passed in parameter is invalid or no samples satisfy all conditions.
     */
    public DataSet extractSubset(final int[] var,
                                 final int[] cond1,
                                 final double[] val1,
                                 final boolean[] and,
                                 final int[] cond2,
                                 final double[] val2)
    {
        //Make sure none of the arrays are null.
        if((var == null) || (cond1 == null) || (val1 == null) || (and == null) || (cond2 == null) || (val2 == null))
        {
            return null;
        }

        //Make sure they all have the same non-zero length.
        if((cond1.length != var.length) || (val1.length != var.length) || (and.length != var.length)
            || (cond2.length != var.length) || (val2.length != var.length) || (var.length == 0))
        {
            return null;
        }

        //Grab the subset by calling extractSubset for one variable conditions repeatedly.
        //This is inefficient in terms of speed (it makes many passes through a shrinking DataSet), 
        //but not memory (because of garbage collection).   
        int i;
        DataSet sub = this;
        for(i = 0; i < var.length; i++)
        {
            sub = sub.extractSubset(var[i], cond1[i], val1[i], and[i], cond2[i], val2[i]);
            if(sub == null)
            {
                return null;
            }
        }

        return sub;
    }

    public DataSet extractSubset(final int jhourvar, final int calendar_field, final List<Integer> value)
    {
        //Make sure the number of values satisfying the conditions is positive.
        //The countValues routine will do a check on jhourvar and return MISSING if it is bad.
        //Thus, I don't need to check it, here.
        int numValues = 0;

        for(int i = 0; i < value.size(); i++)
        {
            numValues += countValues(jhourvar, calendar_field, value.get(i));
        }

        if(numValues <= 0)
        {
            return null;
        }

        final DataSet newdata = new DataSet(numValues, _numberOfVariables);
        int i;
        Calendar date;

        //Count the values one by one
        for(i = 0; i < _sampleSize; i++)
        {
            //Skip missing data.
            if(getValueBlindly(i, jhourvar) == MISSING)
            {
                continue;
            }

            //Get the calendar corresponding to the julian hour
            date = HCalendar.computeCalendarFromJulianHour((int)getValueBlindly(i, jhourvar));

            //If the value of the field calendar_field of date is equal to value,
            //then count it.
//            if(date.get(calendar_field) == value)
            if(value.contains(date.get(calendar_field)))
            {
                newdata.addSample(getCopyOfSample(i));
            }
        }

        return newdata;
    }

    /**
     * Return a subset of the DataSet for which the passed in julian hour variable contains dates with a calendar-field
     * as specified.
     * 
     * @param jhourvar The julian hour variable conditioned on.
     * @param calendar_field The field, such as Calendar.MONTH or Calendar.DAY_OF_YEAR.
     * @param value The value the field must equal, such as 1 for January (if MONTH).
     * @return The new DataSet, or null if no samples satisfy the condition.
     */
    public DataSet extractSubset(final int jhourvar, final int calendar_field, final int value)
    {
        //Make sure the number of values satisfying the conditions is positive.
        //The countValues routine will do a check on jhourvar and return MISSING if it is bad.
        //Thus, I don't need to check it, here.
        final int numvalues = countValues(jhourvar, calendar_field, value);

        if(numvalues <= 0)
        {
            return null;
        }

        final DataSet newdata = new DataSet(numvalues, _numberOfVariables);
        int i;
        Calendar date;

        //Count the values one by one
        for(i = 0; i < _sampleSize; i++)
        {
            //Skip missing data.
            if(getValueBlindly(i, jhourvar) == MISSING)
            {
                continue;
            }

            //Get the calendar corresponding to the julian hour
            date = HCalendar.computeCalendarFromJulianHour((int)getValueBlindly(i, jhourvar));

            //If the value of the field calendar_field of date is equal to value,
            //then count it.
            if(date.get(calendar_field) == value)
            {
                newdata.addSample(getCopyOfSample(i));
            }
        }

        return newdata;
    }

    /**
     * Return a subset of this DataSet that contains all of the samples, but only the specified variables.
     * 
     * @param variables An array of variable indices.
     * @return The new DataSet, or null if any variable is invalid.
     */
    public DataSet extractSubset(final int[] variables)
    {
        //Make sure variables has something inside it.
        if(variables == null)
        {
            return null;
        }
        if(variables.length == 0)
        {
            return null;
        }

        //Check the variables to make sure they are all valid
        int i;
        for(i = 0; i < variables.length; i++)
        {
            if((variables[i] > _numberOfVariables) || (variables[i] < 0))
            {
                return null;
            }
        }

        //All the variables are acceptable, so create a data set to contain everything.
        final DataSet newdata = new DataSet(_maximumSize, variables.length);
        newdata.setSampleSize(_sampleSize);

        for(i = 0; i < variables.length; i++)
        {
            newdata.setVariable(i, getCopyOfVariable(variables[i]));
        }

        return newdata;
    }

    /**
     * Creates a data set which is the union (i.e. A U B) of this DataSet and the given DataSet. The resulting DataSet
     * will contain only ONE copy of each sample present in either DataSet (there will be no samples that are equal).
     * The resulting DataSet will have a maximum size equal to the sum of the sample size of both DataSets, so some
     * samples may go unused, because repeat samples are eliminated.
     * 
     * @param otherdata The DataSet to union with.
     * @return The union, or null if the two DataSets do not have the same number of variables.
     */
    public DataSet union(final DataSet otherdata)
    {
        //The two data sets must have the same number of variables.
        if(_numberOfVariables != otherdata.getNumberOfVariables())
        {
            //PRINT ERROR HERE
            return null;
        }

        //Allocate the data set.
        final DataSet newdata = new DataSet(_sampleSize + otherdata.getSampleSize(), _numberOfVariables);
        newdata.clearAllData();

        //First, go through this data set.
        int i;
        resetPtr();
        for(i = 0; i < _sampleSize; i++)
        {
            //If the sample is not part of newdata yet, then add it.
            if(newdata.findSampleIndex(getCopyOfSample(i)) == (int)MISSING)
            {
                newdata.addSample(getCopyOfSample(i));
            }
        }

        //Now the other data set
        double currentsample[];
        otherdata.resetPtr();
        for(i = 0; i < otherdata.getSampleSize(); i++)
        {
            currentsample = otherdata.getCopyOfSample(i);

            //If the sample is not part of newdata yet, then add it.
            if(newdata.findSampleIndex(currentsample) == (int)MISSING)
            {
                newdata.addSample(currentsample);
            }
        }

        return newdata;
    }

    //Creates a data set which is the intersection of this data set with otherdata based.
    //NOTE: The resulting data set will contain ONE copy of all samples present in both data sets.
    //  In other words, if a sample is present 5 times in both, it will only appear once in the result.
    //NOTE: The new data set will have a maximum size equal to the smaller of the two samplesizes, but not
    //  all available samples may be filled.      
    /**
     * Creates a data set which is the intersection of this DataSet and the given DataSet. The resulting DataSet will
     * contain only ONE copy of each sample present in both DataSets (there will be no samples that are equal). The
     * resulting DataSet will have a maximum size equal to the smaller of the two sample sizes, and some samples may go
     * unused, because repeat samples are eliminated.
     * 
     * @param otherdata The DataSet to intersect.
     * @return A new DataSet, or null if the number of variables are not equal between the two DataSets.
     */
    public DataSet intersection(final DataSet otherdata)
    {
        //The two data sets must have the same number of variables.
        if(_numberOfVariables != otherdata.getNumberOfVariables())
        {
            //PRINT ERROR HERE
            return null;
        }

        //Allocate the data set.
        final DataSet newdata = new DataSet(Math.min(_sampleSize, otherdata.getSampleSize()), _numberOfVariables);
        newdata.clearAllData();

        int i;
        for(i = 0; i < _sampleSize; i++)
        {
            //If the sample from this data set is in other data AND it is not part of newdata yet,
            //then add it.  That way, I guarantee only one copy of every intersecting sample.
            if((otherdata.findSampleIndex(getCopyOfSample(i)) != (int)MISSING)
                && (newdata.findSampleIndex(getCopyOfSample(i)) == (int)MISSING))
            {
                newdata.addSample(getCopyOfSample(i));
            }
        }

        return newdata;
    }

    public DataSet extractUniqueSubset(final int var)
    {
        if((var < 0) || (var >= _numberOfVariables))
        {
            //PRINT ERROR: Illegal variable number
            return null;
        }

        //Create the storage data set to hold up to _samplesize many samples.
        final DataSet returned = new DataSet(_sampleSize, _numberOfVariables);

        //These are the two counters.
        int fromindex = 0;
        int toindex = -1;
        for(fromindex = 0; fromindex < _sampleSize; fromindex++)
        {
            //If toindex is 0, just add the sample.
            if(toindex == -1)
            {
                returned.addSample(getSample(fromindex));
                toindex++;
            }

            //else, if the sample pointed to by fromindex has a var equal to
            //the last added sample in returned, then just update that last
            //added sample.  
            else if(getValue(fromindex, var) == returned.getValue(toindex, var))
            {
                returned.setSample(toindex, getSample(fromindex));
            }
            //Otherwise, just add it as above for if toindex was 0...
            else
            {
                returned.addSample(getSample(fromindex));
                toindex++;
            }
        }

        return returned;

    }

    /**
     * Extracts a DataSet containing one copy of each sample (it eliminates repeats).
     * 
     * @return A new DataSet.
     */
    public DataSet extractUnique()
    {
        //create an empty dataset (one with 0 samplesize).
        final DataSet dummydata = new DataSet(1, _numberOfVariables);

        //Return the union of this with dummydata -- Union handles extracting only
        //one copy of each sample.
        return union(dummydata);
    }

    /////////////////////////////////////////////////////////////////////////
    //Other Stuff:
    /////////////////////////////////////////////////////////////////////////

    /**
     * This is designed for instantaneous data only! <br>
     * <br>
     * Temporally average the data set over some specified time width. The resulting data set will have data at regular
     * intervals that is averaged over each interval, where all values within the time period were weighted according to
     * the proportion of the time pd associated with them, and then summed together. The variables of the resulting data
     * set are the same, except the julian hour variable is the hour marking the BEGINNING of the time period (e.g. if
     * daily average, it is hour 0 of each day!). <br>
     * <br>
     * A null DataSet will be returned if the variable is invalid or if there is no data within the first time interval
     * to average. <br>
     * <br>
     * Averaging stops when no more data is ABOVE the current interval. Averaging is done for ALL variables. If you
     * don't want all variables, then call extractSubset(variables[]) first to get the variables you want (make sure to
     * include the julian hour variable). <br>
     * <br>
     * NOTE: This assumes that NO DATA IS MISSING! In otherwords, between sample 0 and the _samplesize - 1 sample, there
     * are no MISSING values in the jhrvar. <br>
     * 
     * @param jhrvar The julian hour variable.
     * @param startjhr The lower bounding hour of the first time interval.
     * @param hourwidth The width of each interval in hours to which to average the data.
     * @param nodataavg If true, it will use the two closest values to determine average when no data is in an interval.
     * @param alreadysorted If false, this method will sort the data by jhr prior to averaging.
     * @return New DataSet or null if a problem is encountered (above).
     */
    @SuppressWarnings("unchecked")
    public DataSet temporalAverage(final int jhrvar,
                                   final double startjhr,
                                   final int hourwidth,
                                   final boolean nodataavg,
                                   final boolean alreadysorted)
    {
        //Check the variable
        if((jhrvar >= _numberOfVariables) || (jhrvar < 0))
        {
            return null;
        }

        //Count the number of samples inside of the [startjhr, startjhr + hourwidth] interval.
        //If there isn't atleast 1, then I'll return null.  I will require that the first interval
        //have at least one value inside it and leave it up to the user to properly define startjhr.
        if(countValues(jhrvar,
                       DataSet.GREATER_THAN_OR_EQUAL_TO,
                       startjhr,
                       DataSet.AND,
                       DataSet.LESS_THAN_OR_EQUAL_TO,
                       startjhr + hourwidth) <= 0)
        {
            return null;
        }

        //Sort the data unless already sorted
        if(!alreadysorted)
        {
            sortBy(jhrvar);
        }

        //Find the first sample whose jhrvar is at least jhrvar.
        resetPtr();
        if(getCurrentValue(jhrvar) < startjhr)
        {
            if(!gotoNextSample(jhrvar, DataSet.GREATER_THAN_OR_EQUAL_TO, startjhr))
            {
                //There is no sample with a julian hour larger than startjhr.
                return null;
            }
        }

        //A vector to store the weights on each sample; a sample array; the data set to be built.
        final Vector weights = new Vector(hourwidth + 2);
        double[] sample = new double[getNumberOfVariables()];
        final DataSet avgdata = new DataSet(getSampleSize(), getNumberOfVariables());

        //Some variables...
        boolean done = false;
        int index;
        int preceding;
        int postceding;
        double lbjhr = startjhr;
        double ubjhr = lbjhr + hourwidth;
        double prevmid;
        double thismid;
        double weight;
        double value;
        int i, j;

        //Loop until I have no more data.
        while(!done)
        {
            //Get the index of the current sample.
            index = getCurrentSampleIndex();

            //Is there a previous index?... if so, grab it.
            if(index > 0)
            {
                preceding = index - 1;
            }
            else
            {
                preceding = index;
            }

            //If the current sample (i.e. index) is already larger than or equal to ubjhr, then do nothing.
            //Otherwise, I need to get the next sample >= ubjhr.
            if(getCurrentValue(jhrvar) < ubjhr)
            {
                //Goto the the next sample above or equal to the current interval.
                if(!gotoNextSample(jhrvar, DataSet.GREATER_THAN_OR_EQUAL_TO, ubjhr))
                {
                    //There are no samples above the working interval, so this is the last interval.
                    //Set the postceding index to be the last index with data in the DataSet.
                    done = true;
                    postceding = getSampleSize() - 1;
                }
                else
                {
                    //Get the postceding index as the new current index.
                    postceding = getCurrentSampleIndex();
                }
            }
            //So... I've got no data inside the current interval.  What I do depends on nodataavg.
            else
            {
                //If nodataavg is true...
                if(nodataavg)
                {
                    //Process this interval as a two point mean.
                    postceding = index;
                }
                //Otherwise...
                else
                {
                    //Setup for the next interval and don't process this one at all..
                    lbjhr = ubjhr;
                    ubjhr = lbjhr + hourwidth;
                    continue;
                }

            }

            //Build the weights vector by looking at all samples from preceding to postceding.
            //Each weight is the width of the sample's associated time period divided by total width of interval.
            //The associated time period is the period for which this sample is the closest sample to any
            //time value within the period.  The midpoint between successive jhr's deliniates between 
            //associated time pds.
            prevmid = lbjhr;
            for(i = preceding; i <= postceding; i++)
            {
                //Get the midpoint between this sample jhr and the next jhr.
                if(i != postceding)
                {
                    thismid = (getValue(i, jhrvar) + getValue(i + 1, jhrvar)) / 2.0;
                }
                else
                {
                    thismid = ubjhr;
                }

                //Force the midpoint to be between lbjhr and ubjhr -- THIS IS IMPORTANT FOR THE NEXT IFs TO WORK!
                if(thismid < lbjhr)
                {
                    thismid = lbjhr;
                }
                if(thismid > ubjhr)
                {
                    thismid = ubjhr;
                }

                //Compute the weight as the distance from this mid to the previous mid divided by the 
                //total interval width.  Combined with the previous ifs which gaurantee thismid and prevmid
                //are between lbjhr and ubjhr, this works for any case.
                weight = (thismid - prevmid) / hourwidth;

                //Add the weight to the vector and record the previous mid as this mid.
                weights.addElement(Double.valueOf(weight));
                prevmid = thismid;
            }

            //Put together the sample.
            //For each variable...
            sample = new double[getNumberOfVariables()];
            for(i = 0; i < getNumberOfVariables(); i++)
            {
                //For the julian hour variable, just store the lower bound of the interval.
                if(i == jhrvar)
                {
                    sample[i] = lbjhr;
                }
                //Otherwise, I need a weighted average...
                else
                {
                    //for each sample contributing to the current interval...
                    sample[i] = 0.00;
                    for(j = preceding; j <= postceding; j++)
                    {
                        //Add its weight times its sample value to this sample value.
                        weight = ((Double)(weights.elementAt(j - preceding))).doubleValue();
                        value = getValue(j, i);
                        sample[i] += weight * (value);
                    }
                }
            }

            //Add the sample to the averaged data.
            avgdata.addSample(sample);

            //Reset the vectors and set the lb and ub on the julian hours.
            weights.removeAllElements();
            lbjhr = ubjhr;
            ubjhr = lbjhr + hourwidth;

        } //End of loop on the data

        return avgdata;
    }

    /**
     * Runs the temporalAverage method to get the 24-hour average starting from the first julian hour with any data.
     * This will create a DataSet with samples for days on which data exists (i.e. nodataavg = false) and assuming the
     * data is not yet sorted.
     * 
     * @param jhrvar The julian hour variable.
     * @return The new DataSet, as returned by temporalAverage.
     */
    public DataSet dailyAverage(final int jhrvar)
    {
        //Get the smallest julian hour and make sure its not MISSING (i.e. jhrvar is valid).
        double startjhr = getSmallest(jhrvar);
        if(startjhr <= 0)
        {
            return null;
        }

        //Compute the start date as hour 0 of the first day with data, in GMT of course.           
        final Calendar startdate = HCalendar.computeCalendarFromJulianHour((int)startjhr);
        startdate.add(Calendar.HOUR, -1 * startdate.get(Calendar.HOUR_OF_DAY));
        startjhr = HCalendar.computeJulianHourFromCalendar(startdate, true);

        //Get the averaged data set.
        return temporalAverage(0, startjhr, 24, false, false);
    }

    /**
     * Same as previous dailyAverage, except it assumes the first date is as specified.
     * 
     * @param jhrvar The julian hour variable.
     * @param firstdate The lower bound on the first interval to average over.
     * @return The new DataSet, as returned by temporalAverage.
     */
    public DataSet dailyAverage(final int jhrvar, final Calendar firstdate)
    {
        //The startjhr for this version is computed from the passsed in Calendar.  The false means
        //the calendar's time zone will be considered.  
        double startjhr = HCalendar.computeJulianHourFromCalendar(firstdate, false);
        if(startjhr <= 0)
        {
            return null;
        }

        //Compute the start date as hour 0 of the first day with data, in GMT of course.           
        final Calendar startdate = HCalendar.computeCalendarFromJulianHour((int)startjhr);
        startdate.add(Calendar.HOUR, -1 * startdate.get(Calendar.HOUR_OF_DAY));
        startjhr = HCalendar.computeJulianHourFromCalendar(startdate, true);

        //Get the averaged data set.
        return temporalAverage(0, startjhr, 24, false, false);
    }

    //===============================================================================================
    // QUALITY CONTROL FUNCTIONS
    //===============================================================================================

    /**
     * @return Returns true if any samples have any MISSING values.
     */
    public boolean isAnyDataMissing()
    {
        int i;

        for(i = 0; i < _sampleSize; i++)
        {
            if(isAnySampleDataMissing(i))
            {
                return true;
            }
        }

        return false;
    }

    /**
     * @param sample The index of sample to check.
     * @return Return true if any variable value for the sample is MISSING.
     */
    public boolean isAnySampleDataMissing(final int sample)
    {
        if((sample >= _sampleSize) || (sample < 0))
        {
            //PRINT ERROR HERE
            return true;
        }

        int j;
        for(j = 0; j < _numberOfVariables; j++)
        {
            if(getValueBlindly(sample, j) == MISSING)
            {
                return true;
            }
        }
        return false;
    }

    /**
     * @param var The index of variable to check.
     * @return Returns true if variable is invalid or if any sample values are MISSING for the variable.
     */
    public boolean isAnyVariableDataMissing(final int var)
    {
        if((var > _numberOfVariables) || (var < 0))
        {
            return true;
        }

        int i;
        for(i = 0; i < _sampleSize; i++)
        {
            if(getValueBlindly(i, var) == MISSING)
            {
                return true;
            }
        }
        return false;
    }

    /**
     * @param sample Sample to check against.
     * @return Returns true if the sample corresponding to current sample pointer equals passed in sample.
     */
    public boolean isSampleEqualTo(final double[] sample)
    {
        //The sample must be of the exact correct length.
        if(sample.length != _numberOfVariables)
        {
            return false;
        }

        //Check the sample one by one.
        int i;
        for(i = 0; i < sample.length; i++)
        {
            if(sample[i] != getCurrentValue(i))
            {
                return false;
            }
        }
        return true;
    }

    /**
     * This method will create a {@link DataTable} object for the two passed in dimensions. The passed in boolean tells
     * it if the first element of the {@link DataTable} should be samples (false) or variables (true). In most cases,
     * true is best so that entire variables can be efficiently acquired to pass into graphical packages, such as
     * {@link JFreeChart}.
     * 
     * @param maximumSamples The maximum number of samples to allow for initially.
     * @param maximumVariables The maximum number of variables to allow for initially.
     * @param makeRowsVariables True for the first element of the underlying 2-d array to be variables, false for
     *            samples. Usually true is best as it allows for acquring entire variables without copying.
     * @return DataTable to be used as _data in a DataSet object.
     */
    protected static DataTable createDataAttribute(final int maximumSamples,
                                                   final int maximumVariables,
                                                   final boolean makeRowsVariables)
    {
        if(makeRowsVariables)
        {
            return new DataTable(maximumVariables, maximumSamples);
        }
        return new DataTable(maximumSamples, maximumVariables);
    }

    //A test main.
    public static void main(final String args[])
    {
        final DataSet thedata = new DataSet(5, 3);

        thedata.setSampleSize(5);
        thedata.setValue(0, 0, 3);
        thedata.setValue(1, 0, 2);
        thedata.setValue(2, 0, 4);
        thedata.setValue(3, 0, 5);
        thedata.setValue(4, 0, 6);
        thedata.setValue(0, 1, 4);
        thedata.setValue(1, 1, 1);
        thedata.setValue(2, 1, 3);
        thedata.setValue(3, 1, 6);
        thedata.setValue(4, 1, 9);
        thedata.setValue(0, 2, 1);
        thedata.setValue(1, 2, 7);
        thedata.setValue(2, 2, 3);
        thedata.setValue(3, 2, 5);
        thedata.setValue(4, 2, 2);

        final int[] preds = {1, 2};
        final double[] coeff = thedata.multipleRegression(0, preds, true);

        MatrixMath.printMatrix(thedata.getData());

        System.out.println("--------------------");
        System.out.println("  b0 = " + coeff[0] + ", b1 = " + coeff[1]);
        System.out.println("--------------------");

        //Test Subset Stuff
        System.out.println("");
        System.out.println("The Count for var 2 between 2 and 7 is "
            + thedata.countValues(1, DataSet.GREATER_THAN, 2.0, DataSet.AND, DataSet.LESS_THAN, 7.0));

        final DataSet newdata = thedata.extractSubset(1, DataSet.GREATER_THAN, 2.0, DataSet.AND, DataSet.LESS_THAN, 7.0);
        MatrixMath.printMatrix(newdata.getData());
        System.out.println("The number of samples is... " + thedata.getSampleSize());

        final Calendar today = Calendar.getInstance();
        today.add(Calendar.YEAR, -100);
        final int jh = HCalendar.computeJulianHourFromCalendar(today, false);
        System.out.println("JULIAN HOUR... " + jh);
        HCalendar.computeCalendarFromJulianHour(jh);

        ////////////////////// TESTING INTERSECTION AND UNION ///////////////////
        System.out.println("\n>>>>>>>>>> TESTING INTERSECTION AND UNION AND MERGE <<<<<<<<<<\n");
        final DataSet thedata2 = new DataSet(5, 3);

        thedata2.setSampleSize(5);
        thedata2.setValue(0, 0, 3);
        thedata2.setValue(1, 0, 2);
        thedata2.setValue(2, 0, 10);
        thedata2.setValue(3, 0, 5);
        thedata2.setValue(4, 0, 6);
        thedata2.setValue(0, 1, 4);
        thedata2.setValue(1, 1, 1);
        thedata2.setValue(2, 1, 3);
        thedata2.setValue(3, 1, 6);
        thedata2.setValue(4, 1, 25);
        thedata2.setValue(0, 2, 1);
        thedata2.setValue(1, 2, 7);
        thedata2.setValue(2, 2, 3);
        thedata2.setValue(3, 2, 5);
        thedata2.setValue(4, 2, 2);

        System.out.println("********************  ORIGINAL 1: *******************");
        MatrixMath.printMatrix(thedata.getData());
        System.out.println("********************  ORIGINAL 2: *******************");
        MatrixMath.printMatrix(thedata2.getData());

        DataSet thedata3 = new DataSet(thedata);
        if(thedata3.mergeDataSetsAsVariables(thedata2))
        {
            System.out.println("********************  MERGE AS VARIABLE: *******************");
            MatrixMath.printMatrix(thedata3.getData());
        }
        else
        {
            System.out.println("####>> FAILED TO MERGE AS SAMPLE");
        }

        thedata3 = new DataSet(thedata);
        if(thedata3.mergeDataSetsAsSamples(thedata2))
        {
            System.out.println("********************  MERGE AS SAMPLE: *******************");
            MatrixMath.printMatrix(thedata3.getData());
        }
        else
        {
            System.out.println("####>> FAILED TO MERGE AS SAMPLE");
        }

        thedata3.removeVariable(2);
        System.out.println("********************  LAST VARIABLE REMOVED FROM MERGE AS SAMPLE:   (RAW DATA BELOW) *******************");
        MatrixMath.printMatrix(thedata3.getDataTable().getData());

        thedata3.removeSample(1);
        System.out.println("********************  SECOND SAMPLE REMOVED FROM MERGE AS SAMPLE:   (RAW DATA BELOW) *******************");
        MatrixMath.printMatrix(thedata3.getDataTable().getData());

        thedata3.changeMaximumNumberOfSamples(12);
        System.out.println("********************  Increased MAXIMUM number of samples to 12:  (RAW DATA BELOW) *******************");
        MatrixMath.printMatrix(thedata3.getDataTable().getData());

        final DataSet intersect = thedata.intersection(thedata2);
        System.out.println("********************  INTERSECTION: *******************  (RAW DATA BELOW) ");
        MatrixMath.printMatrix(intersect.getDataTable().getData());

        final DataSet union = thedata.union(thedata2);
        System.out.println("******************** UNION: ***************************  (RAW DATA BELOW) ");
        MatrixMath.printMatrix(union.getDataTable().getData());

        thedata2.changeMaximumNumberOfSamples(10);
        thedata2.setSampleSize(6);
        thedata2.makeSampleValuesEqualTo(5, DataSet.MISSING);
        System.out.println("\nThe new sample size is ... " + thedata2.getSampleSize());
        System.out.println("The max size is ... " + thedata2.getMaximumSampleSize());
        MatrixMath.printMatrix(thedata2.getDataTable().getData());

        ///////////////////////// TESTING SORTING /////////////////////////////

        System.out.println("******************** Data To Sort: ***************************  (RAW DATA BELOW) ");
        MatrixMath.printMatrix(thedata2.getDataTable().getData());
        thedata2.sortBy(2);
        System.out.println("******************** Sorted by 2: ***************************  (RAW DATA BELOW) ");
        MatrixMath.printMatrix(thedata2.getDataTable().getData());

        /*
         * thedata.createCDF(0, 1); thedata.resetPtr(); System.out.println("---------------------");
         * System.out.println(" (" + thedata.getCurrentValue(0) + ", " + thedata.getCurrentValue(1) + ")");
         * thedata.next(); System.out.println(" (" + thedata.getCurrentValue(0) + ", " + thedata.getCurrentValue(1) +
         * ")"); thedata.next(); System.out.println(" (" + thedata.getCurrentValue(0) + ", " +
         * thedata.getCurrentValue(1) + ")"); thedata.next(); System.out.println(" (" + thedata.getCurrentValue(0) +
         * ", " + thedata.getCurrentValue(1) + ")"); thedata.next(); System.out.println(" (" +
         * thedata.getCurrentValue(0) + ", " + thedata.getCurrentValue(1) + ")");
         * System.out.println("---------------------\n"); System.out.println("MEAN = " + thedata.mean(0));
         * System.out.println("SUM = " + thedata.sum(0)); System.out.println("SUMSQ = " + thedata.sumSquares(0));
         * System.out.println("SMALLEST = " + thedata.getSmallest(0, thedata.NO_VARIABLE, thedata.MISSING));
         */
    }

    public void removeValue(final double valueToRemove)
    {
        int counter = 0;

        for(int index = 0; index < _dataTable.getNumberOfRows(); index++)
        {
            if(_dataTable.getCopyOfRow(index)[0] != valueToRemove)
            {
                counter++;
            }
        }

        final DataSet newSet = new DataSet(counter, this.getNumberOfVariables());
//        DataTable newTable = DataSet.createDataAttribute( counter, _dataTable.getNumberOfColumns(), true );

        for(int index = 0; index < _dataTable.getNumberOfRows(); index++)
        {
            if(_dataTable.getCopyOfRow(index)[0] != valueToRemove)
            {
                newSet.addSample(_dataTable.getCopyOfRow(index));
            }
        }

        this._dataTable = newSet.getDataTable();

        _sampleSize = counter;
    }

    /**
     * Trims out empty/unused samples at the end of the dataset
     */
    public void trim()
    {
        final DataSet subset = extractSubset(0, getSampleSize());
        this._dataTable = subset.getDataTable();
        this._maximumSize = subset.getMaximumSampleSize();
    }

    /**
     * @return If the {@link #_dataTable} instances in this and the provided {@link DataSet} are equal. This checks the
     *         entire array; it is not limited by {@link #_sampleSize}.
     */
    public boolean areDataTablesCompletelyEqual(final DataSet other)
    {
        return _dataTable.equals(other.getDataTable());
    }
}
