/*
 * Created on Jan 8, 2008 To change the template for this generated file go to Window&gt;Preferences&gt;Java&gt;Code
 * Generation&gt;Code and Comments
 */
package ohd.hseb.util.fews.ensmodels.climatology;

import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import java.util.TimeZone;

import ohd.hseb.measurement.Measurement;
import ohd.hseb.measurement.MeasuringUnit;
import ohd.hseb.util.Logger;
import ohd.hseb.util.data.DataSet;
import ohd.hseb.util.data.DatacardData;
import ohd.hseb.util.data.DatacardDataException;
import ohd.hseb.util.fews.Diagnostics;
import ohd.hseb.util.fews.FewsAdapterDAO;
import ohd.hseb.util.fews.FewsRegularTimeSeries;
import ohd.hseb.util.fews.ensmodels.FewsEnsemble;
import ohd.hseb.util.misc.HCalendar;

/**
 * This class reads in datacard format file via a DatacardData object, and translates it into FewsRegularTimeSeries
 * objects based on specified time series start and end times. <br>
 * <br>
 * When dealing with leap days, it works as follows:<br>
 * <br>
 * 1. If the forecast window includes a leap day (2/29) but the year from the datacard file does not, then 3/1 in the
 * datacard file year will be mapped to 2/29, 3/2 will be mapped to 3/1, etc. <br>
 * 2. If the forecast window does NOT include a leap day, but the year from the datacard file does, then 2/29 in the
 * datacard window will be mapped to 3/1, 3/1 to 3/2, etc.<br>
 * <br>
 * In both cases, the data may appear to be shifted by 1 day, but remember that it is so because of leap day
 * complications.<br>
 * 
 * @author hank
 */
public class DatacardTimeSeriesGenerator
{

    /**
     * The DatacardData read in during construction.
     */
    private final DatacardData _data;

    /**
     * @param fileName The name of the datacard file that will serve as a source of data.
     * @param fileTimeZone The time zone to assume for the file. If null, use GMT.
     */
    public DatacardTimeSeriesGenerator(final String fileName, String fileTimeZone) throws DatacardDataException
    {
        System.out.println("Reading in file " + fileName + " assuming time zone " + fileTimeZone);
        if(fileTimeZone == null)
        {
            fileTimeZone = "GMT";
        }
        _data = new DatacardData(fileName, fileTimeZone);
        System.out.println("  Found " + _data.getSampleSize() + " many values within the datacard file.");
    }

    /**
     * @param data The DatacardData to use, which has already been read in.
     */
    public DatacardTimeSeriesGenerator(final DatacardData data)
    {
        _data = data;
    }

    /**
     * @param targetForecastStartTime The start time of the forecast we wish to create.
     * @return Returns the end time that should be used given the targetForecastStartTime and assuming each year of data
     *         is a new time series.
     */
    public Calendar computeLastDateWithValuesAssumingMultipleTimeSeries(final Calendar targetForecastStartTime)
    {
        final int year = computeFirstYear(targetForecastStartTime);
        final Calendar testCal = (Calendar)targetForecastStartTime.clone();
        testCal.set(Calendar.YEAR, year);

        boolean leapDayPassed = false;
        while(getData().findSampleIndex(DatacardData.JULIAN_HOUR, testCal) != DataSet.MISSING)
        {
            testCal.add(Calendar.HOUR, getData()._tsdt);
            if((testCal.get(Calendar.MONTH) == 1) && (testCal.get(Calendar.DAY_OF_MONTH) == 29))
            {
                leapDayPassed = true;
            }
        }
        testCal.add(Calendar.HOUR, -1 * getData()._tsdt);

        //Set the testCal year to be relative to the targetForecastStartTime.
        testCal.set(Calendar.YEAR, targetForecastStartTime.get(Calendar.YEAR));
        if(testCal.before(targetForecastStartTime))
        {
            testCal.set(Calendar.YEAR, testCal.get(Calendar.YEAR) + 1);
        }

        //If, when looking at the first year of day, a leap day was passed, but there is no leap day in the
        //target start to end time, then we need to add one day to the end time.
        if((leapDayPassed) && (!HCalendar.isLeapDayWithinInterval(targetForecastStartTime, testCal)))
        {
            testCal.add(Calendar.DAY_OF_YEAR, 1);
        }

        //In the opposite case, then we need to subtract one day since there is a leap day to account for
        //in the forecast time range.
        if((!leapDayPassed) && (HCalendar.isLeapDayWithinInterval(targetForecastStartTime, testCal)))
        {
            testCal.add(Calendar.DAY_OF_YEAR, -1);
        }
        return testCal;
    }

    /**
     * Returns the first year available for time series construction given the forecast target time series start time.
     * 
     * @param start The target start time of the forecast time series.
     * @return Take the passed in start and replace its year with the year of the first data value within the _data
     *         DatacardData object. If this new date is BEFORE the first data value date, then return the year + 1.
     *         Otherwise, return the year.
     */
    public int computeFirstYear(final Calendar start)
    {
        int year = _data._firstdate.get(Calendar.YEAR);
        final Calendar testCal = (Calendar)start.clone();
        testCal.set(Calendar.YEAR, year);
        if(testCal.before(_data._firstdate))
        {
            year++;
        }
        return year;
    }

    /**
     * This method returns the number of time series that can be constructed from the datacard file given a forecast
     * time series target start time and end time.
     * 
     * @param start The target forecast time series start time.
     * @param end The target forecast time series end time.
     * @param firstYear the first year for which time series will be constructed. Pass in computeFirstYear(start) to get
     *            the largest possible number of time series.
     * @return The number of time series that can be constructed form the DatacardData object. If 0 is returned, the
     *         requested forecast time series is too long for the DatacardData.
     */
    public int computeNumberOfTimeSeries(final Calendar start, final Calendar end, final int firstYear)
    {
        final Calendar testTSEndTime = (Calendar)end.clone();

        //The first test calendar has a year of firstYear plus the difference between 
        //the end year and start year of the target forecast time series.
        testTSEndTime.set(Calendar.YEAR, firstYear + (end.get(Calendar.YEAR) - start.get(Calendar.YEAR)));
        int count = 0;

        //Loop until we create a calendar that is too late.
        while(testTSEndTime.before(_data._lastdate))
        {
            //The current testTSEndTime is okay, so add one to count and prepare the next test.
            count++;
            testTSEndTime.add(Calendar.YEAR, 1);
        }

        return count;
    }

    public FewsRegularTimeSeries acquireOneTimeSeries(final Calendar start, final Calendar end, final int timeStep) throws DatacardTimeSeriesGeneratorException
    {
        final Calendar timeSeriesStartTimeInGMT = HCalendar.computeCalendarFromMilliseconds(start.getTimeInMillis());

        //Get the samples, in the form of a data set, for the data within the time
        //window from the _data DatacardData.
        final int tsStartJhr = HCalendar.computeJulianHourFromCalendar(start, false);
        final int tsEndJhr = HCalendar.computeJulianHourFromCalendar(end, false);
        final DataSet timeSeriesDataSubset = _data.extractSubset(DatacardData.JULIAN_HOUR,
                                                                 DataSet.GREATER_THAN_OR_EQUAL_TO,
                                                                 tsStartJhr,
                                                                 DataSet.AND,
                                                                 DataSet.LESS_THAN_OR_EQUAL_TO,
                                                                 tsEndJhr); //Buffer of timeStep is used to make sure I get enough data

        //If the time series data subset is null, throw an exception.
        if(timeSeriesDataSubset == null)
        {
            throw new DatacardTimeSeriesGeneratorException("Could not find any data in datacard file within "
                + HCalendar.buildDateTimeTZStr(start) + " and " + HCalendar.buildDateTimeTZStr(end));
        }

        //Convert the DataSet into a FewRegularTimeSeries.  Catch any translator exceptions and
        //prepend the message with the year.
        try
        {
            final FewsRegularTimeSeries resultingTimeSeries = convertDataSetToRegularTimeSeries(timeSeriesDataSubset,
                                                                                                start,
                                                                                                end,
                                                                                                timeSeriesStartTimeInGMT,
                                                                                                timeStep);
            return resultingTimeSeries;
        }
        catch(final DatacardTimeSeriesGeneratorException dte)
        {
            throw new DatacardTimeSeriesGeneratorException("Cannot acquire time series: " + dte.getMessage());
        }

    }

    /**
     * @param start The start time of the target forecast window. The time zone is important, as it will be used to
     *            adjust the datacard data julian hours into the target forecast window time zone.
     * @param end The end time of the target forecast window. The time zone is important, as it will be used to adjust
     *            the datacard data julian hours into the target forecast window time zone.
     * @param firstYear The first year from which to acquire data (4-digit year, of course). Use computeFirstYear(start)
     *            in order to determine what year to use to maximize the number of time time series acquired.
     * @param lastYear The last year for which to acquire data. Use firstYear + computeNumberOfTimeSeries(...) to
     *            maximize the number of time series acquired.
     * @param timeStep The time step of the time series to build.
     * @return List of FewsRegularTimeSeries. Each time series will have a start time of start, and end time of end, and
     *         a time step of timeStep. The ensemble id will be set to "datacard" by default, and indices will be set
     *         according to year order.
     * @throws DatacardTimeSeriesGeneratorException if the combination of dates and years yield time series with missing
     *             or no data. This method will not allow for missing time series data.
     */
    public FewsEnsemble acquireTimeSeries(final Calendar start,
                                          final Calendar end,
                                          final int firstYear,
                                          final int lastYear,
                                          final int timeStep) throws DatacardTimeSeriesGeneratorException
    {
        final FewsEnsemble resultingTimeSeries = new FewsEnsemble();

        //For each working year...
        int workingYear = firstYear;
        while(workingYear <= lastYear)
        {
            //Set the raw data time series start time and end time.  The year on the end time is
            //the same as the start time plud the difference in years between the passed in start 
            //and end times (target forecast time series times).
            final Calendar tsStartTime = (Calendar)start.clone();
            tsStartTime.set(Calendar.YEAR, workingYear);
            final Calendar tsEndTime = (Calendar)end.clone();
            tsEndTime.set(Calendar.YEAR, workingYear + (end.get(Calendar.YEAR) - start.get(Calendar.YEAR)));

            //I want to be gauranteed of being able to handle a forecast target window that 
            //includes a leap day.  To do so, I need to add at least 1 day to the end time, so
            //that I always have an extra days worth of data in case its needed.  I've added 4
            //just in case.
            tsEndTime.add(Calendar.DAY_OF_YEAR, 4);

            //Compute GMT calendars based on the tsStartTime and tsEndTime.  Ensure that the
            //resulting GMT times reflect an assumption of Standard time for the ts start
            //and end times.  The DatacardData JULIAN_HOUR column is NEVER built using daylight 
            //time; its always in standard time regardless of the time of year.  So, we need 
            //to remove the effect of daylight savings time here.
            final Calendar timeSeriesStartTimeInGMT = HCalendar.computeCalendarFromMilliseconds(tsStartTime.getTimeInMillis());
            final Calendar timeSeriesEndTimeInGMT = HCalendar.computeCalendarFromMilliseconds(tsEndTime.getTimeInMillis());

            //Get the samples, in the form of a data set, for the data within the time
            //window from the _data DatacardData.
            final int tsStartJhr = HCalendar.computeJulianHourFromCalendar(timeSeriesStartTimeInGMT, false);
            final int tsEndJhr = HCalendar.computeJulianHourFromCalendar(timeSeriesEndTimeInGMT, false);
            final DataSet timeSeriesDataSubset = _data.extractSubset(DatacardData.JULIAN_HOUR,
                                                                     DataSet.GREATER_THAN_OR_EQUAL_TO,
                                                                     tsStartJhr,
                                                                     DataSet.AND,
                                                                     DataSet.LESS_THAN_OR_EQUAL_TO,
                                                                     (tsEndJhr + timeStep)); //Buffer of timeStep is used to make sure I get enough data

            //If the time series data subset is null, throw an exception.
            if(timeSeriesDataSubset == null)
            {
                throw new DatacardTimeSeriesGeneratorException("Could not find any data in datacard file within "
                    + HCalendar.buildDateTimeTZStr(tsStartTime) + " and " + HCalendar.buildDateTimeTZStr(tsEndTime));
            }

            //Convert the DataSet into a FewRegularTimeSeries.  Catch any translator exceptions and
            //prepend the message with the year.
            try
            {
                final FewsRegularTimeSeries ts = convertDataSetToRegularTimeSeries(timeSeriesDataSubset,
                                                                                   start,
                                                                                   end,
                                                                                   timeSeriesStartTimeInGMT,
                                                                                   timeStep);
                //Assign year to ts here before adding to resultingTimeSeries.
                ts.setEnsembleMemberIndex(workingYear);
                resultingTimeSeries.add(ts);
            }
            catch(final DatacardTimeSeriesGeneratorException dte)
            {
                throw new DatacardTimeSeriesGeneratorException("For year " + workingYear + ": " + dte.getMessage());
            }

            workingYear++;
        }

        //Set the ensemble parameters.
        resultingTimeSeries.setEnsembleId("datacard");
        //TODO resultingTimeSeries.synchronizeMemberIndexToListOrder();

        return resultingTimeSeries;
    }

    /**
     * Method attempts to build a FewRegularTimeSeries using the passed in DataSet, which is assumed to have two
     * columns: 0 is julian hour, 1 is value. The start time of the time series to build is given, as is the end time.
     * The time step is also provided.
     * 
     * @param rawTSData Cannot be null!
     * @param start The target forecast start time.
     * @param end The target forecast end time.
     * @param rawDataStartTime The start time for the time series to use with the given rawTSData.
     * @param timeStep The time series time step.
     */
    private FewsRegularTimeSeries convertDataSetToRegularTimeSeries(final DataSet rawTSData,
                                                                    final Calendar start,
                                                                    final Calendar end,
                                                                    final Calendar rawDataStartTimeInGMT,
                                                                    final int timeStep) throws DatacardTimeSeriesGeneratorException
    {
        //Initialize the ts.
        final FewsRegularTimeSeries fewsTS = new FewsRegularTimeSeries(start.getTimeInMillis(),
                                                                       end.getTimeInMillis(),
                                                                       timeStep,
                                                                       determineMeasuringUnit());

        //Populate the header info for the fewsTS.
        fewsTS.setIntervalInHours(timeStep);
        fewsTS.setName(_data._tsdesc);
        fewsTS.setTimeSeriesType(_data._datatype.trim());
        fewsTS.setLocationId("XXXXX");
//        final Calendar creationTime = 
        Calendar.getInstance(TimeZone.getTimeZone("GMT"));

        //Loop until we are after end.  workingTargetTSTime tracks the time that will be used
        //to put the value into the FewsRegularTimeSeries.  workingRawDataTime tracks the
        //corresponding time within the time frame of the raw time series data.
        final Calendar workingTargetTSTime = (Calendar)start.clone();
        int workingRawTSTimeJhr = HCalendar.computeJulianHourFromCalendar(rawDataStartTimeInGMT, false);
        rawTSData.resetPtr();
        boolean reachedTheEndOfAvailableData = false;
        int lastUsedSampleIndex = -1;
        while(!workingTargetTSTime.after(end))
        {
            //If we have reached the end of available data and we are attempting to continue anyway, 
            //that means we will have a partial time series.
            if(reachedTheEndOfAvailableData)
            {
                //Set the rest of the time series to missing.
                while(!workingTargetTSTime.after(end))
                {
                    final Measurement measurement = new Measurement(DataSet.MISSING, determineMeasuringUnit());
                    fewsTS.setMeasurementByTime(measurement, workingTargetTSTime.getTimeInMillis());
                    workingTargetTSTime.add(Calendar.HOUR_OF_DAY, timeStep);
                }
                throw new DatacardTimeSeriesGeneratorException("Reached the end of available data without filling out time series.");
            }

            //For the working calendar, I need to find the julian hour closest to it in the tsData.
            //The afterIndex is the index of the first sample AFTER the current sample with a 
            //julian hour after workingRawTSTimeJhr.  I know this may be a problem if the 
            //current sample's julian hour is equal to workingRawTSTimeJhr (implying we want no
            //advancement), but the Closest Time if below will handle that.
            rawTSData.gotoNextSample(DatacardData.JULIAN_HOUR, DataSet.GREATER_THAN_OR_EQUAL_TO, workingRawTSTimeJhr);
            int afterIndex = rawTSData.getCurrentSampleIndex();

            //If after index is equal to the sample size, then we have reached the end of the available
            //data.  Move afterIndex back 1 and allow the program to use that index only once by setting
            //reachedTheEndOfAvailableData to true.
            if(afterIndex >= rawTSData.getSampleSize())
            {
                afterIndex--;
                reachedTheEndOfAvailableData = true;
            }

            //Closest Time
            //If afterIndex is 0, accept it.  If afterIndex is not zero, check afterIndex and
            //afterIndex - 1 for the closest sample in time to workingRawTSTimeJhr.  If afterIndex - 1
            //has a closer time, then backup the acceptedSampleIndex and the sample point in rawTSData.
            int acceptedSampleIndex = afterIndex;
            if(afterIndex > 0)
            {
                if((workingRawTSTimeJhr - (rawTSData.getValue(afterIndex - 1, DatacardData.JULIAN_HOUR))) < ((rawTSData.getValue(afterIndex,
                                                                                                                                 DatacardData.JULIAN_HOUR)) - workingRawTSTimeJhr))
                {
                    acceptedSampleIndex--;
                    rawTSData.gotoSample(acceptedSampleIndex);
                }
            }

            //Check the accepted sample index to make sure we are not using the same one twice.
            if((acceptedSampleIndex == lastUsedSampleIndex) && (lastUsedSampleIndex >= 0))
            {
                //Set the rest of the time series to missing.
                final String workingTargetTimeStr = HCalendar.buildDateTimeTZStr(workingTargetTSTime);
                while(!workingTargetTSTime.after(end))
                {
                    final Measurement measurement = new Measurement(DataSet.MISSING, determineMeasuringUnit());
                    fewsTS.setMeasurementByTime(measurement, workingTargetTSTime.getTimeInMillis());
                    workingTargetTSTime.add(Calendar.HOUR_OF_DAY, timeStep);
                }
                throw new DatacardTimeSeriesGeneratorException("Attempted to use time series value more than once "
                    + "while searching for time " + workingTargetTimeStr + "; "
                    + "make sure time series time step is at least as large as the datacard step, "
                    + "and make sure there are no MISSING required values in datacard.");
            }

            //So acceptedSampleIndex is the sample we want to use.  
            final Measurement measurement = new Measurement(rawTSData.getValue(acceptedSampleIndex, DatacardData.VALUE),
                                                            determineMeasuringUnit());
            fewsTS.setMeasurementByTime(measurement, workingTargetTSTime.getTimeInMillis());
            lastUsedSampleIndex = acceptedSampleIndex;

            //Next...
            workingTargetTSTime.add(Calendar.HOUR_OF_DAY, timeStep);
            workingRawTSTimeJhr += timeStep;
        }

        return fewsTS;
    }

    private MeasuringUnit determineMeasuringUnit()
    {
        if(_data._datadim.trim().equalsIgnoreCase("L"))
        {
            if(_data._dataunits.trim().equalsIgnoreCase("IN"))
            {
                return MeasuringUnit.inches;
            }
            if(_data._dataunits.trim().equalsIgnoreCase("FT"))
            {
                return MeasuringUnit.feet;
            }
            if(_data._dataunits.trim().equalsIgnoreCase("CM"))
            {
                return MeasuringUnit.cm;
            }
            if(_data._dataunits.trim().equalsIgnoreCase("M"))
            {
                return MeasuringUnit.meters;
            }
            if(_data._dataunits.trim().equalsIgnoreCase("MM"))
            {
                return MeasuringUnit.mm;
            }
            //TODO This is a workaround!  Degress should be TEMP not L.
            if(_data._dataunits.trim().equalsIgnoreCase("DEGF"))
            {
                return MeasuringUnit.degreesFahrenheit;
            }
            if(_data._dataunits.trim().equalsIgnoreCase("DEGC"))
            {
                return MeasuringUnit.degreesCelsius;
            }
        }
        if(_data._datadim.trim().equalsIgnoreCase("L3"))
        {
            if(_data._dataunits.trim().equalsIgnoreCase("CFSD"))
            {
                return MeasuringUnit.cfs;
            }
            if(_data._dataunits.trim().equalsIgnoreCase("CMSD"))
            {
                return MeasuringUnit.cms;
            }
        }
        if(_data._datadim.trim().equalsIgnoreCase("TEMP"))
        {
            if(_data._dataunits.trim().equalsIgnoreCase("DEGF"))
            {
                return MeasuringUnit.degreesFahrenheit;
            }
            if(_data._dataunits.trim().equalsIgnoreCase("DEGC"))
            {
                return MeasuringUnit.degreesCelsius;
            }
        }
        return null;
    }

    public DatacardData getData()
    {
        return _data;
    }

    public static void main(final String[] args)
    {
        if((args.length != 4) && (args.length != 8))
        {
            System.out.println("USAGE: convDatacardToXML <datacard file> <time zone of data> <output XML file> <one/ensemble> [<start date> <start hour> <end date> <end hour>]");
            System.out.println("\nDates must be of format CCYY-MM-DD.  Hours must be of the format hh:mm:ss");
            return;
        }

        final String datacardFileName = args[0];
        final String timeZone = args[1];
        final String xmlFileName = args[2];
        final boolean one = args[3].equalsIgnoreCase("one");

        final Logger logger = new Diagnostics();

        try
        {
            final DatacardTimeSeriesGenerator generator = new DatacardTimeSeriesGenerator(datacardFileName, timeZone);

            Calendar start = generator.getData()._firstdate;
            Calendar end = generator.getData()._lastdate;
            final int timeStep = generator.getData()._tsdt;
            if(args.length == 8)
            {
                start = HCalendar.processDate(args[4] + " " + args[5] + " " + timeZone);
                end = HCalendar.processDate(args[6] + " " + args[7] + " " + timeZone);
            }
            System.out.println("XML Ensemble Time Window: start = " + start.getTime() + " - end " + end.getTime());

            if(one)
            {
                final FewsRegularTimeSeries series = generator.acquireOneTimeSeries(start, end, timeStep);
                final List<FewsRegularTimeSeries> seriesList = new ArrayList<FewsRegularTimeSeries>();
                seriesList.add(series);
                System.out.println("One series found, generating file " + xmlFileName);
                FewsAdapterDAO.writeTimeSeriesToFewsXML(false,
                                                        seriesList,
                                                        xmlFileName,
                                                        logger,
                                                        TimeZone.getTimeZone(timeZone));
                seriesList.add(series);
            }
            else
            {
                final int firstYear = generator.computeFirstYear(start);
                final int numberOfTS = generator.computeNumberOfTimeSeries(start, end, firstYear);
                final int lastYear = firstYear + numberOfTS - 1;

                final FewsEnsemble ens = generator.acquireTimeSeries(start, end, firstYear, lastYear, timeStep);

                System.out.println("Number of Members Found: " + ens.size());
                FewsAdapterDAO.writeTimeSeriesToFewsXML(false, ens, xmlFileName, logger, TimeZone.getTimeZone(timeZone));
            }
        }
        catch(final DatacardDataException e)
        {
            e.printStackTrace();
        }
        catch(final DatacardTimeSeriesGeneratorException e)
        {
            e.printStackTrace();
        }
        catch(final Exception e)
        {
            e.printStackTrace();
        }
        finally
        {
            System.out.println("DIAGNOSTICS:");
            System.out.println(((Diagnostics)logger).getListOfDiagnosticsAsString());
        }

    }

}
