package ohd.hseb.hefs.mefp.sources.gefs;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Iterator;
import java.util.List;
import java.util.zip.GZIPInputStream;

import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader;
import nl.wldelft.util.timeseries.ParameterType;
import nl.wldelft.util.timeseries.SimpleEquidistantTimeStep;
import nl.wldelft.util.timeseries.TimeSeriesArray;
import nl.wldelft.util.timeseries.TimeSeriesArrays;
import ohd.hseb.hefs.pe.tools.HEFSTools;
import ohd.hseb.hefs.pe.tools.LocationAndDataTypeIdentifier;
import ohd.hseb.hefs.utils.tools.ListTools;
import ohd.hseb.hefs.utils.tsarrays.TimeSeriesArraysTools;
import ohd.hseb.hefs.utils.tsarrays.TimeSeriesEnsemble;
import ohd.hseb.hefs.utils.tsarrays.agg.AggregationTools;
import ohd.hseb.util.misc.HCalendar;
import ohd.hseb.util.misc.SegmentedLine;

import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;

import com.google.common.collect.Lists;

/**
 * This assumes that all units are metric in the files. It also assumes all forecast times are at 0Z hours. This affects
 * the temperature methods primarily, which must compute 24h min/max values from 12-12Z. It also means that only 15 days
 * are available, since canonical events are always computed on a 12Z clock (currently). Note that all time series
 * returned in all cases will still have a 0Z forecast time.
 * 
 * @author Hank.Herr
 */
public abstract class ProcessedGEFSFileTools
{
    private static final Logger LOG = LogManager.getLogger(ProcessedGEFSFileTools.class);

    /**
     * Prepares the header for any read-in time series.
     */
    private static DefaultTimeSeriesHeader prepareHeader(final LocationAndDataTypeIdentifier identifier,
                                                         final String dateString,
                                                         final String parameterId,
                                                         final String unitStr,
                                                         final int memberIndex)
    {
        //Making sure minutes and seconds are set to 0.
        final Calendar t0 = HCalendar.convertStringToCalendar(dateString + " 00:00", "CCYYMMDDhh mm:ss");

        //DefaultTimeSeriesHeader editableHeader = (DefaultTimeSeriesHeader)timeSeriesToPrepare.getHeader();
        final DefaultTimeSeriesHeader editableHeader = new DefaultTimeSeriesHeader();

        editableHeader.setForecastTime(t0.getTimeInMillis());
        editableHeader.setLocationDescription(identifier.getLocationId());
        editableHeader.setLocationId(identifier.getLocationId());
        editableHeader.setLocationName(identifier.getLocationId());

        editableHeader.setParameterId(parameterId);
        editableHeader.setParameterName(parameterId);
        editableHeader.setParameterType(ParameterType.ACCUMULATIVE);
        editableHeader.setUnit(unitStr);

        editableHeader.setEnsembleId("GEFS");
        editableHeader.setEnsembleMemberIndex(memberIndex);

        return editableHeader;
    }

    /**
     * @param identifier Identifier for which to read a file.
     * @param assignedParameterId The parameterId to assign to the time series read.
     * @param gzipForecastFile The file to read.
     * @param startTimeAdjustmentFactor Adjustment factor to use when determining the start time.
     * @param forecastTimesToRead The number of forecast times for which to read reforecasts. Negative value or 0
     *            implies reading all.
     * @return {@link List} of {@link TimeSeriesEnsemble} instance.
     * @throws Exception
     */
    private static List<TimeSeriesEnsemble> readProcessedASCIIForecastFile(final LocationAndDataTypeIdentifier identifier,
                                                                           final String assignedParameterId,
                                                                           final File gzipForecastFile,
                                                                           final long startTimeAdjustmentFactor,
                                                                           final int forecastTimesToRead) throws Exception
    {
        //Data type specific information to be read in.
        String assignedUnitStr = "MM";
        int numberFieldWidth = 12;
        final int timeStepInHours = 6;
        if(identifier.isTemperatureDataType())
        {
            assignedUnitStr = "DEGC";
            numberFieldWidth = 11;
        }
        else if(!identifier.isPrecipitationDataType())
        {
            throw new IllegalArgumentException("Identifier is neither precipitation nor temperature: "
                + identifier.buildStringToDisplayInTree());
        }

        //Prep the stream.
        final GZIPInputStream gzipInputStream = new GZIPInputStream(new FileInputStream(gzipForecastFile));
        final InputStreamReader reader = new InputStreamReader(gzipInputStream);
        final BufferedReader buffReader = new BufferedReader(reader);

        //Define segmented line positions
        final int[] positions = new int[34];
        positions[0] = 10;
        positions[1] = 14;
        for(int i = 2; i < positions.length; i++)
        {
            positions[i] = positions[i - 1] + numberFieldWidth;
        }

        String line = null;
        final List<TimeSeriesArrays> allTSs = new ArrayList<TimeSeriesArrays>();
        TimeSeriesArrays workingTSs = null;
        String workingDateStr = null;

        try
        {
            //While I have lines to read...
            while(((line = buffReader.readLine()) != null) && (!line.trim().isEmpty()))
            {
                final SegmentedLine segLine = new SegmentedLine(line, positions);

                //Skip empty lines, which may be at the end of the file.
                if(segLine.getNumberOfSegments() <= 0)
                {
                    continue;
                }

                //Member index
                int memberIndex = -1;
                try
                {
                    memberIndex = Integer.parseInt(segLine.getSegment(1).trim());
                }
                catch(final NumberFormatException e)
                {
                    throw new Exception("Second component is not valid member index in this line: '" + line + "'.");
                }

                //Set the date string, assuming that member 1 is always the first member of each lagged ensemble 
                //listed in the file.
                if(memberIndex == 1)
                {
                    workingDateStr = segLine.getSegment(0);
                }

                //Prepare a header, time step, and ts.  Populate it based on segLine and add it to allTS.
                final DefaultTimeSeriesHeader header = prepareHeader(identifier,
                                                                     workingDateStr,
                                                                     assignedParameterId,
                                                                     assignedUnitStr,
                                                                     memberIndex);
                header.setTimeStep(SimpleEquidistantTimeStep.getInstance(HCalendar.MILLIS_IN_HR * timeStepInHours));
                final TimeSeriesArray ts = new TimeSeriesArray(header);

                for(int i = 2; i < segLine.getNumberOfSegments(); i++)
                {
                    final float number = Float.parseFloat(segLine.getSegment(i).trim());

                    if((number <= -90.0f) || (Float.isNaN(number)))
                    {
                        ts.putValue(header.getForecastTime() + startTimeAdjustmentFactor + (i - 1)
                            * header.getTimeStep().getStepMillis(), Float.NaN);
                    }
                    else
                    {
                        ts.putValue(header.getForecastTime() + startTimeAdjustmentFactor + (i - 1)
                            * header.getTimeStep().getStepMillis(), number);
                    }
                }

                //Check the member index and re-initialize the working arrays if needed.
                if(memberIndex == 1)
                {
                    workingTSs = new TimeSeriesArrays(ts);
                    allTSs.add(workingTSs);
                }
                else
                //XXX For the GEFS members used sensitivity study, add an if to this else to determine if ts is added based on memberIndex < a threshold value; e.g., if (ts.getHeader().getMemberIndex() <= 5 or 1)
                {
                    workingTSs.add(ts);
                }

                //XXX DEBUG
//                if(!TimeSeriesArrayTools.isAllMissing(ts))
//                {
//                    System.err.println("####>> found data for T0 -- " + workingDateStr + " -- " + allTSs.size());
//                }

                //Limit the number read artificially
                if((forecastTimesToRead > 0) && (allTSs.size() > forecastTimesToRead))
                {
                    break;
                }
            }
        }
        finally
        {
            buffReader.close();
        }

        //Convert time series arrays to ensembles.
        final List<TimeSeriesEnsemble> results = Lists.newArrayList();
        for(final TimeSeriesArrays ts: allTSs)
        {
            results.add(new TimeSeriesEnsemble(ts));
        }

        return results;
    }

    /**
     * For this to work properly, the ensembles in the two files must be in order by forecast time.
     * 
     * @param identifier Identifier for which to read time series.
     * @param assignedParameterId The parameter id to assign to the time series.
     * @param day1To8GzipFile The days 1-8 gzipped ASCII file.
     * @param day9To16GzipFile The days 9-16 gzipped ASCII file.
     * @param forecastsTimesToRead The number of forecast times for which to read in reforecasts. Negative value or 0
     *            implies reading all.
     * @return A list of {@link TimeSeriesArrays} (casted from {@link TimeSeriesEnsemble} instances) containing the
     *         combined ensembles, days 1 - 16.
     * @throws Exception For various reasons.
     */
    public static List<TimeSeriesArrays> readProcessedASCIIForecastFilePair(final LocationAndDataTypeIdentifier identifier,
                                                                            final String assignedParameterId,
                                                                            final File day1To8GzipFile,
                                                                            final File day9To16GzipFile,
                                                                            final int forecastsTimesToRead) throws Exception
    {
        final List<TimeSeriesEnsemble> day1To8Ens = readProcessedASCIIForecastFile(identifier,
                                                                                   assignedParameterId,
                                                                                   day1To8GzipFile,
                                                                                   0L,
                                                                                   forecastsTimesToRead);
        final List<TimeSeriesEnsemble> day9To16Ens = readProcessedASCIIForecastFile(identifier,
                                                                                    assignedParameterId,
                                                                                    day9To16GzipFile,
                                                                                    8 * 24 * HCalendar.MILLIS_IN_HR,
                                                                                    forecastsTimesToRead);

        //Combine the series.  The looping is based on day1to8Ens.  For each ensemble found, look for the first
        //ensemble in day9to16Ens that has a forecast time at least equal to the first ensembles forecast time.
        //If the found ensemble has a forecast time exactly equal to it, then use it to extend the first ensemble.
        //Otherwise, we did not find a match for one of the ensembles, so print a warning.
        final Iterator<TimeSeriesEnsemble> day9To16Iter = day9To16Ens.iterator();
        for(final TimeSeriesEnsemble ensemble1To8: day1To8Ens)
        {
            //Iterate until the forecast time in the 9 to 16 is no longer smaller than the 1 - 8 ensemble.
            TimeSeriesEnsemble ensemble9To16 = day9To16Iter.next();
            while(ensemble9To16.getForecastTime() < ensemble1To8.getForecastTime())
            {
                ensemble9To16 = day9To16Iter.next();
            }

            if(ensemble1To8.getForecastTime() != ensemble9To16.getForecastTime())
            {
                LOG.warn("For day 1 to 8 ensemble with forecast time "
                    + HCalendar.buildDateTimeTZStr(ensemble1To8.getForecastTime())
                    + ", no corresponding ensemble was found for days 9 to 16; discarding ensemble.");
            }
            else
            {
                TimeSeriesArraysTools.extendFromOtherTimeSeries(ensemble1To8, ensemble9To16);
            }
        }

        //Ensemble for days 1 to 8 has now been extended to include 9 -16.  Return it.
        return ListTools.convertCollection(day1To8Ens, (TimeSeriesArrays)null);
    }

    /**
     * @return {@link #readProcessedASCIIForecastFilePair(LocationAndDataTypeIdentifier, String, File, File)} results.
     */
    public static List<TimeSeriesArrays> readPrecipitationFile(final LocationAndDataTypeIdentifier identifier,
                                                               final File gzipForecastFileDays1To8,
                                                               final File gzipForecastFileDays9To16,
                                                               final int forecastTimesToRead) throws Exception
    {
        return readProcessedASCIIForecastFilePair(identifier,
                                                  HEFSTools.FORECAST_PRECIP_PARAMETER_ID,
                                                  gzipForecastFileDays1To8,
                                                  gzipForecastFileDays9To16,
                                                  forecastTimesToRead);
    }

    /**
     * This computes the 24h maximum values starting from 12Z of the first day. It assumes that all time series read in
     * have forecast times that are 0Z. Note that the output time series will still have 0Z forecast times!
     * 
     * @return Calls {@link #readProcessedASCIIForecastFilePair(LocationAndDataTypeIdentifier, String, File, File)}. It
     *         takes the results and aggregates all time series to 24h maximums using
     *         {@link AggregationTools#aggregateToMaximum(TimeSeriesArrays, String)}.
     */
    public static List<TimeSeriesArrays> readMaxTemperatureFile(final LocationAndDataTypeIdentifier identifier,
                                                                final File gzipForecastFileDays1To8,
                                                                final File gzipForecastFileDays9To16,
                                                                final int forecastTimesToRead) throws Exception
    {
        // System.err.println("####>> IN HERE!!!");
        final List<TimeSeriesArrays> ts6h = readProcessedASCIIForecastFilePair(identifier,
                                                                               HEFSTools.FORECAST_TMAX_PARAMETER_ID,
                                                                               gzipForecastFileDays1To8,
                                                                               gzipForecastFileDays9To16,
                                                                               forecastTimesToRead);
        final List<TimeSeriesArrays> results = Lists.newArrayList();
        for(final TimeSeriesArrays tsIn: ts6h)
        {
            //Agg start time is the first 12Z after the start, which we know is hour 0Z always.
            final long startTime = tsIn.get(0).getHeader().getForecastTime() + 12 * HCalendar.MILLIS_IN_HR;
            results.add(AggregationTools.aggregateToMaximum(tsIn, startTime, null, "1 day", null, null, false, false));
        }
        TimeSeriesArraysTools.setAllParameterIds(results, HEFSTools.FORECAST_TMAX_PARAMETER_ID);
        return results;
    }

    /**
     * This computes the 24h maximum values starting from 12Z of the first day. It assumes that all time series read in
     * have forecast times that are 0Z. Note that the output time series will still have 0Z forecast times!
     * 
     * @return Calls {@link #readProcessedASCIIForecastFilePair(LocationAndDataTypeIdentifier, String, File, File)}. It
     *         takes the results and aggregates all time series to 24h minimums using
     *         {@link AggregationTools#aggregateToMinimum(TimeSeriesArrays, String)}.
     */
    public static List<TimeSeriesArrays> readMinTemperatureFile(final LocationAndDataTypeIdentifier identifier,
                                                                final File gzipForecastFileDays1To8,
                                                                final File gzipForecastFileDays9To16,
                                                                final int forecastTimesToRead) throws Exception
    {
        final List<TimeSeriesArrays> ts6h = readProcessedASCIIForecastFilePair(identifier,
                                                                               HEFSTools.FORECAST_TMIN_PARAMETER_ID,
                                                                               gzipForecastFileDays1To8,
                                                                               gzipForecastFileDays9To16,
                                                                               forecastTimesToRead);
        final List<TimeSeriesArrays> results = Lists.newArrayList();
        for(final TimeSeriesArrays tsIn: ts6h)
        {
            final long startTime = tsIn.get(0).getHeader().getForecastTime() + 12 * HCalendar.MILLIS_IN_HR;
            results.add(AggregationTools.aggregateToMinimum(tsIn, startTime, null, "1 day", null, null, false, false));
        }
        TimeSeriesArraysTools.setAllParameterIds(results, HEFSTools.FORECAST_TMIN_PARAMETER_ID);
        return results;
    }

    /**
     * @param minimumTemperature If true, then minimum data is assumed. Otherwise, maximum data is assumed.
     * @return Read in time series by calling either
     *         {@link #readMaxTemperatureFile(LocationAndDataTypeIdentifier, File, File)} or
     *         {@link #readMinTemperatureFile(LocationAndDataTypeIdentifier, File, File)}.
     * @throws Exception
     */
    public static List<TimeSeriesArrays> readTemperatureFile(final boolean minimumTemperature,
                                                             final LocationAndDataTypeIdentifier identifier,
                                                             final File gzipForecastFileDays1To8,
                                                             final File gzipForecastFileDays9To16,
                                                             final int forecastTimesToRead) throws Exception
    {
        if(minimumTemperature)
        {
            return readMinTemperatureFile(identifier,
                                          gzipForecastFileDays1To8,
                                          gzipForecastFileDays9To16,
                                          forecastTimesToRead);
        }
        else
        {
            return readMaxTemperatureFile(identifier,
                                          gzipForecastFileDays1To8,
                                          gzipForecastFileDays9To16,
                                          forecastTimesToRead);
        }
    }

}
