package ohd.hseb.hefs.mefp.tools.canonical;

import java.util.List;

import nl.wldelft.util.timeseries.TimeSeriesArray;
import ohd.hseb.hefs.utils.tools.ParameterId;
import ohd.hseb.hefs.utils.tsarrays.TimeSeriesArrayTools;
import ohd.hseb.hefs.utils.xml.CompositeXMLReader;
import ohd.hseb.hefs.utils.xml.CompositeXMLWriter;
import ohd.hseb.hefs.utils.xml.XMLReadable;
import ohd.hseb.hefs.utils.xml.XMLReader;
import ohd.hseb.hefs.utils.xml.XMLWritable;
import ohd.hseb.hefs.utils.xml.XMLWriter;
import ohd.hseb.hefs.utils.xml.vars.XMLInteger;
import ohd.hseb.util.misc.HCalendar;
import ohd.hseb.util.misc.HString;
import ohd.hseb.util.misc.SegmentedLine;

import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Lists;

/**
 * Defines a canonical event by an event number, start lead period, end lead period, and number of lagged ensembles to
 * employ for computations. It reads and writes events to lines found in the canonical event files used by MEFP. It also
 * computes event values given a time series, t0, and time step. <br/>
 * <br/>
 * No {@link CanonicalEvent} with a start period of < 0 is considered equal with any other event.
 * 
 * @author hank.herr
 */
public class CanonicalEvent implements Comparable<CanonicalEvent>, XMLReadable, XMLWritable
{
    /**
     * The ordered event number relative to either other base or modulation events (or overall, though that is not
     * standard). Counting starts at 1, normally, but can be anything if the event numbers are set via nonstandard
     * means.
     */
    private final XMLInteger _eventNumber = new XMLInteger("number", -1);

    /**
     * Starts counting at 1, as in the first period (T0, T0 + 1 step) is period 1 and records the measurement at T0 + 1
     * step.
     */
    private final XMLInteger _startLeadPeriod = new XMLInteger("startPeriod", -1);

    /**
     * Starts counting at 1.
     */
    private final XMLInteger _endLeadPeriod = new XMLInteger("endPeriod", -1);

    /**
     * Used for CFSv2.
     */
    private final XMLInteger _numberOfLaggedEnsembleMembers = new XMLInteger("numberOfLaggedEnsembleMembers", -1);

    public CanonicalEvent()
    {
    }

    /**
     * For testing purposes
     * 
     * @param num
     * @param start
     * @param stop
     * @param members
     */
    public CanonicalEvent(final int num, final int start, final int stop, final int members)
    {
        setEventNumber(num);
        setStartLeadPeriod(start);
        setEndLeadPeriod(stop);
        setNumberOfLaggedEnsembleMembers(members);
    }

    public CanonicalEvent(final String line) throws Exception
    {
        readFromLine(line);
    }

    public long computeStartTime(final long t0, final int periodStepSizeInHours)
    {
        return t0 + getStartLeadPeriod() * periodStepSizeInHours * HCalendar.MILLIS_IN_HR;
    }

    public long computeEndTime(final long t0, final int periodStepSizeInHours)
    {
        return t0 + getEndLeadPeriod() * periodStepSizeInHours * HCalendar.MILLIS_IN_HR;
    }

    public int computeNumberOfPeriods()
    {
        return getEndLeadPeriod() - getStartLeadPeriod() + 1;
    }

    public int getEventNumber()
    {
        return _eventNumber.get();
    }

    public void setEventNumber(final int eventNumber)
    {
        this._eventNumber.set(eventNumber);
    }

    public int getStartLeadPeriod()
    {
        return _startLeadPeriod.get();
    }

    public void setStartLeadPeriod(final int startLeadPeriod)
    {
        this._startLeadPeriod.set(startLeadPeriod);
    }

    public int getEndLeadPeriod()
    {
        return _endLeadPeriod.get();
    }

    public void setEndLeadPeriod(final int endLeadPeriod)
    {
        this._endLeadPeriod.set(endLeadPeriod);
    }

    public int getNumberOfLaggedEnsembleMembers()
    {
        return _numberOfLaggedEnsembleMembers.get();
    }

    public void setNumberOfLaggedEnsembleMembers(final int numberOfLaggedEnsembleMembers)
    {
        this._numberOfLaggedEnsembleMembers.set(numberOfLaggedEnsembleMembers);
    }

    public int getDuration()
    {
        return getEndLeadPeriod() - getStartLeadPeriod() + 1;
    }

    /**
     * For reading from an Fortran EPP3 prototypes import text file.
     * 
     * @param line
     * @throws Exception
     */
    public void readFromLine(final String line) throws Exception
    {
        final SegmentedLine segline = new SegmentedLine(line, new int[]{8, 16, 24, 32, 34});
        setEventNumber(Integer.parseInt(segline.getSegment(0).trim()));
        setStartLeadPeriod(Integer.parseInt(segline.getSegment(1).trim()));
        setEndLeadPeriod(Integer.parseInt(segline.getSegment(2).trim()));
        setNumberOfLaggedEnsembleMembers(Integer.parseInt(segline.getSegment(4).trim()));
    }

    /**
     * For writing to the Fortran EPP3 prototype
     * 
     * @return Line to put in canonical events file.
     */
    public String writeToLine()
    {
        String result = HString.formatStringToFieldWidth(8, "" + getEventNumber(), true);
        result += HString.formatStringToFieldWidth(8, "" + getStartLeadPeriod(), true);
        result += HString.formatStringToFieldWidth(8, "" + getEndLeadPeriod(), true);
        result += HString.formatStringToFieldWidth(8, "" + getDuration(), true);
        result += HString.formatStringToFieldWidth(2, "" + getNumberOfLaggedEnsembleMembers(), false);
        return result;
    }

    /**
     * @throws Exception If this event is not validly defined.
     */
    public void validate() throws Exception
    {
        if(_endLeadPeriod.get() < _startLeadPeriod.get())
        {
            throw new Exception("Canonical event end time, " + _endLeadPeriod + ", is less than the start time, "
                + _startLeadPeriod + ", which is not allowed.");
        }
        if(_numberOfLaggedEnsembleMembers.get() < 0)
        {
            throw new Exception("The number of lagged ensemble members, " + _numberOfLaggedEnsembleMembers
                + ", is negative, which is not allowed.");
        }
    }

    /**
     * Calls {@link #computeEvent(List, long, int, boolean)} with an returnOnMissing of true.
     * 
     * @param ts The one time series for which to compute the event.
     * @param t0 The time series forecast time. This does not need to match the forecast times of the ensemble members,
     *            which could be lagged and therefore have differing forecast times.
     * @param periodStepSizeInHours The size of each period: 6h for precip, 24h for temp. This parameter is slightly
     *            redundant, since it must be the same as the time step of the ts, but I still like the idea of
     *            including it in order to make sure the caller knows what's going on.
     * @return The canonical event value for the time series. If the return is null, then the desired event's end time
     *         is after the end of the time series, meaning the canonical event cannot be computed.
     */
    public Double computeEvent(final TimeSeriesArray ts, final long t0, final int periodStepSizeInHours)
    {
        return computeEvent(Lists.newArrayList(ts), t0, periodStepSizeInHours, true, false);
    }

    /**
     * The provided ensemble must satisfy some requirements. Specifically, the start time for all time series must not
     * be after the start time of the canonical event, given the T0. This should never be a problem if the provided T0
     * matches the latest forecast time of the ensemble members. Also, ensemble members are skipped if the end time of
     * the member is before the end time of the event. This can happen with lagged ensembles.
     * 
     * @param ensemble The ensemble for which to compute the event.
     * @param t0 The computational forecast time, or 0th time step, from the event steps are determined. This does not
     *            need to match the forecast times of the ensemble members, which could be lagged and therefore have
     *            differing forecast times.
     * @param periodStepSizeInHours The size of each period: 6h for precip, 24h for temp. This parameter is slightly
     *            redundant, since it must be the same as the time step of the ts, but I still like the idea of
     *            including it in order to make sure the caller knows what's going on.
     * @param returnOnMissing If true, then if the ensemble mean ts contains a missing value, i.e. {@link Double#NaN},
     *            for any entry required for this event, a {@link Double#NaN} is returned. If false, then missings are
     *            skipped in the processing for the event. Focusing on the true case, the algorithm is straight forward
     *            if an ensemble mean (one time series) is provided: it stops computing the sum when a missing is found
     *            and then returns. If a lagged ensemble is provided, however, then an ensemble mean is considered
     *            computable if and only if at least one member allows for the event to be computed, including no
     *            missings for the event. So, if it finds a missing for a time series, is skips that time series and
     *            moves on to the next. If all time series are skipped, then it returns a NaN.
     * @param useMemberSizeLimit If true, a number of members equal to {@link #_numberOfLaggedEnsembleMembers} will be
     *            used. Otherwise, all members will be used.
     * @return The canonical event value for the time series. If the return is {@link Double#NaN}, then either one of
     *         the required values was missing and returnOnMissing (above) is true, -OR- all values were missing so no
     *         computation could be done.
     */
    public Double computeEvent(final List<TimeSeriesArray> ensemble,
                               final long t0,
                               final int periodStepSizeInHours,
                               final boolean returnOnMissing,
                               final boolean useMemberSizeLimit)
    {
        //Illegal checks include if the ensemble is empty, if the period steps size does not match the step
        //size of the provided time series.
        if(ensemble.isEmpty())
        {
            throw new IllegalArgumentException("The provided ensemble is empty.");
        }
        if(periodStepSizeInHours * HCalendar.MILLIS_IN_HR > ensemble.get(0).getHeader().getTimeStep().getStepMillis())
        {
            throw new IllegalArgumentException("The specified periodStepSizeInHours, " + periodStepSizeInHours
                + ", is larger than the time step of the time series, "
                + ensemble.get(0).getHeader().getTimeStep().getStepMillis() / HCalendar.MILLIS_IN_HR + ".");
        }

        //Flag indicating if it is a precip time series.
        final boolean precip = ParameterId.of(ensemble.get(0).getHeader()).isPrecipitation();

        //The first measurement used in the computation is that for the _startLeadPeriod.
        //This time is used to identify the index of the first value used in the aggregation,
        //so it should start from the _startLeadPeriod, not _startLeadPeriod -1.
        final long startTime = computeStartTime(t0, periodStepSizeInHours);
        final long endTime = computeEndTime(t0, periodStepSizeInHours);

        float workingTSValue;
        int workingTSIndex;
        long workingTSTime;
        int numPds;
        double sum = 0.0d;
        int count = 0;
        int memberCount = 0;

        //For each ensemble member.  The sum is across all members.
        for(final TimeSeriesArray ts: ensemble)
        {
            //We are done if the time series to process is the getNumberOfLaggedEnsembleMembers member.
            if((useMemberSizeLimit) && (memberCount >= this.getNumberOfLaggedEnsembleMembers()))
            {
                break;
            }
            memberCount++; //increment for the next check.

            //An illegal state exception identifying if the startTime is before one step prior to the time series start time.
            //Though it is possible for a value to apply to steps prior to it (such as monthly CFSv2), it will never apply 
            //more than one step prior to itself.  Hence, that marks an error.
            //
            //I'm not so sure why this merits an error instead of a continue like the others.  Hmmm...
            if(startTime < ts.getStartTime() - ts.getHeader().getTimeStep().getStepMillis())
            {
//XXX Should the below be an error?  It is triggered if the reforecast time series are before the historical data start time.
//For now, I've comment it out.
//                throw new IllegalStateException("The start time for the event, "
//                    + new Date(startTime)
//                    + ", is before the start time of the time series, "
//                    + new Date(ts.getStartTime())
//                    + "."
//                    + " The canonical event start time must be after or on the start time (i.e., first available data) for all provided time series!");
                continue;
            }

            //Skip all missing time series.  This avoids the missing event value issue by bypassing such time series.
            //Its assumed this is a case where the CFSv2 lagged ensemble does not include enough values.  
            if(TimeSeriesArrayTools.isAllMissing(ts))
            {
                continue;
            }

            //Continue to the next time series if the endTime of the event is after the time series end time.
            //This should not happen if this method is only called when appropriate.
            if(endTime > ts.getEndTime())
            {
                continue;
            }

            //A -1 workingTSIndex indicates the first time through the loop.  The workingTSTime is one step
            //before the startTime -- it is NOT necessarily T0 (for example, CFSv2 monthly time series).
            workingTSIndex = ts.firstIndexAfterOrAtTime(startTime) - 1;
            workingTSTime = ts.getTime(workingTSIndex + 1) - ts.getHeader().getTimeStep().getStepMillis();
            workingTSValue = Float.NaN;
            final double startingSum = sum;
            final int startingCount = count;
            for(long time = startTime; time <= endTime; time += periodStepSizeInHours * HCalendar.MILLIS_IN_HR)
            {
                //Compute the working value to use per event substep.  If the time being looked at is after the 
                //workingTSTime, then the working values need to be updated.
                if(time > workingTSTime)
                {
                    //Since the time series step size is not smaller than the canonical event size, incrementing
                    //by one step should guarantee that time <= workingTSTime.
                    workingTSTime += ts.getHeader().getTimeStep().getStepMillis();
                    workingTSIndex++;

                    //If the time series does not possess a value for workingTSTime, then the working value is NaN
                    //and we backup workingTSIndex so that the next time the working time needs updating, the index
                    //is checked again.  This should not happen if the time series are constructed so that all values
                    //are present, even if missing.
                    if(ts.getTime(workingTSIndex) != workingTSTime)
                    {
                        workingTSValue = Float.NaN;
                        workingTSIndex--;
                    }
                    //Otherwise, get the value and scale it for precip by the number of canonical event steps for each
                    //time series step.
                    else
                    {
                        workingTSValue = ts.getValue(workingTSIndex);

                        //Scale the precip value over the number of periods traversed by one time step in the time series.
                        if((precip) && !Float.isNaN(workingTSValue))
                        {
                            numPds = (int)(ts.getHeader().getTimeStep().getStepMillis() / (periodStepSizeInHours * HCalendar.MILLIS_IN_HR));
                            workingTSValue = workingTSValue / numPds;
                        }
                    }
                }

                //If the workingValue is missing, then return missing if returnOnMissing is true.  Otherwise, skip it.
                if(Float.isNaN(workingTSValue))
                {
                    if(returnOnMissing)
                    {
                        //In this case, the user wants this method to return if the event cannot be calculated at all
                        //using the provided ensemble/single ts.  To allow for this to work for both an ensemble (where
                        //the event cannot be computed across all members) and a single time series, the returnOnMissing
                        //reaction will depend on the count being 0.  Hence, if a missing is found for the working TS
                        //for this event, reset the sum and count to what it was before.  See if check below for count == 0.
                        sum = startingSum;
                        count = startingCount;
                        break;
                    }
                }
                else
                {
                    sum += workingTSValue;
                    count++;
                }
            }
        }

//XXX This is the old way and not valid if we have no data at all.
        //If the count is 0, then the event could not be computed for ANY member.  That means that the ensemble mean, which requires
        //at least one member to be valid for the entire event, could not be computed or includes a missing value so that we do what the
        //return on missing flag says to do.
//        if(count == 0)
//        {
//            if(returnOnMissing)
//            {
//                return Double.NaN;
//            }
//        }

        //There is no data to be used, so we need to return a NaN.  Otherwise, we'll divide by zero below... not good.
        if(count == 0)
        {
            return Double.NaN;
        }

// LW   return sum / count;

        //For precipitation, we need the average of the ensemble of the sum over a each member.  If only given one time series,
        //this is simply the sum of that time series.
        if(precip)
        {
            final double averageSumOfEnsembleMember = sum / count
                * ((endTime - startTime) / (periodStepSizeInHours * HCalendar.MILLIS_IN_HR) + 1);
            return averageSumOfEnsembleMember;
// LW            return sum;
        }

        //For temperature, return the step-average over all ensemble members.
        else
        {
            return sum / count;
        }

    }

    @Override
    public int compareTo(final CanonicalEvent o)
    {
        // Note the second one is reversed (o and this) to match the file sorting.
        return ComparisonChain.start()
                              .compare(this.getEndLeadPeriod(), o.getEndLeadPeriod())
                              .compare(o.getStartLeadPeriod(), this.getStartLeadPeriod())
                              .result();
    }

    @Override
    public String toString()
    {
        return "CanonicalEvent(number = " + _eventNumber.get() + ", start = " + _startLeadPeriod.get() + ", end = "
            + _endLeadPeriod.get() + ", numens = " + this._numberOfLaggedEnsembleMembers.get() + ")";
    }

    @Override
    public int hashCode()
    {
        final String hashCodeStr = this._startLeadPeriod + " " + this._endLeadPeriod;
        return hashCodeStr.hashCode();
    }

    @Override
    public boolean equals(final Object obj)
    {
        final CanonicalEvent other = (CanonicalEvent)obj;
        return (this.getStartLeadPeriod() == other.getStartLeadPeriod())
            && (this.getEndLeadPeriod() == other.getEndLeadPeriod());
    }

    public String getXMLTagName()
    {
        return "canonicalEvent";
    }

    @Override
    public XMLWriter getWriter()
    {
        return new CompositeXMLWriter(getXMLTagName(),
                                      _eventNumber,
                                      _startLeadPeriod,
                                      _endLeadPeriod,
                                      _numberOfLaggedEnsembleMembers);
    }

    @Override
    public XMLReader getReader()
    {
        return new CompositeXMLReader(getXMLTagName(),
                                      _eventNumber,
                                      _startLeadPeriod,
                                      _endLeadPeriod,
                                      _numberOfLaggedEnsembleMembers);
    }

    /**
     * The ability to compute the event unit is used in a few places. Since I need to put it somewhere, I'll put it here
     * as a static to be called within {@link CanonicalEvent}.
     * 
     * @param precipitation True for precipitation, false for temperature.
     * @return The number of hours to use for the canonical event units.
     */
    public static final int determineCanonicalEventPeriodUnitInHours(final boolean precipitation)
    {
        if(precipitation)
        {
            return 6;
        }
        else
        {
            return 24;
        }
    }

    /**
     * @return A string of the form (start,end).
     */
    public static String createXMLAttributeString(final CanonicalEvent event)
    {
        return "(" + event.getStartLeadPeriod() + "," + event.getEndLeadPeriod() + ")";
    }

    /**
     * @param str A string matching the format generated by {@link #createXMLAttributeString(CanonicalEvent)}.
     * @return A {@link CanonicalEvent} that only has its start and end period defined. It is assumed the returned value
     *         will be mapped later, somehow, to the true canonical events stored globally that include all fields. This
     *         return should be sufficient for use in mapping and equals.
     */
    public static CanonicalEvent parseXMLAttributeString(final String str)
    {
        final SegmentedLine segLine = new SegmentedLine(str, "(),", SegmentedLine.MODE_NO_EMPTY_SEGS);
        if(segLine.getNumberOfSegments() != 2)
        {
            throw new IllegalArgumentException("Provided attribute string does not contain the two required arguments.");
        }
        final int startPeriod = Integer.parseInt(segLine.getSegment(0));
        final int endPeriod = Integer.parseInt(segLine.getSegment(1));
        return new CanonicalEvent(-1, startPeriod, endPeriod, -1);
    }
}
