package ohd.hseb.util.data;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.text.DecimalFormat;

import ohd.hseb.hefs.utils.tools.StreamTools;
import ohd.hseb.util.misc.SegmentedLine;

/**
 * THIS IS NOT A SUBCLASS OF DataSet!!! This class allows for reading in a DataSet object directly from a file. The file
 * must be a file of data only! Each line is a collection of numbers each of which is separated by a character in
 * {@link #FILE_COLUMN_SEPARATORS} or specified in a call to the constructor. Each row should have an identical number
 * of entries.<br>
 * <br>
 * After reading in data from a file, the DataSet read in can be acquired using the {@link #getData()} method. <br>
 * 
 * @author hank
 */
public class FileData
{

    final static String CLASSNAME = "FileData";

    /**
     * The default growth size for the DataSet. Whenever the max number of samples must be increased, it will be
     * increased by this amount.
     */
    public final static int GROWTH_SIZE = 500;

    /**
     * Valid separators for the numbers within each row. Comma is not included in case a comma is used in the numbers.
     * Call {@link #FileData(String, String, int[], int)} if you need to read in a CSV file and specify separators of
     * ",".
     */
    public final static String FILE_COLUMN_SEPARATORS = " ;|\t";

    /**
     * Stores the read in data.
     */
    private DataSet _data;

    /**
     * The data file to read.
     */
    private File _dataFile;

    /**
     * The columns to be read, or null for all.
     */
    private int[] _columns;

    /**
     * The initial size of _data. Only used when {@link #_columns} is null so that {@link #_data} is defined after
     * reading the first line of the file.
     */
    private final int _initialSize;

    /**
     * The characters to treat as separators when reading the file. This is never treated as a whole, but, rather, each
     * character within the string is a valid column separator.
     */
    private String _columnSeparators = FILE_COLUMN_SEPARATORS;

    private final DecimalFormat _numberParser = new DecimalFormat();

    /**
     * From the file specified by filename, read in the columns specified by the int[] (start counting of columns at 0).
     * Furthermore, setup the initial maximum sample size for the DataSet to be sampleSize. It can grow beyond this size
     * based by factors of {@link #GROWTH_SIZE}. <br>
     * <br>
     * Pass in null for the columns array if all data is to be read in.
     * 
     * @param fileName The String file name. Used to initialize the _datafile attribute.
     * @param columnSeparators The characters used to demarcate columns, in a string. Each character is a separator; the
     *            string is NOT treated as a whole. Pass in "," to read in a CSV file.
     * @param columns An array of column indices within the file (counting starts at 0).
     * @param sampleSize The initial maximum sample size to allow for.
     * @exception Exception thrown by readData().
     */
    public FileData(final String fileName, final String columnSeparators, final int[] columns, final int sampleSize) throws FileDataException
    {
        _initialSize = sampleSize;
        _columnSeparators = columnSeparators;
        if(columns == null)
        {
            _data = null;
            _columns = null;
        }
        else
        {
            _data = new DataSet(_initialSize, columns.length);
            _columns = columns;
        }

        //Read in the data.
        readData(fileName);
    }

    /**
     * Calls {@link #FileData(String, String, int[], int)} passing in {@link #FILE_COLUMN_SEPARATORS} for the column
     * separators.
     */
    public FileData(final String fileName, final int[] columns, final int sampleSize) throws FileDataException
    {
        this(fileName, FILE_COLUMN_SEPARATORS, columns, sampleSize);
    }

    /**
     * Calls {@link #FileData(String, int[], int)} passing in {@link #GROWTH_SIZE} for the sample size.
     */
    public FileData(final String fileName, final int[] columns) throws FileDataException
    {
        this(fileName, columns, GROWTH_SIZE);
    }

    /**
     * Calls {@link #FileData(String, int[])} passing in null for the columns to read so that all columns are read.
     */
    public FileData(final String fileName) throws FileDataException
    {
        this(fileName, null);
    }

    /**
     * Initialize this FileData object based on the passed in line of values, and the given initial size.
     * 
     * @param initialSize The initial size for this data.
     * @param aline One line of text which is a single row of this data.
     */
    public FileData(final int initialSize, final String aline) throws FileDataException
    {
        _initialSize = initialSize;
        _data = null;
        _columns = null;
        readDataLine(aline);
    }

    /**
     * Read in data from the filename given. This method initializes the _filedata attribute, which stores the filename
     * as a File object.
     * 
     * @param filename The file from which to read the data.
     * @exception Exception is thrown if and I/O exception occurs or if readDataLine throws one.
     */
    public void readData(final String filename) throws FileDataException
    {
        //Open up the file for read.
        FileReader fileReader = null;
        BufferedReader bufReader = null;
        String aline;

        //Check for existence and reability.
        _dataFile = new File(filename);
        if(!_dataFile.exists() || !_dataFile.canRead())
        {
            throw new FileDataException("File \"" + filename + "\" either does not exist or is not readable.");
        }

        //Try to open the data file for reading.
        try
        {
            fileReader = new FileReader(_dataFile);
            bufReader = new BufferedReader(fileReader);
        }
        catch(final FileNotFoundException e1)
        {
            StreamTools.closeStream(bufReader);
            throw new FileDataException("Failed to open requested file " + filename);
        }

        //Read each line one at a time.  Segment the line and add it to the DataSet.
        //Loop through the file until the done flag is true!
        boolean done = false;
        try
        {
            while(!done)
            {
                //Get a line
                aline = bufReader.readLine();

                //If the line is null, then we assume the file is done.
                if(aline == null)
                {
                    done = true;
                    continue;
                }

                readDataLine(aline);
            }
        }
        catch(final IOException ioe)
        {
            throw new FileDataException("I/O Exception occurred while reading file.");
        }
        finally
        {
            StreamTools.closeStream(bufReader);
        }

        //If _data is still null, return an error.
        if(_data == null)
        {
            throw new FileDataException("No data found in file " + filename);
        }

    }

    /**
     * Process a single line of text. If the DataSet attribute _data is null when this routine is called, it gets
     * initialized according to the number of segments in the current line being processed and it is assumed that all
     * data is to be stored from the file.
     * 
     * @param aline The line to process.
     * @throws FileDataException Thrown if a desired column doesn't exist or if a NumberFormatError occurs.
     */
    public void readDataLine(final String aline) throws FileDataException
    {
        //First, I want to replace an "nan" strings with missings... "-999".
        final String working = aline.replaceAll("nan", "-999");

        //Segment the line.
        final SegmentedLine segline = new SegmentedLine(working, _columnSeparators, SegmentedLine.MODE_NO_EMPTY_SEGS);

        //If the line is blank, do nothing.
        if(segline.getNumberOfSegments() == 0)
        {
            return;
        }

        //If the _data DataSet is currently null, then build it assuming all columns are used and
        //initialize the columns array to be for all.
        if(_data == null)
        {
            _data = new DataSet(_initialSize, segline.getNumberOfSegments());
            initializeColumnsToAll();
        }

        //asample will serve as storage for the sample.
        final double[] asample = new double[_data.getNumberOfVariables()];

        //Check on the sample size.  Increase it if needed.
        if(_data.getSampleSize() == _data.getMaximumSampleSize())
        {
            _data.changeMaximumNumberOfSamples(_data.getSampleSize() + FileData.GROWTH_SIZE);
        }

        //Loop through the segments.
        Double value;
        int i;
        for(i = 0; i < _columns.length; i++)
        {
            if((_columns[i] < 0) || (_columns[i] >= segline.getNumberOfSegments()))
            {
                throw new FileDataException("Column number " + _columns[i] + " cannot be acquired from line \"" + aline
                    + "\".");
            }

            try
            {
                value = _numberParser.parse(segline.getSegment(_columns[i])).doubleValue();
                asample[i] = value.doubleValue();
            }
            catch(final Exception nfe)
            {
                throw new FileDataException("Badly formatted number in line \"" + aline + "\".");
            }
        }

        _data.addSample(asample);
    }

    /**
     * Setup columns for reading all data. This will initialize the _columns attribute to include all columns based on
     * the number of variables within the _data DataSet.
     */
    public void initializeColumnsToAll()
    {
        if(_data == null)
        {
            _columns = null;
            return;
        }

        int i;
        _columns = new int[_data.getNumberOfVariables()];
        for(i = 0; i < _columns.length; i++)
        {
            _columns[i] = i;
        }
    }

    /**
     * Output the table of data to a file. The file will contain the numbers within each table, with columns separated
     * by two spaces, "  ", and one sample per line in the file.
     * 
     * @param filename The name of the file to write to.
     * @throws FileDataException Thrown if and I/O exception occurs.
     */
    public void outputData(final String filename) throws FileDataException
    {
        FileWriter thefile;
        BufferedWriter localfile = null;

        //Try to open the data file for writing.
        try
        {
            thefile = new FileWriter(filename);
            localfile = new BufferedWriter(thefile);

            //Output the data in (member, sample) table format.
            int i, j;
            String aline = "";
            for(i = 0; i < _data.getSampleSize(); i++)
            {
                for(j = 0; j < _data.getNumberOfVariables(); j++)
                {
                    aline += _data.getValue(i, j) + "  ";
                }
                localfile.write(aline, 0, aline.length());
                localfile.newLine();
                localfile.flush();
                aline = "";
            }
        }
        catch(final IOException e1)
        {
            throw new FileDataException("Failed to open requested file " + filename + ".");
        }
        finally
        {
            if(localfile != null)
            {
                try
                {
                    localfile.close();
                }
                catch(final IOException e)
                {
                    e.printStackTrace();
                }
            }
        }
    }

    ////////////////////////////////////////////////////
    //Sets
    ////////////////////////////////////////////////////

    /**
     * @param columns Array of ints specifying column indices and the order in which to read them.
     */
    public boolean setColumns(final int[] columns)
    {
        int i;
        for(i = 0; i < columns.length; i++)
        {
            if(columns[i] < 0)
            {
                return false;
            }
        }

        _columns = columns;
        return true;
    }

    ////////////////////////////////////////////////////
    //Gets
    ////////////////////////////////////////////////////

    /**
     * @return the DataSet as read in from the file.
     */
    public DataSet getData()
    {
        return _data;
    }

    /**
     * @return The File object associated with the file that was read.
     */
    public File getDataFile()
    {
        return _dataFile;
    }

    ////////////////////////////////////////////////////
    //Interface/Listeners
    ////////////////////////////////////////////////////

    ////////////////////////////////////////////////////
    //Static
    ////////////////////////////////////////////////////
    public static void main(final String args[])
    {

        if(args.length < 1)
        {
            System.out.println("Improper arguments.  Proper command: ");
            System.out.println("");
            System.out.println("  <command> <data file> <column1> <column2> ...");
            System.out.println("");
            System.out.println("");
            return;
        }

        int[] columns = null;
        int i;

        if(args.length > 1)
        {
            columns = new int[args.length - 1];
            for(i = 0; i < columns.length; i++)
            {
                try
                {
                    columns[i] = Integer.parseInt(args[i + 1]);
                }
                catch(final NumberFormatException nfe)
                {
                    System.out.println("Column index #" + i + ", \"" + args[i + 1]
                        + "\", is not an integer!  Aborting!");
                    System.exit(1);
                }
            }
        }

        try
        {
            final FileData data = new FileData(args[0], columns);
            MatrixMath.printMatrix(data.getData().getData());
        }
        catch(final FileDataException fde)
        {
            System.out.println("Failed to read in data!");
        }
    }

}
