
// modul: TAsciiData.h    ver 1.0b (english)                                  //
//                                                                            //
//    multi-threaded "load ASCII data-matrix from file" class                 //
//                                                                            //
//    written by lore at newty dot de   -   home:                  //
//    copyright 1998-2000 by lore                                             //
//                                                                            //
//    FREE for all kinds of non-commercial use including modifying and        //
//    redistribution as long, as this header is not removed!                  //
//    disclaimer: nothing is guaranteed. you use it on your own risk.         //
//                                                                            //
//    please report bugs immediately to bugs at newty dot de !!               //
//                                                                            //

// I. important functions:
//    void load(const char* filename)  loading the data-matrix. this function can only be called once in the
//                                     lifetime of an instance. This is easier to implement and costs only a
//                                     minimum of additional resources.
//    void save(const char* filename)  saving data-matrix to file
//    inline int nDS()                 query #rows and #columns. both are inline functions, i.g. always use
//    inline int dim()                 them and do not copy the values to an own variable!
//    const float* getRow(const int i) query of the i´th row
//    void calc()                      column-wise calculate statistical values which can be queried using
//                                     getMeanVec(), getMinVec(), getMaxVec(), getRangeVec() and getDevVec().
//    void randomize(const int _n)     interchanging two random rows n-times
//    void sort(const int col)         sort rows according to the values of the specified column 'col'
// II. annotations:
//    *  whitespaces and comments are always ignored when reading a file. comments must start at the begin
//       of a line. the comment characters are '%', '#' and '*'. they can be changed by editing the
//       function 'bool IsComment(const char c)' in fileUtil.cpp
//    *  load/save and calc run in a thread when a parent, font and resource id is supllied in constructor.
//       the resource id must correspond to a dialog resource definition in the resource file. you may say
//       that it is no "real" multithreading, cause the main-thread is hanging in a modal dialog while the
//       additional thread is loading/saving or calculating. you can easily change it by executing the
//       thread-control dialog none-modal. but beware: you´ll have to do synchronization cause when loading
//       for example you have got to ensure, that no functionality is called from the main-thread which needs
//       the loaded data-object. have fun and enjoy multithreading side-effects :-))
//    *  important: the destructor is private, i.g. you have to call release() instead! (see IV. to know more)
//    *  important: it is the philosophy to throw an exception in case of errors or non-permissibility of
//       an action even if it would be possible to perform a default or neutral action!
//    *  exceptions are of type 'TErrText'. this is a struct only containing a character-array. see its
//       typedef below.
// III. time-series functionality: (if you don´t want to use this, you should ignore it)
// a) what it is:
//    imagine you´ve got a vector (matrix with one single row) and the values represent a variable y
//    measured about time. then, at every time step k, you can try to forecast the value y(k) regarding only
//    the past 'depth' ones y(k-1), y(k-2) ... y(k-depth).
//    for example a vector is given as:
//    time  |  value
//    -----------------          y   |
//      0   |    1.2                 |
//      1   |    3                 7-|              *
//      2   |    4.1               6-|
//      3   |    4.67              5-|          *     *         *
//      4   |    7.2               4-|       *           *  *
//      5   |    4.8               3-|    *
//      6   |    4.0               2-|
//      7   |    4.5               1-| *
//      8   |    5.02              --|------------------------------>
//                                   | 0  1  2  3  4  5  6  7  8         t
//    now we decide to use a depth of 2, i.g. we always want to predict the actual value y(k) using the two
//    past ones y(k-1) and y(k-2).thus we work with the matrix:
//                                                                      4.1   3     1.2
//                                                                      4.67  4.1   3
//                                                                      7.2   4.67  4.1
//                                                                      4.8   7.2   4.67
//                                                                      4.0   4.8   7.2
//                                                                      4.5   4.0   4.8
//                                                                      5.02  4.5   4.0
//    the time-series functionality now just keeps the vector in memory but provides you access to
//    the data as if you´d have the matrix. you use the same access-functions, thus you don´t need to bother
//    if the matrix is really or virtually stored in your memory.
// b)    how it is done
//       you always access the data-matrix using a row-map, i.g. a one dimensional field which contains the
//       pointers to the different rows. now, when a vector is converted to a time-series, it is copied in
//       inverse order to a float-field called 'series'. then the row-pointers are cross-filled with the adresses
//       of the entries of the float-field 'series'.
//                         "float** data"           "float* series"
//                         --------------           ---------------
//                                _
//       data            ------> |_| ----         --->   5.02     (not all pointers are plotted)
//                               |_| ----|--      |      4.5
//       1. row          ------> |_| ----|--|--   |      4.0
//       2. row          ------> |_|   __|__|__|__|      4.8
//         .                     |_|  |  |  |  |         7.2
//         .                     |_|  |  |  |  |         4.67
//         .                     |_|  |  |  |  ------>   4.1
//         .                     |_|  |  |  --------->   3
//       nDS-depth´s row ------> |_| _|  ------------>   1.2
//       annotations and implications:
//       * when you only want to keep a vector in memory there is a memory OVERHEAD of factor 2!!
//       * you acces a row 'row' by just taking 'data[row+depth]. thus once copied to the series-vector you
//         can change the depth by just changing a variables value.
//         note: you may have to recalculate the statistical values using calc().
//       * you´ve got 'depth' rows less than the vector has. therefor a virtual number of rows is used.
//       * sorting and randomizing must not alter more than the virtual number of rows
//       * normalization is only permitted when you have got the vector, i.g. depth is zero
// IV. why is the destructor private? what´s the sense of the refernce-counter 'ref'?? where can i use Lock() for???
//       well, actually there is no sense in it. not in the implementation you´ve got. i took my implementation,
//       reduced it to the understandable basics and translated the comments to english. now i´ll try to give you
//       an idea of what you can do with these mechanisms: imagine you want to do some calculations with the data in
//       the background. in the meanwhile some other (main-thread-processing) wants to change the order of the rows or
//       even deletes the object. i solved this problem by adding a member-function which provides the pointers to all rows
//       in the actual order. at the begin of the background-processing this function is called. then the
//       main-thread-processing can change the order like it wants. however, standardization or changing the depth of
//       time-series is forbidden and therefore the object can be locked with the function Lock().
//       now to the reference-counter: when the background-process gets the pointers he needs from the member-function,
//       this counter is incremented automatically. when the background-process is finished he calls release(). thus you
//       can delete the data-object in the mainthread by calling release() without considering if some other processing
//       still needs the data. the destruction of the data-object is prevented as long as it is needed. this mechanism is
//       called 'counted objects'. windows uses it for it´s handles for example :-)
//  V. the viv´s ('viv' = very important variable)
//    int   _nDS        - real number of rows
//    int   _dim        - real number of columns
//    int  vir_nDS      - virtual number of rows    (these values are like the real ones if you don´t use time-series)
//    int  vir_dim      - virtual number of columns
//    float** data      - pointer to data-matrix
//    float** series
//    float** order     - original order of rows
//    int      ref      - object reference counter

#ifndef _DATA_H
#define _DATA_H

#include <owl\owlpch.h>

#include "threadCtrlDlg.h"

// typedef for the exceptions. necessary cause when throwing an object it is copied and the default copy
// constructor for this object will just make a byte-wise copy which is all i want to have. throwing a
// character-array directly doesn´t work cause only the pointer will be copied.
typedef struct { char szErrText[256]; } TErrText;

// definition of class TAsciiData
class TAsciiData

   // -> parent-window and font (optional) - if supplied load/save/calc routines run multi-threaded
   TWindow* pParent;
   TFont*   pFont;
   TResId   resId; // identifier must correspond to a dialog resource definition in the resource file

   mutable int ref;                                // object referenz counter
   mutable TThreadCtrlDlg*    pCtrlDlg;            // -> thread-control-dialog

   // thread-functions for load/save of data and calculation of statistical values
   friend void calc (void* _pMyself);
   friend void load (void* _pMyself);
   friend void save (void* _pMyself);

   // error object: if an exception occurs and is catched in the thread-functions, err.szErrText is
   // used to signal it when execution goes on in the main thread. the problem is, that you should
   // catch an exception in the thread where you threw it.
   mutable TErrText  err;

   mutable ofstream     outfile;    // output stream for save()
   ifstream             infile;     // input  stream for load()

   // state enumeration
   mutable enum TStateEnum {startup, detectCols, detectRows, reading, conv2timeSeries, calcI, calcII, saving} state;

   void        setState(TStateEnum _state);// wrapper for state changes which calls actState()
   friend void actState(void* _pMyself);   // actualize displayed state, used direct or as callback-function

   bool  updated;                          // flag: thread-control dialog has been updated after state-change

 // data-matrix
   void allocateMemory();                  // allocate memory for data-matrix

   float*   series;
   float**  data;                          //-> data-matrix
   int      _dim;                          // #columns
   int      _nDS;                          // #rows
   int      vir_nDS;                       // # virtual rows
   int      vir_dim;                       // # virtual columns
   int      _depth;                        // depth when interpreted as time-series
   int      allocated_nDS;                 // #rows for which memory was allocated (only needed for destruction)

   mutable int*   order;                   // original order of the rows

   // flags
   mutable bool   _IsSorted;
   mutable bool   _IsLocked;
   bool           _loaded;                 // used to prevent a second call of load()
   mutable bool   _consistentStatValues;

   // -> calculated statistical values
   mutable float*    mean;
   mutable float*    min;
   mutable float*    max;
   mutable float*    range;
   mutable float*    invRange;            // inverse of range, calculated once for better performance
   mutable float*    dev;
   mutable bool*     maxEqualsMin;

   ~TAsciiData(); // prívate destuktor! call release() to delete an instance!

   // constructor and wrapper to delete an instance
   TAsciiData(TWindow* _pParent =NULL, TFont* _pFont =NULL, TResId _resId =0 );
   void release() const;

   void load(const char* fileName);                // loading data-matrix from file
   void save(const char* fileName) const;          // saving data-matrix to file
   void calc() const;                              // calculate statistical values

   void randomize(const int _n)  const;            // exchange '_n' times two random rows
   void reorder()                const;            // restore original order of rows
   void sort(const int col)      const;            // sort all rows regarding column 'col'

   // standardize column 'col' or all columns (default)
   void standardize(const bool zero2One, const int col =-1);

  // functions to access the data-matrix
   inline         int      dim()                                     const { return vir_dim;                }; // # columns
   inline         int      nDS()                                     const { return vir_nDS;                }; // # rows
   inline const   float*   getRow   (const int row)                  const { return data[row+_depth];       }; // -> row
   inline         float    getEntry (const int row, const int col)   const { return data[row+_depth][col];  };

  // functions to access the vectors with the statistical, column wise calculated, values
  // calc() must be called before. permissibility is not checked!
   inline const float* getMeanVec()    const {  return mean;      };
   inline const float* getMinVec()     const {  return min;       };
   inline const float* getMaxVec()     const {  return max;       };
   inline const float* getRangeVec()   const {  return range;     };
   inline const float* getInvRangeVec()const {  return invRange;  };    // inverse of range
   inline const float* getDevVec()     const {  return dev;       };    // deviation

  // query flags/states ...
   inline bool maxEqual2Min(const int column) const;
   inline bool consistentStatValues()  const { return (_consistentStatValues && mean != NULL);  };
   inline bool IsSorted()              const { return _IsSorted;  };
   inline bool IsRandom()              const { return (order);    };

   // functions to lock/unlock and query actual state
   void Lock()       const { _IsLocked=true;    };
   void Unlock()     const { _IsLocked=false;   };
   bool IsLocked()   const { return _IsLocked;  };

  // functions for time-series functionality
   void        set_time_series(const int __depth);
   inline bool IsTimeSeries()const { return (series); };
   inline int  depth()       const { return _depth;   }; // depth when interpreted as time-series
   inline int  getMaxDepth() const { return _nDS-2;   }; // biggest possible depth of time-series

   mutable CRITICAL_SECTION synchro;

