back
/******************************************************************************/
// modul: TAsciiData.h ver 1.0b (english) //
// //
// multi-threaded "load ASCII data-matrix from file" class //
// //
// written by lore at newty dot de - home: www.newty.de //
// copyright 1998-2000 by lore //
// //
// FREE for all kinds of non-commercial use including modifying and //
// redistribution as long, as this header is not removed! //
// disclaimer: nothing is guaranteed. you use it on your own risk. //
// //
// please report bugs immediately to bugs at newty dot de !! //
// //
/******************************************************************************/
/***********************************************************************************************************/
//
// I. important functions:
// void load(const char* filename) loading the data-matrix. this function can only be called once in the
// lifetime of an instance. This is easier to implement and costs only a
// minimum of additional resources.
//
// void save(const char* filename) saving data-matrix to file
//
// inline int nDS() query #rows and #columns. both are inline functions, i.g. always use
// inline int dim() them and do not copy the values to an own variable!
//
// const float* getRow(const int i) query of the i´th row
//
// void calc() column-wise calculate statistical values which can be queried using
// getMeanVec(), getMinVec(), getMaxVec(), getRangeVec() and getDevVec().
//
// void randomize(const int _n) interchanging two random rows n-times
// void sort(const int col) sort rows according to the values of the specified column 'col'
//
//
// II. annotations:
// * whitespaces and comments are always ignored when reading a file. comments must start at the begin
// of a line. the comment characters are '%', '#' and '*'. they can be changed by editing the
// function 'bool IsComment(const char c)' in fileUtil.cpp
// * load/save and calc run in a thread when a parent, font and resource id is supllied in constructor.
// the resource id must correspond to a dialog resource definition in the resource file. you may say
// that it is no "real" multithreading, cause the main-thread is hanging in a modal dialog while the
// additional thread is loading/saving or calculating. you can easily change it by executing the
// thread-control dialog none-modal. but beware: you´ll have to do synchronization cause when loading
// for example you have got to ensure, that no functionality is called from the main-thread which needs
// the loaded data-object. have fun and enjoy multithreading side-effects :-))
// * important: the destructor is private, i.g. you have to call release() instead! (see IV. to know more)
// * important: it is the philosophy to throw an exception in case of errors or non-permissibility of
// an action even if it would be possible to perform a default or neutral action!
// * exceptions are of type 'TErrText'. this is a struct only containing a character-array. see its
// typedef below.
//
//
//
// III. time-series functionality: (if you don´t want to use this, you should ignore it)
//
// a) what it is:
// imagine you´ve got a vector (matrix with one single row) and the values represent a variable y
// measured about time. then, at every time step k, you can try to forecast the value y(k) regarding only
// the past 'depth' ones y(k-1), y(k-2) ... y(k-depth).
//
// for example a vector is given as:
//
// time | value
// ----------------- y |
// 0 | 1.2 |
// 1 | 3 7-| *
// 2 | 4.1 6-|
// 3 | 4.67 5-| * * *
// 4 | 7.2 4-| * * *
// 5 | 4.8 3-| *
// 6 | 4.0 2-|
// 7 | 4.5 1-| *
// 8 | 5.02 --|------------------------------>
// | 0 1 2 3 4 5 6 7 8 t
//
//
// now we decide to use a depth of 2, i.g. we always want to predict the actual value y(k) using the two
// past ones y(k-1) and y(k-2).thus we work with the matrix:
// 4.1 3 1.2
// 4.67 4.1 3
// 7.2 4.67 4.1
// 4.8 7.2 4.67
// 4.0 4.8 7.2
// 4.5 4.0 4.8
// 5.02 4.5 4.0
//
// the time-series functionality now just keeps the vector in memory but provides you access to
// the data as if you´d have the matrix. you use the same access-functions, thus you don´t need to bother
// if the matrix is really or virtually stored in your memory.
//
// b) how it is done
// you always access the data-matrix using a row-map, i.g. a one dimensional field which contains the
// pointers to the different rows. now, when a vector is converted to a time-series, it is copied in
// inverse order to a float-field called 'series'. then the row-pointers are cross-filled with the adresses
// of the entries of the float-field 'series'.
//
//
// "float** data" "float* series"
// -------------- ---------------
//
// _
// data ------> |_| ---- ---> 5.02 (not all pointers are plotted)
// |_| ----|-- | 4.5
// 1. row ------> |_| ----|--|-- | 4.0
// 2. row ------> |_| __|__|__|__| 4.8
// . |_| | | | | 7.2
// . |_| | | | | 4.67
// . |_| | | | ------> 4.1
// . |_| | | ---------> 3
// nDS-depth´s row ------> |_| _| ------------> 1.2
//
//
// annotations and implications:
// * when you only want to keep a vector in memory there is a memory OVERHEAD of factor 2!!
// * you acces a row 'row' by just taking 'data[row+depth]. thus once copied to the series-vector you
// can change the depth by just changing a variables value.
// note: you may have to recalculate the statistical values using calc().
// * you´ve got 'depth' rows less than the vector has. therefor a virtual number of rows is used.
// * sorting and randomizing must not alter more than the virtual number of rows
// * normalization is only permitted when you have got the vector, i.g. depth is zero
//
//
// IV. why is the destructor private? what´s the sense of the refernce-counter 'ref'?? where can i use Lock() for???
//
// well, actually there is no sense in it. not in the implementation you´ve got. i took my implementation,
// reduced it to the understandable basics and translated the comments to english. now i´ll try to give you
// an idea of what you can do with these mechanisms: imagine you want to do some calculations with the data in
// the background. in the meanwhile some other (main-thread-processing) wants to change the order of the rows or
// even deletes the object. i solved this problem by adding a member-function which provides the pointers to all rows
// in the actual order. at the begin of the background-processing this function is called. then the
// main-thread-processing can change the order like it wants. however, standardization or changing the depth of
// time-series is forbidden and therefore the object can be locked with the function Lock().
//
// now to the reference-counter: when the background-process gets the pointers he needs from the member-function,
// this counter is incremented automatically. when the background-process is finished he calls release(). thus you
// can delete the data-object in the mainthread by calling release() without considering if some other processing
// still needs the data. the destruction of the data-object is prevented as long as it is needed. this mechanism is
// called 'counted objects'. windows uses it for it´s handles for example :-)
//
//
//
//
// V. the viv´s ('viv' = very important variable)
//
// int _nDS - real number of rows
// int _dim - real number of columns
// int vir_nDS - virtual number of rows (these values are like the real ones if you don´t use time-series)
// int vir_dim - virtual number of columns
//
// float** data - pointer to data-matrix
// float** series
// float** order - original order of rows
// int ref - object reference counter
//
#ifndef _DATA_H
#define _DATA_H
#include <owl\owlpch.h>
#include "threadCtrlDlg.h"
// typedef for the exceptions. necessary cause when throwing an object it is copied and the default copy
// constructor for this object will just make a byte-wise copy which is all i want to have. throwing a
// character-array directly doesn´t work cause only the pointer will be copied.
typedef struct { char szErrText[256]; } TErrText;
/***********************************************************************************************************/
// definition of class TAsciiData
class TAsciiData
{
private:
// -> parent-window and font (optional) - if supplied load/save/calc routines run multi-threaded
TWindow* pParent;
TFont* pFont;
TResId resId; // identifier must correspond to a dialog resource definition in the resource file
mutable int ref; // object referenz counter
mutable TThreadCtrlDlg* pCtrlDlg; // -> thread-control-dialog
// thread-functions for load/save of data and calculation of statistical values
friend void calc (void* _pMyself);
friend void load (void* _pMyself);
friend void save (void* _pMyself);
// error object: if an exception occurs and is catched in the thread-functions, err.szErrText is
// used to signal it when execution goes on in the main thread. the problem is, that you should
// catch an exception in the thread where you threw it.
mutable TErrText err;
mutable ofstream outfile; // output stream for save()
ifstream infile; // input stream for load()
// state enumeration
mutable enum TStateEnum {startup, detectCols, detectRows, reading, conv2timeSeries, calcI, calcII, saving} state;
void setState(TStateEnum _state);// wrapper for state changes which calls actState()
friend void actState(void* _pMyself); // actualize displayed state, used direct or as callback-function
bool updated; // flag: thread-control dialog has been updated after state-change
// data-matrix
void allocateMemory(); // allocate memory for data-matrix
float* series;
float** data; //-> data-matrix
int _dim; // #columns
int _nDS; // #rows
int vir_nDS; // # virtual rows
int vir_dim; // # virtual columns
int _depth; // depth when interpreted as time-series
int allocated_nDS; // #rows for which memory was allocated (only needed for destruction)
mutable int* order; // original order of the rows
// flags
mutable bool _IsSorted;
mutable bool _IsLocked;
bool _loaded; // used to prevent a second call of load()
mutable bool _consistentStatValues;
// -> calculated statistical values
mutable float* mean;
mutable float* min;
mutable float* max;
mutable float* range;
mutable float* invRange; // inverse of range, calculated once for better performance
mutable float* dev;
mutable bool* maxEqualsMin;
~TAsciiData(); // prívate destuktor! call release() to delete an instance!
public:
// constructor and wrapper to delete an instance
TAsciiData(TWindow* _pParent =NULL, TFont* _pFont =NULL, TResId _resId =0 );
void release() const;
void load(const char* fileName); // loading data-matrix from file
void save(const char* fileName) const; // saving data-matrix to file
void calc() const; // calculate statistical values
void randomize(const int _n) const; // exchange '_n' times two random rows
void reorder() const; // restore original order of rows
void sort(const int col) const; // sort all rows regarding column 'col'
// standardize column 'col' or all columns (default)
void standardize(const bool zero2One, const int col =-1);
// functions to access the data-matrix
inline int dim() const { return vir_dim; }; // # columns
inline int nDS() const { return vir_nDS; }; // # rows
inline const float* getRow (const int row) const { return data[row+_depth]; }; // -> row
inline float getEntry (const int row, const int col) const { return data[row+_depth][col]; };
// functions to access the vectors with the statistical, column wise calculated, values
// calc() must be called before. permissibility is not checked!
inline const float* getMeanVec() const { return mean; };
inline const float* getMinVec() const { return min; };
inline const float* getMaxVec() const { return max; };
inline const float* getRangeVec() const { return range; };
inline const float* getInvRangeVec()const { return invRange; }; // inverse of range
inline const float* getDevVec() const { return dev; }; // deviation
// query flags/states ...
inline bool maxEqual2Min(const int column) const;
inline bool consistentStatValues() const { return (_consistentStatValues && mean != NULL); };
inline bool IsSorted() const { return _IsSorted; };
inline bool IsRandom() const { return (order); };
// functions to lock/unlock and query actual state
void Lock() const { _IsLocked=true; };
void Unlock() const { _IsLocked=false; };
bool IsLocked() const { return _IsLocked; };
// functions for time-series functionality
void set_time_series(const int __depth);
inline bool IsTimeSeries()const { return (series); };
inline int depth() const { return _depth; }; // depth when interpreted as time-series
inline int getMaxDepth() const { return _nDS-2; }; // biggest possible depth of time-series
mutable CRITICAL_SECTION synchro;
};
#endif
back