cwDataSets.h/cpp : Added wtr,rdr,adapter.
This commit is contained in:
parent
0f86c8de20
commit
721d9e79a1
2840
cwDataSets.cpp
2840
cwDataSets.cpp
File diff suppressed because it is too large
Load Diff
265
cwDataSets.h
265
cwDataSets.h
@ -43,59 +43,246 @@ between fold selection passes.
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
namespace cw
|
namespace cw
|
||||||
{
|
{
|
||||||
namespace dataset
|
namespace dataset
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
wtr: Writes columnar numeric files one row at a time. The data in a column
|
||||||
|
may be multidimensional. In othe words the data in a column may be a matrix.
|
||||||
|
Furthermore the data in a column may have a variable shape.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
1. Use define_columns() to name and describe the shape of the data in each column.
|
||||||
|
If a data has a variable size then set the variable dimension to 0.
|
||||||
|
2. For each row in the source dataset
|
||||||
|
3. For each column in the source dataset
|
||||||
|
4. Call wtr::write() to cache the column contents
|
||||||
|
5. Call write_record() to write the record to disk.
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
a. The data type of a column is determined by the data type of the column in the first row.
|
||||||
|
b. The data type of a column may not change after the first row.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
File Format:
|
||||||
|
Offset | Field | Label
|
||||||
|
-------|-------|------------------------
|
||||||
|
4 | 0 | record_count
|
||||||
|
4 | 1 | column_count
|
||||||
|
|
||||||
|
v | 0 2 | label [ cnt, c0, c1, c2 ...]
|
||||||
|
4 | 1 3 | id
|
||||||
|
4 | 2 4 | varDimN
|
||||||
|
4 | 3 5 | rankN
|
||||||
|
4 | 4 6 | maxEleN
|
||||||
|
4 | 5 7 | max typeflags
|
||||||
|
v | 6 8 | max value
|
||||||
|
4 | 7 9 | min typeflags
|
||||||
|
4 | 8 10 | min value
|
||||||
|
4 | 9 11 | dimV[0]
|
||||||
|
4 | 10 12 | maxDimV[0]
|
||||||
|
4 | . | dimV[1]
|
||||||
|
4 | . | maxDimV[1]
|
||||||
|
|
||||||
|
|
||||||
|
column 0 column 1 column N
|
||||||
|
---------------------- --------------------- ---------------------
|
||||||
|
Row Format: { <row_byte_count> { <varDimV0> <data0> } { <varDimV1> <data1> } ... { <varDimVN> <dataN> } }
|
||||||
|
|
||||||
|
Note that if a column's data has a fixed size then the <varDimV> is empty.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace wtr
|
||||||
|
{
|
||||||
|
typedef handle<struct wtr_str> handle_t;
|
||||||
|
|
||||||
|
rc_t create( handle_t& h, const char* fn );
|
||||||
|
rc_t destroy( handle_t& h );
|
||||||
|
|
||||||
|
// Define the shape of each column. Set variable length dimensions to 0.
|
||||||
|
rc_t define_columns( handle_t h, const char* label, unsigned columnId, unsigned rankN, const unsigned* dimV );
|
||||||
|
|
||||||
|
// Cache one column of data which will then be written on the call to write_record().
|
||||||
|
// If all the dimensions are defined in the column configuration then set dimV to nullptr;
|
||||||
|
rc_t write( handle_t h, unsigned columnId, const int* dV, unsigned dN, const unsigned* dimV=nullptr, unsigned dimN=0 );
|
||||||
|
rc_t write( handle_t h, unsigned columnId, const float* dV, unsigned dN, const unsigned* dimV=nullptr, unsigned dimN=0 );
|
||||||
|
rc_t write( handle_t h, unsigned columnId, const double* dV, unsigned dN, const unsigned* dimV=nullptr, unsigned dimN=0 );
|
||||||
|
|
||||||
|
// Write the
|
||||||
|
rc_t write_record( handle_t h );
|
||||||
|
|
||||||
|
rc_t test( const object_t* cfg );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace rdr
|
||||||
|
{
|
||||||
|
typedef handle<struct rdr_str> handle_t;
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
kIntRdrFl = 0x01,
|
||||||
|
kFloatRdrFl = 0x02,
|
||||||
|
kDoubleRdrFl = 0x04
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct col_str
|
||||||
|
{
|
||||||
|
const char* label; // Unique column label
|
||||||
|
unsigned id; // Unique column id
|
||||||
|
unsigned typeId; // See k???RdrFl type flags
|
||||||
|
unsigned varDimN; // Count of variable sized dimensions. 0 if this is a fixed size column.
|
||||||
|
unsigned rankN; // Count of elements in dimV[]
|
||||||
|
unsigned* dimV; // dimV[rankN]. Dimensions with value zero are undefined and set per field.
|
||||||
|
unsigned eleN; // Size of current column value
|
||||||
|
unsigned* maxDimV; // maxDimV[rankN]. Maximum value for each dimension. Same as dimV[]
|
||||||
|
|
||||||
|
variant::value_t max; // Max value of all data elements in this field
|
||||||
|
variant::value_t min; // Min value of all data elements in this field
|
||||||
|
|
||||||
|
unsigned maxEleN; // Max. count of elements in any one field.
|
||||||
|
unsigned maxByteN; // Max. size of this field in bytes
|
||||||
|
|
||||||
|
unsigned byteOffset; // Byte offset of the value of this field in the current record buffer.
|
||||||
|
unsigned byteN; // Size of this field in bytes.
|
||||||
|
} col_t;
|
||||||
|
|
||||||
|
rc_t create( handle_t& h, const char* fn );
|
||||||
|
rc_t destroy( handle_t& h );
|
||||||
|
|
||||||
|
unsigned column_count( handle_t h );
|
||||||
|
const col_t* column_cfg( handle_t h, unsigned colIdx );
|
||||||
|
const col_t* column_cfg( handle_t h, const char* colLabel );
|
||||||
|
|
||||||
|
unsigned record_count( handle_t h);
|
||||||
|
|
||||||
|
unsigned cur_record_index( handle_t h );
|
||||||
|
unsigned next_record_index( handle_t h );
|
||||||
|
|
||||||
|
enum {
|
||||||
|
kOkState, // Normal state
|
||||||
|
kErrorState, // An error has occurred which render the rdr unusable.
|
||||||
|
kEofState // The end of the file has been encountered.
|
||||||
|
};
|
||||||
|
|
||||||
|
unsigned state( handle_t h );
|
||||||
|
|
||||||
|
rc_t seek( handle_t h, unsigned recordIdx );
|
||||||
|
|
||||||
|
// Read the next record.
|
||||||
|
rc_t read( handle_t h, unsigned recordIdx=kInvalidIdx );
|
||||||
|
|
||||||
|
// Read a column value.
|
||||||
|
//
|
||||||
|
// vRef = Pointer to the value vector.
|
||||||
|
// nRef = Count of elements in value vector.
|
||||||
|
// dimVRef = Dimension vector. nRef = cumprod(dimVRef)
|
||||||
|
rc_t get( handle_t h, unsigned columnId, const int*& vRef, unsigned& nRef, const unsigned*& dimVRef );
|
||||||
|
rc_t get( handle_t h, unsigned columnId, const float*& vRef, unsigned& nRef, const unsigned*& dimVRef );
|
||||||
|
rc_t get( handle_t h, unsigned columnId, const double*& vRef, unsigned& nRef, const unsigned*& dimVRef );
|
||||||
|
|
||||||
|
rc_t report( handle_t h );
|
||||||
|
|
||||||
|
rc_t test( const object_t* cfg );
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace adapter
|
||||||
|
{
|
||||||
|
typedef handle<struct adapter_str> handle_t;
|
||||||
|
|
||||||
|
enum {
|
||||||
|
kPreInitState,
|
||||||
|
kInitState,
|
||||||
|
kEofState,
|
||||||
|
kErrorState
|
||||||
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
|
||||||
|
kTrackColDimFl = 0x01,
|
||||||
|
|
||||||
|
kIntFl = 0x10, // Field Type Flags: int
|
||||||
|
kFloatFl = 0x20, // float
|
||||||
|
kDoubleFl = 0x40, // double
|
||||||
|
kTypeMask = 0x70 // (int | float | double)
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct colMap_str
|
||||||
|
{
|
||||||
|
unsigned colId; // Column identifier from the rdr
|
||||||
|
unsigned fieldEleOffset; // Offset into field record of this column
|
||||||
|
unsigned eleN; // Count of elements in this column
|
||||||
|
const unsigned* dimV; // Shape of this column
|
||||||
|
unsigned rankN; // dimV[ rankN ] Rank of this column
|
||||||
|
} colMap_t;
|
||||||
|
|
||||||
|
|
||||||
|
rc_t create( handle_t& hRef, const char* fn, unsigned maxBatchN );
|
||||||
|
rc_t destroy( handle_t& hRef );
|
||||||
|
|
||||||
|
// Create a field and assign it a column.
|
||||||
|
rc_t create_field( handle_t h, unsigned fieldId, unsigned flags, const char* colLabel=nullptr, bool oneHotFl=false );
|
||||||
|
|
||||||
|
// Assign an additional column to a field
|
||||||
|
rc_t assign_column( handle_t h, unsigned fieldId, const char* colLabel, bool oneHotFl=false );
|
||||||
|
|
||||||
|
// Total count of records in the dataset.
|
||||||
|
unsigned record_count( handle_t h );
|
||||||
|
|
||||||
|
// Field element count for fixed size fields.
|
||||||
|
unsigned field_fixed_ele_count( handle_t h, unsigned fieldId );
|
||||||
|
|
||||||
|
// Read and cache batchN records.
|
||||||
|
// recordIdxV[ batchN ] is an optional array of record indexes
|
||||||
|
rc_t read( handle_t h, unsigned batchN, const unsigned* recordIdxV=nullptr );
|
||||||
|
|
||||||
|
// Return field vectors formed on the previous call to read().
|
||||||
|
// fV[ eleN, batchN ]
|
||||||
|
// fNV[ batchN ] = eleN for each column of vV[]
|
||||||
|
rc_t get( handle_t h, unsigned fieldId, const int*& fV_Ref, const unsigned*& fNV_Ref );
|
||||||
|
rc_t get( handle_t h, unsigned fieldId, const float*& fV_Ref, const unsigned*& fNV_Ref );
|
||||||
|
rc_t get( handle_t h, unsigned fieldId, const double*& fV_Ref, const unsigned*& fNV_Ref );
|
||||||
|
|
||||||
|
// Returns col position and geometry data from each record returned by the last
|
||||||
|
// call to read().
|
||||||
|
// Returns colMapV_Ref[batchN][columnN].
|
||||||
|
rc_t column_map( handle_t h, unsigned fieldId, colMap_t const * const *& colMapV_Ref );
|
||||||
|
|
||||||
|
// See k???State above for return values.
|
||||||
|
unsigned state( handle_t h );
|
||||||
|
|
||||||
|
// Print a field to stdout. If fmt==nullptr then a format is automatically set based on the data type.
|
||||||
|
rc_t print_field( handle_t h, unsigned fieldId, const char* fmt=nullptr );
|
||||||
|
|
||||||
|
rc_t test( const object_t* cfg );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
namespace mnist
|
namespace mnist
|
||||||
{
|
{
|
||||||
typedef handle<struct mnist_str> handle_t;
|
typedef handle<struct mnist_str> handle_t;
|
||||||
|
|
||||||
rc_t create( handle_t& h, const char* dir );
|
rc_t create( handle_t& h, const char* inDir );
|
||||||
rc_t destroy( handle_t& h );
|
rc_t destroy( handle_t& h );
|
||||||
|
|
||||||
// Each column has one example image.
|
unsigned record_count( handle_t h );
|
||||||
// The top row contains the example label.
|
|
||||||
const mtx::f_t* train( handle_t h );
|
|
||||||
const mtx::f_t* validate( handle_t h );
|
|
||||||
const mtx::f_t* test( handle_t h );
|
|
||||||
|
|
||||||
rc_t test(const char* dir, const char* imageFn );
|
rc_t seek( handle_t h, unsigned exampleIdx );
|
||||||
|
rc_t dataM( handle_t h, const float*& dataM, const unsigned*& labelV, unsigned exampleN, unsigned& actualExampleN_Ref, unsigned exampleIdx=kInvalidIdx );
|
||||||
|
|
||||||
|
rc_t write( handle_t h, const char* fn );
|
||||||
|
|
||||||
|
rc_t test( const object_t* cfg );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
typedef handle<struct datasetMgr_str> handle_t;
|
|
||||||
|
|
||||||
// Data subset flags
|
|
||||||
enum { kTrainSsFl=0x10, kValidSsFl=0x20, kTestSsFl=0x40 };
|
|
||||||
|
|
||||||
|
|
||||||
enum { kFloatFl=0x02, kDoubleFl=0x04 };
|
|
||||||
rc_t create( handle_t& h, const object_t* cfg, unsigned flags );
|
|
||||||
rc_t destroy( handle_t& h );
|
|
||||||
|
|
||||||
|
|
||||||
// Load a dataset, divide it into train,validate, and test subsets
|
|
||||||
rc_t load( handle_t h, const char* dsLabel, unsigned batchN, unsigned validPct, unsigned testPct, unsigned flags );
|
|
||||||
|
|
||||||
// Shuffle the subset.
|
|
||||||
rc_t shuffle( handle_t h, unsigned subsetFl );
|
|
||||||
|
|
||||||
// Get the dimensions of all the examples from a subset.
|
|
||||||
// dimN=1: dimV[0]=batchN
|
|
||||||
// dimN=2: dimV[0]=realN dimV[1]=batchN
|
|
||||||
// dimN=3: dimV[0,1]=realN dimV[2]=batchN
|
|
||||||
rc_t subset_dims( handle_t h, unsigned subsetFl, const unsigned*& dimV_Ref, unsigned& dimN_Ref );
|
|
||||||
rc_t label_dims( handle_t h, unsigned subsetFl, const unsigned*& dimV_Ref, unsigned& dimN_Ref );
|
|
||||||
|
|
||||||
|
|
||||||
// get the next batch. Returns nullptr at the end of an epoch.
|
|
||||||
rc_t batch_f( handle_t h, unsigned subsetFl, const float*& dataM_Ref, const float*& labelM_Ref );
|
|
||||||
rc_t batch_d( handle_t h, unsigned subsetFl, const double*& dataM_Ref, const double*& labelM_Ref );
|
|
||||||
|
|
||||||
rc_t test( const object_t* cfg );
|
rc_t test( const object_t* cfg );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user