2020-08-20 00:10:35 +00:00
# include "cwCommon.h"
# include "cwLog.h"
# include "cwCommonImpl.h"
# include "cwMem.h"
2020-10-30 13:40:39 +00:00
# include "cwObject.h"
2020-08-20 00:10:35 +00:00
# include "cwFile.h"
# include "cwFileSys.h"
2020-10-30 13:40:39 +00:00
# include "cwVectOps.h"
2020-08-20 00:10:35 +00:00
# include "cwMtx.h"
2020-12-15 20:32:22 +00:00
# include "cwVariant.h"
2020-08-20 00:10:35 +00:00
# include "cwDataSets.h"
# include "cwSvg.h"
2020-10-30 13:40:39 +00:00
# include "cwTime.h"
2020-12-15 20:32:22 +00:00
# include "cwText.h"
//----------------------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------------------
2020-08-20 00:10:35 +00:00
namespace cw
{
namespace dataset
{
2020-12-15 20:32:22 +00:00
namespace wtr
{
typedef struct col_str
2020-08-20 00:10:35 +00:00
{
2020-12-15 20:32:22 +00:00
rdr : : col_t col ; // Public fields.
unsigned char * cur ; // Cache of the current column data contents.
unsigned curByteN ; // Count of bytes in cur[].
unsigned * curDimV ; // Cache of the current column dimensions.
struct col_str * link ; // Link to next col_t record.
} col_t ;
typedef struct wtr_str
{
file : : handle_t fH ; // Output file handle
unsigned record_count ; // Total count of rows.
col_t * colL ; // Linked list of column descriptions
unsigned totalVarDimN ; // Total count of unknown dim's among all columns
} wtr_t ;
inline wtr_t * _handleToPtr ( handle_t h )
{ return handleToPtr < handle_t , wtr_t > ( h ) ; }
rc_t _destroy ( wtr_t * p )
{
col_t * c = p - > colL ;
while ( c ! = nullptr )
{
col_t * c0 = c - > link ;
mem : : free ( const_cast < char * > ( c - > col . label ) ) ;
mem : : release ( c - > col . dimV ) ;
mem : : release ( c - > col . maxDimV ) ;
mem : : release ( c - > cur ) ;
mem : : release ( c - > curDimV ) ;
mem : : release ( c ) ;
c = c0 ;
}
file : : close ( p - > fH ) ;
mem : : release ( p ) ;
2020-08-20 00:10:35 +00:00
2020-12-15 20:32:22 +00:00
return kOkRC ;
}
2020-08-20 00:10:35 +00:00
2020-12-15 20:32:22 +00:00
col_t * _columnIdToPtr ( wtr_t * p , unsigned columnId )
2020-08-20 00:10:35 +00:00
{
2020-12-15 20:32:22 +00:00
col_t * c = p - > colL ;
for ( ; c ! = nullptr ; c = c - > link )
if ( c - > col . id = = columnId )
return c ;
2020-08-20 00:10:35 +00:00
2020-12-15 20:32:22 +00:00
cwLogError ( kInvalidArgRC , " The dataset column id %i was not found. " , columnId ) ;
return nullptr ;
2020-08-20 00:10:35 +00:00
}
2020-12-15 20:32:22 +00:00
// eleN = count of elements in dV[]
// dimV[ dimN ] = dimensions for variable sized data elements. cumprod(dimV) must equal eleN
rc_t _write_column_to_buf ( wtr_t * p , unsigned columnId , unsigned eleN , const unsigned * dimV , unsigned dimN , const void * dV , unsigned typeFlags , col_t * & colPtrRef )
2020-08-20 00:10:35 +00:00
{
2020-12-15 20:32:22 +00:00
col_t * c = _columnIdToPtr ( p , columnId ) ;
2020-08-20 00:10:35 +00:00
2020-12-15 20:32:22 +00:00
if ( c = = nullptr )
return cwLogError ( kInvalidArgRC , " Unable to locate the column description associated with id: %i. " , columnId ) ;
// if this is a fixed size column
if ( c - > col . varDimN = = 0 )
2020-08-20 00:10:35 +00:00
{
2020-12-15 20:32:22 +00:00
// verify that the element count matches the fixed element count
if ( eleN ! = c - > col . maxEleN )
return cwLogError ( kInvalidArgRC , " Data vector in column '%s' has %i elements but should have %i elements. " , cwStringNullGuard ( c - > col . label ) , eleN , c - > col . maxEleN ) ;
if ( dimV ! = nullptr | | dimN ! = 0 )
cwLogWarning ( " The dimension vector for the fixed sized column '%s' is ignored in the write() function. " , cwStringNullGuard ( c - > col . label ) ) ;
2020-08-20 00:10:35 +00:00
}
2020-12-15 20:32:22 +00:00
else // this is a variable sized column
2020-08-20 00:10:35 +00:00
{
2020-12-15 20:32:22 +00:00
unsigned tmpEleN = 1 ;
for ( unsigned i = 0 ; i < c - > col . rankN ; + + i )
{
tmpEleN * = dimV [ i ] ; // track the count of elements
c - > col . maxDimV [ i ] = std : : max ( c - > col . maxDimV [ i ] , dimV [ i ] ) ; // track the max. dimension
c - > curDimV [ i ] = dimV [ i ] ; // store the this columns dimensions
}
// verify that the sizeof the data matches the size given in the dimensions
if ( tmpEleN ! = eleN )
return cwLogError ( kInvalidArgRC , " The product of the dimension vector does not equal the count of elements in column '%s'. " , c - > col . label ) ;
}
if ( p - > record_count = = 0 )
{
// set data type
c - > col . max . flags = typeFlags ;
c - > col . min . flags = typeFlags ;
}
else
{
// verify data type is the same for all elements
if ( c - > col . max . flags ! = typeFlags )
return cwLogError ( kInvalidArgRC , " The data vector type '%s' does not match the column type '%s'. " , variant : : flagsToLabel ( typeFlags ) , variant : : flagsToLabel ( c - > col . max . flags ) ) ;
2020-08-20 00:10:35 +00:00
}
2020-12-15 20:32:22 +00:00
// store the bytes associated with col/row
unsigned bytesPerEle = variant : : flagsToBytes ( typeFlags ) ;
2020-08-20 00:10:35 +00:00
2020-12-15 20:32:22 +00:00
if ( bytesPerEle = = 0 )
return cwLogError ( kInvalidArgRC , " Invalid type identifier in column '%s'. " , cwStringNullGuard ( c - > col . label ) ) ;
else
2020-08-20 00:10:35 +00:00
{
2020-12-15 20:32:22 +00:00
c - > curByteN = bytesPerEle * eleN ;
c - > cur = mem : : resize < unsigned char > ( c - > cur , c - > curByteN ) ;
memcpy ( c - > cur , dV , c - > curByteN ) ;
}
colPtrRef = c ;
return kOkRC ;
}
2020-08-20 00:10:35 +00:00
2020-12-15 20:32:22 +00:00
rc_t _write_hdr ( wtr_t * p )
{
col_t * c ;
rc_t rc ;
2020-08-20 00:10:35 +00:00
2020-12-15 20:32:22 +00:00
p - > totalVarDimN = 0 ;
// get the count of columns
unsigned col_count = 0 ;
for ( c = p - > colL ; c ! = nullptr ; c = c - > link )
+ + col_count ;
if ( ( rc = file : : write ( p - > fH , p - > record_count ) ) ! = kOkRC ) goto errLabel ;
if ( ( rc = file : : write ( p - > fH , col_count ) ) ! = kOkRC ) goto errLabel ;
for ( c = p - > colL ; c ! = nullptr ; c = c - > link )
{
if ( ( rc = file : : writeStr ( p - > fH , c - > col . label ) ) ! = kOkRC ) goto errLabel ;
if ( ( rc = file : : write ( p - > fH , c - > col . id ) ) ! = kOkRC ) goto errLabel ;
if ( ( rc = file : : write ( p - > fH , c - > col . varDimN ) ) ! = kOkRC ) goto errLabel ;
if ( ( rc = file : : write ( p - > fH , c - > col . rankN ) ) ! = kOkRC ) goto errLabel ;
if ( ( rc = file : : write ( p - > fH , c - > col . maxEleN ) ) ! = kOkRC ) goto errLabel ;
if ( ( rc = variant : : write ( p - > fH , c - > col . max ) ) ! = kOkRC ) goto errLabel ;
if ( ( rc = variant : : write ( p - > fH , c - > col . min ) ) ! = kOkRC ) goto errLabel ;
for ( unsigned i = 0 ; i < c - > col . rankN ; + + i )
2020-08-20 00:10:35 +00:00
{
2020-12-15 20:32:22 +00:00
if ( ( rc = file : : write ( p - > fH , c - > col . dimV [ i ] ) ) ! = kOkRC ) goto errLabel ;
if ( ( rc = file : : write ( p - > fH , c - > col . maxDimV [ i ] ) ) ! = kOkRC ) goto errLabel ;
2020-08-20 00:10:35 +00:00
}
2020-12-15 20:32:22 +00:00
p - > totalVarDimN + = c - > col . varDimN ;
2020-08-20 00:10:35 +00:00
}
errLabel :
2020-12-15 20:32:22 +00:00
return rc ;
}
rc_t _re_write_hdr ( wtr_t * p )
{
rc_t rc ;
if ( ( rc = file : : seek ( p - > fH , file : : kBeginFl , 0 ) ) ! = kOkRC )
return cwLogError ( kSeekFailRC , " Data file Header seek failed. " ) ;
if ( ( rc = _write_hdr ( p ) ) ! = kOkRC )
return cwLogError ( rc , " Header re-write failed. " ) ;
return rc ;
2020-08-20 00:10:35 +00:00
}
}
2020-12-15 20:32:22 +00:00
}
2020-08-20 00:10:35 +00:00
}
2020-12-15 20:32:22 +00:00
cw : : rc_t cw : : dataset : : wtr : : create ( handle_t & h , const char * fn )
2020-08-20 00:10:35 +00:00
{
2020-12-15 20:32:22 +00:00
rc_t rc ;
2020-08-20 00:10:35 +00:00
if ( ( rc = destroy ( h ) ) ! = kOkRC )
return rc ;
2020-12-15 20:32:22 +00:00
auto p = mem : : allocZ < wtr_t > ( 1 ) ;
2020-08-20 00:10:35 +00:00
2020-12-15 20:32:22 +00:00
if ( ( rc = file : : open ( p - > fH , fn , file : : kWriteFl ) ) ! = kOkRC )
2020-08-20 00:10:35 +00:00
{
2020-12-15 20:32:22 +00:00
rc = cwLogError ( rc , " Data file creation failed. " ) ;
2020-08-20 00:10:35 +00:00
goto errLabel ;
}
h . set ( p ) ;
2020-12-15 20:32:22 +00:00
2020-08-20 00:10:35 +00:00
errLabel :
2020-12-15 20:32:22 +00:00
if ( rc ! = kOkRC )
2020-08-20 00:10:35 +00:00
_destroy ( p ) ;
2020-12-15 20:32:22 +00:00
2020-08-20 00:10:35 +00:00
return rc ;
}
2020-12-15 20:32:22 +00:00
cw : : rc_t cw : : dataset : : wtr : : destroy ( handle_t & h )
2020-08-20 00:10:35 +00:00
{
rc_t rc = kOkRC ;
2020-12-15 20:32:22 +00:00
2020-08-20 00:10:35 +00:00
if ( ! h . isValid ( ) )
return rc ;
2020-12-15 20:32:22 +00:00
wtr_t * p = _handleToPtr ( h ) ;
if ( ( rc = _re_write_hdr ( p ) ) ! = kOkRC )
return rc ;
2020-08-20 00:10:35 +00:00
if ( ( rc = _destroy ( p ) ) ! = kOkRC )
return rc ;
h . clear ( ) ;
return rc ;
}
2020-12-15 20:32:22 +00:00
cw : : rc_t cw : : dataset : : wtr : : define_columns ( handle_t h , const char * label , unsigned columnId , unsigned rankN , const unsigned * dimV )
2020-08-20 00:10:35 +00:00
{
2020-12-15 20:32:22 +00:00
rc_t rc = kOkRC ;
wtr_t * p = _handleToPtr ( h ) ;
col_t * c = mem : : allocZ < col_t > ( 1 ) ;
c - > col . label = mem : : duplStr ( label ) ;
c - > col . id = columnId ;
c - > col . rankN = rankN ;
c - > col . varDimN = 0 ;
c - > col . dimV = mem : : allocDupl < unsigned > ( dimV , rankN ) ;
c - > col . maxDimV = mem : : allocDupl < unsigned > ( dimV , rankN ) ;
c - > curDimV = mem : : allocDupl < unsigned > ( dimV , rankN ) ;
c - > col . maxEleN = 1 ;
for ( unsigned i = 0 ; i < rankN ; + + i )
{
c - > col . maxEleN * = dimV [ i ] ;
if ( dimV [ i ] = = 0 )
{
c - > col . varDimN + = 1 ;
}
}
2020-08-20 00:10:35 +00:00
2020-12-15 20:32:22 +00:00
// link the new col recd to the end of the column list
col_t * c0 = p - > colL ;
col_t * c1 = nullptr ;
for ( ; c0 ! = nullptr ; c0 = c0 - > link )
c1 = c0 ;
if ( c1 = = nullptr )
p - > colL = c ;
else
c1 - > link = c ;
return rc ;
2020-08-20 00:10:35 +00:00
}
2020-12-15 20:32:22 +00:00
cw : : rc_t cw : : dataset : : wtr : : write ( handle_t h , unsigned columnId , const int * dV , unsigned eleN , const unsigned * dimV , unsigned rankN )
2020-08-20 00:10:35 +00:00
{
2020-12-15 20:32:22 +00:00
rc_t rc ;
wtr_t * p = _handleToPtr ( h ) ;
col_t * c = nullptr ;
if ( ( rc = _write_column_to_buf ( p , columnId , eleN , dimV , rankN , dV , variant : : kInt32VFl , c ) ) ! = kOkRC )
return rc ;
2020-08-20 00:10:35 +00:00
2020-12-15 20:32:22 +00:00
if ( p - > record_count = = 0 )
{
c - > col . min . u . i32 = vop : : min ( dV , eleN ) ;
c - > col . max . u . i32 = vop : : max ( dV , eleN ) ;
//printf("0i %i %i\n", columnId, c->col.min.u.i32 );
}
else
{
//printf("1i %i %i\n", columnId, c->col.min.u.i32 );
c - > col . min . u . i32 = std : : min ( c - > col . min . u . i32 , vop : : min ( dV , eleN ) ) ;
c - > col . max . u . i32 = std : : max ( c - > col . max . u . i32 , vop : : max ( dV , eleN ) ) ;
}
2020-08-20 00:10:35 +00:00
2020-12-15 20:32:22 +00:00
return rc ;
}
2020-08-20 00:10:35 +00:00
2020-12-15 20:32:22 +00:00
cw : : rc_t cw : : dataset : : wtr : : write ( handle_t h , unsigned columnId , const float * dV , unsigned eleN , const unsigned * dimV , unsigned rankN )
2020-08-20 00:10:35 +00:00
{
2020-12-15 20:32:22 +00:00
rc_t rc ;
wtr_t * p = _handleToPtr ( h ) ;
col_t * c = nullptr ;
if ( ( rc = _write_column_to_buf ( p , columnId , eleN , dimV , rankN , dV , variant : : kFloatVFl , c ) ) ! = kOkRC )
return rc ;
if ( p - > record_count = = 0 )
2020-08-20 00:10:35 +00:00
{
2020-12-15 20:32:22 +00:00
c - > col . min . u . f = vop : : min ( dV , eleN ) ;
c - > col . max . u . f = vop : : max ( dV , eleN ) ;
}
else
{
c - > col . min . u . f = std : : min ( c - > col . min . u . f , vop : : min ( dV , eleN ) ) ;
c - > col . max . u . f = std : : max ( c - > col . max . u . f , vop : : max ( dV , eleN ) ) ;
}
2020-08-20 00:10:35 +00:00
2020-12-15 20:32:22 +00:00
return rc ;
}
2020-08-20 00:10:35 +00:00
2020-12-15 20:32:22 +00:00
cw : : rc_t cw : : dataset : : wtr : : write ( handle_t h , unsigned columnId , const double * dV , unsigned eleN , const unsigned * dimV , unsigned rankN )
{
rc_t rc ;
wtr_t * p = _handleToPtr ( h ) ;
col_t * c = nullptr ;
if ( ( rc = _write_column_to_buf ( p , columnId , eleN , dimV , rankN , dV , variant : : kDoubleVFl , c ) ) ! = kOkRC )
return rc ;
2020-08-20 00:10:35 +00:00
2020-12-15 20:32:22 +00:00
if ( p - > record_count = = 0 )
{
c - > col . min . u . d = vop : : min ( dV , eleN ) ;
c - > col . max . u . d = vop : : max ( dV , eleN ) ;
}
else
{
c - > col . min . u . d = std : : min ( c - > col . min . u . d , vop : : min ( dV , eleN ) ) ;
c - > col . max . u . d = std : : max ( c - > col . max . u . d , vop : : max ( dV , eleN ) ) ;
2020-08-20 00:10:35 +00:00
}
return rc ;
}
2020-12-15 20:32:22 +00:00
cw : : rc_t cw : : dataset : : wtr : : write_record ( handle_t h )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
rc_t rc ;
wtr_t * p = _handleToPtr ( h ) ;
col_t * c ;
// if this is the first row in the file then write the file header
if ( p - > record_count = = 0 )
if ( ( rc = _write_hdr ( p ) ) ! = kOkRC )
return rc ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
unsigned rowByteN = 0 ;
// calculate the size of the row data
for ( c = p - > colL ; c ! = nullptr ; c = c - > link )
rowByteN + = c - > col . varDimN * sizeof ( unsigned ) + c - > curByteN ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
// write the size of this row
if ( ( rc = file : : write ( p - > fH , rowByteN ) ) ! = kOkRC )
goto errLabel ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
// for each column
for ( c = p - > colL ; c ! = nullptr ; c = c - > link )
{
// if this is a variable sized column
if ( c - > col . varDimN > 0 )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
// then write the variable sized dimensions
for ( unsigned i = 0 ; i < c - > col . rankN ; + + i )
if ( c - > col . dimV [ i ] = = 0 )
if ( ( rc = file : : write ( p - > fH , c - > curDimV [ i ] ) ) ! = kOkRC )
goto errLabel ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
// write the column field value
if ( ( rc = file : : write ( p - > fH , c - > cur , c - > curByteN ) ) ! = kOkRC )
goto errLabel ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
}
errLabel :
if ( rc ! = kOkRC )
rc = cwLogError ( rc , " Example index %i write failed " , p - > record_count ) ;
else
p - > record_count + = 1 ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
return rc ;
}
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
/*
File Format for the following data .
where the data record itself is repeated 3 time .
unsigned dim0V [ ] = { 1 } ;
unsigned dim1V [ ] = { 3 } ;
unsigned dim2V [ ] = { 2 , 0 } ;
unsigned dim3V [ ] = { 2 , 2 } ;
int val0 [ ] = { 0 } ;
int val1 [ ] = { 1 , 2 , 3 } ;
int val2 [ ] = { 4 , 5 , 6 , 7 , 8 , 9 } ;
int val3 [ ] = { 10 , 11 , 13 , 14 } ;
0300 0000 3 recd_count
0400 0000 4 col_count
0400 0000 label size - col0
636f 6 c30 label
0000 0000 id
0000 0000 varDimN
0100 0000 rankN
0100 0000 maxEleN
4000 0000 max type
0000 0000 max value
0000 0000
4000 0000 min type
0000 0000 min value
0000 0000
0100 0000 dimV [ 0 ]
0100 0000 maxDimV [ 0 ]
0400 0000 label size - col 1
636f 6 c31 label
0100 0000 id
0000 0000 varDimN
0100 0000 rankN
0300 0000 maxEleN
4000 0000 max type
0300 0000 max value
0000 0000
4000 0000 min type
0100 0000 max value
0000 0000
0300 0000 dimV [ 0 ]
0300 0000 maxDimV [ 0 ]
0400 0000 label size - col 2
636f 6 c32 label
0200 0000 id
0100 0000 varDimN
0200 0000 rankN
0000 0000 maxEleN
4000 0000 max type
0900 0000 max value
0000 0000
4000 0000 min type
0400 0000 min value
0000 0000
0200 0000 dimV [ 0 ]
0200 0000 maxDimV [ 0 ]
0000 0000 dimV [ 1 ]
0300 0000 maxDimV [ 1 ]
0400 0000 label size - col 3
636f 6 c33 label
0300 0000 id
0000 0000 varDimN
0200 0000 rankN
0400 0000 maxEleN
4000 0000 max type
0e00 0000 max value
0000 0000
4000 0000 min type
0 a00 0000 min value
0000 0000
0200 0000 dimV [ 0 ]
0200 0000 maxDimV [ 0 ]
0200 0000 dimV [ 1 ]
0200 0000 maxDimV [ 1 ]
3 c00 0000 recd0 size ( 60 bytes )
0000 0000 0 col0
0100 0000 1 col1 [ 0 ]
0200 0000 2 col1 [ 1 ]
0300 0000 3 col1 [ 2 ]
0300 0000 dimV [ 1 ] col2 < - variable dimension
0400 0000 4 col2 [ 0 ]
0500 0000 5
0600 0000 6
0700 0000 7
0800 0000 8
0900 0000 9
0 a00 0000 10 col3
0b00 0000 11
0 d00 0000 12
0e00 0000 13
3 c00 0000 recd1 size ( 60 bytes )
0100 0000 1 col0
0100 0000
0200 0000
0300 0000
0300 0000
0400 0000
0500 0000
0600 0000
0700 0000
0800 0000
0900 0000
0 a00 0000
0b00 0000
0 d00 0000
0e00 0000
3 c00 0000 recd2 size ( 60 bytes )
0200 0000 2 col0
0100 0000
0200 0000
0300 0000
0300 0000
0400 0000
0500 0000
0600 0000
0700 0000
0800 0000
0900 0000
0 a00 0000
0b00 0000
0 d00 0000
0e00 0000
*/
cw : : rc_t cw : : dataset : : wtr : : test ( const object_t * cfg )
{
rc_t rc = kOkRC ;
char * outFn = nullptr ;
handle_t h ;
if ( ( rc = cfg - > getv ( " outFn " , outFn ) ) ! = kOkRC )
return cwLogError ( rc , " wtr test failed. Argument parse failed. " ) ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
outFn = filesys : : expandPath ( outFn ) ;
if ( ( rc = create ( h , outFn ) ) ! = kOkRC )
{
rc = cwLogError ( rc , " rdr create failed. " ) ;
goto errLabel ;
}
else
{
enum { kId0 , kId1 , kId2 , kId3 } ;
unsigned dim0V [ ] = { 1 } ;
unsigned dim1V [ ] = { 3 } ;
unsigned dim2V [ ] = { 2 , 0 } ;
unsigned dim3V [ ] = { 2 , 2 } ;
unsigned dim0N = cwCountOf ( dim0V ) ;
unsigned dim1N = cwCountOf ( dim1V ) ;
unsigned dim2N = cwCountOf ( dim2V ) ;
unsigned dim3N = cwCountOf ( dim3V ) ;
int val0 [ ] = { 0 } ;
int val1 [ ] = { 1 , 2 , 3 } ;
int val2 [ ] = { 4 , 5 , 6 , 7 , 8 , 9 } ;
int val3 [ ] = { 10 , 11 , 13 , 14 } ;
if ( ( rc = define_columns ( h , " col0 " , kId0 , dim0N , dim0V ) ) ! = kOkRC )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
rc = cwLogError ( rc , " Define column 0 failed. " ) ;
goto errLabel ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
if ( ( rc = define_columns ( h , " col1 " , kId1 , dim1N , dim1V ) ) ! = kOkRC )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
rc = cwLogError ( rc , " Define column 1 failed. " ) ;
goto errLabel ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
if ( ( rc = define_columns ( h , " col2 " , kId2 , dim2N , dim2V ) ) ! = kOkRC )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
rc = cwLogError ( rc , " Define column 2 failed. " ) ;
goto errLabel ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
if ( ( rc = define_columns ( h , " col3 " , kId3 , dim3N , dim3V ) ) ! = kOkRC )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
rc = cwLogError ( rc , " Define column 3 failed. " ) ;
goto errLabel ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
for ( unsigned i = 0 ; i < 3 ; + + i )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
val0 [ 0 ] = i ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
write ( h , kId0 , val0 , dim0V [ 0 ] ) ;
write ( h , kId1 , val1 , dim1V [ 0 ] ) ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
dim2V [ 1 ] = 3 ;
write ( h , kId2 , val2 , dim2V [ 0 ] * dim2V [ 1 ] , dim2V , dim2N ) ;
write ( h , kId3 , val3 , dim3V [ 0 ] * dim3V [ 1 ] ) ;
write_record ( h ) ;
}
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
}
errLabel :
destroy ( h ) ;
mem : : release ( outFn ) ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
return rc ;
}
//----------------------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------------------
namespace cw
{
namespace dataset
{
namespace rdr
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
enum
{
kSizeofRecordHeader = sizeof ( unsigned )
} ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
typedef struct
{
col_t col ; // Public record
unsigned * varDimIdxV ; // varDimIdxV[] Dimension indexes that are variable in this column.
unsigned varDimIdxN ; // Count of values in varDimIdxV[].
} c_t ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
typedef struct rdr_str
{
c_t * colA ; // colA[ column_count ] Per column data.
unsigned column_count ; // Count of elements in colA[].
unsigned record_count ; // Count of total examples.
file : : handle_t fH ; // Backing data file handle.
std : : uint8_t * buf ; // buf[ bufMaxByteN ] File read buffer
unsigned bufMaxByteN ; // Allocated size of buf[] in bytes. (also sizeof fixed size records)
unsigned bufCurByteN ; // Current count of bytes used in buf[].
bool isFixedSizeFl ; // True if all fields are fixed size
unsigned curRecordIdx ; // Index of record in buf[].
unsigned nextRecordIdx ; // Index of the next record to read.
long baseFileByteOffs ; // File byte offset of the first data record
unsigned state ; // See k???State enum
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
} rdr_t ;
typedef struct type_str
{
const char * label ;
unsigned typeId ;
unsigned variantFl ;
} type_t ;
type_t _typeRefA [ ] = {
{ " int " , kIntRdrFl , variant : : kInt32VFl } ,
{ " float " , kFloatRdrFl , variant : : kFloatVFl } ,
{ " double " , kDoubleRdrFl , variant : : kDoubleVFl } ,
{ nullptr , 0 , 0 }
} ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
rdr_t * _handleToPtr ( handle_t h )
{ return handleToPtr < handle_t , rdr_t > ( h ) ; }
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
const type_t * _typeIdToDesc ( unsigned typeId )
{
for ( const type_t * t = _typeRefA ; t - > label ! = nullptr ; + + t )
if ( t - > typeId = = typeId )
return t ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
cwLogError ( kInvalidArgRC , " The dataset rdr typeId %i is not valid. " , typeId ) ;
return nullptr ;
}
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
const type_t * _varTypeFlagsToDesc ( unsigned variantFl )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
for ( const type_t * t = _typeRefA ; t - > label ! = nullptr ; + + t )
if ( t - > variantFl = = variantFl )
return t ;
return nullptr ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
const char * _typeIdToLabel ( unsigned typeId )
{
const type_t * t ;
if ( ( t = _typeIdToDesc ( typeId ) ) = = nullptr )
return nullptr ;
return t - > label ;
}
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
bool _typeIdMatch ( unsigned typeId , unsigned variantTypeFl )
{
const type_t * t ;
if ( ( t = _typeIdToDesc ( typeId ) ) = = nullptr )
return false ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
return t - > typeId = = typeId & & t - > variantFl = = variantTypeFl ;
}
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
const c_t * _colFromId ( rdr_t * p , unsigned columnId )
{
for ( unsigned i = 0 ; i < p - > column_count ; + + i )
if ( p - > colA [ i ] . col . id = = columnId )
return p - > colA + i ;
cwLogError ( kInvalidArgRC , " Invalid columnId (%i). " , columnId ) ;
return nullptr ;
}
const c_t * _colFromLabel ( rdr_t * p , const char * colLabel )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
for ( unsigned i = 0 ; i < p - > column_count ; + + i )
if ( textCompare ( p - > colA [ i ] . col . label , colLabel ) = = 0 )
return p - > colA + i ;
cwLogError ( kInvalidArgRC , " Invalid column label:%s. " , colLabel ) ;
return nullptr ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
rc_t _destroy ( rdr_t * p )
{
for ( unsigned i = 0 ; i < p - > column_count ; + + i )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
mem : : release ( p - > colA [ i ] . col . dimV ) ;
mem : : release ( p - > colA [ i ] . col . maxDimV ) ;
mem : : release ( p - > colA [ i ] . varDimIdxV ) ;
mem : : free ( const_cast < char * > ( p - > colA [ i ] . col . label ) ) ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
file : : close ( p - > fH ) ;
mem : : release ( p - > colA ) ;
mem : : release ( p - > buf ) ;
mem : : release ( p ) ;
return kOkRC ;
}
rc_t _readHdr ( rdr_t * p )
{
rc_t rc = kOkRC ;
unsigned bufOffsByteN = 0 ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
p - > bufMaxByteN = 0 ;
p - > isFixedSizeFl = true ;
if ( ( rc = read ( p - > fH , p - > record_count ) ) ! = kOkRC ) goto errLabel ;
if ( ( rc = read ( p - > fH , p - > column_count ) ) ! = kOkRC ) goto errLabel ;
p - > colA = mem : : allocZ < c_t > ( p - > column_count ) ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
// for each column
for ( unsigned i = 0 ; i < p - > column_count ; + + i )
{
c_t * c = p - > colA + i ;
if ( ( rc = readStr ( p - > fH , ( char * * ) & c - > col . label , 255 ) ) ! = kOkRC ) goto errLabel ;
if ( ( rc = read ( p - > fH , c - > col . id ) ) ! = kOkRC ) goto errLabel ;
if ( ( rc = read ( p - > fH , c - > col . varDimN ) ) ! = kOkRC ) goto errLabel ;
if ( ( rc = read ( p - > fH , c - > col . rankN ) ) ! = kOkRC ) goto errLabel ;
if ( ( rc = read ( p - > fH , c - > col . maxEleN ) ) ! = kOkRC ) goto errLabel ;
if ( ( rc = variant : : read ( p - > fH , c - > col . max ) ) ! = kOkRC ) goto errLabel ;
if ( ( rc = variant : : read ( p - > fH , c - > col . min ) ) ! = kOkRC ) goto errLabel ;
c - > col . dimV = mem : : allocZ < unsigned > ( c - > col . rankN ) ;
c - > col . maxDimV = mem : : allocZ < unsigned > ( c - > col . rankN ) ;
c - > varDimIdxV = mem : : allocZ < unsigned > ( c - > col . rankN ) ;
c - > varDimIdxN = 0 ;
c - > col . maxEleN = c - > col . rankN = = 0 ? 0 : 1 ;
for ( unsigned j = 0 ; j < c - > col . rankN ; + + j )
{
if ( ( rc = file : : read ( p - > fH , c - > col . dimV [ j ] ) ) ! = kOkRC ) goto errLabel ;
if ( ( rc = file : : read ( p - > fH , c - > col . maxDimV [ j ] ) ) ! = kOkRC ) goto errLabel ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
if ( c - > col . dimV [ j ] = = 0 )
c - > varDimIdxV [ c - > varDimIdxN + + ] = j ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
c - > col . maxEleN * = c - > col . maxDimV [ j ] ;
}
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
unsigned bytesPerEle = variant : : flagsToBytes ( c - > col . max . flags ) ;
const type_t * t ;
if ( ( t = _varTypeFlagsToDesc ( c - > col . max . flags ) ) = = nullptr )
rc = cwLogError ( kInvalidDataTypeRC , " The column %s is not a valid data type (e.g. int, float double). " , cwStringNullGuard ( c - > col . label ) ) ;
else
c - > col . typeId = t - > typeId ;
// TODO: why maintain both eleN and maxEleN and byteN and maxByteN?
c - > col . eleN = c - > col . maxEleN ;
c - > col . maxByteN = bytesPerEle * c - > col . maxEleN ;
c - > col . byteOffset = bufOffsByteN ;
c - > col . byteN = c - > col . maxByteN ;
p - > bufMaxByteN + = c - > col . maxByteN + c - > varDimIdxN * sizeof ( unsigned ) ; // Track the max file buffer size
if ( c - > col . varDimN ! = 0 & & p - > isFixedSizeFl )
p - > isFixedSizeFl = false ;
bufOffsByteN = p - > bufMaxByteN ;
}
p - > buf = mem : : alloc < std : : uint8_t > ( p - > bufMaxByteN ) ;
// store the file offset to the first data record
rc = tell ( p - > fH , & p - > baseFileByteOffs ) ;
errLabel :
if ( rc ! = kOkRC )
{
rc = cwLogError ( rc , " Data set file header read failed. " ) ;
p - > state = kErrorState ;
}
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
return rc ;
}
rc_t _rewind ( rdr_t * p )
{
rc_t rc ;
if ( ( rc = file : : seek ( p - > fH , file : : kBeginFl , p - > baseFileByteOffs ) ) ! = kOkRC )
p - > state = kErrorState ;
else
{
p - > curRecordIdx = kInvalidIdx ;
p - > nextRecordIdx = 0 ;
}
return rc ;
}
rc_t _var_seek ( rdr_t * p , unsigned recdIdx )
{
rc_t rc = kOkRC ;
if ( recdIdx < p - > nextRecordIdx )
if ( ( rc = _rewind ( p ) ) ! = kOkRC )
goto errLabel ;
for ( ; recdIdx < p - > nextRecordIdx ; + + recdIdx )
{
unsigned recdByteN ;
if ( ( rc = file : : read ( p - > fH , recdByteN ) ) ! = kOkRC )
{
p - > state = kErrorState ;
goto errLabel ;
}
if ( ( rc = file : : seek ( p - > fH , file : : kCurFl , recdByteN ) ) ! = kOkRC )
{
p - > state = kErrorState ;
goto errLabel ;
}
}
errLabel :
return rc ;
}
// Seek to the a record, but don't actually read it.
rc_t _seek ( rdr_t * p , unsigned recdIdx )
{
rc_t rc = kOkRC ;
if ( p - > nextRecordIdx = = recdIdx )
return rc ;
if ( recdIdx > = p - > record_count )
{
rc = cwLogError ( kInvalidArgRC , " The seek index %i is invalid. Record Count=%i " , recdIdx , p - > record_count ) ;
goto errLabel ;
}
if ( p - > isFixedSizeFl )
rc = _var_seek ( p , recdIdx ) ;
else
{
// fixed size recds offset = baseOffset + (recd_index * (sizeof(recd_byte_cnt) + sizeof(data_record)))
rc = file : : seek ( p - > fH , file : : kBeginFl , p - > baseFileByteOffs + recdIdx * ( kSizeofRecordHeader + p - > bufMaxByteN ) ) ;
}
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
if ( rc = = kOkRC )
p - > nextRecordIdx = recdIdx ;
errLabel :
return rc ;
}
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
rc_t _parse_var_record ( rdr_t * p )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
rc_t rc = kOkRC ;
p - > bufCurByteN = 0 ;
for ( unsigned i = 0 ; i < p - > column_count ; + + i )
{
c_t * c = p - > colA + i ;
// if this is a variabled sized column
if ( c - > col . varDimN ! = 0 )
{
unsigned * varDimV = reinterpret_cast < unsigned * > ( p - > buf + p - > bufCurByteN ) ;
unsigned eleN = c - > col . rankN = = 0 ? 0 : 1 ;
// for each dim. of this column
for ( unsigned j = 0 , k = 0 ; j < c - > col . rankN ; + + j )
{
// if this is a variable sized dimension then set the actual dim. size
if ( k < c - > varDimIdxN & & c - > varDimIdxV [ k ] = = j )
{
c - > col . dimV [ j ] = varDimV [ k ] ;
k + = 1 ;
p - > bufCurByteN + = sizeof ( varDimV [ k ] ) ;
}
// calc the count of elements in this field
eleN * = c - > col . dimV [ j ] ;
}
// set the size and count of elements in this field
c - > col . eleN = eleN ;
c - > col . byteN = variant : : flagsToBytes ( c - > col . max . flags ) * eleN ;
}
c - > col . byteOffset = p - > bufCurByteN ;
p - > bufCurByteN + = c - > col . byteN ;
}
return rc ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
rc_t _read_record ( rdr_t * p )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
rc_t rc = kOkRC ;
unsigned recordByteN ;
// Read the byte length of this record
if ( ( rc = file : : read ( p - > fH , recordByteN ) ) ! = kOkRC )
{
if ( file : : eof ( p - > fH ) )
{
p - > state = kEofState ;
return kEofRC ;
}
goto errLabel ;
}
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
assert ( recordByteN < = p - > bufMaxByteN ) ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
// read the record data into p->buf[]
if ( ( rc = file : : read ( p - > fH , p - > buf , recordByteN ) ) ! = kOkRC )
goto errLabel ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
// if all columns in the record do not have a fixed size then update
// the column pointers into the data record
if ( ! p - > isFixedSizeFl )
if ( ( rc = _parse_var_record ( p ) ) ! = kOkRC )
goto errLabel ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
p - > curRecordIdx = p - > nextRecordIdx ;
p - > nextRecordIdx + = 1 ;
errLabel :
return rc ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
rc_t _get ( rdr_t * p , unsigned columnId , void * & vpRef , unsigned & nRef , const unsigned * & dimVRef , unsigned reqTypeId )
{
const c_t * c ; ;
if ( ( c = _colFromId ( p , columnId ) ) = = nullptr )
return kInvalidArgRC ;
if ( c - > col . typeId ! = reqTypeId )
return cwLogError ( kInvalidArgRC , " Cannot convert the column '%s' from type:%s to type:%s. " , _typeIdToLabel ( c - > col . typeId ) , _typeIdToLabel ( reqTypeId ) ) ;
nRef = c - > col . eleN ;
dimVRef = c - > col . dimV ;
vpRef = p - > buf + c - > col . byteOffset ;
return kOkRC ;
}
}
}
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
cw : : rc_t cw : : dataset : : rdr : : create ( handle_t & h , const char * fn )
2020-10-30 13:40:39 +00:00
{
rc_t rc ;
if ( ( rc = destroy ( h ) ) ! = kOkRC )
return rc ;
2020-12-15 20:32:22 +00:00
auto p = mem : : allocZ < rdr_t > ( 1 ) ;
if ( ( rc = file : : open ( p - > fH , fn , file : : kReadFl ) ) = = kOkRC )
if ( ( rc = _readHdr ( p ) ) ! = kOkRC )
goto errLabel ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
p - > state = kOkState ;
p - > curRecordIdx = kInvalidIdx ;
2020-10-30 13:40:39 +00:00
h . set ( p ) ;
2020-12-15 20:32:22 +00:00
errLabel :
if ( rc ! = kOkRC )
_destroy ( p ) ;
2020-10-30 13:40:39 +00:00
return rc ;
}
2020-12-15 20:32:22 +00:00
cw : : rc_t cw : : dataset : : rdr : : destroy ( handle_t & h )
2020-10-30 13:40:39 +00:00
{
rc_t rc = kOkRC ;
2020-12-15 20:32:22 +00:00
if ( ! h . isValid ( ) )
return rc ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
rdr_t * p = _handleToPtr ( h ) ;
2020-10-30 13:40:39 +00:00
if ( ( rc = _destroy ( p ) ) ! = kOkRC )
return rc ;
h . clear ( ) ;
2020-12-15 20:32:22 +00:00
return rc ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
unsigned cw : : dataset : : rdr : : column_count ( handle_t h )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
rdr_t * p = _handleToPtr ( h ) ;
return p - > column_count ;
}
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
const cw : : dataset : : rdr : : col_t * cw : : dataset : : rdr : : column_cfg ( handle_t h , unsigned colIdx )
{
rdr_t * p = _handleToPtr ( h ) ;
if ( colIdx > = p - > column_count )
return nullptr ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
return & p - > colA [ colIdx ] . col ;
}
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
const cw : : dataset : : rdr : : col_t * cw : : dataset : : rdr : : column_cfg ( handle_t h , const char * colLabel )
{
rdr_t * p = _handleToPtr ( h ) ;
const c_t * c ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
if ( ( c = _colFromLabel ( p , colLabel ) ) = = nullptr )
return nullptr ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
return & c - > col ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
unsigned cw : : dataset : : rdr : : record_count ( handle_t h )
{
rdr_t * p = _handleToPtr ( h ) ;
return p - > record_count ;
}
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
unsigned cw : : dataset : : rdr : : cur_record_index ( handle_t h )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
rdr_t * p = _handleToPtr ( h ) ;
return p - > curRecordIdx ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
unsigned cw : : dataset : : rdr : : next_record_index ( handle_t h )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
rdr_t * p = _handleToPtr ( h ) ;
return p - > nextRecordIdx ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
unsigned cw : : dataset : : rdr : : state ( handle_t h )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
rdr_t * p = _handleToPtr ( h ) ;
return p - > state ;
}
cw : : rc_t cw : : dataset : : rdr : : seek ( handle_t h , unsigned recordIdx )
{
rdr_t * p = _handleToPtr ( h ) ;
return _seek ( p , recordIdx ) ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
cw : : rc_t cw : : dataset : : rdr : : read ( handle_t h , unsigned record_index )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
rc_t rc = kOkRC ;
rdr_t * p = _handleToPtr ( h ) ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
if ( record_index ! = kInvalidIdx )
if ( ( rc = _seek ( p , record_index ) ) ! = kOkRC )
return rc ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
return _read_record ( p ) ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
cw : : rc_t cw : : dataset : : rdr : : get ( handle_t h , unsigned columnId , const int * & vRef , unsigned & nRef , const unsigned * & dimVRef )
{
rdr_t * p = _handleToPtr ( h ) ;
void * vp = nullptr ;
rc_t rc = _get ( p , columnId , vp , nRef , dimVRef , kIntRdrFl ) ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
vRef = rc ! = kOkRC ? nullptr : static_cast < const int * > ( vp ) ;
return rc ;
}
cw : : rc_t cw : : dataset : : rdr : : get ( handle_t h , unsigned columnId , const float * & vRef , unsigned & nRef , const unsigned * & dimVRef )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
rdr_t * p = _handleToPtr ( h ) ;
void * vp = nullptr ;
rc_t rc = _get ( p , columnId , vp , nRef , dimVRef , kFloatRdrFl ) ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
vRef = rc ! = kOkRC ? nullptr : static_cast < const float * > ( vp ) ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
return rc ;
}
cw : : rc_t cw : : dataset : : rdr : : get ( handle_t h , unsigned columnId , const double * & vRef , unsigned & nRef , const unsigned * & dimVRef )
{
rdr_t * p = _handleToPtr ( h ) ;
void * vp = nullptr ;
rc_t rc = _get ( p , columnId , vp , nRef , dimVRef , kDoubleRdrFl ) ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
vRef = rc ! = kOkRC ? nullptr : static_cast < const double * > ( vp ) ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
return rc ;
}
cw : : rc_t cw : : dataset : : rdr : : report ( handle_t h )
{
rc_t rc = kOkRC ;
rdr_t * p = _handleToPtr ( h ) ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
for ( unsigned i = 0 ; i < p - > column_count ; + + i )
2020-10-30 13:40:39 +00:00
{
2020-12-15 20:32:22 +00:00
const c_t * c = p - > colA + i ;
printf ( " id:%5i vdN:%5i mxEleN:%5i rank:%3i %8s " , c - > col . id , c - > col . varDimN , c - > col . maxEleN , c - > col . rankN , _typeIdToLabel ( c - > col . typeId ) ) ;
printf ( " min: " ) ; variant : : print ( c - > col . min ) ;
printf ( " max: " ) ; variant : : print ( c - > col . max ) ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
printf ( " | " ) ;
for ( unsigned j = 0 ; j < c - > col . rankN ; + + j )
printf ( " %i " , c - > col . dimV [ j ] ) ;
printf ( " | " ) ;
for ( unsigned j = 0 ; j < c - > col . rankN ; + + j )
printf ( " %i " , c - > col . maxDimV [ j ] ) ;
printf ( " \n " ) ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
return rc ;
}
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
cw : : rc_t cw : : dataset : : rdr : : test ( const object_t * cfg )
{
rc_t rc = kOkRC ;
char * inFn = nullptr ;
handle_t h ;
if ( ( rc = cfg - > getv ( " inFn " , inFn ) ) ! = kOkRC )
return cwLogError ( rc , " rdr test failed. Argument parse failed. " ) ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
inFn = filesys : : expandPath ( inFn ) ;
2020-10-30 13:40:39 +00:00
2020-12-15 20:32:22 +00:00
if ( ( rc = create ( h , inFn ) ) ! = kOkRC )
{
rc = cwLogError ( rc , " rdr create failed. " ) ;
}
else
{
const int * v = nullptr ;
unsigned vN = 0 ;
const unsigned * dimV = nullptr ;
report ( h ) ;
while ( ( rc = read ( h ) ) = = kOkRC )
{
get ( h , 0 , v , vN , dimV ) ; vop : : print ( v , vN , " %i " , " c0: " ) ;
get ( h , 1 , v , vN , dimV ) ; vop : : print ( v , vN , " %i " , " c1: " ) ;
get ( h , 2 , v , vN , dimV ) ; vop : : print ( v , vN , " %i " , " c2: " ) ;
get ( h , 3 , v , vN , dimV ) ; vop : : print ( v , vN , " %i " , " c3: " ) ;
}
if ( rc ! = kEofRC )
rc = cwLogError ( kOpFailRC , " The read operation failed. " ) ;
destroy ( h ) ;
}
return rc ;
2020-10-30 13:40:39 +00:00
}
2020-12-15 20:32:22 +00:00
//----------------------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------------------
namespace cw {
namespace dataset {
namespace adapter {
typedef struct col_str
{
const rdr : : col_t * col ; // Column description
bool oneHotFl ; // Convert this column to a one-hot vector
unsigned maxEleN ; // Max count of elements in the buffer from this column
int oneHotMax ; // Max value in this column
int oneHotMin ; // Min value in this column
unsigned * batchDimV ; // batchDivV[ col.rankN, batchN ] or nullptr for fixed size columns
struct col_str * link ; //
} col_t ;
typedef struct field_str
{
unsigned id ; // Field Id
unsigned flags ; // Field flags
bool isFixedSizeFl ; // Do all columns in this field have a fixed size.
unsigned bytesPerEle ; // Size of each element in buf[] (determined by flags | k<DataType>fl)
unsigned bufMaxEleN ; // Allocated size of buf[] for a batch size of maxBatchN
unsigned bufEleN ; // Current count of elements in buf[] for the entire batch.
unsigned bufMaxFieldByteN ; // Max. size in bytes of one field record.
unsigned bufByteN ; // Current count of bytes in buf.
std : : uint8_t * buf ; // buf[ bufMaxFieldByteN*batchN ]
unsigned * batchEleNV ; // batchEleN[ maxBatchN ] Count of ele's in each record of a batch.
col_t * colL ; // List of columns assigned to this field
colMap_t * * colMapM ; // colMapM[ batchN ]
colMap_t * colMapA ; // colMapA[ batchN*columnN ] Storage for colMapM[]
struct field_str * link ; //
} field_t ;
typedef struct adapter_str
{
unsigned maxBatchN ; // Max. possible value of batchN in a call to read().
unsigned batchN ; // Count of records returned in the last call to read().
rdr : : handle_t rdrH ; // Source data file
field_t * fieldL ; // List of field descriptions
unsigned state ; // Exception state
} adapter_t ;
inline adapter_t * _handleToPtr ( handle_t h )
{ return handleToPtr < handle_t , adapter_t > ( h ) ; }
rc_t _destroy ( adapter_t * p )
{
rc_t rc = kOkRC ;
field_t * f = p - > fieldL ;
while ( f ! = nullptr )
{
field_t * f0 = f - > link ;
col_t * c = f - > colL ;
while ( c ! = nullptr )
{
col_t * c0 = c - > link ;
// if this is a var width column
if ( c - > col - > varDimN > 0 )
mem : : release ( c - > batchDimV ) ;
mem : : release ( c ) ;
c = c0 ;
}
mem : : release ( f - > batchEleNV ) ;
mem : : release ( f - > buf ) ;
mem : : release ( f - > colMapM ) ;
mem : : release ( f - > colMapA ) ;
mem : : release ( f ) ;
f = f0 ;
}
rdr : : destroy ( p - > rdrH ) ;
mem : : release ( p ) ;
return rc ;
}
field_t * _fieldIdToRecd ( adapter_t * p , unsigned fieldId )
{
field_t * f = p - > fieldL ;
for ( ; f ! = nullptr ; f = f - > link )
if ( f - > id = = fieldId )
return f ;
cwLogError ( kInvalidArgRC , " Invalid field id '%i'. " , fieldId ) ;
return nullptr ;
}
rc_t _calc_one_hot_ele_count ( col_t * c , unsigned & eleN_Ref )
{
rc_t rc = kOkRC ;
if ( ! variant : : isInt ( c - > col - > min ) | | ! variant : : isInt ( c - > col - > max ) )
return cwLogError ( kInvalidArgRC , " One-hot columns must be integer valued. " ) ;
if ( c - > col - > rankN ! = 1 | | c - > col - > maxDimV [ 0 ] ! = 1 )
return cwLogError ( kInvalidArgRC , " One-hot columns must be scalar integers. " ) ;
if ( ( rc = variant : : get ( c - > col - > min , c - > oneHotMin ) ) ! = kOkRC )
return cwLogError ( rc , " Unable to obtain the one-hot minimum value. " ) ;
if ( ( rc = variant : : get ( c - > col - > max , c - > oneHotMax ) ) ! = kOkRC )
return cwLogError ( rc , " Unable to obtain the maximum value. " ) ;
eleN_Ref = ( c - > oneHotMax - c - > oneHotMin ) + 1 ;
return rc ;
}
rc_t _assign_column ( adapter_t * p , field_t * f , const char * colLabel , bool oneHotFl )
{
rc_t rc = kOkRC ;
col_t * c = mem : : allocZ < col_t > ( 1 ) ;
if ( ( c - > col = rdr : : column_cfg ( p - > rdrH , colLabel ) ) = = nullptr )
rc = kInvalidArgRC ;
else
{
c - > oneHotFl = oneHotFl ;
// locate the last link in the column list
col_t * c0 = f - > colL ;
while ( c0 ! = nullptr & & c0 - > link ! = nullptr )
c0 = c0 - > link ;
// add the new record to the end of the list
if ( c0 = = nullptr )
f - > colL = c ;
else
c0 - > link = c ;
// if one-hot encoding was requested
if ( oneHotFl )
rc = _calc_one_hot_ele_count ( c , c - > maxEleN ) ;
else
c - > maxEleN = c - > col - > maxEleN ;
// update the size of the field buffer to account for the column size
f - > bufMaxEleN + = c - > col - > maxEleN ;
// if this is a variable length column
if ( c - > col - > varDimN > 0 )
f - > isFixedSizeFl = false ;
if ( cwIsFlag ( f - > flags , kTrackColDimFl ) )
{
// if this is a fixed size column then batchDimV is null
// otherwise it is a [batchN,rankN] matrix used to hold the dim's of each returned data ele from this column
c - > batchDimV = c - > col - > varDimN = = 0 ? nullptr : mem : : allocZ < unsigned > ( p - > maxBatchN * c - > col - > rankN ) ;
}
}
if ( rc ! = kOkRC )
rc = cwLogError ( rc , " '%s' Column assignment failed. " , cwStringNullGuard ( colLabel ) ) ;
return rc ;
}
rc_t _allocate_field_buffer ( adapter_t * p , field_t * f )
{
rc_t rc = kOkRC ;
f - > bufMaxEleN = 0 ;
// calc the field width as the sum of the max column widths
unsigned colN = 0 ;
for ( col_t * c = f - > colL ; c ! = nullptr ; c = c - > link )
{
f - > bufMaxEleN + = c - > maxEleN ;
colN + = 1 ;
}
f - > bufMaxFieldByteN = f - > bufMaxEleN * f - > bytesPerEle ;
f - > buf = mem : : alloc < std : : uint8_t > ( p - > maxBatchN * f - > bufMaxFieldByteN ) ;
// if col. dim tracking is enabled for this field
if ( cwIsFlag ( f - > flags , kTrackColDimFl ) )
{
// allocate the column dim tracking data structures
f - > colMapM = mem : : allocZ < colMap_t * > ( p - > maxBatchN ) ;
f - > colMapA = mem : : allocZ < colMap_t > ( p - > maxBatchN * colN ) ;
// initialize the fixed portion of the col. tracking records
for ( unsigned i = 0 ; i < p - > maxBatchN ; + + i )
{
f - > colMapM [ i ] = f - > colMapA + i * colN ;
// for batch index i for each column
unsigned j = 0 , eleOffs = 0 ;
for ( col_t * c = f - > colL ; c ! = nullptr ; c = c - > link , + + j )
{
f - > colMapM [ i ] [ j ] . colId = c - > col - > id ;
f - > colMapM [ i ] [ j ] . rankN = c - > col - > rankN ;
// if this is a fixed size field then the col. map can be completely populated in advance of reading the data
// TODO: don't allocate the complete colMapA[] array because every colN records are duplicates anyway.
// just point colMapM[] to a single row of colMapA[].
if ( ! f - > isFixedSizeFl )
{
f - > colMapM [ i ] [ j ] . eleN = c - > oneHotFl ? c - > maxEleN : c - > col - > eleN ;
f - > colMapM [ i ] [ j ] . fieldEleOffset = eleOffs ;
f - > colMapM [ i ] [ j ] . dimV = c - > col - > dimV ;
eleOffs + = c - > col - > eleN ;
}
else
{
f - > colMapM [ i ] [ j ] . dimV = c - > batchDimV + ( i * c - > col - > rankN ) ;
}
}
}
}
return rc ;
}
template < typename S , typename D >
rc_t _translate_one_hot ( std : : uint8_t * buf , unsigned bufByteN , const S * src , unsigned srcEleN , const col_t * c , unsigned & dstByteNRef )
{
rc_t rc = kOkRC ;
dstByteNRef = 0 ;
unsigned dstEleN = ( c - > oneHotMax - c - > oneHotMin ) + 1 ;
unsigned dstByteN = dstEleN * sizeof ( D ) ;
if ( dstByteN > bufByteN )
return cwLogError ( kBufTooSmallRC , " The field buffer is too small (src:%i > buf:%i) during one - hot conversion . " ,dstByteN,bufByteN) ;
if ( srcEleN ! = 1 )
return cwLogError ( kInvalidArgRC , " One-hot encoded fields must be scalars. (srcEleN:%i) " ,srcEleN) ;
unsigned oneHotIdx = src [ 0 ] - c - > oneHotMin ;
if ( oneHotIdx > = dstEleN )
return cwLogError ( kInvalidArgRC , " The one-hot index (%i) is out of the one - hot vector size : % i . " ,oneHotIdx,dstEleN) ;
memset ( buf , 0 , dstByteN ) ;
D * dst = reinterpret_cast < D * > ( buf ) ;
dst [ oneHotIdx ] = 1 ;
dstByteNRef = dstByteN ;
return rc ;
}
template < typename S , typename D >
rc_t _translate_datatype ( const col_t * c , std : : uint8_t * buf , unsigned bufByteN , const S * src , unsigned srcEleN , unsigned & dstByteNRef )
{
if ( c - > oneHotFl )
return _translate_one_hot < S , D > ( buf , bufByteN , src , srcEleN , c , dstByteNRef ) ;
unsigned dstByteN = srcEleN * sizeof ( D ) ;
D * dst = reinterpret_cast < D * > ( buf ) ;
dstByteNRef = 0 ;
if ( dstByteN > bufByteN )
return cwLogError ( kBufTooSmallRC , " The field buffer is too small (src:%i > buf:%i) . " ,dstByteN,bufByteN) ;
// copy, and translate, the rdr::col into the field->buf[]
for ( unsigned i = 0 ; i < srcEleN ; + + i )
dst [ i ] = src [ i ] ;
dstByteNRef = dstByteN ;
return kOkRC ;
}
template < typename T >
rc_t _translate_column_tpl ( adapter_t * p , field_t * f , col_t * c , std : : uint8_t * buf , unsigned bufN , unsigned & dstByteNRef )
{
rc_t rc = kOkRC ;
const T * v = nullptr ;
unsigned vN = 0 ;
const unsigned * dimV = nullptr ;
// read the column
if ( ( rc = rdr : : get ( p - > rdrH , c - > col - > id , v , vN , dimV ) ) ! = kOkRC )
return rc ;
switch ( f - > flags & kTypeMask )
{
case kIntFl : rc = _translate_datatype < T , int > ( c , buf , bufN , v , vN , dstByteNRef ) ; break ;
case kFloatFl : rc = _translate_datatype < T , float > ( c , buf , bufN , v , vN , dstByteNRef ) ; break ;
case kDoubleFl : rc = _translate_datatype < T , double > ( c , buf , bufN , v , vN , dstByteNRef ) ; break ;
default :
assert ( 0 ) ;
}
return rc ;
}
rc_t _translate_column ( adapter_t * p , field_t * f , col_t * c , std : : uint8_t * buf , unsigned bufN , unsigned & dstByteNRef )
{
rc_t rc = kOkRC ;
switch ( c - > col - > typeId )
{
case rdr : : kIntRdrFl : rc = _translate_column_tpl < int > ( p , f , c , buf , bufN , dstByteNRef ) ; break ;
case rdr : : kFloatRdrFl : rc = _translate_column_tpl < float > ( p , f , c , buf , bufN , dstByteNRef ) ; break ;
case rdr : : kDoubleRdrFl : rc = _translate_column_tpl < double > ( p , f , c , buf , bufN , dstByteNRef ) ; break ;
default :
assert ( 0 ) ;
}
return rc ;
}
rc_t _read_field ( adapter_t * p , unsigned batchIdx , field_t * f , unsigned & byteNRef )
{
rc_t rc = kOkRC ;
byteNRef = 0 ;
// on the first use the buffer will not yet be allocated
if ( f - > buf = = nullptr )
if ( ( rc = _allocate_field_buffer ( p , f ) ) ! = kOkRC )
return rc ;
unsigned availBufByteN = f - > bufMaxFieldByteN ;
unsigned fieldBufByteOffs = 0 ;
// for each column of this field
for ( col_t * c = f - > colL ; c ! = nullptr ; c = c - > link )
{
unsigned colByteN = 0 ;
// translate each source column into the field buffer
if ( ( rc = _translate_column ( p , f , c , f - > buf + f - > bufByteN + fieldBufByteOffs , availBufByteN , colByteN ) ) ! = kOkRC )
return rc ;
assert ( availBufByteN > = colByteN ) ;
availBufByteN - = colByteN ;
fieldBufByteOffs + = colByteN ;
// if column dim. tracking is enabled and this is a variable with column ...
if ( cwIsFlag ( f - > flags , kTrackColDimFl ) & & c - > col - > varDimN > 0 )
for ( unsigned i = 0 ; i < c - > col - > rankN ; + + i )
c - > batchDimV [ batchIdx * c - > col - > rankN + i ] = c - > col - > dimV [ i ] ; // ... get the dim's of this column
}
byteNRef = fieldBufByteOffs ;
return rc ;
}
template < typename T >
cw : : rc_t _get ( handle_t h , unsigned fieldId , const T * & vV , const unsigned * & nV )
{
rc_t rc = kOkRC ;
adapter_t * p = _handleToPtr ( h ) ;
field_t * f ;
if ( p - > state ! = kInitState )
return cwLogError ( kInvalidStateRC , " get() failed The adapter is in an invalid state ( % i ! = % i ) . " ,p->state,kInitState) ;
if ( ( f = _fieldIdToRecd ( p , fieldId ) ) = = nullptr )
return kInvalidArgRC ;
if ( f - > buf = = nullptr )
return cwLogError ( kInvalidStateRC , " read() must be called begore get ( ) . " ) ;
vV = reinterpret_cast < const T * > ( f - > buf ) ;
nV = f - > batchEleNV ;
return rc ;
}
template < typename T >
cw : : rc_t _print_field ( adapter_t * p , field_t * f , const char * fmt , unsigned batchIdx , const T * v , unsigned vN )
{
rc_t rc = kOkRC ;
unsigned i = 0 , k = 0 ;
for ( col_t * c = f - > colL ; c ! = nullptr ; c = c - > link , + + i )
{
colMap_t * cm = f - > colMapM [ batchIdx ] + i ;
printf ( " | %s %i : " , c - > col - > label , cm - > eleN ) ;
for ( unsigned j = 0 ; j < cm - > eleN ; + + j )
printf ( fmt , v [ k + + ] ) ;
}
return rc ;
}
template < typename T >
cw : : rc_t _print_field ( adapter_t * p , field_t * f , const char * fmt )
{
rc_t rc = kOkRC ;
printf ( " Field:%3i \n " , f - > id ) ;
for ( unsigned i = 0 , k = 0 ; i < p - > batchN ; + + i )
{
printf ( " %i : " , i ) ;
T * v = reinterpret_cast < T * > ( f - > buf ) + k ;
unsigned vN = f - > batchEleNV [ i ] ;
if ( cwIsFlag ( f - > flags , kTrackColDimFl ) )
rc = _print_field ( p , f , fmt , i , v , vN ) ;
else
for ( unsigned j = 0 ; j < vN ; + + j )
printf ( fmt , v [ j ] ) ;
k + = vN ;
printf ( " \n " ) ;
}
return rc ;
}
}
}
}
cw : : rc_t cw : : dataset : : adapter : : create ( handle_t & hRef , const char * fn , unsigned maxBatchN )
{
rc_t rc = kOkRC ;
if ( ( rc = destroy ( hRef ) ) ! = kOkRC )
return rc ;
adapter_t * p = mem : : allocZ < adapter_t > ( 1 ) ;
if ( ( rc = rdr : : create ( p - > rdrH , fn ) ) ! = kOkRC )
goto errLabel ;
p - > maxBatchN = maxBatchN ;
p - > state = kPreInitState ;
hRef . set ( p ) ;
errLabel :
if ( rc ! = kOkRC )
_destroy ( p ) ;
return rc ;
}
cw : : rc_t cw : : dataset : : adapter : : destroy ( handle_t & hRef )
{
rc_t rc = kOkRC ;
if ( ! hRef . isValid ( ) )
return rc ;
adapter_t * p = _handleToPtr ( hRef ) ;
if ( ( rc = _destroy ( p ) ) ! = kOkRC )
return rc ;
hRef . clear ( ) ;
return rc ;
}
cw : : rc_t cw : : dataset : : adapter : : create_field ( handle_t h , unsigned fieldId , unsigned flags , const char * colLabel , bool oneHotFl )
{
rc_t rc = kOkRC ;
adapter_t * p = _handleToPtr ( h ) ;
field_t * f = mem : : allocZ < field_t > ( 1 ) ;
unsigned typeFlags = flags & kTypeMask ;
f - > id = fieldId ;
f - > flags = flags ;
f - > link = p - > fieldL ;
f - > batchEleNV = mem : : alloc < unsigned > ( p - > maxBatchN ) ;
p - > fieldL = f ;
switch ( typeFlags )
{
case kIntFl : f - > bytesPerEle = sizeof ( int ) ; break ;
case kFloatFl : f - > bytesPerEle = sizeof ( float ) ; break ;
case kDoubleFl : f - > bytesPerEle = sizeof ( double ) ; break ;
default :
rc = cwLogError ( kInvalidArgRC , " The field data type value 0x%x is not valid. " , typeFlags ) ;
}
if ( colLabel ! = nullptr )
rc = _assign_column ( p , f , colLabel , oneHotFl ) ;
return rc ;
}
cw : : rc_t cw : : dataset : : adapter : : assign_column ( handle_t h , unsigned fieldId , const char * colLabel , bool oneHotFl )
{
adapter_t * p = _handleToPtr ( h ) ;
const rdr : : col_t * c = nullptr ;
field_t * f ;
if ( ( c = rdr : : column_cfg ( p - > rdrH , colLabel ) ) = = nullptr )
return kInvalidArgRC ;
if ( ( f = _fieldIdToRecd ( p , fieldId ) ) = = nullptr )
return kInvalidArgRC ;
return _assign_column ( p , f , colLabel , oneHotFl ) ;
}
unsigned cw : : dataset : : adapter : : record_count ( handle_t h )
{
adapter_t * p = _handleToPtr ( h ) ;
return rdr : : record_count ( p - > rdrH ) ;
}
unsigned cw : : dataset : : adapter : : field_fixed_ele_count ( handle_t h , unsigned fieldId )
{
adapter_t * p = _handleToPtr ( h ) ;
field_t * f ;
if ( ( f = _fieldIdToRecd ( p , fieldId ) ) = = nullptr )
return 0 ;
return f - > bufEleN ; ;
}
cw : : rc_t cw : : dataset : : adapter : : read ( handle_t h , unsigned batchN , const unsigned * recordIdxV )
{
rc_t rc = kOkRC ;
adapter_t * p = _handleToPtr ( h ) ;
switch ( p - > state )
{
case kInitState :
break ;
case kPreInitState :
p - > state = kInitState ;
break ;
default :
return cwLogError ( kInvalidStateRC , " Invalid adapter state (%i != %i) . " ,p->state,kInitState) ;
}
if ( batchN > p - > maxBatchN )
return cwLogError ( kInvalidArgRC , " The batch count:%i is greater than the max batch count:%i. " , batchN , p - > maxBatchN ) ;
p - > batchN = 0 ;
// for each record in this batch
for ( unsigned i = 0 ; i < batchN ; + + i )
{
// read the data record
if ( ( rc = rdr : : read ( p - > rdrH , recordIdxV = = nullptr ? kInvalidIdx : recordIdxV [ i ] ) ) ! = kOkRC )
{
if ( rc = = kEofRC )
p - > state = kEofState ;
goto errLabel ;
}
// translate each field
for ( field_t * f = p - > fieldL ; f ! = nullptr ; f = f - > link )
{
unsigned fieldByteN = 0 ;
if ( i = = 0 )
{
f - > bufEleN = 0 ;
f - > bufByteN = 0 ;
}
// read the field into f->buf[]
if ( ( rc = _read_field ( p , i , f , fieldByteN ) ) ! = kOkRC )
{
rc = cwLogError ( rc , " Field (id:%i) read failed. " , f - > id ) ;
goto errLabel ;
}
assert ( fieldByteN % f - > bytesPerEle = = 0 ) ;
// update the buffer state
unsigned fieldEleN = fieldByteN / f - > bytesPerEle ;
f - > bufEleN + = fieldEleN ;
f - > bufByteN + = fieldByteN ;
f - > batchEleNV [ i ] = fieldEleN ;
}
p - > batchN + = 1 ;
}
errLabel :
if ( rc ! = kOkRC )
p - > state = kErrorState ;
return rc ;
}
cw : : rc_t cw : : dataset : : adapter : : get ( handle_t h , unsigned fieldId , const int * & vV , const unsigned * & nV )
{ return _get < int > ( h , fieldId , vV , nV ) ; }
cw : : rc_t cw : : dataset : : adapter : : get ( handle_t h , unsigned fieldId , const float * & vV , const unsigned * & nV )
{ return _get < float > ( h , fieldId , vV , nV ) ; }
cw : : rc_t cw : : dataset : : adapter : : get ( handle_t h , unsigned fieldId , const double * & vV , const unsigned * & nV )
{ return _get < double > ( h , fieldId , vV , nV ) ; }
cw : : rc_t cw : : dataset : : adapter : : column_map ( handle_t h , unsigned fieldId , colMap_t const * const * & colMapV_Ref )
{
rc_t rc = kOkRC ;
adapter_t * p = _handleToPtr ( h ) ;
field_t * f ;
if ( p - > state ! = kInitState )
return cwLogError ( kInvalidStateRC , " Invalid adapter state (%i != %i) . " ,p->state,kInitState) ;
if ( ( f = _fieldIdToRecd ( p , fieldId ) ) = = nullptr )
return kInvalidArgRC ;
colMapV_Ref = f - > colMapM ;
return rc ;
}
unsigned cw : : dataset : : adapter : : state ( handle_t h )
{
adapter_t * p = _handleToPtr ( h ) ;
return p - > state ;
}
cw : : rc_t cw : : dataset : : adapter : : print_field ( handle_t h , unsigned fieldId , const char * fmt )
{
rc_t rc = kOkRC ;
adapter_t * p = _handleToPtr ( h ) ;
field_t * f ;
if ( ( f = _fieldIdToRecd ( p , fieldId ) ) = = nullptr )
return cwLogError ( kInvalidArgRC , " Invalid field id (%i) . " ,fieldId) ;
switch ( f - > flags & kTypeMask )
{
case kIntFl : rc = _print_field < int > ( p , f , fmt = = nullptr ? " %i " : fmt ) ; break ;
case kFloatFl : rc = _print_field < float > ( p , f , fmt = = nullptr ? " %f " : fmt ) ; break ;
case kDoubleFl : rc = _print_field < double > ( p , f , fmt = = nullptr ? " %f " : fmt ) ; break ;
default :
rc = cwLogError ( kInvalidArgRC , " Unknown type flag: 0x%x. " , f - > flags & kTypeMask ) ;
}
return rc ;
}
cw : : rc_t cw : : dataset : : adapter : : test ( const object_t * cfg )
{
rc_t rc = kOkRC ;
char * inFn = nullptr ;
unsigned batchN = 0 ;
handle_t h ;
enum {
kField0Id = 0 ,
kField1Id = 1
} ;
// read the cfg args
if ( ( rc = cfg - > getv ( " inFn " , inFn , " batchN " , batchN ) ) ! = kOkRC )
return cwLogError ( rc , " adapter test failed. Argument parse failed. " ) ;
inFn = filesys : : expandPath ( inFn ) ;
// create the adapter
if ( ( rc = create ( h , inFn , batchN ) ) ! = kOkRC )
{
rc = cwLogError ( rc , " Unable to create dataset adapter for '%s'. " , inFn ) ;
goto errLabel ;
}
else
{
const int * xV = nullptr ;
const float * yV = nullptr ;
const unsigned * xNV = nullptr ;
const unsigned * yNV = nullptr ;
unsigned recdIdxV [ ] = { 2 , 1 , 0 } ;
if ( ( rc = create_field ( h , kField0Id , kIntFl | kTrackColDimFl , " col0 " , true ) ) ! = kOkRC )
goto errLabel ;
if ( ( rc = create_field ( h , kField1Id , kFloatFl | kTrackColDimFl , " col1 " ) ) ! = kOkRC )
goto errLabel ;
if ( ( rc = assign_column ( h , kField1Id , " col2 " ) ) ! = kOkRC )
goto errLabel ;
if ( ( rc = assign_column ( h , kField1Id , " col3 " ) ) ! = kOkRC )
goto errLabel ;
assert ( cwCountOf ( recdIdxV ) = = batchN ) ;
if ( ( rc = read ( h , batchN , recdIdxV ) ) ! = kOkRC )
goto errLabel ;
if ( ( rc = get ( h , kField0Id , xV , xNV ) ) ! = kOkRC )
goto errLabel ;
if ( ( rc = get ( h , kField1Id , yV , yNV ) ) ! = kOkRC )
goto errLabel ;
for ( unsigned i = 0 , n0 = 0 , n1 = 0 ; i < batchN ; + + i )
{
for ( unsigned j = 0 ; j < xNV [ i ] ; + + j )
printf ( " %i " , xV [ n0 + j ] ) ;
n0 + = xNV [ i ] ;
printf ( " : " ) ;
for ( unsigned j = 0 ; j < yNV [ i ] ; + + j )
printf ( " %f " , yV [ n1 + j ] ) ;
n1 = yNV [ i ] ;
printf ( " \n " ) ;
}
print_field ( h , kField0Id ) ;
print_field ( h , kField1Id ) ;
}
errLabel :
destroy ( h ) ;
mem : : release ( inFn ) ;
return rc ;
}
//----------------------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------------------
namespace cw
{
namespace dataset
{
namespace mnist
{
typedef struct mnist_str
{
char * trainFn ;
char * testFn ;
char * validFn ;
unsigned * data_dimV ;
unsigned * label_dimV ;
unsigned exampleN ;
float * dataM ;
unsigned * labelV ;
unsigned kPixN ;
unsigned curIdx ;
} mnist_t ;
inline mnist_t * _handleToPtr ( handle_t h )
{ return handleToPtr < handle_t , mnist_t > ( h ) ; }
rc_t _destroy ( mnist_t * p )
{
rc_t rc = kOkRC ;
mem : : release ( p - > dataM ) ;
mem : : release ( p - > labelV ) ;
mem : : release ( p - > trainFn ) ;
mem : : release ( p - > validFn ) ;
mem : : release ( p - > testFn ) ;
mem : : release ( p ) ;
return rc ;
}
rc_t _read_file_record_count ( const char * fn , unsigned & nRef )
{
rc_t rc ;
file : : handle_t fH ;
// open the file
if ( ( rc = file : : open ( fH , fn , file : : kReadFl | file : : kBinaryFl ) ) ! = kOkRC )
{
rc = cwLogError ( rc , " MNIST file open failed on '%s'. " , cwStringNullGuard ( fn ) ) ;
goto errLabel ;
}
// read the count of examples
if ( ( rc = read ( fH , nRef ) ) ! = kOkRC )
{
rc = cwLogError ( rc , " Unable to read MNIST example count. " ) ;
goto errLabel ;
}
// close file
if ( ( rc = file : : close ( fH ) ) ! = kOkRC )
{
rc = cwLogError ( rc , " MNIST file close failed on '%s'. " , cwStringNullGuard ( fn ) ) ;
goto errLabel ;
}
errLabel :
return rc ;
}
rc_t _read_file ( mnist_t * p , const char * fn , unsigned n , float * dataM , unsigned * labelV )
{
file : : handle_t fH ;
rc_t rc = kOkRC ;
unsigned exampleN = 0 ;
// open the file
if ( ( rc = file : : open ( fH , fn , file : : kReadFl | file : : kBinaryFl ) ) ! = kOkRC )
{
rc = cwLogError ( rc , " MNIST file open failed on '%s'. " , cwStringNullGuard ( fn ) ) ;
goto errLabel ;
}
// read the count of examples
if ( ( rc = read ( fH , exampleN ) ) ! = kOkRC )
{
rc = cwLogError ( rc , " Unable to read MNIST example count. " ) ;
goto errLabel ;
}
assert ( exampleN = = n ) ;
// read each example
for ( unsigned i = 0 ; i < exampleN ; + + i )
{
// read the digit image label
if ( ( rc = read ( fH , labelV [ i ] ) ) ! = kOkRC )
{
rc = cwLogError ( rc , " Unable to read MNIST label on example %i. " , i ) ;
goto errLabel ;
}
// read the image pixels
if ( ( rc = readFloat ( fH , dataM + i * p - > kPixN , p - > kPixN ) ) ! = kOkRC )
{
rc = cwLogError ( rc , " Unable to read MNIST data vector on example %i. " , i ) ;
goto errLabel ;
}
}
errLabel :
if ( rc ! = kOkRC )
rc = cwLogError ( rc , " Load failed on MNIST file %s. " , cwStringNullGuard ( fn ) ) ;
file : : close ( fH ) ;
return rc ;
}
}
}
}
cw : : rc_t cw : : dataset : : mnist : : create ( handle_t & h , const char * dir )
{
rc_t rc ;
mnist_t * p = nullptr ;
unsigned trainN = 0 ;
unsigned validN = 0 ;
unsigned testN = 0 ;
if ( ( rc = destroy ( h ) ) ! = kOkRC )
return rc ;
char * inDir = filesys : : expandPath ( dir ) ;
// allocate the object
p = mem : : allocZ < mnist_t > ( 1 ) ;
p - > kPixN = 784 ;
p - > trainFn = filesys : : makeFn ( inDir , " mnist_train " , " .bin " , NULL ) ;
p - > validFn = filesys : : makeFn ( inDir , " mnist_valid " , " .bin " , NULL ) ;
p - > testFn = filesys : : makeFn ( inDir , " mnist_test " , " .bin " , NULL ) ;
mem : : release ( inDir ) ;
_read_file_record_count ( p - > trainFn , trainN ) ;
p - > exampleN + = trainN ;
_read_file_record_count ( p - > validFn , validN ) ;
p - > exampleN + = validN ;
_read_file_record_count ( p - > testFn , testN ) ;
p - > exampleN + = testN ;
// allocate the data memory
p - > dataM = mem : : alloc < float > ( p - > kPixN * p - > exampleN ) ;
p - > labelV = mem : : alloc < unsigned > ( p - > exampleN ) ;
// read the training data
if ( ( rc = _read_file ( p , p - > trainFn , trainN , p - > dataM , p - > labelV ) ) ! = kOkRC )
{
rc = cwLogError ( rc , " MNIST training set load failed. " ) ;
goto errLabel ;
}
// read the validation data
if ( ( rc = _read_file ( p , p - > validFn , validN , p - > dataM + p - > kPixN * trainN , p - > labelV + trainN ) ) ! = kOkRC )
{
rc = cwLogError ( rc , " MNIST validation set load failed. " ) ;
goto errLabel ;
}
// read the testing data
if ( ( rc = _read_file ( p , p - > testFn , testN , p - > dataM + p - > kPixN * ( trainN + validN ) , p - > labelV + ( trainN + validN ) ) ) ! = kOkRC )
{
rc = cwLogError ( rc , " MNIST test set load failed. " ) ;
goto errLabel ;
}
h . set ( p ) ;
errLabel :
if ( rc ! = kOkRC )
_destroy ( p ) ;
mem : : release ( inDir ) ;
return rc ;
}
cw : : rc_t cw : : dataset : : mnist : : destroy ( handle_t & h )
{
rc_t rc = kOkRC ;
if ( ! h . isValid ( ) )
return rc ;
mnist_t * p = _handleToPtr ( h ) ;
if ( ( rc = _destroy ( p ) ) ! = kOkRC )
return rc ;
h . clear ( ) ;
return rc ;
}
unsigned cw : : dataset : : mnist : : record_count ( handle_t h )
{
mnist_t * p = _handleToPtr ( h ) ;
return p - > exampleN ;
}
cw : : rc_t cw : : dataset : : mnist : : seek ( handle_t h , unsigned exampleIdx )
{
rc_t rc = kOkRC ;
mnist_t * p = _handleToPtr ( h ) ;
if ( exampleIdx < = p - > exampleN )
p - > curIdx = exampleIdx ;
else
rc = cwLogError ( kSeekFailRC , " Illegal seek index. Seek failed. " ) ;
return rc ;
}
cw : : rc_t cw : : dataset : : mnist : : dataM ( handle_t h , const float * & dataM_Ref , const unsigned * & labelV_Ref , unsigned exampleN , unsigned & actualExampleN_Ref , unsigned exampleIdx )
{
rc_t rc = kOkRC ;
mnist_t * p = _handleToPtr ( h ) ;
if ( exampleIdx = = kInvalidIdx )
exampleIdx = p - > curIdx ;
if ( exampleIdx > = p - > exampleN )
return kEofRC ;
if ( exampleIdx + exampleN > p - > exampleN )
exampleN = p - > exampleN - exampleIdx ;
//memcpy(dataM, p->dataM + exampleIdx * p->kPixN, exampleN * p->kPixN * sizeof(p->dataM[0]) );
//memcpy(labelV, p->labelV + exampleIdx, exampleN * sizeof(p->labelV[0]) );
dataM_Ref = p - > dataM + exampleIdx * p - > kPixN ;
labelV_Ref = p - > labelV + exampleIdx ;
actualExampleN_Ref = exampleN ;
p - > curIdx + = exampleN ;
return rc ;
}
cw : : rc_t cw : : dataset : : mnist : : write ( handle_t h , const char * fn )
{
rc_t rc = kOkRC ;
unsigned recdN = record_count ( h ) ;
wtr : : handle_t wtrH ;
if ( ( rc = wtr : : create ( wtrH , fn ) ) ! = kOkRC )
return cwLogError ( rc , " Dataset wtr create failed. " ) ;
enum { kImagId , kNumbId } ;
unsigned numbDimV [ ] = { 1 } ;
unsigned imagDimV [ ] = { 28 , 28 } ;
unsigned imagEleN = imagDimV [ 0 ] * imagDimV [ 1 ] ;
if ( ( rc = define_columns ( wtrH , " numb " , kNumbId , cwCountOf ( numbDimV ) , numbDimV ) ) ! = kOkRC )
goto errLabel ;
if ( ( rc = define_columns ( wtrH , " imag " , kImagId , cwCountOf ( imagDimV ) , imagDimV ) ) ! = kOkRC )
goto errLabel ;
printf ( " recdN: %i \n " , recdN ) ;
for ( unsigned i = 0 ; i < recdN ; )
{
const float * imagM = nullptr ;
const unsigned * numbV = nullptr ;
unsigned cacheRecdN = std : : min ( 100u , recdN - i ) ;
unsigned actRecdN = 0 ;
if ( ( rc = dataM ( h , imagM , numbV , cacheRecdN , actRecdN , i ) ) ! = kOkRC )
{
cwLogError ( rc , " Extract image data failed. " ) ;
goto errLabel ;
}
for ( unsigned j = 0 ; j < actRecdN ; + + j )
{
// write the digit this imag represents as an 'int'.
if ( ( rc = wtr : : write ( wtrH , kNumbId , ( ( int * ) numbV ) + j , 1 ) ) ! = kOkRC )
goto errLabel ;
// write the image data as 'floats'
if ( ( rc = wtr : : write ( wtrH , kImagId , imagM + j * imagEleN , imagEleN ) ) ! = kOkRC )
goto errLabel ;
if ( ( rc = wtr : : write_record ( wtrH ) ) ! = kOkRC )
goto errLabel ;
}
i + = actRecdN ;
}
errLabel :
if ( rc ! = kOkRC )
cwLogError ( rc , " MNIST data file write failed. " ) ;
wtr : : destroy ( wtrH ) ;
return rc ;
}
cw : : rc_t cw : : dataset : : mnist : : test ( const object_t * cfg )
{
handle_t h ;
rc_t rc = kOkRC ;
char * inDir = nullptr ;
char * outHtmlFn = nullptr ;
if ( ( rc = cfg - > getv ( " inDir " , inDir , " outHtmlFn " , outHtmlFn ) ) ! = kOkRC )
return cwLogError ( rc , " MNIST test failed. Argument parse failed. " ) ;
inDir = filesys : : expandPath ( inDir ) ;
outHtmlFn = filesys : : expandPath ( outHtmlFn ) ;
if ( ( rc = create ( h , inDir ) ) = = kOkRC )
{
svg : : handle_t svgH ;
if ( ( rc = svg : : create ( svgH ) ) ! = kOkRC )
rc = cwLogError ( rc , " SVG Test failed on create. " ) ;
else
{
//const mtx::f_t* m = train(h);
/*
unsigned zn = 0 ;
unsigned i = 1 ;
for ( ; i < m - > dimV [ 1 ] ; + + i )
{
const float * v0 = m - > base + ( 28 * 28 + 1 ) * ( i - 1 ) + 1 ;
const float * v1 = m - > base + ( 28 * 28 + 1 ) * ( i - 0 ) + 1 ;
float d = 0 ;
for ( unsigned j = 0 ; j < 28 * 28 ; + + j )
d + = fabs ( v0 [ j ] - v1 [ j ] ) ;
if ( d = = 0 )
+ + zn ;
else
{
printf ( " %i %i %f \n " , i , zn , d ) ;
zn = 0 ;
}
}
printf ( " i:%i n:%i zn:%i \n " , i , m - > dimV [ 1 ] , zn ) ;
*/
const float * dataM = nullptr ;
const unsigned * labelV = nullptr ;
unsigned exampleN = 10 ;
unsigned actualExampleN = 0 ;
//mnist::seek( h, 10 );
mnist : : dataM ( h , dataM , labelV , exampleN , actualExampleN ) ;
for ( unsigned i = 0 ; i < actualExampleN ; + + i )
{
printf ( " label: %i \n " , labelV [ i ] ) ;
svg : : offset ( svgH , 0 , i * 30 * 5 ) ;
svg : : image ( svgH , dataM + ( 28 * 28 ) * i , 28 , 28 , 5 , svg : : kInvGrayScaleColorMapId ) ;
}
svg : : write ( svgH , outHtmlFn , nullptr , svg : : kStandAloneFl | svg : : kGenInlineStyleFl , 10 , 10 , 10 , 10 ) ;
svg : : destroy ( svgH ) ;
}
rc = destroy ( h ) ;
}
mem : : release ( outHtmlFn ) ;
mem : : release ( inDir ) ;
return rc ;
}
//----------------------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------------------
cw : : rc_t cw : : dataset : : test ( const object_t * cfg )
{
rc_t rc = kOkRC ;
char * inDir = nullptr ;
char * dsFn = nullptr ;
char * outHtmlFn = nullptr ;
mnist : : handle_t mniH ;
adapter : : handle_t rdrH ;
svg : : handle_t svgH ;
unsigned batchN = 10 ;
if ( ( rc = cfg - > getv ( " inDir " , inDir , " dsFn " , dsFn , " outHtmlFn " , outHtmlFn , " batchN " , batchN ) ) ! = kOkRC )
return cwLogError ( rc , " MNIST test failed. Argument parse failed. " ) ;
inDir = filesys : : expandPath ( inDir ) ;
dsFn = filesys : : expandPath ( dsFn ) ;
outHtmlFn = filesys : : expandPath ( outHtmlFn ) ;
// open the native MNIST object
if ( ( rc = mnist : : create ( mniH , inDir ) ) ! = kOkRC )
{
cwLogError ( rc , " Unable to open the native MNIST object. " ) ;
goto errLabel ;
}
else
{
// write the MNIST data to a dataset file
if ( ( rc = mnist : : write ( mniH , dsFn ) ) ! = kOkRC )
{
cwLogError ( rc , " MNIST dataset write failed " ) ;
goto errLabel ;
}
mnist : : destroy ( mniH ) ;
}
// open a dataset adapter
if ( ( rc = adapter : : create ( rdrH , dsFn , batchN ) ) ! = kOkRC )
{
cwLogError ( rc , " Dataset reader create failed. " ) ;
goto errLabel ;
}
else
{
// create an SVG file
if ( ( rc = svg : : create ( svgH ) ) ! = kOkRC )
rc = cwLogError ( rc , " SVG writer create failed. " ) ;
else
{
enum { kImagId , kNumbId } ;
// create dataset fields
if ( ( rc = create_field ( rdrH , kImagId , adapter : : kFloatFl , " imag " ) ) ! = kOkRC )
{
cwLogError ( rc , " Dataset rdr column define failed. " ) ;
goto errLabel ;
}
if ( ( rc = create_field ( rdrH , kNumbId , adapter : : kIntFl , " numb " ) ) ! = kOkRC )
{
cwLogError ( rc , " Dataset rdr column define failed. " ) ;
goto errLabel ;
}
// read a batch of data
if ( ( rc = adapter : : read ( rdrH , batchN ) ) ! = kOkRC )
{
cwLogError ( rc , " Batch read failed. " ) ;
goto errLabel ;
}
else
{
const int * numbV = nullptr ;
const unsigned * numbNV = nullptr ;
const float * imagV = nullptr ;
const unsigned * imagNV = nullptr ;
adapter : : get ( rdrH , kNumbId , numbV , numbNV ) ; // get the labels
adapter : : get ( rdrH , kImagId , imagV , imagNV ) ; // get the image data
for ( unsigned i = 0 ; i < batchN ; + + i )
{
printf ( " label: %i \n " , numbV [ i ] ) ;
svg : : offset ( svgH , 0 , i * 30 * 5 ) ;
svg : : image ( svgH , imagV + ( 28 * 28 ) * i , 28 , 28 , 5 , svg : : kInvGrayScaleColorMapId ) ;
}
svg : : write ( svgH , outHtmlFn , nullptr , svg : : kStandAloneFl | svg : : kGenInlineStyleFl , 10 , 10 , 10 , 10 ) ;
}
}
}
errLabel :
adapter : : destroy ( rdrH ) ;
svg : : destroy ( svgH ) ;
mem : : release ( inDir ) ;
mem : : release ( dsFn ) ;
mem : : release ( outHtmlFn ) ;
return rc ;
}