2487 lines
68 KiB
C++
2487 lines
68 KiB
C++
#include "cwCommon.h"
|
|
#include "cwLog.h"
|
|
#include "cwCommonImpl.h"
|
|
#include "cwMem.h"
|
|
#include "cwObject.h"
|
|
#include "cwFile.h"
|
|
#include "cwFileSys.h"
|
|
#include "cwVectOps.h"
|
|
#include "cwMtx.h"
|
|
#include "cwVariant.h"
|
|
#include "cwDataSets.h"
|
|
#include "cwSvg.h"
|
|
#include "cwTime.h"
|
|
#include "cwText.h"
|
|
|
|
|
|
//----------------------------------------------------------------------------------------------------------------------------
|
|
//----------------------------------------------------------------------------------------------------------------------------
|
|
//----------------------------------------------------------------------------------------------------------------------------
|
|
|
|
namespace cw
|
|
{
|
|
namespace dataset
|
|
{
|
|
namespace wtr
|
|
{
|
|
typedef struct col_str
|
|
{
|
|
rdr::col_t col; // Public fields.
|
|
unsigned char* cur; // Cache of the current column data contents.
|
|
unsigned curByteN; // Count of bytes in cur[].
|
|
unsigned* curDimV; // Cache of the current column dimensions.
|
|
struct col_str* link; // Link to next col_t record.
|
|
} col_t;
|
|
|
|
typedef struct wtr_str
|
|
{
|
|
file::handle_t fH; // Output file handle
|
|
unsigned record_count; // Total count of rows.
|
|
col_t* colL; // Linked list of column descriptions
|
|
unsigned totalVarDimN; // Total count of unknown dim's among all columns
|
|
} wtr_t;
|
|
|
|
inline wtr_t* _handleToPtr( handle_t h )
|
|
{ return handleToPtr<handle_t,wtr_t>(h); }
|
|
|
|
rc_t _destroy( wtr_t* p )
|
|
{
|
|
col_t* c = p->colL;
|
|
while( c != nullptr )
|
|
{
|
|
col_t* c0 = c->link;
|
|
mem::free(const_cast<char*>(c->col.label));
|
|
mem::release(c->col.dimV);
|
|
mem::release(c->col.maxDimV);
|
|
mem::release(c->cur);
|
|
mem::release(c->curDimV);
|
|
mem::release(c);
|
|
c = c0;
|
|
}
|
|
|
|
file::close(p->fH);
|
|
mem::release(p);
|
|
|
|
return kOkRC;
|
|
}
|
|
|
|
col_t* _columnIdToPtr( wtr_t* p, unsigned columnId )
|
|
{
|
|
col_t* c = p->colL;
|
|
for(; c!=nullptr; c=c->link )
|
|
if( c->col.id == columnId )
|
|
return c;
|
|
|
|
cwLogError(kInvalidArgRC,"The dataset column id %i was not found.",columnId);
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
// eleN = count of elements in dV[]
|
|
// dimV[ dimN ] = dimensions for variable sized data elements. cumprod(dimV) must equal eleN
|
|
rc_t _write_column_to_buf( wtr_t* p, unsigned columnId, unsigned eleN, const unsigned* dimV, unsigned dimN, const void* dV, unsigned typeFlags, col_t*& colPtrRef )
|
|
{
|
|
col_t* c = _columnIdToPtr(p,columnId);
|
|
|
|
if( c == nullptr )
|
|
return cwLogError(kInvalidArgRC,"Unable to locate the column description associated with id: %i.",columnId);
|
|
|
|
// if this is a fixed size column
|
|
if( c->col.varDimN == 0 )
|
|
{
|
|
// verify that the element count matches the fixed element count
|
|
if( eleN != c->col.maxEleN )
|
|
return cwLogError(kInvalidArgRC,"Data vector in column '%s' has %i elements but should have %i elements.", cwStringNullGuard(c->col.label), eleN, c->col.maxEleN );
|
|
|
|
if( dimV != nullptr || dimN != 0 )
|
|
cwLogWarning("The dimension vector for the fixed sized column '%s' is ignored in the write() function.",cwStringNullGuard(c->col.label));
|
|
|
|
}
|
|
else // this is a variable sized column
|
|
{
|
|
unsigned tmpEleN = 1;
|
|
for(unsigned i=0; i<c->col.rankN; ++i)
|
|
{
|
|
tmpEleN *= dimV[i]; // track the count of elements
|
|
c->col.maxDimV[i] = std::max(c->col.maxDimV[i], dimV[i] ); // track the max. dimension
|
|
c->curDimV[i] = dimV[i]; // store the this columns dimensions
|
|
}
|
|
|
|
// verify that the sizeof the data matches the size given in the dimensions
|
|
if( tmpEleN != eleN )
|
|
return cwLogError(kInvalidArgRC,"The product of the dimension vector does not equal the count of elements in column '%s'.",c->col.label);
|
|
|
|
}
|
|
|
|
if( p->record_count == 0)
|
|
{
|
|
// set data type
|
|
c->col.max.flags = typeFlags;
|
|
c->col.min.flags = typeFlags;
|
|
}
|
|
else
|
|
{
|
|
// verify data type is the same for all elements
|
|
if( c->col.max.flags != typeFlags )
|
|
return cwLogError(kInvalidArgRC,"The data vector type '%s' does not match the column type '%s'.", variant::flagsToLabel(typeFlags), variant::flagsToLabel(c->col.max.flags));
|
|
|
|
}
|
|
|
|
// store the bytes associated with col/row
|
|
unsigned bytesPerEle = variant::flagsToBytes(typeFlags);
|
|
|
|
|
|
if( bytesPerEle == 0 )
|
|
return cwLogError(kInvalidArgRC,"Invalid type identifier in column '%s'.", cwStringNullGuard(c->col.label));
|
|
else
|
|
{
|
|
c->curByteN = bytesPerEle * eleN;
|
|
c->cur = mem::resize<unsigned char>(c->cur,c->curByteN);
|
|
memcpy(c->cur,dV,c->curByteN);
|
|
}
|
|
|
|
colPtrRef = c;
|
|
|
|
return kOkRC;
|
|
}
|
|
|
|
|
|
rc_t _write_hdr( wtr_t* p )
|
|
{
|
|
col_t* c;
|
|
rc_t rc;
|
|
|
|
p->totalVarDimN = 0;
|
|
|
|
// get the count of columns
|
|
unsigned col_count = 0;
|
|
for(c=p->colL; c!=nullptr; c=c->link)
|
|
++col_count;
|
|
|
|
if((rc = file::write( p->fH, p->record_count )) != kOkRC ) goto errLabel;
|
|
if((rc = file::write( p->fH, col_count )) != kOkRC ) goto errLabel;
|
|
|
|
for(c=p->colL; c!=nullptr; c=c->link)
|
|
{
|
|
if((rc = file::writeStr( p->fH, c->col.label )) != kOkRC ) goto errLabel;
|
|
if((rc = file::write( p->fH, c->col.id )) != kOkRC ) goto errLabel;
|
|
if((rc = file::write( p->fH, c->col.varDimN )) != kOkRC ) goto errLabel;
|
|
if((rc = file::write( p->fH, c->col.rankN )) != kOkRC ) goto errLabel;
|
|
if((rc = file::write( p->fH, c->col.maxEleN )) != kOkRC ) goto errLabel;
|
|
if((rc = variant::write( p->fH, c->col.max)) != kOkRC ) goto errLabel;
|
|
if((rc = variant::write( p->fH, c->col.min )) != kOkRC ) goto errLabel;
|
|
|
|
for(unsigned i=0; i<c->col.rankN; ++i)
|
|
{
|
|
if((rc = file::write( p->fH, c->col.dimV[i] )) != kOkRC ) goto errLabel;
|
|
if((rc = file::write( p->fH, c->col.maxDimV[i])) != kOkRC ) goto errLabel;
|
|
}
|
|
|
|
p->totalVarDimN += c->col.varDimN;
|
|
|
|
}
|
|
|
|
errLabel:
|
|
return rc;
|
|
}
|
|
|
|
rc_t _re_write_hdr( wtr_t* p )
|
|
{
|
|
rc_t rc;
|
|
if((rc = file::seek( p->fH, file::kBeginFl, 0)) != kOkRC )
|
|
return cwLogError( kSeekFailRC, "Data file Header seek failed.");
|
|
|
|
if((rc = _write_hdr( p )) != kOkRC )
|
|
return cwLogError( rc, "Header re-write failed.");
|
|
|
|
return rc;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
cw::rc_t cw::dataset::wtr::create( handle_t& h, const char* fn )
|
|
{
|
|
rc_t rc;
|
|
if((rc = destroy(h)) != kOkRC )
|
|
return rc;
|
|
|
|
auto p = mem::allocZ<wtr_t>(1);
|
|
|
|
if((rc = file::open(p->fH,fn,file::kWriteFl)) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"Data file creation failed.");
|
|
goto errLabel;
|
|
}
|
|
|
|
h.set(p);
|
|
|
|
errLabel:
|
|
if(rc != kOkRC )
|
|
_destroy(p);
|
|
|
|
return rc;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::wtr::destroy( handle_t& h )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
|
|
if( !h.isValid())
|
|
return rc;
|
|
|
|
wtr_t* p = _handleToPtr(h);
|
|
|
|
if(( rc = _re_write_hdr( p )) != kOkRC )
|
|
return rc;
|
|
|
|
if((rc = _destroy(p)) != kOkRC )
|
|
return rc;
|
|
|
|
h.clear();
|
|
|
|
return rc;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::wtr::define_columns( handle_t h, const char* label, unsigned columnId, unsigned rankN, const unsigned* dimV )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
wtr_t* p = _handleToPtr(h);
|
|
col_t* c = mem::allocZ<col_t>(1);
|
|
c->col.label = mem::duplStr(label);
|
|
c->col.id = columnId;
|
|
c->col.rankN = rankN;
|
|
c->col.varDimN = 0;
|
|
c->col.dimV = mem::allocDupl<unsigned>( dimV, rankN );
|
|
c->col.maxDimV = mem::allocDupl<unsigned>( dimV, rankN );
|
|
c->curDimV = mem::allocDupl<unsigned>( dimV, rankN );
|
|
c->col.maxEleN = 1;
|
|
|
|
for(unsigned i=0; i<rankN; ++i)
|
|
{
|
|
c->col.maxEleN *= dimV[i];
|
|
|
|
if( dimV[i] == 0 )
|
|
{
|
|
c->col.varDimN +=1;
|
|
}
|
|
}
|
|
|
|
// link the new col recd to the end of the column list
|
|
col_t* c0 = p->colL;
|
|
col_t* c1 = nullptr;
|
|
for(; c0!=nullptr; c0=c0->link)
|
|
c1 = c0;
|
|
|
|
if( c1 == nullptr )
|
|
p->colL = c;
|
|
else
|
|
c1->link = c;
|
|
|
|
return rc;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::wtr::write( handle_t h, unsigned columnId, const int* dV, unsigned eleN, const unsigned* dimV, unsigned rankN )
|
|
{
|
|
rc_t rc;
|
|
wtr_t* p = _handleToPtr(h);
|
|
col_t* c = nullptr;
|
|
|
|
if((rc = _write_column_to_buf( p, columnId, eleN, dimV, rankN, dV, variant::kInt32VFl, c)) != kOkRC )
|
|
return rc;
|
|
|
|
if( p->record_count == 0 )
|
|
{
|
|
c->col.min.u.i32 = vop::min( dV, eleN );
|
|
c->col.max.u.i32 = vop::max( dV, eleN );
|
|
//printf("0i %i %i\n", columnId, c->col.min.u.i32 );
|
|
}
|
|
else
|
|
{
|
|
//printf("1i %i %i\n", columnId, c->col.min.u.i32 );
|
|
c->col.min.u.i32 = std::min(c->col.min.u.i32, vop::min( dV, eleN ));
|
|
c->col.max.u.i32 = std::max(c->col.max.u.i32, vop::max( dV, eleN ));
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::wtr::write( handle_t h, unsigned columnId, const float* dV, unsigned eleN, const unsigned* dimV, unsigned rankN )
|
|
{
|
|
rc_t rc;
|
|
wtr_t* p = _handleToPtr(h);
|
|
col_t* c = nullptr;
|
|
|
|
if((rc = _write_column_to_buf( p, columnId, eleN, dimV, rankN, dV, variant::kFloatVFl, c)) != kOkRC )
|
|
return rc;
|
|
|
|
if( p->record_count == 0 )
|
|
{
|
|
c->col.min.u.f = vop::min( dV, eleN );
|
|
c->col.max.u.f = vop::max( dV, eleN );
|
|
}
|
|
else
|
|
{
|
|
c->col.min.u.f = std::min(c->col.min.u.f, vop::min( dV, eleN ));
|
|
c->col.max.u.f = std::max(c->col.max.u.f, vop::max( dV, eleN ));
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::wtr::write( handle_t h, unsigned columnId, const double* dV, unsigned eleN, const unsigned* dimV, unsigned rankN )
|
|
{
|
|
rc_t rc;
|
|
wtr_t* p = _handleToPtr(h);
|
|
col_t* c = nullptr;
|
|
|
|
if((rc = _write_column_to_buf( p, columnId, eleN, dimV, rankN, dV, variant::kDoubleVFl, c)) != kOkRC )
|
|
return rc;
|
|
|
|
if( p->record_count == 0 )
|
|
{
|
|
c->col.min.u.d = vop::min( dV, eleN );
|
|
c->col.max.u.d = vop::max( dV, eleN );
|
|
}
|
|
else
|
|
{
|
|
c->col.min.u.d = std::min(c->col.min.u.d, vop::min( dV, eleN ));
|
|
c->col.max.u.d = std::max(c->col.max.u.d, vop::max( dV, eleN ));
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::wtr::write_record( handle_t h )
|
|
{
|
|
rc_t rc;
|
|
wtr_t* p = _handleToPtr(h);
|
|
col_t* c;
|
|
|
|
// if this is the first row in the file then write the file header
|
|
if( p->record_count == 0 )
|
|
if((rc = _write_hdr(p)) != kOkRC )
|
|
return rc;
|
|
|
|
unsigned rowByteN = 0;
|
|
|
|
// calculate the size of the row data
|
|
for(c=p->colL; c!=nullptr; c=c->link)
|
|
rowByteN += c->col.varDimN * sizeof(unsigned) + c->curByteN;
|
|
|
|
// write the size of this row
|
|
if((rc = file::write(p->fH, rowByteN)) != kOkRC )
|
|
goto errLabel;
|
|
|
|
// for each column
|
|
for(c=p->colL; c!=nullptr; c=c->link)
|
|
{
|
|
// if this is a variable sized column
|
|
if( c->col.varDimN > 0 )
|
|
{
|
|
// then write the variable sized dimensions
|
|
for(unsigned i=0; i<c->col.rankN; ++i)
|
|
if( c->col.dimV[i] == 0 )
|
|
if((rc = file::write( p->fH, c->curDimV[i] )) != kOkRC )
|
|
goto errLabel;
|
|
}
|
|
|
|
|
|
// write the column field value
|
|
if((rc = file::write( p->fH, c->cur, c->curByteN)) != kOkRC )
|
|
goto errLabel;
|
|
|
|
}
|
|
|
|
errLabel:
|
|
if( rc != kOkRC )
|
|
rc = cwLogError(rc,"Example index %i write failed", p->record_count);
|
|
else
|
|
p->record_count += 1;
|
|
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
|
|
File Format for the following data.
|
|
where the data record itself is repeated 3 time.
|
|
|
|
unsigned dim0V[] = {1};
|
|
unsigned dim1V[] = {3};
|
|
unsigned dim2V[] = {2,0};
|
|
unsigned dim3V[] = {2,2};
|
|
|
|
int val0[] = {0};
|
|
int val1[] = {1,2,3};
|
|
int val2[] = {4,5,6,7,8,9};
|
|
int val3[] = {10,11,13,14};
|
|
|
|
0300 0000 3 recd_count
|
|
0400 0000 4 col_count
|
|
|
|
0400 0000 label size - col0
|
|
636f 6c30 label
|
|
0000 0000 id
|
|
0000 0000 varDimN
|
|
0100 0000 rankN
|
|
0100 0000 maxEleN
|
|
4000 0000 max type
|
|
0000 0000 max value
|
|
0000 0000
|
|
4000 0000 min type
|
|
0000 0000 min value
|
|
0000 0000
|
|
0100 0000 dimV[0]
|
|
0100 0000 maxDimV[0]
|
|
|
|
0400 0000 label size - col 1
|
|
636f 6c31 label
|
|
0100 0000 id
|
|
0000 0000 varDimN
|
|
0100 0000 rankN
|
|
0300 0000 maxEleN
|
|
4000 0000 max type
|
|
0300 0000 max value
|
|
0000 0000
|
|
4000 0000 min type
|
|
0100 0000 max value
|
|
0000 0000
|
|
0300 0000 dimV[0]
|
|
0300 0000 maxDimV[0]
|
|
|
|
0400 0000 label size - col 2
|
|
636f 6c32 label
|
|
0200 0000 id
|
|
0100 0000 varDimN
|
|
0200 0000 rankN
|
|
0000 0000 maxEleN
|
|
4000 0000 max type
|
|
0900 0000 max value
|
|
0000 0000
|
|
4000 0000 min type
|
|
0400 0000 min value
|
|
0000 0000
|
|
0200 0000 dimV[0]
|
|
0200 0000 maxDimV[0]
|
|
|
|
0000 0000 dimV[1]
|
|
0300 0000 maxDimV[1]
|
|
|
|
0400 0000 label size - col 3
|
|
636f 6c33 label
|
|
0300 0000 id
|
|
0000 0000 varDimN
|
|
0200 0000 rankN
|
|
0400 0000 maxEleN
|
|
4000 0000 max type
|
|
0e00 0000 max value
|
|
0000 0000
|
|
4000 0000 min type
|
|
0a00 0000 min value
|
|
0000 0000
|
|
0200 0000 dimV[0]
|
|
0200 0000 maxDimV[0]
|
|
0200 0000 dimV[1]
|
|
0200 0000 maxDimV[1]
|
|
|
|
3c00 0000 recd0 size (60 bytes)
|
|
0000 0000 0 col0
|
|
|
|
0100 0000 1 col1[0]
|
|
0200 0000 2 col1[1]
|
|
0300 0000 3 col1[2]
|
|
|
|
0300 0000 dimV[1] col2 <- variable dimension
|
|
0400 0000 4 col2[0]
|
|
0500 0000 5
|
|
0600 0000 6
|
|
0700 0000 7
|
|
0800 0000 8
|
|
0900 0000 9
|
|
|
|
0a00 0000 10 col3
|
|
0b00 0000 11
|
|
0d00 0000 12
|
|
0e00 0000 13
|
|
|
|
3c00 0000 recd1 size (60 bytes)
|
|
0100 0000 1 col0
|
|
|
|
0100 0000
|
|
0200 0000
|
|
0300 0000
|
|
|
|
0300 0000
|
|
0400 0000
|
|
0500 0000
|
|
0600 0000
|
|
0700 0000
|
|
0800 0000
|
|
0900 0000
|
|
|
|
0a00 0000
|
|
0b00 0000
|
|
0d00 0000
|
|
0e00 0000
|
|
|
|
3c00 0000 recd2 size (60 bytes)
|
|
0200 0000 2 col0
|
|
|
|
0100 0000
|
|
0200 0000
|
|
0300 0000
|
|
|
|
0300 0000
|
|
0400 0000
|
|
0500 0000
|
|
0600 0000
|
|
0700 0000
|
|
0800 0000
|
|
0900 0000
|
|
|
|
0a00 0000
|
|
0b00 0000
|
|
0d00 0000
|
|
0e00 0000
|
|
*/
|
|
cw::rc_t cw::dataset::wtr::test( const object_t* cfg )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
char* outFn = nullptr;
|
|
handle_t h;
|
|
|
|
if((rc = cfg->getv("outFn",outFn)) != kOkRC )
|
|
return cwLogError(rc,"wtr test failed. Argument parse failed.");
|
|
|
|
outFn = filesys::expandPath(outFn);
|
|
|
|
if((rc = create(h,outFn)) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"rdr create failed.");
|
|
goto errLabel;
|
|
}
|
|
else
|
|
{
|
|
enum { kId0, kId1, kId2, kId3 };
|
|
unsigned dim0V[] = {1};
|
|
unsigned dim1V[] = {3};
|
|
unsigned dim2V[] = {2,0};
|
|
unsigned dim3V[] = {2,2};
|
|
|
|
unsigned dim0N = cwCountOf(dim0V);
|
|
unsigned dim1N = cwCountOf(dim1V);
|
|
unsigned dim2N = cwCountOf(dim2V);
|
|
unsigned dim3N = cwCountOf(dim3V);
|
|
|
|
int val0[] = {0};
|
|
int val1[] = {1,2,3};
|
|
int val2[] = {4,5,6,7,8,9};
|
|
int val3[] = {10,11,13,14};
|
|
|
|
if((rc = define_columns(h, "col0", kId0, dim0N, dim0V )) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"Define column 0 failed.");
|
|
goto errLabel;
|
|
}
|
|
|
|
if((rc = define_columns(h, "col1", kId1, dim1N, dim1V )) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"Define column 1 failed.");
|
|
goto errLabel;
|
|
}
|
|
|
|
if((rc = define_columns(h, "col2", kId2, dim2N, dim2V )) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"Define column 2 failed.");
|
|
goto errLabel;
|
|
}
|
|
|
|
if((rc = define_columns(h, "col3", kId3, dim3N, dim3V )) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"Define column 3 failed.");
|
|
goto errLabel;
|
|
}
|
|
|
|
for(unsigned i=0; i<3; ++i)
|
|
{
|
|
|
|
val0[0] = i;
|
|
|
|
write( h, kId0, val0, dim0V[0] );
|
|
write( h, kId1, val1, dim1V[0] );
|
|
|
|
dim2V[1] = 3;
|
|
write( h, kId2, val2, dim2V[0]*dim2V[1], dim2V, dim2N );
|
|
write( h, kId3, val3, dim3V[0]*dim3V[1] );
|
|
write_record(h);
|
|
}
|
|
|
|
}
|
|
|
|
errLabel:
|
|
destroy(h);
|
|
mem::release(outFn);
|
|
|
|
return rc;
|
|
}
|
|
|
|
//----------------------------------------------------------------------------------------------------------------------------
|
|
//----------------------------------------------------------------------------------------------------------------------------
|
|
//----------------------------------------------------------------------------------------------------------------------------
|
|
namespace cw
|
|
{
|
|
namespace dataset
|
|
{
|
|
namespace rdr
|
|
{
|
|
enum
|
|
{
|
|
kSizeofRecordHeader = sizeof(unsigned)
|
|
};
|
|
|
|
typedef struct
|
|
{
|
|
col_t col; // Public record
|
|
unsigned* varDimIdxV; // varDimIdxV[] Dimension indexes that are variable in this column.
|
|
unsigned varDimIdxN; // Count of values in varDimIdxV[].
|
|
} c_t;
|
|
|
|
typedef struct rdr_str
|
|
{
|
|
c_t* colA; // colA[ column_count ] Per column data.
|
|
unsigned column_count; // Count of elements in colA[].
|
|
unsigned record_count; // Count of total examples.
|
|
file::handle_t fH; // Backing data file handle.
|
|
std::uint8_t* buf; // buf[ bufMaxByteN ] File read buffer
|
|
unsigned bufMaxByteN; // Allocated size of buf[] in bytes. (also sizeof fixed size records)
|
|
unsigned bufCurByteN; // Current count of bytes used in buf[].
|
|
bool isFixedSizeFl; // True if all fields are fixed size
|
|
|
|
unsigned curRecordIdx; // Index of record in buf[].
|
|
unsigned nextRecordIdx; // Index of the next record to read.
|
|
long baseFileByteOffs; // File byte offset of the first data record
|
|
|
|
unsigned state; // See k???State enum
|
|
|
|
|
|
} rdr_t;
|
|
|
|
typedef struct type_str
|
|
{
|
|
const char* label;
|
|
unsigned typeId;
|
|
unsigned variantFl;
|
|
} type_t;
|
|
|
|
type_t _typeRefA[] = {
|
|
{ "int", kIntRdrFl, variant::kInt32VFl },
|
|
{ "float", kFloatRdrFl, variant::kFloatVFl },
|
|
{ "double", kDoubleRdrFl, variant::kDoubleVFl },
|
|
{ nullptr, 0, 0 }
|
|
};
|
|
|
|
rdr_t* _handleToPtr(handle_t h )
|
|
{ return handleToPtr<handle_t,rdr_t>(h); }
|
|
|
|
|
|
const type_t* _typeIdToDesc( unsigned typeId )
|
|
{
|
|
for(const type_t* t=_typeRefA; t->label!=nullptr; ++t)
|
|
if( t->typeId == typeId )
|
|
return t;
|
|
|
|
cwLogError(kInvalidArgRC,"The dataset rdr typeId %i is not valid.", typeId);
|
|
return nullptr;
|
|
}
|
|
|
|
const type_t* _varTypeFlagsToDesc( unsigned variantFl )
|
|
{
|
|
for(const type_t* t=_typeRefA; t->label!=nullptr; ++t)
|
|
if( t->variantFl == variantFl )
|
|
return t;
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
const char* _typeIdToLabel( unsigned typeId )
|
|
{
|
|
const type_t* t;
|
|
if((t = _typeIdToDesc(typeId)) == nullptr )
|
|
return nullptr;
|
|
return t->label;
|
|
}
|
|
|
|
bool _typeIdMatch( unsigned typeId, unsigned variantTypeFl )
|
|
{
|
|
const type_t* t;
|
|
|
|
if((t = _typeIdToDesc(typeId)) == nullptr )
|
|
return false;
|
|
|
|
return t->typeId==typeId && t->variantFl==variantTypeFl;
|
|
}
|
|
|
|
const c_t* _colFromId( rdr_t* p, unsigned columnId )
|
|
{
|
|
for(unsigned i=0; i<p->column_count; ++i)
|
|
if( p->colA[i].col.id == columnId )
|
|
return p->colA + i;
|
|
|
|
cwLogError(kInvalidArgRC,"Invalid columnId (%i).", columnId );
|
|
return nullptr;
|
|
}
|
|
|
|
const c_t* _colFromLabel( rdr_t* p, const char* colLabel )
|
|
{
|
|
for(unsigned i=0; i<p->column_count; ++i)
|
|
if( textCompare(p->colA[i].col.label, colLabel) == 0 )
|
|
return p->colA + i;
|
|
|
|
cwLogError(kInvalidArgRC,"Invalid column label:%s.", colLabel );
|
|
return nullptr;
|
|
}
|
|
|
|
rc_t _destroy( rdr_t* p )
|
|
{
|
|
for(unsigned i=0; i<p->column_count; ++i)
|
|
{
|
|
mem::release( p->colA[i].col.dimV );
|
|
mem::release( p->colA[i].col.maxDimV );
|
|
mem::release( p->colA[i].varDimIdxV);
|
|
mem::free( const_cast<char*>(p->colA[i].col.label) );
|
|
}
|
|
|
|
file::close(p->fH);
|
|
mem::release(p->colA);
|
|
mem::release(p->buf);
|
|
mem::release(p);
|
|
|
|
return kOkRC;
|
|
}
|
|
|
|
rc_t _readHdr( rdr_t* p )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
unsigned bufOffsByteN = 0;
|
|
|
|
p->bufMaxByteN = 0;
|
|
p->isFixedSizeFl = true;
|
|
|
|
if((rc = read(p->fH,p->record_count)) != kOkRC ) goto errLabel;
|
|
if((rc = read(p->fH,p->column_count)) != kOkRC ) goto errLabel;
|
|
|
|
p->colA = mem::allocZ<c_t>( p->column_count);
|
|
|
|
// for each column
|
|
for(unsigned i=0; i<p->column_count; ++i)
|
|
{
|
|
c_t* c = p->colA + i;
|
|
|
|
if((rc = readStr( p->fH,(char**)&c->col.label,255)) != kOkRC ) goto errLabel;
|
|
if((rc = read(p->fH,c->col.id)) != kOkRC ) goto errLabel;
|
|
if((rc = read(p->fH,c->col.varDimN)) != kOkRC ) goto errLabel;
|
|
if((rc = read(p->fH,c->col.rankN )) != kOkRC ) goto errLabel;
|
|
if((rc = read(p->fH,c->col.maxEleN )) != kOkRC ) goto errLabel;
|
|
if((rc = variant::read( p->fH, c->col.max)) != kOkRC ) goto errLabel;
|
|
if((rc = variant::read( p->fH, c->col.min )) != kOkRC ) goto errLabel;
|
|
|
|
|
|
c->col.dimV = mem::allocZ<unsigned>( c->col.rankN );
|
|
c->col.maxDimV = mem::allocZ<unsigned>( c->col.rankN );
|
|
c->varDimIdxV = mem::allocZ<unsigned>( c->col.rankN );
|
|
c->varDimIdxN = 0;
|
|
|
|
c->col.maxEleN = c->col.rankN==0 ? 0 : 1;
|
|
|
|
for(unsigned j=0; j<c->col.rankN; ++j)
|
|
{
|
|
if((rc = file::read( p->fH, c->col.dimV[j] )) != kOkRC ) goto errLabel;
|
|
if((rc = file::read( p->fH, c->col.maxDimV[j])) != kOkRC ) goto errLabel;
|
|
|
|
if( c->col.dimV[j] == 0 )
|
|
c->varDimIdxV[c->varDimIdxN++] = j;
|
|
|
|
c->col.maxEleN *= c->col.maxDimV[j];
|
|
}
|
|
|
|
unsigned bytesPerEle = variant::flagsToBytes(c->col.max.flags);
|
|
|
|
const type_t* t;
|
|
if((t = _varTypeFlagsToDesc(c->col.max.flags)) == nullptr )
|
|
rc = cwLogError(kInvalidDataTypeRC,"The column %s is not a valid data type (e.g. int, float double).",cwStringNullGuard(c->col.label));
|
|
else
|
|
c->col.typeId = t->typeId;
|
|
|
|
// TODO: why maintain both eleN and maxEleN and byteN and maxByteN?
|
|
c->col.eleN = c->col.maxEleN;
|
|
c->col.maxByteN = bytesPerEle * c->col.maxEleN;
|
|
c->col.byteOffset = bufOffsByteN;
|
|
c->col.byteN = c->col.maxByteN;
|
|
|
|
p->bufMaxByteN += c->col.maxByteN + c->varDimIdxN * sizeof(unsigned); // Track the max file buffer size
|
|
|
|
if( c->col.varDimN != 0 && p->isFixedSizeFl )
|
|
p->isFixedSizeFl = false;
|
|
|
|
bufOffsByteN = p->bufMaxByteN;
|
|
}
|
|
|
|
p->buf = mem::alloc<std::uint8_t>(p->bufMaxByteN);
|
|
|
|
// store the file offset to the first data record
|
|
rc = tell(p->fH,&p->baseFileByteOffs);
|
|
|
|
errLabel:
|
|
if( rc != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"Data set file header read failed.");
|
|
p->state = kErrorState;
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
rc_t _rewind( rdr_t* p )
|
|
{
|
|
rc_t rc;
|
|
if((rc = file::seek( p->fH, file::kBeginFl, p->baseFileByteOffs)) != kOkRC )
|
|
p->state = kErrorState;
|
|
else
|
|
{
|
|
p->curRecordIdx = kInvalidIdx;
|
|
p->nextRecordIdx = 0;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
rc_t _var_seek( rdr_t* p, unsigned recdIdx )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
|
|
if( recdIdx < p->nextRecordIdx )
|
|
if((rc = _rewind(p)) != kOkRC )
|
|
goto errLabel;
|
|
|
|
for(; recdIdx < p->nextRecordIdx; ++recdIdx )
|
|
{
|
|
unsigned recdByteN;
|
|
if((rc = file::read(p->fH,recdByteN)) != kOkRC )
|
|
{
|
|
p->state = kErrorState;
|
|
goto errLabel;
|
|
}
|
|
|
|
if((rc = file::seek(p->fH, file::kCurFl, recdByteN )) != kOkRC )
|
|
{
|
|
p->state = kErrorState;
|
|
goto errLabel;
|
|
}
|
|
}
|
|
|
|
errLabel:
|
|
return rc;
|
|
}
|
|
|
|
// Seek to the a record, but don't actually read it.
|
|
rc_t _seek( rdr_t* p, unsigned recdIdx )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
|
|
if( p->nextRecordIdx == recdIdx )
|
|
return rc;
|
|
|
|
if( recdIdx >= p->record_count )
|
|
{
|
|
rc = cwLogError(kInvalidArgRC,"The seek index %i is invalid. Record Count=%i", recdIdx, p->record_count);
|
|
goto errLabel;
|
|
}
|
|
|
|
if( p->isFixedSizeFl )
|
|
rc = _var_seek(p,recdIdx);
|
|
else
|
|
{
|
|
// fixed size recds offset = baseOffset + (recd_index * (sizeof(recd_byte_cnt) + sizeof(data_record)))
|
|
rc = file::seek( p->fH, file::kBeginFl, p->baseFileByteOffs + recdIdx * (kSizeofRecordHeader + p->bufMaxByteN));
|
|
}
|
|
|
|
if( rc == kOkRC )
|
|
p->nextRecordIdx = recdIdx;
|
|
|
|
errLabel:
|
|
return rc;
|
|
|
|
}
|
|
|
|
rc_t _parse_var_record( rdr_t* p )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
|
|
p->bufCurByteN = 0;
|
|
|
|
for(unsigned i=0; i<p->column_count; ++i)
|
|
{
|
|
c_t* c = p->colA + i;
|
|
|
|
// if this is a variabled sized column
|
|
if( c->col.varDimN != 0 )
|
|
{
|
|
unsigned* varDimV = reinterpret_cast<unsigned*>(p->buf + p->bufCurByteN );
|
|
unsigned eleN = c->col.rankN==0 ? 0 : 1;
|
|
|
|
// for each dim. of this column
|
|
for(unsigned j=0,k=0; j<c->col.rankN; ++j)
|
|
{
|
|
// if this is a variable sized dimension then set the actual dim. size
|
|
if( k<c->varDimIdxN && c->varDimIdxV[k] == j )
|
|
{
|
|
c->col.dimV[j] = varDimV[k];
|
|
k += 1;
|
|
p->bufCurByteN += sizeof(varDimV[k]);
|
|
}
|
|
|
|
// calc the count of elements in this field
|
|
eleN *= c->col.dimV[j];
|
|
}
|
|
|
|
// set the size and count of elements in this field
|
|
c->col.eleN = eleN;
|
|
c->col.byteN = variant::flagsToBytes( c->col.max.flags ) * eleN;
|
|
}
|
|
|
|
c->col.byteOffset = p->bufCurByteN;
|
|
p->bufCurByteN += c->col.byteN;
|
|
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
|
|
rc_t _read_record( rdr_t* p )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
|
|
unsigned recordByteN;
|
|
|
|
// Read the byte length of this record
|
|
if((rc = file::read(p->fH, recordByteN )) != kOkRC )
|
|
{
|
|
if( file::eof(p->fH) )
|
|
{
|
|
p->state = kEofState;
|
|
return kEofRC;
|
|
}
|
|
goto errLabel;
|
|
}
|
|
|
|
assert( recordByteN <= p->bufMaxByteN );
|
|
|
|
// read the record data into p->buf[]
|
|
if((rc = file::read( p->fH, p->buf, recordByteN )) != kOkRC )
|
|
goto errLabel;
|
|
|
|
|
|
// if all columns in the record do not have a fixed size then update
|
|
// the column pointers into the data record
|
|
if( !p->isFixedSizeFl )
|
|
if((rc = _parse_var_record( p )) != kOkRC )
|
|
goto errLabel;
|
|
|
|
p->curRecordIdx = p->nextRecordIdx;
|
|
p->nextRecordIdx += 1;
|
|
errLabel:
|
|
return rc;
|
|
}
|
|
|
|
rc_t _get( rdr_t* p, unsigned columnId, void*& vpRef, unsigned& nRef, const unsigned*& dimVRef, unsigned reqTypeId )
|
|
{
|
|
const c_t* c;;
|
|
|
|
if((c = _colFromId(p,columnId)) == nullptr )
|
|
return kInvalidArgRC;
|
|
|
|
if( c->col.typeId != reqTypeId )
|
|
return cwLogError(kInvalidArgRC,"Cannot convert the column '%s' from type:%s to type:%s.", _typeIdToLabel(c->col.typeId), _typeIdToLabel(reqTypeId));
|
|
|
|
nRef = c->col.eleN;
|
|
dimVRef = c->col.dimV;
|
|
vpRef = p->buf + c->col.byteOffset;
|
|
return kOkRC;
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
cw::rc_t cw::dataset::rdr::create( handle_t& h, const char* fn )
|
|
{
|
|
rc_t rc;
|
|
if((rc = destroy(h)) != kOkRC )
|
|
return rc;
|
|
|
|
auto p = mem::allocZ<rdr_t>(1);
|
|
|
|
if((rc = file::open(p->fH, fn,file::kReadFl)) == kOkRC )
|
|
if((rc = _readHdr(p)) != kOkRC )
|
|
goto errLabel;
|
|
|
|
p->state = kOkState;
|
|
p->curRecordIdx = kInvalidIdx;
|
|
h.set(p);
|
|
|
|
errLabel:
|
|
if(rc != kOkRC )
|
|
_destroy(p);
|
|
|
|
return rc;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::rdr::destroy( handle_t& h )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
|
|
if( !h.isValid())
|
|
return rc;
|
|
|
|
rdr_t* p = _handleToPtr(h);
|
|
|
|
if((rc = _destroy(p)) != kOkRC )
|
|
return rc;
|
|
|
|
h.clear();
|
|
|
|
return rc;
|
|
}
|
|
|
|
unsigned cw::dataset::rdr::column_count( handle_t h )
|
|
{
|
|
rdr_t* p = _handleToPtr(h);
|
|
return p->column_count;
|
|
}
|
|
|
|
const cw::dataset::rdr::col_t* cw::dataset::rdr::column_cfg( handle_t h, unsigned colIdx )
|
|
{
|
|
rdr_t* p = _handleToPtr(h);
|
|
|
|
if( colIdx >= p->column_count )
|
|
return nullptr;
|
|
|
|
return &p->colA[colIdx].col;
|
|
}
|
|
|
|
const cw::dataset::rdr::col_t* cw::dataset::rdr::column_cfg( handle_t h, const char* colLabel )
|
|
{
|
|
rdr_t* p = _handleToPtr(h);
|
|
const c_t* c;
|
|
|
|
if((c = _colFromLabel(p, colLabel )) == nullptr )
|
|
return nullptr;
|
|
|
|
return &c->col;
|
|
}
|
|
|
|
unsigned cw::dataset::rdr::record_count( handle_t h)
|
|
{
|
|
rdr_t* p = _handleToPtr(h);
|
|
return p->record_count;
|
|
}
|
|
|
|
unsigned cw::dataset::rdr::cur_record_index( handle_t h )
|
|
{
|
|
rdr_t* p = _handleToPtr(h);
|
|
return p->curRecordIdx;
|
|
}
|
|
|
|
unsigned cw::dataset::rdr::next_record_index( handle_t h )
|
|
{
|
|
rdr_t* p = _handleToPtr(h);
|
|
return p->nextRecordIdx;
|
|
}
|
|
|
|
unsigned cw::dataset::rdr::state( handle_t h )
|
|
{
|
|
rdr_t* p = _handleToPtr(h);
|
|
return p->state;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::rdr::seek( handle_t h, unsigned recordIdx )
|
|
{
|
|
rdr_t* p = _handleToPtr(h);
|
|
return _seek(p,recordIdx);
|
|
}
|
|
|
|
|
|
cw::rc_t cw::dataset::rdr::read( handle_t h, unsigned record_index )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
rdr_t* p = _handleToPtr(h);
|
|
|
|
if( record_index != kInvalidIdx )
|
|
if((rc = _seek(p,record_index)) != kOkRC )
|
|
return rc;
|
|
|
|
return _read_record(p);
|
|
}
|
|
|
|
cw::rc_t cw::dataset::rdr::get( handle_t h, unsigned columnId, const int*& vRef, unsigned& nRef, const unsigned*& dimVRef )
|
|
{
|
|
rdr_t* p = _handleToPtr(h);
|
|
void* vp = nullptr;
|
|
rc_t rc = _get(p, columnId, vp, nRef, dimVRef, kIntRdrFl );
|
|
|
|
vRef = rc!=kOkRC ? nullptr : static_cast<const int*>(vp);
|
|
|
|
return rc;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::rdr::get( handle_t h, unsigned columnId, const float*& vRef, unsigned& nRef, const unsigned*& dimVRef )
|
|
{
|
|
rdr_t* p = _handleToPtr(h);
|
|
void* vp = nullptr;
|
|
rc_t rc = _get(p, columnId, vp, nRef, dimVRef, kFloatRdrFl );
|
|
|
|
vRef = rc!=kOkRC ? nullptr : static_cast<const float*>(vp);
|
|
|
|
return rc;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::rdr::get( handle_t h, unsigned columnId, const double*& vRef, unsigned& nRef, const unsigned*& dimVRef )
|
|
{
|
|
rdr_t* p = _handleToPtr(h);
|
|
void* vp = nullptr;
|
|
rc_t rc = _get(p, columnId, vp, nRef, dimVRef, kDoubleRdrFl );
|
|
|
|
vRef = rc!=kOkRC ? nullptr : static_cast<const double*>(vp);
|
|
|
|
return rc;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::rdr::report( handle_t h )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
rdr_t* p = _handleToPtr(h);
|
|
|
|
for(unsigned i=0; i<p->column_count; ++i)
|
|
{
|
|
const c_t* c = p->colA + i;
|
|
printf("id:%5i vdN:%5i mxEleN:%5i rank:%3i %8s", c->col.id, c->col.varDimN, c->col.maxEleN, c->col.rankN, _typeIdToLabel(c->col.typeId) );
|
|
|
|
printf(" min:"); variant::print(c->col.min);
|
|
printf(" max:"); variant::print(c->col.max);
|
|
|
|
printf(" | ");
|
|
|
|
for(unsigned j=0; j<c->col.rankN; ++j)
|
|
printf("%i ",c->col.dimV[j]);
|
|
|
|
printf(" | ");
|
|
|
|
for(unsigned j=0; j<c->col.rankN; ++j)
|
|
printf("%i ",c->col.maxDimV[j]);
|
|
|
|
printf("\n");
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::rdr::test( const object_t* cfg )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
char* inFn = nullptr;
|
|
handle_t h;
|
|
|
|
if((rc = cfg->getv("inFn",inFn)) != kOkRC )
|
|
return cwLogError(rc,"rdr test failed. Argument parse failed.");
|
|
|
|
inFn = filesys::expandPath(inFn);
|
|
|
|
if((rc = create(h,inFn)) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"rdr create failed.");
|
|
}
|
|
else
|
|
{
|
|
const int* v = nullptr;
|
|
unsigned vN = 0;
|
|
const unsigned* dimV = nullptr;
|
|
|
|
report(h);
|
|
|
|
while( (rc=read(h)) == kOkRC )
|
|
{
|
|
get(h,0,v,vN,dimV); vop::print(v,vN,"%i ","c0:");
|
|
get(h,1,v,vN,dimV); vop::print(v,vN,"%i ","c1:");
|
|
get(h,2,v,vN,dimV); vop::print(v,vN,"%i ","c2:");
|
|
get(h,3,v,vN,dimV); vop::print(v,vN,"%i ","c3:");
|
|
}
|
|
|
|
if( rc != kEofRC )
|
|
rc = cwLogError(kOpFailRC,"The read operation failed.");
|
|
|
|
destroy(h);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
//----------------------------------------------------------------------------------------------------------------------------
|
|
//----------------------------------------------------------------------------------------------------------------------------
|
|
//----------------------------------------------------------------------------------------------------------------------------
|
|
namespace cw {
|
|
namespace dataset {
|
|
namespace adapter {
|
|
|
|
typedef struct col_str
|
|
{
|
|
const rdr::col_t* col; // Column description
|
|
bool oneHotFl; // Convert this column to a one-hot vector
|
|
unsigned maxEleN; // Max count of elements in the buffer from this column
|
|
int oneHotMax; // Max value in this column
|
|
int oneHotMin; // Min value in this column
|
|
unsigned* batchDimV; // batchDivV[ col.rankN, batchN ] or nullptr for fixed size columns
|
|
struct col_str* link; //
|
|
} col_t;
|
|
|
|
typedef struct field_str
|
|
{
|
|
unsigned id; // Field Id
|
|
unsigned flags; // Field flags
|
|
bool isFixedSizeFl; // Do all columns in this field have a fixed size.
|
|
unsigned bytesPerEle; // Size of each element in buf[] (determined by flags | k<DataType>fl)
|
|
unsigned bufMaxEleN; // Allocated size of buf[] for a batch size of maxBatchN
|
|
unsigned bufEleN; // Current count of elements in buf[] for the entire batch.
|
|
unsigned bufMaxFieldByteN; // Max. size in bytes of one field record.
|
|
unsigned bufByteN; // Current count of bytes in buf.
|
|
std::uint8_t* buf; // buf[ bufMaxFieldByteN*batchN ]
|
|
unsigned* batchEleNV; // batchEleN[ maxBatchN ] Count of ele's in each record of a batch.
|
|
col_t* colL; // List of columns assigned to this field
|
|
colMap_t** colMapM; // colMapM[ batchN ]
|
|
colMap_t* colMapA; // colMapA[ batchN*columnN ] Storage for colMapM[]
|
|
struct field_str* link; //
|
|
} field_t;
|
|
|
|
typedef struct adapter_str
|
|
{
|
|
unsigned maxBatchN; // Max. possible value of batchN in a call to read().
|
|
unsigned batchN; // Count of records returned in the last call to read().
|
|
rdr::handle_t rdrH; // Source data file
|
|
field_t* fieldL; // List of field descriptions
|
|
unsigned state; // Exception state
|
|
} adapter_t;
|
|
|
|
inline adapter_t* _handleToPtr(handle_t h )
|
|
{ return handleToPtr<handle_t,adapter_t>(h); }
|
|
|
|
rc_t _destroy( adapter_t* p )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
|
|
field_t* f = p->fieldL;
|
|
while( f != nullptr )
|
|
{
|
|
field_t* f0 = f->link;
|
|
|
|
col_t* c = f->colL;
|
|
while( c != nullptr )
|
|
{
|
|
col_t* c0 = c->link;
|
|
|
|
// if this is a var width column
|
|
if( c->col->varDimN > 0 )
|
|
mem::release(c->batchDimV);
|
|
mem::release(c);
|
|
c = c0;
|
|
}
|
|
|
|
mem::release(f->batchEleNV);
|
|
mem::release(f->buf);
|
|
mem::release(f->colMapM);
|
|
mem::release(f->colMapA);
|
|
mem::release(f);
|
|
f=f0;
|
|
}
|
|
|
|
rdr::destroy(p->rdrH);
|
|
|
|
mem::release(p);
|
|
return rc;
|
|
}
|
|
|
|
field_t* _fieldIdToRecd( adapter_t* p, unsigned fieldId )
|
|
{
|
|
field_t* f = p->fieldL;
|
|
for(; f!=nullptr; f=f->link)
|
|
if( f->id == fieldId )
|
|
return f;
|
|
|
|
cwLogError(kInvalidArgRC,"Invalid field id '%i'.",fieldId);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
rc_t _calc_one_hot_ele_count( col_t* c, unsigned& eleN_Ref )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
|
|
if( !variant::isInt(c->col->min) || !variant::isInt(c->col->max) )
|
|
return cwLogError(kInvalidArgRC,"One-hot columns must be integer valued.");
|
|
|
|
if( c->col->rankN != 1 || c->col->maxDimV[0] != 1 )
|
|
return cwLogError(kInvalidArgRC,"One-hot columns must be scalar integers.");
|
|
|
|
|
|
if((rc = variant::get(c->col->min,c->oneHotMin)) != kOkRC )
|
|
return cwLogError(rc,"Unable to obtain the one-hot minimum value.");
|
|
|
|
if((rc = variant::get(c->col->max,c->oneHotMax)) != kOkRC )
|
|
return cwLogError(rc,"Unable to obtain the maximum value.");
|
|
|
|
eleN_Ref = (c->oneHotMax - c->oneHotMin) + 1;
|
|
|
|
return rc;
|
|
}
|
|
|
|
rc_t _assign_column( adapter_t* p, field_t* f, const char* colLabel, bool oneHotFl )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
col_t* c = mem::allocZ<col_t>(1);
|
|
|
|
if((c->col = rdr::column_cfg(p->rdrH, colLabel)) == nullptr )
|
|
rc = kInvalidArgRC;
|
|
else
|
|
{
|
|
c->oneHotFl = oneHotFl;
|
|
|
|
// locate the last link in the column list
|
|
col_t* c0 = f->colL;
|
|
while( c0!=nullptr && c0->link != nullptr )
|
|
c0=c0->link;
|
|
|
|
// add the new record to the end of the list
|
|
if( c0 == nullptr )
|
|
f->colL = c;
|
|
else
|
|
c0->link = c;
|
|
|
|
// if one-hot encoding was requested
|
|
if( oneHotFl )
|
|
rc = _calc_one_hot_ele_count(c,c->maxEleN);
|
|
else
|
|
c->maxEleN = c->col->maxEleN;
|
|
|
|
// update the size of the field buffer to account for the column size
|
|
f->bufMaxEleN += c->col->maxEleN;
|
|
|
|
// if this is a variable length column
|
|
if( c->col->varDimN > 0 )
|
|
f->isFixedSizeFl = false;
|
|
|
|
if( cwIsFlag(f->flags,kTrackColDimFl) )
|
|
{
|
|
// if this is a fixed size column then batchDimV is null
|
|
// otherwise it is a [batchN,rankN] matrix used to hold the dim's of each returned data ele from this column
|
|
c->batchDimV = c->col->varDimN == 0 ? nullptr : mem::allocZ<unsigned>(p->maxBatchN*c->col->rankN);
|
|
}
|
|
}
|
|
|
|
if( rc != kOkRC )
|
|
rc = cwLogError(rc,"'%s' Column assignment failed.", cwStringNullGuard(colLabel));
|
|
|
|
return rc;
|
|
}
|
|
|
|
rc_t _allocate_field_buffer( adapter_t* p, field_t* f )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
f->bufMaxEleN = 0;
|
|
|
|
// calc the field width as the sum of the max column widths
|
|
unsigned colN = 0;
|
|
for(col_t* c=f->colL; c!=nullptr; c=c->link)
|
|
{
|
|
f->bufMaxEleN += c->maxEleN;
|
|
colN += 1;
|
|
}
|
|
|
|
f->bufMaxFieldByteN = f->bufMaxEleN * f->bytesPerEle;
|
|
f->buf = mem::alloc<std::uint8_t>(p->maxBatchN * f->bufMaxFieldByteN);
|
|
|
|
// if col. dim tracking is enabled for this field
|
|
if( cwIsFlag(f->flags,kTrackColDimFl) )
|
|
{
|
|
// allocate the column dim tracking data structures
|
|
f->colMapM = mem::allocZ<colMap_t*>( p->maxBatchN );
|
|
f->colMapA = mem::allocZ<colMap_t>( p->maxBatchN * colN );
|
|
|
|
// initialize the fixed portion of the col. tracking records
|
|
for(unsigned i=0; i<p->maxBatchN; ++i)
|
|
{
|
|
f->colMapM[i] = f->colMapA + i*colN;
|
|
|
|
// for batch index i for each column
|
|
unsigned j=0, eleOffs=0;
|
|
for(col_t* c=f->colL; c!=nullptr; c=c->link,++j)
|
|
{
|
|
f->colMapM[i][j].colId = c->col->id;
|
|
f->colMapM[i][j].rankN = c->col->rankN;
|
|
|
|
// if this is a fixed size field then the col. map can be completely populated in advance of reading the data
|
|
// TODO: don't allocate the complete colMapA[] array because every colN records are duplicates anyway.
|
|
// just point colMapM[] to a single row of colMapA[].
|
|
if( !f->isFixedSizeFl )
|
|
{
|
|
f->colMapM[i][j].eleN = c->oneHotFl ? c->maxEleN : c->col->eleN;
|
|
f->colMapM[i][j].fieldEleOffset = eleOffs;
|
|
f->colMapM[i][j].dimV = c->col->dimV;
|
|
|
|
eleOffs += c->col->eleN;
|
|
}
|
|
else
|
|
{
|
|
f->colMapM[i][j].dimV = c->batchDimV + (i*c->col->rankN);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
template< typename S, typename D >
|
|
rc_t _translate_one_hot( std::uint8_t* buf, unsigned bufByteN, const S* src, unsigned srcEleN, const col_t* c, unsigned& dstByteNRef )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
|
|
dstByteNRef = 0;
|
|
|
|
unsigned dstEleN = (c->oneHotMax - c->oneHotMin) + 1;
|
|
unsigned dstByteN = dstEleN * sizeof(D);
|
|
|
|
if( dstByteN > bufByteN )
|
|
return cwLogError(kBufTooSmallRC,"The field buffer is too small (src:%i > buf:%i) during one-hot conversion.",dstByteN,bufByteN);
|
|
|
|
if( srcEleN != 1 )
|
|
return cwLogError(kInvalidArgRC,"One-hot encoded fields must be scalars. (srcEleN:%i)",srcEleN);
|
|
|
|
unsigned oneHotIdx = src[0] - c->oneHotMin;
|
|
|
|
if( oneHotIdx >= dstEleN )
|
|
return cwLogError(kInvalidArgRC,"The one-hot index (%i) is out of the one-hot vector size:%i.",oneHotIdx,dstEleN);
|
|
|
|
memset(buf,0,dstByteN);
|
|
|
|
D* dst = reinterpret_cast<D*>(buf);
|
|
dst[ oneHotIdx ] = 1;
|
|
|
|
dstByteNRef = dstByteN;
|
|
|
|
return rc;
|
|
}
|
|
|
|
template< typename S, typename D >
|
|
rc_t _translate_datatype( const col_t* c, std::uint8_t* buf, unsigned bufByteN, const S* src, unsigned srcEleN, unsigned& dstByteNRef )
|
|
{
|
|
if( c->oneHotFl )
|
|
return _translate_one_hot<S,D>( buf, bufByteN, src, srcEleN, c, dstByteNRef );
|
|
|
|
|
|
unsigned dstByteN = srcEleN * sizeof(D);
|
|
D* dst = reinterpret_cast<D*>(buf);
|
|
|
|
dstByteNRef = 0;
|
|
|
|
if( dstByteN > bufByteN )
|
|
return cwLogError(kBufTooSmallRC,"The field buffer is too small (src:%i > buf:%i).",dstByteN,bufByteN);
|
|
|
|
// copy, and translate, the rdr::col into the field->buf[]
|
|
for(unsigned i=0; i<srcEleN; ++i)
|
|
dst[i] = src[i];
|
|
|
|
dstByteNRef = dstByteN;
|
|
|
|
return kOkRC;
|
|
|
|
}
|
|
|
|
template< typename T >
|
|
rc_t _translate_column_tpl(adapter_t* p, field_t* f, col_t* c, std::uint8_t* buf, unsigned bufN, unsigned& dstByteNRef)
|
|
{
|
|
rc_t rc = kOkRC;
|
|
const T* v = nullptr;
|
|
unsigned vN = 0;
|
|
const unsigned* dimV = nullptr;
|
|
|
|
// read the column
|
|
if((rc = rdr::get(p->rdrH, c->col->id, v, vN, dimV )) != kOkRC )
|
|
return rc;
|
|
|
|
switch( f->flags & kTypeMask )
|
|
{
|
|
case kIntFl: rc = _translate_datatype<T,int>( c, buf, bufN, v, vN, dstByteNRef ); break;
|
|
case kFloatFl: rc = _translate_datatype<T,float>( c, buf, bufN, v, vN, dstByteNRef ); break;
|
|
case kDoubleFl: rc = _translate_datatype<T,double>( c, buf, bufN, v, vN, dstByteNRef ); break;
|
|
default:
|
|
assert(0);
|
|
}
|
|
|
|
|
|
return rc;
|
|
}
|
|
|
|
rc_t _translate_column( adapter_t* p, field_t* f, col_t* c, std::uint8_t* buf, unsigned bufN, unsigned& dstByteNRef )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
|
|
switch( c->col->typeId )
|
|
{
|
|
case rdr::kIntRdrFl: rc = _translate_column_tpl<int>( p,f,c,buf,bufN,dstByteNRef); break;
|
|
case rdr::kFloatRdrFl: rc = _translate_column_tpl<float>( p,f,c,buf,bufN,dstByteNRef); break;
|
|
case rdr::kDoubleRdrFl: rc = _translate_column_tpl<double>(p,f,c,buf,bufN,dstByteNRef); break;
|
|
default:
|
|
assert(0);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
rc_t _read_field( adapter_t* p, unsigned batchIdx, field_t* f, unsigned& byteNRef )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
|
|
byteNRef = 0;
|
|
|
|
// on the first use the buffer will not yet be allocated
|
|
if( f->buf == nullptr )
|
|
if((rc = _allocate_field_buffer(p,f)) != kOkRC )
|
|
return rc;
|
|
|
|
unsigned availBufByteN = f->bufMaxFieldByteN;
|
|
unsigned fieldBufByteOffs = 0;
|
|
|
|
// for each column of this field
|
|
for(col_t* c=f->colL; c!=nullptr; c=c->link)
|
|
{
|
|
unsigned colByteN = 0;
|
|
|
|
// translate each source column into the field buffer
|
|
if((rc = _translate_column( p, f, c, f->buf + f->bufByteN + fieldBufByteOffs, availBufByteN, colByteN )) != kOkRC )
|
|
return rc;
|
|
|
|
assert( availBufByteN >= colByteN );
|
|
|
|
availBufByteN -= colByteN;
|
|
fieldBufByteOffs += colByteN;
|
|
|
|
// if column dim. tracking is enabled and this is a variable with column ...
|
|
if( cwIsFlag(f->flags,kTrackColDimFl) && c->col->varDimN>0 )
|
|
for( unsigned i=0; i<c->col->rankN; ++i)
|
|
c->batchDimV[ batchIdx * c->col->rankN + i] = c->col->dimV[i]; // ... get the dim's of this column
|
|
}
|
|
|
|
byteNRef = fieldBufByteOffs;
|
|
return rc;
|
|
}
|
|
|
|
template< typename T >
|
|
cw::rc_t _get( handle_t h, unsigned fieldId, const T*& vV, const unsigned*& nV )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
adapter_t* p = _handleToPtr(h);
|
|
field_t* f;
|
|
|
|
if( p->state != kInitState )
|
|
return cwLogError(kInvalidStateRC,"get() failed The adapter is in an invalid state (%i != %i).",p->state,kInitState);
|
|
|
|
if((f = _fieldIdToRecd(p,fieldId)) == nullptr )
|
|
return kInvalidArgRC;
|
|
|
|
if(f->buf == nullptr )
|
|
return cwLogError( kInvalidStateRC, "read() must be called begore get().");
|
|
|
|
vV = reinterpret_cast<const T*>(f->buf);
|
|
nV = f->batchEleNV;
|
|
|
|
return rc;
|
|
}
|
|
|
|
template< typename T >
|
|
cw::rc_t _print_field( adapter_t* p, field_t* f, const char* fmt, unsigned batchIdx, const T* v, unsigned vN )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
unsigned i = 0,k = 0;
|
|
for(col_t* c=f->colL; c!=nullptr; c=c->link,++i)
|
|
{
|
|
colMap_t* cm = f->colMapM[batchIdx] + i;
|
|
|
|
printf("| %s %i : ", c->col->label, cm->eleN );
|
|
for(unsigned j=0; j<cm->eleN; ++j)
|
|
printf(fmt,v[k++]);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
template< typename T >
|
|
cw::rc_t _print_field( adapter_t* p, field_t* f, const char* fmt )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
|
|
printf("Field:%3i \n",f->id);
|
|
for(unsigned i=0,k=0; i<p->batchN; ++i)
|
|
{
|
|
printf("%i : ",i);
|
|
|
|
T* v = reinterpret_cast<T*>(f->buf) + k;
|
|
unsigned vN = f->batchEleNV[i];
|
|
|
|
if( cwIsFlag(f->flags,kTrackColDimFl) )
|
|
rc = _print_field(p,f,fmt,i,v,vN);
|
|
else
|
|
for(unsigned j=0; j<vN; ++j)
|
|
printf(fmt,v[j]);
|
|
|
|
k += vN;
|
|
printf("\n");
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
cw::rc_t cw::dataset::adapter::create( handle_t& hRef, const char* fn, unsigned maxBatchN )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
|
|
if((rc = destroy(hRef)) != kOkRC )
|
|
return rc;
|
|
|
|
adapter_t* p = mem::allocZ<adapter_t>(1);
|
|
|
|
if((rc = rdr::create(p->rdrH,fn)) != kOkRC )
|
|
goto errLabel;
|
|
|
|
p->maxBatchN = maxBatchN;
|
|
p->state = kPreInitState;
|
|
|
|
hRef.set(p);
|
|
|
|
errLabel:
|
|
if( rc != kOkRC )
|
|
_destroy(p);
|
|
|
|
return rc;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::adapter::destroy( handle_t& hRef )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
|
|
if( !hRef.isValid() )
|
|
return rc;
|
|
|
|
adapter_t* p = _handleToPtr(hRef);
|
|
|
|
if((rc = _destroy(p)) != kOkRC )
|
|
return rc;
|
|
|
|
hRef.clear();
|
|
|
|
return rc;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::adapter::create_field( handle_t h, unsigned fieldId, unsigned flags, const char* colLabel, bool oneHotFl )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
adapter_t* p = _handleToPtr(h);
|
|
field_t* f = mem::allocZ<field_t>(1);
|
|
unsigned typeFlags = flags & kTypeMask;
|
|
|
|
f->id = fieldId;
|
|
f->flags = flags;
|
|
f->link = p->fieldL;
|
|
f->batchEleNV = mem::alloc<unsigned>(p->maxBatchN);
|
|
p->fieldL = f;
|
|
|
|
switch( typeFlags )
|
|
{
|
|
case kIntFl: f->bytesPerEle = sizeof(int); break;
|
|
case kFloatFl: f->bytesPerEle = sizeof(float); break;
|
|
case kDoubleFl: f->bytesPerEle = sizeof(double); break;
|
|
default:
|
|
rc = cwLogError(kInvalidArgRC,"The field data type value 0x%x is not valid.", typeFlags );
|
|
}
|
|
|
|
if( colLabel != nullptr )
|
|
rc = _assign_column( p, f, colLabel, oneHotFl );
|
|
|
|
return rc;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::adapter::assign_column( handle_t h, unsigned fieldId, const char* colLabel, bool oneHotFl )
|
|
{
|
|
adapter_t* p = _handleToPtr(h);
|
|
const rdr::col_t* c = nullptr;
|
|
field_t* f;
|
|
|
|
if(( c = rdr::column_cfg(p->rdrH,colLabel)) == nullptr )
|
|
return kInvalidArgRC;
|
|
|
|
if((f = _fieldIdToRecd(p,fieldId)) == nullptr )
|
|
return kInvalidArgRC;
|
|
|
|
|
|
return _assign_column( p, f, colLabel, oneHotFl );
|
|
}
|
|
|
|
unsigned cw::dataset::adapter::record_count( handle_t h )
|
|
{
|
|
adapter_t* p = _handleToPtr(h);
|
|
return rdr::record_count(p->rdrH);
|
|
}
|
|
|
|
|
|
unsigned cw::dataset::adapter::field_fixed_ele_count( handle_t h, unsigned fieldId )
|
|
{
|
|
adapter_t* p = _handleToPtr(h);
|
|
field_t* f;
|
|
|
|
if((f = _fieldIdToRecd(p,fieldId)) == nullptr )
|
|
return 0;
|
|
|
|
return f->bufEleN;;
|
|
}
|
|
|
|
|
|
cw::rc_t cw::dataset::adapter::read( handle_t h, unsigned batchN, const unsigned* recordIdxV )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
adapter_t* p = _handleToPtr(h);
|
|
|
|
switch( p->state )
|
|
{
|
|
case kInitState:
|
|
break;
|
|
|
|
case kPreInitState:
|
|
p->state = kInitState;
|
|
break;
|
|
|
|
default:
|
|
return cwLogError(kInvalidStateRC,"Invalid adapter state (%i != %i).",p->state,kInitState);
|
|
}
|
|
|
|
|
|
if( batchN > p->maxBatchN )
|
|
return cwLogError(kInvalidArgRC,"The batch count:%i is greater than the max batch count:%i.",batchN,p->maxBatchN);
|
|
|
|
p->batchN = 0;
|
|
|
|
// for each record in this batch
|
|
for(unsigned i=0; i<batchN; ++i)
|
|
{
|
|
// read the data record
|
|
if((rc = rdr::read(p->rdrH, recordIdxV==nullptr ? kInvalidIdx : recordIdxV[i] )) != kOkRC )
|
|
{
|
|
if( rc == kEofRC )
|
|
p->state = kEofState;
|
|
|
|
goto errLabel;
|
|
}
|
|
|
|
// translate each field
|
|
for(field_t* f=p->fieldL; f!=nullptr; f=f->link)
|
|
{
|
|
unsigned fieldByteN = 0;
|
|
if( i == 0 )
|
|
{
|
|
f->bufEleN = 0;
|
|
f->bufByteN = 0;
|
|
}
|
|
|
|
// read the field into f->buf[]
|
|
if((rc = _read_field(p,i,f,fieldByteN)) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"Field (id:%i) read failed.",f->id);
|
|
goto errLabel;
|
|
}
|
|
|
|
assert( fieldByteN % f->bytesPerEle == 0 );
|
|
|
|
// update the buffer state
|
|
unsigned fieldEleN = fieldByteN / f->bytesPerEle;
|
|
f->bufEleN += fieldEleN;
|
|
f->bufByteN += fieldByteN;
|
|
f->batchEleNV[i] = fieldEleN;
|
|
}
|
|
|
|
p->batchN += 1;
|
|
}
|
|
errLabel:
|
|
if( rc != kOkRC )
|
|
p->state = kErrorState;
|
|
|
|
return rc;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::adapter::get( handle_t h, unsigned fieldId, const int*& vV, const unsigned*& nV )
|
|
{ return _get<int>(h,fieldId,vV,nV); }
|
|
|
|
cw::rc_t cw::dataset::adapter::get( handle_t h, unsigned fieldId, const float*& vV, const unsigned*& nV )
|
|
{ return _get<float>(h,fieldId,vV,nV); }
|
|
|
|
cw::rc_t cw::dataset::adapter::get( handle_t h, unsigned fieldId, const double*& vV, const unsigned*& nV )
|
|
{ return _get<double>(h,fieldId,vV,nV); }
|
|
|
|
|
|
cw::rc_t cw::dataset::adapter::column_map( handle_t h, unsigned fieldId, colMap_t const * const *& colMapV_Ref )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
adapter_t* p = _handleToPtr(h);
|
|
field_t* f;
|
|
|
|
if( p->state != kInitState )
|
|
return cwLogError(kInvalidStateRC,"Invalid adapter state (%i != %i).",p->state,kInitState);
|
|
|
|
if((f = _fieldIdToRecd(p,fieldId)) == nullptr )
|
|
return kInvalidArgRC;
|
|
|
|
colMapV_Ref = f->colMapM;
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
unsigned cw::dataset::adapter::state( handle_t h )
|
|
{
|
|
adapter_t* p = _handleToPtr(h);
|
|
return p->state;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::adapter::print_field( handle_t h, unsigned fieldId, const char* fmt )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
adapter_t* p = _handleToPtr(h);
|
|
field_t* f;
|
|
|
|
if((f = _fieldIdToRecd(p,fieldId)) == nullptr )
|
|
return cwLogError(kInvalidArgRC,"Invalid field id (%i).",fieldId);
|
|
|
|
switch( f->flags & kTypeMask )
|
|
{
|
|
case kIntFl: rc = _print_field<int>( p, f, fmt==nullptr ? "%i " : fmt ); break;
|
|
case kFloatFl: rc = _print_field<float>( p, f, fmt==nullptr ? "%f " : fmt ); break;
|
|
case kDoubleFl: rc = _print_field<double>(p, f, fmt==nullptr ? "%f " : fmt ); break;
|
|
default:
|
|
rc = cwLogError(kInvalidArgRC,"Unknown type flag: 0x%x.",f->flags & kTypeMask);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
|
|
cw::rc_t cw::dataset::adapter::test( const object_t* cfg )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
char* inFn = nullptr;
|
|
unsigned batchN = 0;
|
|
handle_t h;
|
|
|
|
enum {
|
|
kField0Id = 0,
|
|
kField1Id = 1
|
|
};
|
|
|
|
// read the cfg args
|
|
if((rc = cfg->getv("inFn",inFn,"batchN",batchN)) != kOkRC )
|
|
return cwLogError(rc,"adapter test failed. Argument parse failed.");
|
|
|
|
inFn = filesys::expandPath(inFn);
|
|
|
|
// create the adapter
|
|
if((rc = create(h, inFn, batchN)) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"Unable to create dataset adapter for '%s'.",inFn);
|
|
goto errLabel;
|
|
}
|
|
else
|
|
{
|
|
const int* xV = nullptr;
|
|
const float* yV = nullptr;
|
|
const unsigned* xNV = nullptr;
|
|
const unsigned* yNV = nullptr;
|
|
unsigned recdIdxV[] = { 2,1,0 };
|
|
|
|
if((rc = create_field( h, kField0Id, kIntFl | kTrackColDimFl, "col0", true )) != kOkRC )
|
|
goto errLabel;
|
|
|
|
if((rc = create_field( h, kField1Id, kFloatFl | kTrackColDimFl, "col1" )) != kOkRC )
|
|
goto errLabel;
|
|
|
|
if((rc = assign_column( h, kField1Id, "col2" )) != kOkRC )
|
|
goto errLabel;
|
|
|
|
if((rc = assign_column( h, kField1Id, "col3" )) != kOkRC )
|
|
goto errLabel;
|
|
|
|
assert( cwCountOf(recdIdxV) == batchN );
|
|
|
|
if((rc = read(h, batchN, recdIdxV )) != kOkRC )
|
|
goto errLabel;
|
|
|
|
if((rc = get(h, kField0Id, xV, xNV )) != kOkRC )
|
|
goto errLabel;
|
|
|
|
if((rc = get(h, kField1Id, yV, yNV )) != kOkRC )
|
|
goto errLabel;
|
|
|
|
for(unsigned i=0,n0=0,n1=0; i<batchN; ++i)
|
|
{
|
|
for(unsigned j=0; j<xNV[i]; ++j)
|
|
printf("%i ", xV[n0+j]);
|
|
n0 += xNV[i];
|
|
|
|
printf(": ");
|
|
|
|
for(unsigned j=0; j<yNV[i]; ++j)
|
|
printf("%f ", yV[n1+j] );
|
|
n1 = yNV[i];
|
|
|
|
printf("\n");
|
|
}
|
|
|
|
print_field(h, kField0Id);
|
|
print_field(h, kField1Id);
|
|
|
|
}
|
|
|
|
errLabel:
|
|
destroy(h);
|
|
|
|
mem::release(inFn);
|
|
return rc;
|
|
}
|
|
|
|
|
|
//----------------------------------------------------------------------------------------------------------------------------
|
|
//----------------------------------------------------------------------------------------------------------------------------
|
|
//----------------------------------------------------------------------------------------------------------------------------
|
|
|
|
namespace cw
|
|
{
|
|
namespace dataset
|
|
{
|
|
namespace mnist
|
|
{
|
|
typedef struct mnist_str
|
|
{
|
|
|
|
char* trainFn;
|
|
char* testFn;
|
|
char* validFn;
|
|
|
|
unsigned* data_dimV;
|
|
unsigned* label_dimV;
|
|
|
|
unsigned exampleN;
|
|
float* dataM;
|
|
unsigned* labelV;
|
|
|
|
unsigned kPixN;
|
|
|
|
unsigned curIdx;
|
|
|
|
|
|
} mnist_t;
|
|
|
|
inline mnist_t* _handleToPtr(handle_t h )
|
|
{ return handleToPtr<handle_t,mnist_t>(h); }
|
|
|
|
rc_t _destroy( mnist_t* p )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
|
|
mem::release(p->dataM);
|
|
mem::release(p->labelV);
|
|
mem::release(p->trainFn);
|
|
mem::release(p->validFn);
|
|
mem::release(p->testFn);
|
|
mem::release(p);
|
|
return rc;
|
|
}
|
|
|
|
rc_t _read_file_record_count( const char* fn, unsigned& nRef )
|
|
{
|
|
rc_t rc;
|
|
file::handle_t fH;
|
|
|
|
// open the file
|
|
if((rc = file::open(fH, fn, file::kReadFl | file::kBinaryFl )) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"MNIST file open failed on '%s'.",cwStringNullGuard(fn));
|
|
goto errLabel;
|
|
}
|
|
|
|
// read the count of examples
|
|
if((rc = read(fH,nRef)) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"Unable to read MNIST example count.");
|
|
goto errLabel;
|
|
}
|
|
|
|
// close file
|
|
if((rc = file::close(fH)) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"MNIST file close failed on '%s'.",cwStringNullGuard(fn));
|
|
goto errLabel;
|
|
}
|
|
|
|
errLabel:
|
|
return rc;
|
|
}
|
|
|
|
rc_t _read_file( mnist_t* p, const char* fn, unsigned n, float* dataM, unsigned* labelV )
|
|
{
|
|
file::handle_t fH;
|
|
rc_t rc = kOkRC;
|
|
unsigned exampleN = 0;
|
|
|
|
// open the file
|
|
if((rc = file::open(fH, fn, file::kReadFl | file::kBinaryFl )) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"MNIST file open failed on '%s'.",cwStringNullGuard(fn));
|
|
goto errLabel;
|
|
}
|
|
|
|
// read the count of examples
|
|
if((rc = read(fH,exampleN)) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"Unable to read MNIST example count.");
|
|
goto errLabel;
|
|
}
|
|
|
|
assert( exampleN == n );
|
|
|
|
// read each example
|
|
for(unsigned i=0; i<exampleN; ++i)
|
|
{
|
|
|
|
// read the digit image label
|
|
if((rc = read(fH, labelV[i])) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"Unable to read MNIST label on example %i.",i);
|
|
goto errLabel;
|
|
}
|
|
|
|
// read the image pixels
|
|
if((rc = readFloat(fH, dataM + i*p->kPixN, p->kPixN)) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"Unable to read MNIST data vector on example %i.",i);
|
|
goto errLabel;
|
|
}
|
|
|
|
}
|
|
|
|
|
|
errLabel:
|
|
if( rc != kOkRC)
|
|
rc = cwLogError(rc,"Load failed on MNIST file %s.",cwStringNullGuard(fn));
|
|
|
|
file::close(fH);
|
|
return rc;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
cw::rc_t cw::dataset::mnist::create( handle_t& h, const char* dir )
|
|
{
|
|
rc_t rc;
|
|
mnist_t* p = nullptr;
|
|
unsigned trainN = 0;
|
|
unsigned validN = 0;
|
|
unsigned testN = 0;
|
|
|
|
if((rc = destroy(h)) != kOkRC )
|
|
return rc;
|
|
|
|
char* inDir = filesys::expandPath(dir);
|
|
|
|
// allocate the object
|
|
p = mem::allocZ<mnist_t>(1);
|
|
p->kPixN = 784;
|
|
|
|
p->trainFn = filesys::makeFn(inDir, "mnist_train", ".bin", NULL );
|
|
p->validFn = filesys::makeFn(inDir, "mnist_valid", ".bin", NULL );
|
|
p->testFn = filesys::makeFn(inDir, "mnist_test", ".bin", NULL );
|
|
|
|
mem::release(inDir);
|
|
|
|
_read_file_record_count( p->trainFn, trainN );
|
|
p->exampleN += trainN;
|
|
|
|
_read_file_record_count( p->validFn, validN );
|
|
p->exampleN += validN;
|
|
|
|
_read_file_record_count( p->testFn, testN );
|
|
p->exampleN += testN;
|
|
|
|
|
|
// allocate the data memory
|
|
p->dataM = mem::alloc<float>( p->kPixN * p->exampleN );
|
|
p->labelV = mem::alloc<unsigned>( p->exampleN );
|
|
|
|
// read the training data
|
|
if((rc = _read_file( p, p->trainFn, trainN, p->dataM, p->labelV )) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"MNIST training set load failed.");
|
|
goto errLabel;
|
|
}
|
|
|
|
// read the validation data
|
|
if((rc = _read_file( p, p->validFn, validN, p->dataM + p->kPixN*trainN, p->labelV + trainN )) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"MNIST validation set load failed.");
|
|
goto errLabel;
|
|
}
|
|
|
|
// read the testing data
|
|
if((rc = _read_file( p, p->testFn, testN, p->dataM + p->kPixN*(trainN +validN), p->labelV + (trainN + validN) )) != kOkRC )
|
|
{
|
|
rc = cwLogError(rc,"MNIST test set load failed.");
|
|
goto errLabel;
|
|
}
|
|
|
|
h.set(p);
|
|
|
|
errLabel:
|
|
if( rc != kOkRC )
|
|
_destroy(p);
|
|
|
|
mem::release(inDir);
|
|
|
|
return rc;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::mnist::destroy( handle_t& h )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
if( !h.isValid())
|
|
return rc;
|
|
|
|
mnist_t* p = _handleToPtr(h);
|
|
|
|
if((rc = _destroy(p)) != kOkRC )
|
|
return rc;
|
|
|
|
h.clear();
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
unsigned cw::dataset::mnist::record_count( handle_t h )
|
|
{
|
|
mnist_t* p = _handleToPtr(h);
|
|
return p->exampleN;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::mnist::seek( handle_t h, unsigned exampleIdx )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
mnist_t* p = _handleToPtr(h);
|
|
|
|
if( exampleIdx <= p->exampleN )
|
|
p->curIdx = exampleIdx;
|
|
else
|
|
rc = cwLogError(kSeekFailRC,"Illegal seek index. Seek failed.");
|
|
|
|
return rc;
|
|
}
|
|
|
|
cw::rc_t cw::dataset::mnist::dataM( handle_t h, const float*& dataM_Ref, const unsigned*& labelV_Ref, unsigned exampleN, unsigned& actualExampleN_Ref, unsigned exampleIdx )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
mnist_t* p = _handleToPtr(h);
|
|
|
|
if( exampleIdx == kInvalidIdx )
|
|
exampleIdx = p->curIdx;
|
|
|
|
if( exampleIdx >= p->exampleN )
|
|
return kEofRC;
|
|
|
|
if( exampleIdx + exampleN > p->exampleN )
|
|
exampleN = p->exampleN - exampleIdx;
|
|
|
|
//memcpy(dataM, p->dataM + exampleIdx * p->kPixN, exampleN * p->kPixN * sizeof(p->dataM[0]) );
|
|
//memcpy(labelV, p->labelV + exampleIdx, exampleN * sizeof(p->labelV[0]) );
|
|
|
|
dataM_Ref = p->dataM + exampleIdx * p->kPixN;
|
|
labelV_Ref = p->labelV + exampleIdx;
|
|
|
|
actualExampleN_Ref = exampleN;
|
|
|
|
p->curIdx += exampleN;
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
cw::rc_t cw::dataset::mnist::write( handle_t h, const char* fn )
|
|
{
|
|
rc_t rc = kOkRC;
|
|
unsigned recdN = record_count(h);
|
|
wtr::handle_t wtrH;
|
|
|
|
if((rc = wtr::create(wtrH,fn)) != kOkRC )
|
|
return cwLogError(rc,"Dataset wtr create failed.");
|
|
|
|
enum { kImagId, kNumbId };
|
|
unsigned numbDimV[] = {1};
|
|
unsigned imagDimV[] = {28,28};
|
|
unsigned imagEleN = imagDimV[0]*imagDimV[1];
|
|
|
|
if((rc = define_columns( wtrH, "numb", kNumbId, cwCountOf(numbDimV), numbDimV )) != kOkRC )
|
|
goto errLabel;
|
|
|
|
if((rc = define_columns( wtrH, "imag", kImagId, cwCountOf(imagDimV), imagDimV )) != kOkRC )
|
|
goto errLabel;
|
|
|
|
printf("recdN: %i\n",recdN);
|
|
|
|
for(unsigned i=0; i < recdN; )
|
|
{
|
|
const float* imagM = nullptr;
|
|
const unsigned* numbV = nullptr;
|
|
unsigned cacheRecdN = std::min(100u,recdN-i);
|
|
unsigned actRecdN = 0;
|
|
|
|
if((rc = dataM(h, imagM, numbV, cacheRecdN, actRecdN, i )) != kOkRC )
|
|
{
|
|
cwLogError(rc,"Extract image data failed.");
|
|
goto errLabel;
|
|
}
|
|
|
|
for(unsigned j=0; j<actRecdN; ++j)
|
|
{
|
|
// write the digit this imag represents as an 'int'.
|
|
if((rc = wtr::write( wtrH, kNumbId, ((int*)numbV) + j, 1 )) != kOkRC )
|
|
goto errLabel;
|
|
|
|
// write the image data as 'floats'
|
|
if((rc = wtr::write( wtrH, kImagId, imagM + j*imagEleN, imagEleN )) != kOkRC )
|
|
goto errLabel;
|
|
|
|
if((rc = wtr::write_record( wtrH )) != kOkRC )
|
|
goto errLabel;
|
|
}
|
|
|
|
i += actRecdN;
|
|
|
|
}
|
|
|
|
errLabel:
|
|
if(rc != kOkRC )
|
|
cwLogError(rc, "MNIST data file write failed.");
|
|
|
|
wtr::destroy(wtrH);
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
cw::rc_t cw::dataset::mnist::test( const object_t* cfg )
|
|
{
|
|
handle_t h;
|
|
rc_t rc = kOkRC;
|
|
char* inDir = nullptr;
|
|
char* outHtmlFn = nullptr;
|
|
|
|
if((rc = cfg->getv("inDir",inDir,"outHtmlFn",outHtmlFn)) != kOkRC )
|
|
return cwLogError(rc,"MNIST test failed. Argument parse failed.");
|
|
|
|
inDir = filesys::expandPath(inDir);
|
|
outHtmlFn = filesys::expandPath(outHtmlFn);
|
|
|
|
if((rc = create(h, inDir )) == kOkRC )
|
|
{
|
|
svg::handle_t svgH;
|
|
|
|
if((rc = svg::create(svgH)) != kOkRC )
|
|
rc = cwLogError(rc,"SVG Test failed on create.");
|
|
else
|
|
{
|
|
//const mtx::f_t* m = train(h);
|
|
|
|
/*
|
|
unsigned zn = 0;
|
|
unsigned i = 1;
|
|
for(; i<m->dimV[1]; ++i)
|
|
{
|
|
const float* v0 = m->base + (28*28+1) * (i-1) + 1;
|
|
const float* v1 = m->base + (28*28+1) * (i-0) + 1;
|
|
float d = 0;
|
|
|
|
for(unsigned j=0; j<28*28; ++j)
|
|
d += fabs(v0[j]-v1[j]);
|
|
|
|
if( d==0 )
|
|
++zn;
|
|
else
|
|
{
|
|
printf("%i %i %f\n",i,zn,d);
|
|
zn = 0;
|
|
}
|
|
}
|
|
|
|
printf("i:%i n:%i zn:%i\n",i,m->dimV[1],zn);
|
|
*/
|
|
|
|
const float* dataM = nullptr;
|
|
const unsigned* labelV = nullptr;
|
|
unsigned exampleN = 10;
|
|
unsigned actualExampleN = 0;
|
|
|
|
//mnist::seek( h, 10 );
|
|
mnist::dataM( h, dataM, labelV, exampleN, actualExampleN );
|
|
|
|
|
|
for(unsigned i=0; i<actualExampleN; ++i)
|
|
{
|
|
printf("label: %i\n", labelV[i] );
|
|
|
|
svg::offset(svgH, 0, i*30*5 );
|
|
svg::image(svgH, dataM + (28*28)*i, 28, 28, 5, svg::kInvGrayScaleColorMapId);
|
|
}
|
|
|
|
svg::write(svgH, outHtmlFn, nullptr, svg::kStandAloneFl | svg::kGenInlineStyleFl, 10,10,10,10);
|
|
|
|
|
|
svg::destroy(svgH);
|
|
}
|
|
|
|
rc = destroy(h);
|
|
}
|
|
|
|
mem::release(outHtmlFn);
|
|
mem::release(inDir);
|
|
return rc;
|
|
}
|
|
|
|
//----------------------------------------------------------------------------------------------------------------------------
|
|
//----------------------------------------------------------------------------------------------------------------------------
|
|
//----------------------------------------------------------------------------------------------------------------------------
|
|
|
|
cw::rc_t cw::dataset::test( const object_t* cfg )
|
|
{
|
|
|
|
rc_t rc = kOkRC;
|
|
char* inDir = nullptr;
|
|
char* dsFn = nullptr;
|
|
char* outHtmlFn = nullptr;
|
|
mnist::handle_t mniH;
|
|
adapter::handle_t rdrH;
|
|
svg::handle_t svgH;
|
|
unsigned batchN = 10;
|
|
|
|
|
|
if((rc = cfg->getv("inDir",inDir,"dsFn",dsFn,"outHtmlFn",outHtmlFn,"batchN",batchN)) != kOkRC )
|
|
return cwLogError(rc,"MNIST test failed. Argument parse failed.");
|
|
|
|
inDir = filesys::expandPath(inDir);
|
|
dsFn = filesys::expandPath(dsFn);
|
|
outHtmlFn = filesys::expandPath(outHtmlFn);
|
|
|
|
// open the native MNIST object
|
|
if((rc = mnist::create(mniH, inDir )) != kOkRC )
|
|
{
|
|
cwLogError(rc,"Unable to open the native MNIST object.");
|
|
goto errLabel;
|
|
}
|
|
else
|
|
{
|
|
// write the MNIST data to a dataset file
|
|
if((rc = mnist::write(mniH, dsFn)) != kOkRC )
|
|
{
|
|
cwLogError(rc,"MNIST dataset write failed");
|
|
goto errLabel;
|
|
}
|
|
|
|
mnist::destroy(mniH);
|
|
|
|
}
|
|
|
|
// open a dataset adapter
|
|
if((rc = adapter::create(rdrH,dsFn,batchN)) != kOkRC )
|
|
{
|
|
cwLogError(rc,"Dataset reader create failed.");
|
|
goto errLabel;
|
|
}
|
|
else
|
|
{
|
|
// create an SVG file
|
|
if((rc = svg::create(svgH)) != kOkRC )
|
|
rc = cwLogError(rc,"SVG writer create failed.");
|
|
else
|
|
{
|
|
|
|
enum { kImagId, kNumbId };
|
|
|
|
// create dataset fields
|
|
if((rc = create_field( rdrH, kImagId, adapter::kFloatFl, "imag" )) != kOkRC )
|
|
{
|
|
cwLogError(rc,"Dataset rdr column define failed.");
|
|
goto errLabel;
|
|
}
|
|
|
|
if((rc = create_field( rdrH, kNumbId, adapter::kIntFl, "numb" )) != kOkRC )
|
|
{
|
|
cwLogError(rc,"Dataset rdr column define failed.");
|
|
goto errLabel;
|
|
}
|
|
|
|
// read a batch of data
|
|
if((rc = adapter::read( rdrH, batchN)) != kOkRC )
|
|
{
|
|
cwLogError(rc,"Batch read failed.");
|
|
goto errLabel;
|
|
}
|
|
else
|
|
{
|
|
const int* numbV = nullptr;
|
|
const unsigned* numbNV = nullptr;
|
|
const float* imagV = nullptr;
|
|
const unsigned* imagNV = nullptr;
|
|
|
|
adapter::get(rdrH, kNumbId, numbV, numbNV ); // get the labels
|
|
adapter::get(rdrH, kImagId, imagV, imagNV ); // get the image data
|
|
|
|
for(unsigned i=0; i<batchN; ++i)
|
|
{
|
|
printf("label: %i\n", numbV[i] );
|
|
|
|
svg::offset(svgH, 0, i*30*5 );
|
|
svg::image(svgH, imagV + (28*28)*i, 28, 28, 5, svg::kInvGrayScaleColorMapId);
|
|
}
|
|
|
|
svg::write(svgH, outHtmlFn, nullptr, svg::kStandAloneFl | svg::kGenInlineStyleFl, 10,10,10,10);
|
|
}
|
|
}
|
|
}
|
|
errLabel:
|
|
adapter::destroy(rdrH);
|
|
svg::destroy(svgH);
|
|
mem::release(inDir);
|
|
mem::release(dsFn);
|
|
mem::release(outHtmlFn);
|
|
return rc;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|