cwDataSet.h/cpp, cwNN.h/cpp : Updates.
This commit is contained in:
parent
4355c78f0d
commit
6dd882312d
@ -75,8 +75,8 @@ libcwSRC += src/libcw/cwMdns.cpp src/libcw/cwEuCon.cpp src/libcw/cwDnsSd.cpp src
|
||||
if cwWEB
|
||||
|
||||
else
|
||||
# libcwHDR += src/libcw/cwSvg.h src/libcw/cwDataSets.h
|
||||
# libcwSRC += src/libcw/cwSvg.cpp src/libcw/cwDataSets.cpp
|
||||
libcwHDR += src/libcw/cwSvg.h src/libcw/cwDataSets.h
|
||||
libcwSRC += src/libcw/cwSvg.cpp src/libcw/cwDataSets.cpp
|
||||
endif
|
||||
|
||||
if cwFFTW
|
||||
|
544
cwDataSets.cpp
544
cwDataSets.cpp
@ -2,12 +2,14 @@
|
||||
#include "cwLog.h"
|
||||
#include "cwCommonImpl.h"
|
||||
#include "cwMem.h"
|
||||
#include "cwObject.h"
|
||||
#include "cwFile.h"
|
||||
#include "cwFileSys.h"
|
||||
#include "cwVectOps.h"
|
||||
#include "cwMtx.h"
|
||||
#include "cwDataSets.h"
|
||||
#include "cwSvg.h"
|
||||
|
||||
#include "cwTime.h"
|
||||
|
||||
namespace cw
|
||||
{
|
||||
@ -17,9 +19,9 @@ namespace cw
|
||||
{
|
||||
typedef struct mnist_str
|
||||
{
|
||||
mtx::fmtx_t* train = nullptr;
|
||||
mtx::fmtx_t* valid = nullptr;
|
||||
mtx::fmtx_t* test = nullptr;
|
||||
mtx::f_t* train = nullptr;
|
||||
mtx::f_t* valid = nullptr;
|
||||
mtx::f_t* test = nullptr;
|
||||
|
||||
} mnist_t;
|
||||
|
||||
@ -37,7 +39,7 @@ namespace cw
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc_t _read_file( const char* dir, const char* fn, mtx::fmtx_t*& m )
|
||||
rc_t _read_file( const char* dir, const char* fn, mtx::f_t*& m )
|
||||
{
|
||||
rc_t rc = kOkRC;
|
||||
file::handle_t fH;
|
||||
@ -90,7 +92,7 @@ namespace cw
|
||||
}
|
||||
|
||||
dimV[1] = exampleN;
|
||||
m = mtx::alloc<float>( dimN, dimV, v, mtx::kAliasReleaseFl );
|
||||
m = mtx::alloc<float>( dimV, dimN, v, mtx::kAliasReleaseFl );
|
||||
|
||||
errLabel:
|
||||
file::close(fH);
|
||||
@ -110,24 +112,26 @@ cw::rc_t cw::dataset::mnist::create( handle_t& h, const char* dir )
|
||||
if((rc = destroy(h)) != kOkRC )
|
||||
return rc;
|
||||
|
||||
char* inDir = filesys::expandPath(dir);
|
||||
|
||||
p = mem::allocZ<mnist_t>(1);
|
||||
|
||||
// read the training data
|
||||
if((rc = _read_file( dir, "mnist_train", p->train )) != kOkRC )
|
||||
if((rc = _read_file( inDir, "mnist_train", p->train )) != kOkRC )
|
||||
{
|
||||
rc = cwLogError(rc,"MNIST training set load failed.");
|
||||
goto errLabel;
|
||||
}
|
||||
|
||||
// read the validation data
|
||||
if((rc = _read_file( dir, "mnist_valid", p->valid )) != kOkRC )
|
||||
if((rc = _read_file( inDir, "mnist_valid", p->valid )) != kOkRC )
|
||||
{
|
||||
rc = cwLogError(rc,"MNIST validation set load failed.");
|
||||
goto errLabel;
|
||||
}
|
||||
|
||||
// read the testing data
|
||||
if((rc = _read_file( dir, "mnist_test", p->test )) != kOkRC )
|
||||
if((rc = _read_file( inDir, "mnist_test", p->test )) != kOkRC )
|
||||
{
|
||||
rc = cwLogError(rc,"MNIST test set load failed.");
|
||||
goto errLabel;
|
||||
@ -138,6 +142,8 @@ cw::rc_t cw::dataset::mnist::create( handle_t& h, const char* dir )
|
||||
errLabel:
|
||||
if( rc != kOkRC )
|
||||
_destroy(p);
|
||||
|
||||
mem::release(inDir);
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -158,19 +164,19 @@ cw::rc_t cw::dataset::mnist::destroy( handle_t& h )
|
||||
return rc;
|
||||
}
|
||||
|
||||
const cw::mtx::fmtx_t* cw::dataset::mnist::train( handle_t h )
|
||||
const cw::mtx::f_t* cw::dataset::mnist::train( handle_t h )
|
||||
{
|
||||
mnist_t* p = _handleToPtr(h);
|
||||
return p->train;
|
||||
}
|
||||
|
||||
const cw::mtx::fmtx_t* cw::dataset::mnist::validate( handle_t h )
|
||||
const cw::mtx::f_t* cw::dataset::mnist::validate( handle_t h )
|
||||
{
|
||||
mnist_t* p = _handleToPtr(h);
|
||||
return p->valid;
|
||||
}
|
||||
|
||||
const cw::mtx::fmtx_t* cw::dataset::mnist::test( handle_t h )
|
||||
const cw::mtx::f_t* cw::dataset::mnist::test( handle_t h )
|
||||
{
|
||||
mnist_t* p = _handleToPtr(h);
|
||||
return p->test;
|
||||
@ -190,7 +196,7 @@ cw::rc_t cw::dataset::mnist::test( const char* dir, const char* imageFn )
|
||||
rc = cwLogError(rc,"SVG Test failed on create.");
|
||||
else
|
||||
{
|
||||
const mtx::fmtx_t* m = train(h);
|
||||
const mtx::f_t* m = train(h);
|
||||
/*
|
||||
unsigned zn = 0;
|
||||
unsigned i = 1;
|
||||
@ -234,3 +240,515 @@ cw::rc_t cw::dataset::mnist::test( const char* dir, const char* imageFn )
|
||||
}
|
||||
|
||||
|
||||
namespace cw
|
||||
{
|
||||
namespace dataset
|
||||
{
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
// struct matrix_str<T>
|
||||
//
|
||||
|
||||
template< typename T >
|
||||
struct matrix_str
|
||||
{
|
||||
struct mtx::mtx_str<T>* dataM;
|
||||
struct mtx::mtx_str<T>* labelM;
|
||||
};
|
||||
|
||||
template< typename T0, typename T1 >
|
||||
void _matrix_load( struct matrix_str<T0>& m, const struct mtx::mtx_str<T1>& dataM, const struct mtx::mtx_str<T1>& labelM )
|
||||
{
|
||||
m.dataM = mtx::alloc<T0,T1>(dataM,nullptr,nullptr);
|
||||
m.labelM = mtx::alloc<T0,T1>(labelM,nullptr,nullptr);
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
void _matrix_release( struct matrix_str<T>& m )
|
||||
{
|
||||
mtx::release(m.dataM);
|
||||
mtx::release(m.labelM);
|
||||
}
|
||||
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
// example_t
|
||||
//
|
||||
|
||||
typedef struct examples_str
|
||||
{
|
||||
unsigned type;
|
||||
union
|
||||
{
|
||||
struct matrix_str<float> f;
|
||||
struct matrix_str<double> d;
|
||||
} u;
|
||||
} examples_t;
|
||||
|
||||
template< typename T >
|
||||
rc_t _examples_load( examples_t& ex, unsigned dstTypeFlag, const struct mtx::mtx_str<T>& dataM, const struct mtx::mtx_str<T>& labelM )
|
||||
{
|
||||
rc_t rc = kOkRC;
|
||||
|
||||
switch( dstTypeFlag )
|
||||
{
|
||||
case kFloatFl:
|
||||
_matrix_load<float,T>(ex.u.f,dataM,labelM);
|
||||
ex.type = dstTypeFlag;
|
||||
break;
|
||||
|
||||
case kDoubleFl:
|
||||
_matrix_load<double,T>(ex.u.d,dataM,labelM);
|
||||
ex.type = dstTypeFlag;
|
||||
break;
|
||||
|
||||
default:
|
||||
rc = cwLogError(kInvalidArgRC,"An invalid example type (%i) was encountered.", dstTypeFlag);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
void _examples_destroy( examples_t& ex )
|
||||
{
|
||||
switch( ex.type )
|
||||
{
|
||||
case kFloatFl: _matrix_release(ex.u.f); break;
|
||||
case kDoubleFl: _matrix_release(ex.u.d); break;
|
||||
}
|
||||
}
|
||||
|
||||
rc_t _examples_data_dimV( const examples_t& ex, const unsigned*& dimV, unsigned& dimN )
|
||||
{
|
||||
switch( ex.type )
|
||||
{
|
||||
case kFloatFl: dimV=ex.u.f.dataM->dimV; dimN=ex.u.f.dataM->dimN; break;
|
||||
case kDoubleFl: dimV=ex.u.d.dataM->dimV; dimN=ex.u.d.dataM->dimN; break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
return kOkRC;
|
||||
}
|
||||
|
||||
rc_t _examples_label_dimV( const examples_t& ex, const unsigned*& dimV, unsigned& dimN )
|
||||
{
|
||||
switch( ex.type )
|
||||
{
|
||||
case kFloatFl: dimV=ex.u.f.labelM->dimV; dimN=ex.u.f.labelM->dimN; break;
|
||||
case kDoubleFl: dimV=ex.u.d.labelM->dimV; dimN=ex.u.d.labelM->dimN; break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
return kOkRC;
|
||||
}
|
||||
|
||||
rc_t _examples_batch_f( const examples_t& ex, unsigned dataOffsetN, unsigned labelOffsetN, const float*& dataM, const float*& labelM )
|
||||
{
|
||||
dataM = ex.u.f.dataM->base + dataOffsetN;
|
||||
labelM = ex.u.f.labelM->base + labelOffsetN;
|
||||
|
||||
return kOkRC;
|
||||
}
|
||||
|
||||
rc_t _examples_batch_d( const examples_t& ex, unsigned dataOffsetN, unsigned labelOffsetN, const double*& dataM, const double*& labelM )
|
||||
{
|
||||
dataM = ex.u.d.dataM->base + dataOffsetN;
|
||||
labelM = ex.u.d.labelM->base + labelOffsetN;
|
||||
|
||||
return kOkRC;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
// datasubset_t
|
||||
//
|
||||
|
||||
typedef struct datasubset_str
|
||||
{
|
||||
examples_t examples;
|
||||
unsigned batchN;
|
||||
unsigned iterIdx;
|
||||
unsigned iterN;
|
||||
} datasubset_t;
|
||||
|
||||
void _datasubset_destroy( datasubset_str& ss )
|
||||
{
|
||||
ss.iterIdx = 0;
|
||||
ss.iterN = 0;
|
||||
_examples_destroy(ss.examples);
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
rc_t _datasetsubset_load( datasubset_t& ss, unsigned dstTypeFlag, unsigned batchN, const struct mtx::mtx_str<T>& dataM, const struct mtx::mtx_str<T>& labelM )
|
||||
{
|
||||
unsigned exampleN = 0;
|
||||
switch( dataM.dimN )
|
||||
{
|
||||
case 2: exampleN = dataM.dimV[1]; break;
|
||||
case 3: exampleN = dataM.dimV[2]; break;
|
||||
default:
|
||||
cwLogError(kInvalidArgRC,"The dataset must be contained in a matrix of 2 or 3 dimensions.");
|
||||
}
|
||||
|
||||
ss.batchN = batchN;
|
||||
ss.iterN = exampleN/batchN;
|
||||
return _examples_load( ss.examples, dstTypeFlag, dataM, labelM );
|
||||
}
|
||||
|
||||
rc_t _datasubset_data_dimV( const datasubset_t& ss, const unsigned*& dimV, unsigned& dimN )
|
||||
{ return _examples_data_dimV( ss.examples, dimV, dimN ); }
|
||||
|
||||
rc_t _datasubset_label_dimV( const datasubset_t& ss, const unsigned*& dimV, unsigned& dimN )
|
||||
{ return _examples_label_dimV( ss.examples, dimV, dimN ); }
|
||||
|
||||
rc_t _datasubset_batch_f( datasubset_t& ss, unsigned dataOffsetN, unsigned labelOffsetN, const float*& dataM, const float*& labelM )
|
||||
{
|
||||
rc_t rc;
|
||||
|
||||
if( ss.iterIdx >= ss.iterN )
|
||||
return kEofRC;
|
||||
|
||||
rc = _examples_batch_f( ss.examples, dataOffsetN * ss.iterIdx, labelOffsetN * ss.iterIdx, dataM, labelM );
|
||||
|
||||
++ss.iterIdx;
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc_t _datasubset_batch_d( datasubset_t& ss, unsigned dataOffsetN, unsigned labelOffsetN, const double*& dataM, const double*& labelM )
|
||||
{
|
||||
rc_t rc;
|
||||
|
||||
if( ss.iterIdx >= ss.iterN )
|
||||
return kEofRC;
|
||||
|
||||
rc = _examples_batch_d( ss.examples, dataOffsetN * ss.iterIdx, labelOffsetN * ss.iterIdx, dataM, labelM );
|
||||
|
||||
++ss.iterIdx;
|
||||
return rc;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
// datasetMgr_t
|
||||
//
|
||||
|
||||
enum
|
||||
{
|
||||
kTrainSsIdx,
|
||||
kValidSsIdx,
|
||||
kTestSsIdx,
|
||||
kDataSubSetN
|
||||
};
|
||||
|
||||
typedef struct datasetMgr_str
|
||||
{
|
||||
const object_t* cfg;
|
||||
unsigned typeFlag;
|
||||
datasubset_t ssA[ kDataSubSetN ];
|
||||
unsigned dataRealN;
|
||||
unsigned labelRealN;
|
||||
} datasetMgr_t;
|
||||
|
||||
datasetMgr_t* _handleToPtr( handle_t h )
|
||||
{ return handleToPtr< handle_t, datasetMgr_t >(h); }
|
||||
|
||||
unsigned _ssFlagToIndex( unsigned flags )
|
||||
{
|
||||
flags &= (kTrainSsFl | kValidSsFl | kTestSsFl );
|
||||
|
||||
switch( flags )
|
||||
{
|
||||
case kTrainSsFl: return kTrainSsIdx;
|
||||
case kValidSsFl: return kValidSsIdx;
|
||||
case kTestSsFl: return kTestSsIdx;
|
||||
}
|
||||
|
||||
cwLogError(kInvalidArgRC,"Invalid subset flags (0x%x).", flags );
|
||||
return kInvalidIdx;
|
||||
}
|
||||
|
||||
void _unload( datasetMgr_t* p )
|
||||
{
|
||||
for(unsigned i=0; i<kDataSubSetN; ++i)
|
||||
_datasubset_destroy( p->ssA[i] );
|
||||
}
|
||||
|
||||
rc_t _destroy( datasetMgr_t* p )
|
||||
{
|
||||
_unload(p);
|
||||
mem::release(p);
|
||||
|
||||
return kOkRC;
|
||||
}
|
||||
|
||||
|
||||
unsigned _mtx_to_realN( const mtx::f_t& m )
|
||||
{
|
||||
switch( m.dimN )
|
||||
{
|
||||
case 1: return 1;
|
||||
case 2: return m.dimV[0];
|
||||
case 3: return m.dimV[0] * m.dimV[1];
|
||||
}
|
||||
|
||||
cwLogError(kInvalidArgRC,"%i invalid matrix rank.",m.dimN);
|
||||
return 0;
|
||||
}
|
||||
|
||||
//rc_t _load( datasetMgr_t* p, unsigned ssFlags, unsigned batchN, const mtx::f_t& dataM, const mtx::f_t& labelM )
|
||||
|
||||
template< typename T >
|
||||
rc_t _load( datasetMgr_t* p, unsigned ssFlags, unsigned batchN, const struct mtx::mtx_str<T>& dataM, const struct mtx::mtx_str<T>& labelM )
|
||||
{
|
||||
rc_t rc = kOkRC;
|
||||
unsigned ssIdx;
|
||||
if(( ssIdx = _ssFlagToIndex(ssFlags)) != kInvalidIdx )
|
||||
if((rc = _datasetsubset_load( p->ssA[ssIdx], p->typeFlag, batchN, dataM, labelM )) != kOkRC )
|
||||
{
|
||||
p->dataRealN = _mtx_to_realN(dataM);
|
||||
p->labelRealN = _mtx_to_realN(labelM);
|
||||
return kOkRC;
|
||||
}
|
||||
|
||||
return kInvalidArgRC;
|
||||
}
|
||||
|
||||
rc_t _mnist_load_subset( datasetMgr_t* p, unsigned ssFlags, unsigned batchN, const mtx::f_t& m )
|
||||
{
|
||||
rc_t rc = kOkRC;
|
||||
mtx::f_t* labelM = mtx::slice_alias(m,0,0,1); // the first row contains the labels
|
||||
mtx::f_t* dsM = mtx::slice_alias(m,1,0); // all successive rows contain the data
|
||||
mtx::f_t* oneHotM = mtx::alloc_one_hot<float>(*labelM); // convert the labels to a one hot encoding
|
||||
|
||||
//unsigned dsExampleN = mtx::ele_count<float>(*labelM); // total count of examples in this dataset
|
||||
|
||||
rc = _load<float>( p, ssFlags, batchN, *dsM, *oneHotM );
|
||||
|
||||
// Inform the matrix objects that the ownership
|
||||
// of the data and dimV memory from 'dsM' and 'oneHotM'
|
||||
// has been taken over by the dataset object.
|
||||
//clear_memory_release_flag( *oneHotM );
|
||||
//clear_memory_release_flag( *dsM );
|
||||
|
||||
mtx::release(labelM);
|
||||
mtx::release(oneHotM);
|
||||
mtx::release(dsM);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc_t _mnist_load( datasetMgr_t* p, const object_t* ele, unsigned batchN, unsigned flags )
|
||||
{
|
||||
rc_t rc = kOkRC;
|
||||
const char* inDir = nullptr;
|
||||
mnist::handle_t mnistH;
|
||||
|
||||
// locate
|
||||
if( ele->get("inDir",inDir) != kOkRC )
|
||||
return cwLogError(kSyntaxErrorRC,"MNIST 'indir' cfg. label not found.");
|
||||
|
||||
if( (rc = mnist::create(mnistH, inDir )) != kOkRC )
|
||||
{
|
||||
return cwLogError(rc,"MNIST dataset instantiation failed.");
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
const mtx::f_t* rM = mnist::train(mnistH);
|
||||
const mtx::f_t* vM = mnist::validate(mnistH);
|
||||
const mtx::f_t* tM = mnist::test(mnistH);
|
||||
|
||||
|
||||
_mnist_load_subset( p, kTrainSsFl, batchN, *rM );
|
||||
_mnist_load_subset( p, kValidSsFl, batchN, *vM );
|
||||
_mnist_load_subset( p, kTestSsFl, batchN, *tM );
|
||||
|
||||
mnist::destroy(mnistH);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cw::rc_t cw::dataset::create( handle_t& h, const object_t* cfg, unsigned flags )
|
||||
{
|
||||
rc_t rc;
|
||||
if((rc = destroy(h)) != kOkRC )
|
||||
return rc;
|
||||
|
||||
datasetMgr_t* p = mem::allocZ<datasetMgr_t>(1);
|
||||
|
||||
p->cfg = cfg;
|
||||
p->typeFlag = flags;
|
||||
h.set(p);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
cw::rc_t cw::dataset::destroy( handle_t& h )
|
||||
{
|
||||
rc_t rc = kOkRC;
|
||||
|
||||
if( !h.isValid() )
|
||||
return kOkRC;
|
||||
|
||||
datasetMgr_t* p = _handleToPtr(h);
|
||||
|
||||
if((rc = _destroy(p)) != kOkRC )
|
||||
return rc;
|
||||
|
||||
h.clear();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
cw::rc_t cw::dataset::load( handle_t h, const char* dsLabel, unsigned batchN, unsigned validPct, unsigned testPct, unsigned flags )
|
||||
{
|
||||
rc_t rc = kOkRC;
|
||||
datasetMgr_t* p = _handleToPtr(h);
|
||||
const object_t* dataL = p->cfg->find("dataL");
|
||||
|
||||
// empty the data mgr x_dsA[] before loading the next dataset
|
||||
_unload(p);
|
||||
|
||||
|
||||
// for each possible dataset
|
||||
for(unsigned i=0; i<dataL->child_count(); ++i)
|
||||
{
|
||||
const object_t* ele = dataL->child_ele(i);
|
||||
const char* label = nullptr;
|
||||
|
||||
// get the name of this dataset
|
||||
if( ele->get("name", label ) != kOkRC )
|
||||
{
|
||||
// all ele's must have a 'name' field
|
||||
cwLogError(kLabelNotFoundRC,"Dataset cfg. element at index %i does not have a 'name' field.",i);
|
||||
goto errLabel;
|
||||
}
|
||||
|
||||
// if this is the target dataset
|
||||
if( strcmp(dsLabel,label) == 0 )
|
||||
{
|
||||
if( strcmp(label,"mnist") == 0 )
|
||||
return _mnist_load(p, ele, batchN,flags);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
errLabel:
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
cw::rc_t cw::dataset::subset_dims( handle_t h, unsigned subsetFl, const unsigned*& dimV_Ref, unsigned& dimN_Ref )
|
||||
{
|
||||
datasetMgr_t* p = _handleToPtr(h);
|
||||
unsigned ssIdx;
|
||||
|
||||
if((ssIdx = _ssFlagToIndex(subsetFl)) == kInvalidIdx )
|
||||
return kInvalidArgRC;
|
||||
|
||||
return _datasubset_data_dimV( p->ssA[ssIdx], dimV_Ref, dimN_Ref );
|
||||
}
|
||||
|
||||
cw::rc_t cw::dataset::label_dims( handle_t h, unsigned subsetFl, const unsigned*& dimV_Ref, unsigned& dimN_Ref )
|
||||
{
|
||||
datasetMgr_t* p = _handleToPtr(h);
|
||||
unsigned ssIdx;
|
||||
|
||||
if((ssIdx = _ssFlagToIndex(subsetFl)) == kInvalidIdx )
|
||||
return kInvalidArgRC;
|
||||
|
||||
return _datasubset_label_dimV( p->ssA[ssIdx], dimV_Ref, dimN_Ref );
|
||||
}
|
||||
|
||||
cw::rc_t cw::dataset::batch_f( handle_t h, unsigned subsetFl, const float*& dataM_Ref, const float*& labelM_Ref )
|
||||
{
|
||||
datasetMgr_t* p = _handleToPtr(h);
|
||||
unsigned ssIdx;
|
||||
|
||||
if((ssIdx = _ssFlagToIndex(subsetFl)) == kInvalidIdx )
|
||||
return kInvalidArgRC;
|
||||
|
||||
return _datasubset_batch_f( p->ssA[ssIdx], p->dataRealN, p->labelRealN, dataM_Ref, labelM_Ref );
|
||||
}
|
||||
|
||||
cw::rc_t cw::dataset::batch_d( handle_t h, unsigned subsetFl, const double*& dataM_Ref, const double*& labelM_Ref )
|
||||
{
|
||||
datasetMgr_t* p = _handleToPtr(h);
|
||||
unsigned ssIdx;
|
||||
|
||||
if((ssIdx = _ssFlagToIndex(subsetFl)) == kInvalidIdx )
|
||||
return kInvalidArgRC;
|
||||
|
||||
return _datasubset_batch_d( p->ssA[ssIdx], p->dataRealN, p->labelRealN, dataM_Ref, labelM_Ref );
|
||||
}
|
||||
|
||||
|
||||
|
||||
cw::rc_t cw::dataset::test( const object_t* cfg )
|
||||
{
|
||||
handle_t h;
|
||||
rc_t rc = kOkRC;
|
||||
const char* dsLabel = nullptr;
|
||||
unsigned batchN = 64;
|
||||
unsigned validPct = 10;
|
||||
unsigned testPct = 10;
|
||||
unsigned typeFlag = kFloatFl;
|
||||
time::spec_t t0;
|
||||
const float* dataM = nullptr;
|
||||
const float* labelM = nullptr;
|
||||
const unsigned *dataDimV = nullptr;
|
||||
const unsigned *labelDimV = nullptr;
|
||||
unsigned dataDimN = 0;
|
||||
unsigned labelDimN = 0;
|
||||
unsigned batchCnt = 0;
|
||||
time::get(t0);
|
||||
|
||||
if((rc = cfg->getv("dsLabel",dsLabel,"batchN",batchN,"validPct",validPct,"testPct",testPct)) != kOkRC )
|
||||
return cwLogError(rc,"Dataset test failed. Argument parse failed.");
|
||||
|
||||
if((rc = create(h,cfg,typeFlag)) != kOkRC )
|
||||
return cwLogError(rc,"Dataset manager create failed.");
|
||||
|
||||
if((rc = load(h, dsLabel, batchN, validPct, testPct, kDoubleFl )) != kOkRC )
|
||||
{
|
||||
cwLogError(rc,"'%s' dataset load failed.", cwStringNullGuard(dsLabel));
|
||||
goto errLabel;
|
||||
}
|
||||
|
||||
if((rc = subset_dims(h,kTrainSsFl,dataDimV, dataDimN )) != kOkRC )
|
||||
goto errLabel;
|
||||
|
||||
if((rc = label_dims(h,kTrainSsFl,labelDimV, labelDimN )) != kOkRC )
|
||||
goto errLabel;
|
||||
|
||||
vop::print(dataDimV,dataDimN,"%i ","data: ");
|
||||
vop::print(labelDimV,labelDimN,"%i ","label: ");
|
||||
|
||||
batchCnt = dataDimV[1]/batchN;
|
||||
printf("batchCnt:%i\n",batchCnt);
|
||||
|
||||
for(unsigned i=0; true; ++i )
|
||||
{
|
||||
if((rc = batch_f(h,kTrainSsFl,dataM,labelM)) != kOkRC )
|
||||
{
|
||||
printf("rc:%i : %i %i\n",rc,batchCnt,i);
|
||||
break;
|
||||
}
|
||||
|
||||
if( i==0 )
|
||||
{
|
||||
vop::print(dataM,3,"%f ");
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
printf("elapsed %i ms\n",time::elapsedMs( t0 ) );
|
||||
|
||||
errLabel:
|
||||
destroy(h);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
87
cwDataSets.h
87
cwDataSets.h
@ -1,6 +1,47 @@
|
||||
#ifndef cwDataSets_h
|
||||
#define cwDataSets_h
|
||||
/*
|
||||
|
||||
Select a dataset and provide batched data/label pairs.
|
||||
|
||||
1. In-memory datasets, stream from disk.
|
||||
2. Train/valid/test set marking.
|
||||
3. K-fold rotation.
|
||||
2. Conversion from source data type to batch data type.
|
||||
3. One-hot encoding.
|
||||
4. Shuffling.
|
||||
|
||||
Options:
|
||||
1. Read all data into memory (otherwise stream from disk -require async reading)
|
||||
2. data type conversion on-load vs on-batch.
|
||||
3. one-hot encoding on-load vs on-batch.
|
||||
4. shuffle
|
||||
a. from streaming input buffer.
|
||||
b. in memory
|
||||
c. on batch
|
||||
|
||||
|
||||
Source Driver:
|
||||
label() // string label of this source
|
||||
open(cfg) // open the source
|
||||
close() // close the source
|
||||
get_info() // get the source dim and type info
|
||||
read(N,dst_t,dataBuf,labelBuf);// read a block of N examples and cvt to type dst_t
|
||||
|
||||
Implementation:
|
||||
The only difference between streaming from disk and initial load to memory is that
|
||||
stream-from-disk fills a second copy of the in-memory data structure.
|
||||
|
||||
All set marking, both RVT and K-Fold, happen on the in-memory data structure after it is populated.
|
||||
|
||||
Shuffling happens on the in-memory data structure after it is populated.
|
||||
If there is no data conversion or one-hot conversion on batch output then shuffling moves elements in-memory otherwise
|
||||
the shuffle index vector is used as a lookup during the output step.
|
||||
|
||||
If K-Fold segmentation is used with a streaming dataset then the k-fold index must persist
|
||||
between fold selection passes.
|
||||
|
||||
*/
|
||||
|
||||
namespace cw
|
||||
{
|
||||
@ -13,16 +54,50 @@ namespace cw
|
||||
rc_t create( handle_t& h, const char* dir );
|
||||
rc_t destroy( handle_t& h );
|
||||
|
||||
// Each column has one example.
|
||||
// The top row contains the labels.
|
||||
const mtx::fmtx_t* train( handle_t h );
|
||||
const mtx::fmtx_t* validate( handle_t h );
|
||||
const mtx::fmtx_t* test( handle_t h );
|
||||
// Each column has one example image.
|
||||
// The top row contains the example label.
|
||||
const mtx::f_t* train( handle_t h );
|
||||
const mtx::f_t* validate( handle_t h );
|
||||
const mtx::f_t* test( handle_t h );
|
||||
|
||||
rc_t test(const char* dir, const char* imageFn );
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
typedef handle<struct datasetMgr_str> handle_t;
|
||||
|
||||
// Data subset flags
|
||||
enum { kTrainSsFl=0x10, kValidSsFl=0x20, kTestSsFl=0x40 };
|
||||
|
||||
|
||||
enum { kFloatFl=0x02, kDoubleFl=0x04 };
|
||||
rc_t create( handle_t& h, const object_t* cfg, unsigned flags );
|
||||
rc_t destroy( handle_t& h );
|
||||
|
||||
|
||||
// Load a dataset, divide it into train,validate, and test subsets
|
||||
rc_t load( handle_t h, const char* dsLabel, unsigned batchN, unsigned validPct, unsigned testPct, unsigned flags );
|
||||
|
||||
// Shuffle the subset.
|
||||
rc_t shuffle( handle_t h, unsigned subsetFl );
|
||||
|
||||
// Get the dimensions of all the examples from a subset.
|
||||
// dimN=1: dimV[0]=batchN
|
||||
// dimN=2: dimV[0]=realN dimV[1]=batchN
|
||||
// dimN=3: dimV[0,1]=realN dimV[2]=batchN
|
||||
rc_t subset_dims( handle_t h, unsigned subsetFl, const unsigned*& dimV_Ref, unsigned& dimN_Ref );
|
||||
rc_t label_dims( handle_t h, unsigned subsetFl, const unsigned*& dimV_Ref, unsigned& dimN_Ref );
|
||||
|
||||
|
||||
// get the next batch. Returns nullptr at the end of an epoch.
|
||||
rc_t batch_f( handle_t h, unsigned subsetFl, const float*& dataM_Ref, const float*& labelM_Ref );
|
||||
rc_t batch_d( handle_t h, unsigned subsetFl, const double*& dataM_Ref, const double*& labelM_Ref );
|
||||
|
||||
rc_t test( const object_t* cfg );
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
111
cwNN.cpp
111
cwNN.cpp
@ -5,70 +5,115 @@
|
||||
#include "cwFile.h"
|
||||
#include "cwNN.h"
|
||||
|
||||
/*
|
||||
|
||||
|
||||
|
||||
|
||||
*/
|
||||
|
||||
namespace cw
|
||||
{
|
||||
namespace nn
|
||||
{
|
||||
|
||||
template< typename R >
|
||||
struct input_str
|
||||
|
||||
typedef struct layer_desc_str
|
||||
{
|
||||
R* x;
|
||||
unsigned dimN;
|
||||
unsigned* dimV;
|
||||
};
|
||||
|
||||
typedef struct dense_str
|
||||
unsigned layerTId;
|
||||
unsigned activationId;
|
||||
unsigned weightInitId;
|
||||
unsigned biasInitId;
|
||||
} layer_desc_t;
|
||||
|
||||
typedef struct network_desc_str
|
||||
{
|
||||
unsigned xN; // count of neurons in src layer
|
||||
unsigned yN; // count of neurons in this layer
|
||||
|
||||
real_t* wM; // wM[ xN, yN ] weight matrix
|
||||
real_t* bV; // bV[ yN ] bias vector
|
||||
|
||||
|
||||
real_t* yV; // scaled input + bias
|
||||
real_t* aV; // activation output
|
||||
real_t* dV; // contribution to cost for each neurode
|
||||
real_t* gV; // C gradient wrt weight at each neurode
|
||||
|
||||
|
||||
} dense_t;
|
||||
layer_desc_t* layers;
|
||||
unsigned layerN;
|
||||
} network_desc_t;
|
||||
|
||||
typedef struct layer_str
|
||||
{
|
||||
const layer_desc_t* desc;
|
||||
const mtx::d_t* iM;
|
||||
mtx::d_t wM;
|
||||
mtx::d_t aM;
|
||||
} layer_t;
|
||||
|
||||
typedef struct nn_str
|
||||
{
|
||||
|
||||
const network_desc_t* desc;
|
||||
layer_t* layerL;
|
||||
} nn_t;
|
||||
|
||||
|
||||
void _mtx_mul( R* z, R* m, R* x, unsigned mN, unsigned mM )
|
||||
nn_t* _allocNet( nn_t* nn, const object_t& nnCfg, unsigned inNodeN )
|
||||
{
|
||||
}
|
||||
|
||||
void _add( R* y, R* x, unsigned n )
|
||||
nn_t* _initNet( nn_t* nn )
|
||||
{
|
||||
}
|
||||
|
||||
void _activation( dense_t* l )
|
||||
rc_t _netForward( nn_t* p )
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
rc_t _netReverse( nn_t* )
|
||||
{
|
||||
}
|
||||
|
||||
void _dense_forward( dense_t* l0, dense_t* l1 )
|
||||
{
|
||||
assert( l1->wM.dimV[1] == l0->yN );
|
||||
assert( l1->wM.dimV[0] == l1->yN );
|
||||
_mtx_mult( l1->zV, l1->wM.base, l0->aV, l0->yN, l1->yN );
|
||||
_add( l1->zV, l1->bV, l1->yN );
|
||||
|
||||
_activation(l1)
|
||||
rc_t _batchUpdate( const mtx::d_t& ds, const train_args_t& args, unsigned ttlTrainExampleN )
|
||||
{
|
||||
}
|
||||
|
||||
rc_t train( handle_t h, dataset::handle_t dsH, const train_args_t& args )
|
||||
{
|
||||
mtx::d_t ds_mtx;
|
||||
mtx::d_t label_mtx;
|
||||
unsigned trainExampleN = dataset::example_count(dsH);
|
||||
unsigned batchPerEpoch = trainExampleN/args.batchN;
|
||||
|
||||
|
||||
for(unsigned i=0; i<epochN; ++i)
|
||||
{
|
||||
for(unsigned j=0; j<batchsPerEpoch; ++j)
|
||||
{
|
||||
dataset::batchd(dsH, j, ds_mtx, label_mtx,args.batchN, batchPerEpoch);
|
||||
|
||||
_batchUpdate(ds_mtx,args,ttlTrainExampleN);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
rc_t test( const char* cfgFn, const char* projLabel )
|
||||
{
|
||||
object_t* cfg = nullptr;
|
||||
rc_t rc = kOkRC;
|
||||
|
||||
if((rc = objectFromFile( cfgFn, cfg )) != kOkRC )
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
errLabel:
|
||||
if( cfg != nullptr )
|
||||
cfg->free();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
45
cwNN.h
45
cwNN.h
@ -15,9 +15,9 @@ namespace cw
|
||||
|
||||
enum
|
||||
{
|
||||
kInputLayerId,
|
||||
kDenseLayerId,
|
||||
kConv1DConvId
|
||||
kInputLayerTId,
|
||||
kDenseLayerTId,
|
||||
kConv1DConvTId
|
||||
};
|
||||
|
||||
enum
|
||||
@ -27,33 +27,28 @@ namespace cw
|
||||
kNormalInitId
|
||||
};
|
||||
|
||||
typedef struct layer_args_str
|
||||
{
|
||||
unsigned typeId;
|
||||
unsigned actId;
|
||||
unsigned weightInitId;
|
||||
unsigned biasInitId;
|
||||
unsigned dimN;
|
||||
const unsigned* dimV;
|
||||
} layer_args_t;
|
||||
|
||||
typedef struct network_args_str
|
||||
{
|
||||
layer_args_t* layers;
|
||||
unsigned layerN;
|
||||
} network_args_t;
|
||||
|
||||
|
||||
rc_t parse_args( const object_t& o, network_args_t& args );
|
||||
|
||||
rc_t create( handle_t& h, const network_args_t& args );
|
||||
typedef struct train_args_str
|
||||
{
|
||||
unsigned epochN;
|
||||
unsigned batchN;
|
||||
double eta;
|
||||
double lambda;
|
||||
|
||||
} train_args_t;
|
||||
|
||||
|
||||
|
||||
rc_t create( handle_t& h, const object_t& cfg );
|
||||
rc_t destroy( handle_t& h );
|
||||
|
||||
template< typename R >
|
||||
rc_t train( handle_t h, unsigned epochN, unsigned batchN, const dataset<R>& trainDs );
|
||||
rc_t train( handle_t h, dataset::handle_t dsH, const train_args_t& args );
|
||||
|
||||
template< typename R >
|
||||
rc_t infer( handle_t h, const dataset<R>& ds );
|
||||
rc_t test( handle_t h, dataset::handle_t dsH );
|
||||
|
||||
|
||||
rc_t test( const char* mnistDir );
|
||||
}
|
||||
}
|
||||
|
||||
|
16
study/nn/nn0.cfg
Normal file
16
study/nn/nn0.cfg
Normal file
@ -0,0 +1,16 @@
|
||||
{
|
||||
projL: [
|
||||
test0:{
|
||||
net: {
|
||||
|
||||
data: { type:mnist, indir:"~/src/datasets/mnist" },
|
||||
|
||||
layerL: [
|
||||
{ type: input, batchN: 32 },
|
||||
{ type: dense, shape: [30] },
|
||||
{ type: dense, shape: [10] },
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
Loading…
Reference in New Issue
Block a user