cwDataSet.h/cpp, cwNN.h/cpp : Updates.

This commit is contained in:
kevin 2020-10-30 09:40:39 -04:00
parent 4355c78f0d
commit 6dd882312d
6 changed files with 728 additions and 79 deletions

View File

@ -75,8 +75,8 @@ libcwSRC += src/libcw/cwMdns.cpp src/libcw/cwEuCon.cpp src/libcw/cwDnsSd.cpp src
if cwWEB if cwWEB
else else
# libcwHDR += src/libcw/cwSvg.h src/libcw/cwDataSets.h libcwHDR += src/libcw/cwSvg.h src/libcw/cwDataSets.h
# libcwSRC += src/libcw/cwSvg.cpp src/libcw/cwDataSets.cpp libcwSRC += src/libcw/cwSvg.cpp src/libcw/cwDataSets.cpp
endif endif
if cwFFTW if cwFFTW

View File

@ -2,12 +2,14 @@
#include "cwLog.h" #include "cwLog.h"
#include "cwCommonImpl.h" #include "cwCommonImpl.h"
#include "cwMem.h" #include "cwMem.h"
#include "cwObject.h"
#include "cwFile.h" #include "cwFile.h"
#include "cwFileSys.h" #include "cwFileSys.h"
#include "cwVectOps.h"
#include "cwMtx.h" #include "cwMtx.h"
#include "cwDataSets.h" #include "cwDataSets.h"
#include "cwSvg.h" #include "cwSvg.h"
#include "cwTime.h"
namespace cw namespace cw
{ {
@ -17,9 +19,9 @@ namespace cw
{ {
typedef struct mnist_str typedef struct mnist_str
{ {
mtx::fmtx_t* train = nullptr; mtx::f_t* train = nullptr;
mtx::fmtx_t* valid = nullptr; mtx::f_t* valid = nullptr;
mtx::fmtx_t* test = nullptr; mtx::f_t* test = nullptr;
} mnist_t; } mnist_t;
@ -37,7 +39,7 @@ namespace cw
return rc; return rc;
} }
rc_t _read_file( const char* dir, const char* fn, mtx::fmtx_t*& m ) rc_t _read_file( const char* dir, const char* fn, mtx::f_t*& m )
{ {
rc_t rc = kOkRC; rc_t rc = kOkRC;
file::handle_t fH; file::handle_t fH;
@ -90,7 +92,7 @@ namespace cw
} }
dimV[1] = exampleN; dimV[1] = exampleN;
m = mtx::alloc<float>( dimN, dimV, v, mtx::kAliasReleaseFl ); m = mtx::alloc<float>( dimV, dimN, v, mtx::kAliasReleaseFl );
errLabel: errLabel:
file::close(fH); file::close(fH);
@ -110,24 +112,26 @@ cw::rc_t cw::dataset::mnist::create( handle_t& h, const char* dir )
if((rc = destroy(h)) != kOkRC ) if((rc = destroy(h)) != kOkRC )
return rc; return rc;
char* inDir = filesys::expandPath(dir);
p = mem::allocZ<mnist_t>(1); p = mem::allocZ<mnist_t>(1);
// read the training data // read the training data
if((rc = _read_file( dir, "mnist_train", p->train )) != kOkRC ) if((rc = _read_file( inDir, "mnist_train", p->train )) != kOkRC )
{ {
rc = cwLogError(rc,"MNIST training set load failed."); rc = cwLogError(rc,"MNIST training set load failed.");
goto errLabel; goto errLabel;
} }
// read the validation data // read the validation data
if((rc = _read_file( dir, "mnist_valid", p->valid )) != kOkRC ) if((rc = _read_file( inDir, "mnist_valid", p->valid )) != kOkRC )
{ {
rc = cwLogError(rc,"MNIST validation set load failed."); rc = cwLogError(rc,"MNIST validation set load failed.");
goto errLabel; goto errLabel;
} }
// read the testing data // read the testing data
if((rc = _read_file( dir, "mnist_test", p->test )) != kOkRC ) if((rc = _read_file( inDir, "mnist_test", p->test )) != kOkRC )
{ {
rc = cwLogError(rc,"MNIST test set load failed."); rc = cwLogError(rc,"MNIST test set load failed.");
goto errLabel; goto errLabel;
@ -139,6 +143,8 @@ cw::rc_t cw::dataset::mnist::create( handle_t& h, const char* dir )
if( rc != kOkRC ) if( rc != kOkRC )
_destroy(p); _destroy(p);
mem::release(inDir);
return rc; return rc;
} }
@ -158,19 +164,19 @@ cw::rc_t cw::dataset::mnist::destroy( handle_t& h )
return rc; return rc;
} }
const cw::mtx::fmtx_t* cw::dataset::mnist::train( handle_t h ) const cw::mtx::f_t* cw::dataset::mnist::train( handle_t h )
{ {
mnist_t* p = _handleToPtr(h); mnist_t* p = _handleToPtr(h);
return p->train; return p->train;
} }
const cw::mtx::fmtx_t* cw::dataset::mnist::validate( handle_t h ) const cw::mtx::f_t* cw::dataset::mnist::validate( handle_t h )
{ {
mnist_t* p = _handleToPtr(h); mnist_t* p = _handleToPtr(h);
return p->valid; return p->valid;
} }
const cw::mtx::fmtx_t* cw::dataset::mnist::test( handle_t h ) const cw::mtx::f_t* cw::dataset::mnist::test( handle_t h )
{ {
mnist_t* p = _handleToPtr(h); mnist_t* p = _handleToPtr(h);
return p->test; return p->test;
@ -190,7 +196,7 @@ cw::rc_t cw::dataset::mnist::test( const char* dir, const char* imageFn )
rc = cwLogError(rc,"SVG Test failed on create."); rc = cwLogError(rc,"SVG Test failed on create.");
else else
{ {
const mtx::fmtx_t* m = train(h); const mtx::f_t* m = train(h);
/* /*
unsigned zn = 0; unsigned zn = 0;
unsigned i = 1; unsigned i = 1;
@ -234,3 +240,515 @@ cw::rc_t cw::dataset::mnist::test( const char* dir, const char* imageFn )
} }
namespace cw
{
namespace dataset
{
//---------------------------------------------------------------------------------------------------------------
// struct matrix_str<T>
//
template< typename T >
struct matrix_str
{
struct mtx::mtx_str<T>* dataM;
struct mtx::mtx_str<T>* labelM;
};
template< typename T0, typename T1 >
void _matrix_load( struct matrix_str<T0>& m, const struct mtx::mtx_str<T1>& dataM, const struct mtx::mtx_str<T1>& labelM )
{
m.dataM = mtx::alloc<T0,T1>(dataM,nullptr,nullptr);
m.labelM = mtx::alloc<T0,T1>(labelM,nullptr,nullptr);
}
template< typename T >
void _matrix_release( struct matrix_str<T>& m )
{
mtx::release(m.dataM);
mtx::release(m.labelM);
}
//---------------------------------------------------------------------------------------------------------------
// example_t
//
typedef struct examples_str
{
unsigned type;
union
{
struct matrix_str<float> f;
struct matrix_str<double> d;
} u;
} examples_t;
template< typename T >
rc_t _examples_load( examples_t& ex, unsigned dstTypeFlag, const struct mtx::mtx_str<T>& dataM, const struct mtx::mtx_str<T>& labelM )
{
rc_t rc = kOkRC;
switch( dstTypeFlag )
{
case kFloatFl:
_matrix_load<float,T>(ex.u.f,dataM,labelM);
ex.type = dstTypeFlag;
break;
case kDoubleFl:
_matrix_load<double,T>(ex.u.d,dataM,labelM);
ex.type = dstTypeFlag;
break;
default:
rc = cwLogError(kInvalidArgRC,"An invalid example type (%i) was encountered.", dstTypeFlag);
}
return rc;
}
void _examples_destroy( examples_t& ex )
{
switch( ex.type )
{
case kFloatFl: _matrix_release(ex.u.f); break;
case kDoubleFl: _matrix_release(ex.u.d); break;
}
}
rc_t _examples_data_dimV( const examples_t& ex, const unsigned*& dimV, unsigned& dimN )
{
switch( ex.type )
{
case kFloatFl: dimV=ex.u.f.dataM->dimV; dimN=ex.u.f.dataM->dimN; break;
case kDoubleFl: dimV=ex.u.d.dataM->dimV; dimN=ex.u.d.dataM->dimN; break;
default:
assert(0);
}
return kOkRC;
}
rc_t _examples_label_dimV( const examples_t& ex, const unsigned*& dimV, unsigned& dimN )
{
switch( ex.type )
{
case kFloatFl: dimV=ex.u.f.labelM->dimV; dimN=ex.u.f.labelM->dimN; break;
case kDoubleFl: dimV=ex.u.d.labelM->dimV; dimN=ex.u.d.labelM->dimN; break;
default:
assert(0);
}
return kOkRC;
}
rc_t _examples_batch_f( const examples_t& ex, unsigned dataOffsetN, unsigned labelOffsetN, const float*& dataM, const float*& labelM )
{
dataM = ex.u.f.dataM->base + dataOffsetN;
labelM = ex.u.f.labelM->base + labelOffsetN;
return kOkRC;
}
rc_t _examples_batch_d( const examples_t& ex, unsigned dataOffsetN, unsigned labelOffsetN, const double*& dataM, const double*& labelM )
{
dataM = ex.u.d.dataM->base + dataOffsetN;
labelM = ex.u.d.labelM->base + labelOffsetN;
return kOkRC;
}
//---------------------------------------------------------------------------------------------------------------
// datasubset_t
//
typedef struct datasubset_str
{
examples_t examples;
unsigned batchN;
unsigned iterIdx;
unsigned iterN;
} datasubset_t;
void _datasubset_destroy( datasubset_str& ss )
{
ss.iterIdx = 0;
ss.iterN = 0;
_examples_destroy(ss.examples);
}
template< typename T >
rc_t _datasetsubset_load( datasubset_t& ss, unsigned dstTypeFlag, unsigned batchN, const struct mtx::mtx_str<T>& dataM, const struct mtx::mtx_str<T>& labelM )
{
unsigned exampleN = 0;
switch( dataM.dimN )
{
case 2: exampleN = dataM.dimV[1]; break;
case 3: exampleN = dataM.dimV[2]; break;
default:
cwLogError(kInvalidArgRC,"The dataset must be contained in a matrix of 2 or 3 dimensions.");
}
ss.batchN = batchN;
ss.iterN = exampleN/batchN;
return _examples_load( ss.examples, dstTypeFlag, dataM, labelM );
}
rc_t _datasubset_data_dimV( const datasubset_t& ss, const unsigned*& dimV, unsigned& dimN )
{ return _examples_data_dimV( ss.examples, dimV, dimN ); }
rc_t _datasubset_label_dimV( const datasubset_t& ss, const unsigned*& dimV, unsigned& dimN )
{ return _examples_label_dimV( ss.examples, dimV, dimN ); }
rc_t _datasubset_batch_f( datasubset_t& ss, unsigned dataOffsetN, unsigned labelOffsetN, const float*& dataM, const float*& labelM )
{
rc_t rc;
if( ss.iterIdx >= ss.iterN )
return kEofRC;
rc = _examples_batch_f( ss.examples, dataOffsetN * ss.iterIdx, labelOffsetN * ss.iterIdx, dataM, labelM );
++ss.iterIdx;
return rc;
}
rc_t _datasubset_batch_d( datasubset_t& ss, unsigned dataOffsetN, unsigned labelOffsetN, const double*& dataM, const double*& labelM )
{
rc_t rc;
if( ss.iterIdx >= ss.iterN )
return kEofRC;
rc = _examples_batch_d( ss.examples, dataOffsetN * ss.iterIdx, labelOffsetN * ss.iterIdx, dataM, labelM );
++ss.iterIdx;
return rc;
}
//---------------------------------------------------------------------------------------------------------------
// datasetMgr_t
//
enum
{
kTrainSsIdx,
kValidSsIdx,
kTestSsIdx,
kDataSubSetN
};
typedef struct datasetMgr_str
{
const object_t* cfg;
unsigned typeFlag;
datasubset_t ssA[ kDataSubSetN ];
unsigned dataRealN;
unsigned labelRealN;
} datasetMgr_t;
datasetMgr_t* _handleToPtr( handle_t h )
{ return handleToPtr< handle_t, datasetMgr_t >(h); }
unsigned _ssFlagToIndex( unsigned flags )
{
flags &= (kTrainSsFl | kValidSsFl | kTestSsFl );
switch( flags )
{
case kTrainSsFl: return kTrainSsIdx;
case kValidSsFl: return kValidSsIdx;
case kTestSsFl: return kTestSsIdx;
}
cwLogError(kInvalidArgRC,"Invalid subset flags (0x%x).", flags );
return kInvalidIdx;
}
void _unload( datasetMgr_t* p )
{
for(unsigned i=0; i<kDataSubSetN; ++i)
_datasubset_destroy( p->ssA[i] );
}
rc_t _destroy( datasetMgr_t* p )
{
_unload(p);
mem::release(p);
return kOkRC;
}
unsigned _mtx_to_realN( const mtx::f_t& m )
{
switch( m.dimN )
{
case 1: return 1;
case 2: return m.dimV[0];
case 3: return m.dimV[0] * m.dimV[1];
}
cwLogError(kInvalidArgRC,"%i invalid matrix rank.",m.dimN);
return 0;
}
//rc_t _load( datasetMgr_t* p, unsigned ssFlags, unsigned batchN, const mtx::f_t& dataM, const mtx::f_t& labelM )
template< typename T >
rc_t _load( datasetMgr_t* p, unsigned ssFlags, unsigned batchN, const struct mtx::mtx_str<T>& dataM, const struct mtx::mtx_str<T>& labelM )
{
rc_t rc = kOkRC;
unsigned ssIdx;
if(( ssIdx = _ssFlagToIndex(ssFlags)) != kInvalidIdx )
if((rc = _datasetsubset_load( p->ssA[ssIdx], p->typeFlag, batchN, dataM, labelM )) != kOkRC )
{
p->dataRealN = _mtx_to_realN(dataM);
p->labelRealN = _mtx_to_realN(labelM);
return kOkRC;
}
return kInvalidArgRC;
}
rc_t _mnist_load_subset( datasetMgr_t* p, unsigned ssFlags, unsigned batchN, const mtx::f_t& m )
{
rc_t rc = kOkRC;
mtx::f_t* labelM = mtx::slice_alias(m,0,0,1); // the first row contains the labels
mtx::f_t* dsM = mtx::slice_alias(m,1,0); // all successive rows contain the data
mtx::f_t* oneHotM = mtx::alloc_one_hot<float>(*labelM); // convert the labels to a one hot encoding
//unsigned dsExampleN = mtx::ele_count<float>(*labelM); // total count of examples in this dataset
rc = _load<float>( p, ssFlags, batchN, *dsM, *oneHotM );
// Inform the matrix objects that the ownership
// of the data and dimV memory from 'dsM' and 'oneHotM'
// has been taken over by the dataset object.
//clear_memory_release_flag( *oneHotM );
//clear_memory_release_flag( *dsM );
mtx::release(labelM);
mtx::release(oneHotM);
mtx::release(dsM);
return rc;
}
rc_t _mnist_load( datasetMgr_t* p, const object_t* ele, unsigned batchN, unsigned flags )
{
rc_t rc = kOkRC;
const char* inDir = nullptr;
mnist::handle_t mnistH;
// locate
if( ele->get("inDir",inDir) != kOkRC )
return cwLogError(kSyntaxErrorRC,"MNIST 'indir' cfg. label not found.");
if( (rc = mnist::create(mnistH, inDir )) != kOkRC )
{
return cwLogError(rc,"MNIST dataset instantiation failed.");
}
else
{
const mtx::f_t* rM = mnist::train(mnistH);
const mtx::f_t* vM = mnist::validate(mnistH);
const mtx::f_t* tM = mnist::test(mnistH);
_mnist_load_subset( p, kTrainSsFl, batchN, *rM );
_mnist_load_subset( p, kValidSsFl, batchN, *vM );
_mnist_load_subset( p, kTestSsFl, batchN, *tM );
mnist::destroy(mnistH);
}
return rc;
}
}
}
cw::rc_t cw::dataset::create( handle_t& h, const object_t* cfg, unsigned flags )
{
rc_t rc;
if((rc = destroy(h)) != kOkRC )
return rc;
datasetMgr_t* p = mem::allocZ<datasetMgr_t>(1);
p->cfg = cfg;
p->typeFlag = flags;
h.set(p);
return rc;
}
cw::rc_t cw::dataset::destroy( handle_t& h )
{
rc_t rc = kOkRC;
if( !h.isValid() )
return kOkRC;
datasetMgr_t* p = _handleToPtr(h);
if((rc = _destroy(p)) != kOkRC )
return rc;
h.clear();
return rc;
}
cw::rc_t cw::dataset::load( handle_t h, const char* dsLabel, unsigned batchN, unsigned validPct, unsigned testPct, unsigned flags )
{
rc_t rc = kOkRC;
datasetMgr_t* p = _handleToPtr(h);
const object_t* dataL = p->cfg->find("dataL");
// empty the data mgr x_dsA[] before loading the next dataset
_unload(p);
// for each possible dataset
for(unsigned i=0; i<dataL->child_count(); ++i)
{
const object_t* ele = dataL->child_ele(i);
const char* label = nullptr;
// get the name of this dataset
if( ele->get("name", label ) != kOkRC )
{
// all ele's must have a 'name' field
cwLogError(kLabelNotFoundRC,"Dataset cfg. element at index %i does not have a 'name' field.",i);
goto errLabel;
}
// if this is the target dataset
if( strcmp(dsLabel,label) == 0 )
{
if( strcmp(label,"mnist") == 0 )
return _mnist_load(p, ele, batchN,flags);
}
}
errLabel:
return rc;
}
cw::rc_t cw::dataset::subset_dims( handle_t h, unsigned subsetFl, const unsigned*& dimV_Ref, unsigned& dimN_Ref )
{
datasetMgr_t* p = _handleToPtr(h);
unsigned ssIdx;
if((ssIdx = _ssFlagToIndex(subsetFl)) == kInvalidIdx )
return kInvalidArgRC;
return _datasubset_data_dimV( p->ssA[ssIdx], dimV_Ref, dimN_Ref );
}
cw::rc_t cw::dataset::label_dims( handle_t h, unsigned subsetFl, const unsigned*& dimV_Ref, unsigned& dimN_Ref )
{
datasetMgr_t* p = _handleToPtr(h);
unsigned ssIdx;
if((ssIdx = _ssFlagToIndex(subsetFl)) == kInvalidIdx )
return kInvalidArgRC;
return _datasubset_label_dimV( p->ssA[ssIdx], dimV_Ref, dimN_Ref );
}
cw::rc_t cw::dataset::batch_f( handle_t h, unsigned subsetFl, const float*& dataM_Ref, const float*& labelM_Ref )
{
datasetMgr_t* p = _handleToPtr(h);
unsigned ssIdx;
if((ssIdx = _ssFlagToIndex(subsetFl)) == kInvalidIdx )
return kInvalidArgRC;
return _datasubset_batch_f( p->ssA[ssIdx], p->dataRealN, p->labelRealN, dataM_Ref, labelM_Ref );
}
cw::rc_t cw::dataset::batch_d( handle_t h, unsigned subsetFl, const double*& dataM_Ref, const double*& labelM_Ref )
{
datasetMgr_t* p = _handleToPtr(h);
unsigned ssIdx;
if((ssIdx = _ssFlagToIndex(subsetFl)) == kInvalidIdx )
return kInvalidArgRC;
return _datasubset_batch_d( p->ssA[ssIdx], p->dataRealN, p->labelRealN, dataM_Ref, labelM_Ref );
}
cw::rc_t cw::dataset::test( const object_t* cfg )
{
handle_t h;
rc_t rc = kOkRC;
const char* dsLabel = nullptr;
unsigned batchN = 64;
unsigned validPct = 10;
unsigned testPct = 10;
unsigned typeFlag = kFloatFl;
time::spec_t t0;
const float* dataM = nullptr;
const float* labelM = nullptr;
const unsigned *dataDimV = nullptr;
const unsigned *labelDimV = nullptr;
unsigned dataDimN = 0;
unsigned labelDimN = 0;
unsigned batchCnt = 0;
time::get(t0);
if((rc = cfg->getv("dsLabel",dsLabel,"batchN",batchN,"validPct",validPct,"testPct",testPct)) != kOkRC )
return cwLogError(rc,"Dataset test failed. Argument parse failed.");
if((rc = create(h,cfg,typeFlag)) != kOkRC )
return cwLogError(rc,"Dataset manager create failed.");
if((rc = load(h, dsLabel, batchN, validPct, testPct, kDoubleFl )) != kOkRC )
{
cwLogError(rc,"'%s' dataset load failed.", cwStringNullGuard(dsLabel));
goto errLabel;
}
if((rc = subset_dims(h,kTrainSsFl,dataDimV, dataDimN )) != kOkRC )
goto errLabel;
if((rc = label_dims(h,kTrainSsFl,labelDimV, labelDimN )) != kOkRC )
goto errLabel;
vop::print(dataDimV,dataDimN,"%i ","data: ");
vop::print(labelDimV,labelDimN,"%i ","label: ");
batchCnt = dataDimV[1]/batchN;
printf("batchCnt:%i\n",batchCnt);
for(unsigned i=0; true; ++i )
{
if((rc = batch_f(h,kTrainSsFl,dataM,labelM)) != kOkRC )
{
printf("rc:%i : %i %i\n",rc,batchCnt,i);
break;
}
if( i==0 )
{
vop::print(dataM,3,"%f ");
}
}
printf("elapsed %i ms\n",time::elapsedMs( t0 ) );
errLabel:
destroy(h);
return rc;
}

View File

@ -1,6 +1,47 @@
#ifndef cwDataSets_h #ifndef cwDataSets_h
#define cwDataSets_h #define cwDataSets_h
/*
Select a dataset and provide batched data/label pairs.
1. In-memory datasets, stream from disk.
2. Train/valid/test set marking.
3. K-fold rotation.
2. Conversion from source data type to batch data type.
3. One-hot encoding.
4. Shuffling.
Options:
1. Read all data into memory (otherwise stream from disk -require async reading)
2. data type conversion on-load vs on-batch.
3. one-hot encoding on-load vs on-batch.
4. shuffle
a. from streaming input buffer.
b. in memory
c. on batch
Source Driver:
label() // string label of this source
open(cfg) // open the source
close() // close the source
get_info() // get the source dim and type info
read(N,dst_t,dataBuf,labelBuf);// read a block of N examples and cvt to type dst_t
Implementation:
The only difference between streaming from disk and initial load to memory is that
stream-from-disk fills a second copy of the in-memory data structure.
All set marking, both RVT and K-Fold, happen on the in-memory data structure after it is populated.
Shuffling happens on the in-memory data structure after it is populated.
If there is no data conversion or one-hot conversion on batch output then shuffling moves elements in-memory otherwise
the shuffle index vector is used as a lookup during the output step.
If K-Fold segmentation is used with a streaming dataset then the k-fold index must persist
between fold selection passes.
*/
namespace cw namespace cw
{ {
@ -13,16 +54,50 @@ namespace cw
rc_t create( handle_t& h, const char* dir ); rc_t create( handle_t& h, const char* dir );
rc_t destroy( handle_t& h ); rc_t destroy( handle_t& h );
// Each column has one example. // Each column has one example image.
// The top row contains the labels. // The top row contains the example label.
const mtx::fmtx_t* train( handle_t h ); const mtx::f_t* train( handle_t h );
const mtx::fmtx_t* validate( handle_t h ); const mtx::f_t* validate( handle_t h );
const mtx::fmtx_t* test( handle_t h ); const mtx::f_t* test( handle_t h );
rc_t test(const char* dir, const char* imageFn ); rc_t test(const char* dir, const char* imageFn );
} }
typedef handle<struct datasetMgr_str> handle_t;
// Data subset flags
enum { kTrainSsFl=0x10, kValidSsFl=0x20, kTestSsFl=0x40 };
enum { kFloatFl=0x02, kDoubleFl=0x04 };
rc_t create( handle_t& h, const object_t* cfg, unsigned flags );
rc_t destroy( handle_t& h );
// Load a dataset, divide it into train,validate, and test subsets
rc_t load( handle_t h, const char* dsLabel, unsigned batchN, unsigned validPct, unsigned testPct, unsigned flags );
// Shuffle the subset.
rc_t shuffle( handle_t h, unsigned subsetFl );
// Get the dimensions of all the examples from a subset.
// dimN=1: dimV[0]=batchN
// dimN=2: dimV[0]=realN dimV[1]=batchN
// dimN=3: dimV[0,1]=realN dimV[2]=batchN
rc_t subset_dims( handle_t h, unsigned subsetFl, const unsigned*& dimV_Ref, unsigned& dimN_Ref );
rc_t label_dims( handle_t h, unsigned subsetFl, const unsigned*& dimV_Ref, unsigned& dimN_Ref );
// get the next batch. Returns nullptr at the end of an epoch.
rc_t batch_f( handle_t h, unsigned subsetFl, const float*& dataM_Ref, const float*& labelM_Ref );
rc_t batch_d( handle_t h, unsigned subsetFl, const double*& dataM_Ref, const double*& labelM_Ref );
rc_t test( const object_t* cfg );
} }

109
cwNN.cpp
View File

@ -5,70 +5,115 @@
#include "cwFile.h" #include "cwFile.h"
#include "cwNN.h" #include "cwNN.h"
/*
*/
namespace cw namespace cw
{ {
namespace nn namespace nn
{ {
template< typename R >
struct input_str typedef struct layer_desc_str
{ {
R* x; unsigned layerTId;
unsigned dimN; unsigned activationId;
unsigned* dimV; unsigned weightInitId;
}; unsigned biasInitId;
} layer_desc_t;
typedef struct dense_str typedef struct network_desc_str
{ {
unsigned xN; // count of neurons in src layer layer_desc_t* layers;
unsigned yN; // count of neurons in this layer unsigned layerN;
} network_desc_t;
real_t* wM; // wM[ xN, yN ] weight matrix
real_t* bV; // bV[ yN ] bias vector
real_t* yV; // scaled input + bias
real_t* aV; // activation output
real_t* dV; // contribution to cost for each neurode
real_t* gV; // C gradient wrt weight at each neurode
} dense_t;
typedef struct layer_str typedef struct layer_str
{ {
const layer_desc_t* desc;
const mtx::d_t* iM;
mtx::d_t wM;
mtx::d_t aM;
} layer_t; } layer_t;
typedef struct nn_str typedef struct nn_str
{ {
const network_desc_t* desc;
layer_t* layerL;
} nn_t; } nn_t;
void _mtx_mul( R* z, R* m, R* x, unsigned mN, unsigned mM ) nn_t* _allocNet( nn_t* nn, const object_t& nnCfg, unsigned inNodeN )
{ {
} }
void _add( R* y, R* x, unsigned n ) nn_t* _initNet( nn_t* nn )
{ {
} }
void _activation( dense_t* l ) rc_t _netForward( nn_t* p )
{
}
rc_t _netReverse( nn_t* )
{ {
} }
void _dense_forward( dense_t* l0, dense_t* l1 )
{
assert( l1->wM.dimV[1] == l0->yN );
assert( l1->wM.dimV[0] == l1->yN );
_mtx_mult( l1->zV, l1->wM.base, l0->aV, l0->yN, l1->yN );
_add( l1->zV, l1->bV, l1->yN );
_activation(l1) rc_t _batchUpdate( const mtx::d_t& ds, const train_args_t& args, unsigned ttlTrainExampleN )
{
} }
rc_t train( handle_t h, dataset::handle_t dsH, const train_args_t& args )
{
mtx::d_t ds_mtx;
mtx::d_t label_mtx;
unsigned trainExampleN = dataset::example_count(dsH);
unsigned batchPerEpoch = trainExampleN/args.batchN;
for(unsigned i=0; i<epochN; ++i)
{
for(unsigned j=0; j<batchsPerEpoch; ++j)
{
dataset::batchd(dsH, j, ds_mtx, label_mtx,args.batchN, batchPerEpoch);
_batchUpdate(ds_mtx,args,ttlTrainExampleN);
}
}
}
} }
rc_t test( const char* cfgFn, const char* projLabel )
{
object_t* cfg = nullptr;
rc_t rc = kOkRC;
if((rc = objectFromFile( cfgFn, cfg )) != kOkRC )
{
}
errLabel:
if( cfg != nullptr )
cfg->free();
return rc;
}
} }

39
cwNN.h
View File

@ -15,9 +15,9 @@ namespace cw
enum enum
{ {
kInputLayerId, kInputLayerTId,
kDenseLayerId, kDenseLayerTId,
kConv1DConvId kConv1DConvTId
}; };
enum enum
@ -27,33 +27,28 @@ namespace cw
kNormalInitId kNormalInitId
}; };
typedef struct layer_args_str
typedef struct train_args_str
{ {
unsigned typeId; unsigned epochN;
unsigned actId; unsigned batchN;
unsigned weightInitId; double eta;
unsigned biasInitId; double lambda;
unsigned dimN;
const unsigned* dimV;
} layer_args_t;
typedef struct network_args_str } train_args_t;
{
layer_args_t* layers;
unsigned layerN;
} network_args_t;
rc_t parse_args( const object_t& o, network_args_t& args );
rc_t create( handle_t& h, const network_args_t& args ); rc_t create( handle_t& h, const object_t& cfg );
rc_t destroy( handle_t& h ); rc_t destroy( handle_t& h );
template< typename R > rc_t train( handle_t h, dataset::handle_t dsH, const train_args_t& args );
rc_t train( handle_t h, unsigned epochN, unsigned batchN, const dataset<R>& trainDs );
template< typename R > rc_t test( handle_t h, dataset::handle_t dsH );
rc_t infer( handle_t h, const dataset<R>& ds );
rc_t test( const char* mnistDir );
} }
} }

16
study/nn/nn0.cfg Normal file
View File

@ -0,0 +1,16 @@
{
projL: [
test0:{
net: {
data: { type:mnist, indir:"~/src/datasets/mnist" },
layerL: [
{ type: input, batchN: 32 },
{ type: dense, shape: [30] },
{ type: dense, shape: [10] },
]
}
}
]
}