diff --git a/cwCsv.cpp b/cwCsv.cpp new file mode 100644 index 0000000..a69cf0d --- /dev/null +++ b/cwCsv.cpp @@ -0,0 +1,577 @@ +#include "cwCommon.h" +#include "cwLog.h" +#include "cwCommonImpl.h" +#include "cwMem.h" +#include "cwText.h" +#include "cwFile.h" +#include "cwObject.h" +#include "cwCsv.h" +#include "cwNumericConvert.h" + +namespace cw +{ + namespace csv + { + typedef struct col_str + { + char* title; + unsigned char_idx; + } col_t; + + typedef struct csv_str + { + file::handle_t fH; + + char* lineBuf; + unsigned lineCharCnt; + + col_t* colA; + unsigned colN; + + unsigned curLineIdx; + unsigned curColCnt; + } csv_t; + + typedef rc_t (*field_handler_t)( csv_t* p, unsigned col_idx, unsigned lineBufCharIdx ); + + csv_t* _handleToPtr( handle_t h ) + { return handleToPtr(h); } + + rc_t _destroy( csv_t* p ) + { + rc_t rc = kOkRC; + + file::close(p->fH); + + for(unsigned i=0; icolN; ++i) + mem::release(p->colA[i].title); + + mem::release(p->colA); + mem::release(p->lineBuf); + mem::release(p); + return rc; + } + + rc_t _print_field_handler( csv_t* p, unsigned col_idx, unsigned lineBufCharIdx ) + { + printf("%i '%s'\n",col_idx, p->lineBuf + lineBufCharIdx); + return kOkRC; + } + + rc_t _fill_titles_field_handler( csv_t* p, unsigned col_idx, unsigned lineBufCharIdx ) + { + assert( col_idx < p->colN ); + p->colA[col_idx].title = mem::duplStr(p->lineBuf + lineBufCharIdx); + p->colA[col_idx].char_idx = lineBufCharIdx; + return kOkRC; + } + + rc_t _update_col_array( csv_t* p, unsigned col_idx, unsigned lineBufCharIdx ) + { + rc_t rc = kOkRC; + + if( col_idx > p->colN ) + { + rc = cwLogError(kSyntaxErrorRC,"Too many CSV columns on line index:%i",p->curLineIdx); + goto errLabel; + } + + p->colA[col_idx].char_idx = lineBufCharIdx; + + errLabel: + return rc; + } + + + rc_t _parse_line_buf( csv_t* p, unsigned& fieldN_Ref, field_handler_t fieldHandlerCb ) + { + enum { + kBeforeField, + kInField, + kInQuotedField, + kEscapeState + }; + + + rc_t rc = kOkRC; + unsigned fieldN = 0; + unsigned state = kBeforeField; + unsigned bi = 0; + const char field_seperator_char = ','; + const char dquote = '"'; + + fieldN_Ref = 0; + + for(unsigned i=0; ilineCharCnt; ++i) + { + char c = p->lineBuf[i]; + + switch( state ) + { + case kBeforeField: + if( isspace(c) ) + continue; + + state = c==dquote ? kInQuotedField : kInField; + + case kInField: + if(c == field_seperator_char ) + { + if( fieldHandlerCb != nullptr ) + { + p->lineBuf[i] = 0; // zero terminate the field + + if((rc = fieldHandlerCb( p, fieldN, bi)) != kOkRC ) + goto errLabel; + } + + fieldN += 1; + bi = i+1; + state = kBeforeField; + } + break; + + case kInQuotedField: + if(c == dquote) + { + if( i+1 < p->lineCharCnt && p->lineBuf[i+1] == dquote ) + state = kEscapeState; + else + state = kInField; + } + break; + + case kEscapeState: + assert( c == dquote ); + state = kInQuotedField; + + } + } + + if( fieldHandlerCb != nullptr ) + if((rc = fieldHandlerCb( p, fieldN, bi )) != kOkRC ) + goto errLabel; + + fieldN_Ref = fieldN + 1; + + // Invalidate any fields that were not populated + // This allows rows that have fewer than p->colN columns to be legal. + if( p->colA != nullptr && fieldN_Ref < p->colN ) + for(unsigned i=fieldN_Ref; icolN; ++i) + p->colA[i].char_idx = kInvalidIdx; + + errLabel: + return rc; + + } + + rc_t _fill_line_buffer( csv_t* p ) + { + rc_t rc; + + if((rc = getLineAuto( p->fH, &p->lineBuf, &p->lineCharCnt )) != kOkRC ) + { + if( rc != kEofRC ) + rc = cwLogError(rc,"Line buf alloc failed on line index:%i.",p->curLineIdx); + } + + return rc; + } + + unsigned _title_to_col_index( csv_t* p, const char* title ) + { + for(unsigned i=0; icolN; ++i) + { + if( textCompare(p->colA[i].title,title) == 0 ) + return i; + } + + return kInvalidIdx; + } + + + rc_t _create_col_ref_array( csv_t* p, const char** titleA=nullptr, unsigned titleN=0 ) + { + rc_t rc = kOkRC; + unsigned colN = 0; + + if((rc = _fill_line_buffer(p)) != kOkRC ) + goto errLabel; + + // get a column count + if((rc = _parse_line_buf(p, colN, nullptr)) != kOkRC ) + { + rc = cwLogError(rc,"Error parsing the title line."); + goto errLabel; + } + + // allocate the column reference array + p->colA = mem::allocZ(colN); + p->colN = colN; + + // set the column titles + if((rc = _parse_line_buf(p, colN, _fill_titles_field_handler)) != kOkRC ) + { + rc = cwLogError(rc,"Error setting the column reference titles."); + goto errLabel; + } + + // verfiy the reference title + for(unsigned i=0; i p->curColCnt || p->colA[colIdx].char_idx == kInvalidIdx ) + { + rc = cwLogError(kInvalidArgRC,"The CSV column index '%i' is not valid on line index:%i. Field count:%i", colIdx,p->curLineIdx,p->curColCnt ); + goto errLabel; + } + + fieldStr_Ref = p->lineBuf + p->colA[colIdx].char_idx; + + errLabel: + return rc; + + } + + template < typename T > + rc_t _parse_number_field( csv_t* p, unsigned colIdx, T& valueRef ) + { + rc_t rc = kOkRC; + const char* fieldStr = nullptr; + + if((rc = _get_field_str(p,colIdx,fieldStr)) != kOkRC ) + goto errLabel; + + if((rc = string_to_number(fieldStr,valueRef)) != kOkRC ) + { + rc = cwLogError(rc,"Numeric parse failed on column '%s' on line index:%i",cwStringNullGuard(p->colA[colIdx].title),p->curLineIdx); + goto errLabel; + } + + errLabel: + return rc; + } + + template < typename T > + rc_t _parse_number_field( csv_t* p, const char* colLabel, T& valueRef ) + { + unsigned colIdx; + if((colIdx = _title_to_col_index(p, colLabel)) == kInvalidIdx ) + return cwLogError(kInvalidArgRC,"The column label '%s' is not valid.",cwStringNullGuard(colLabel)); + + return _parse_number_field(p,colIdx,valueRef); + } + + rc_t _parse_string_field( csv_t* p, unsigned colIdx, const char*& valRef ) + { + rc_t rc = kOkRC; + + valRef = nullptr; + + if((rc = _get_field_str(p,colIdx,valRef)) != kOkRC ) + goto errLabel; + + errLabel: + return rc; + } + + + + } +} + + +cw::rc_t cw::csv::create( handle_t& hRef, const char* fname, const char** titleA, unsigned titleN ) +{ + rc_t rc; + if((rc = destroy(hRef)) != kOkRC ) + return rc; + + csv_t* p = mem::allocZ(); + + //_store_title_array( p, titleA, titleN ); + + if((rc = file::open(p->fH,fname,file::kReadFl)) != kOkRC ) + goto errLabel; + + if((rc = _create_col_ref_array( p, titleA, titleN )) != kOkRC ) + goto errLabel; + + hRef.set(p); + + errLabel: + + if( rc != kOkRC ) + { + rc = cwLogError(rc,"CSV file open failed."); + _destroy(p); + } + + return rc; +} + +cw::rc_t cw::csv::destroy(handle_t& hRef ) +{ + rc_t rc = kOkRC; + if(!hRef.isValid() ) + return rc; + + csv_t* p = _handleToPtr(hRef); + + if((rc = _destroy(p)) != kOkRC ) + return rc; + + hRef.clear(); + + return rc; +} + +cw::rc_t cw::csv::line_count( handle_t h, unsigned& lineCntRef ) +{ + rc_t rc = kOkRC; + csv_t* p = _handleToPtr(h); + lineCntRef = 0; + + long offset = 0; + if((rc = file::tell(p->fH,&offset)) != kOkRC ) + { + rc = cwLogError(rc,"file tell() failed."); + goto errLabel; + } + + + if((rc = file::seek(p->fH,file::kBeginFl,0)) != kOkRC ) + { + rc = cwLogError(rc,"file seek failed."); + goto errLabel; + } + + if((rc = file::lineCount(p->fH, &lineCntRef)) != kOkRC ) + { + rc = cwLogError(rc,"CSV line count query failed."); + goto errLabel; + } + + if((rc = file::seek(p->fH,file::kBeginFl,offset)) != kOkRC ) + { + rc = cwLogError(rc,"file seek() failed."); + goto errLabel; + } + + errLabel: + return rc; +} + +unsigned cw::csv::title_col_index( handle_t h, const char* title ) +{ + csv_t* p = _handleToPtr(h); + return _title_to_col_index(p,title); +} + + +cw::rc_t cw::csv::rewind( handle_t h ) +{ + rc_t rc = kOkRC; + csv_t* p = _handleToPtr(h); + + if((rc = file::seek(p->fH,file::kBeginFl,0)) != kOkRC ) + { + rc = cwLogError(rc,"Rewind failed on CSV."); + goto errLabel; + } + + if((rc = _fill_line_buffer(p)) != kOkRC ) + goto errLabel; + + p->curLineIdx = 0; + + errLabel: + return rc; + +} + +cw::rc_t cw::csv::next_line( handle_t h ) +{ + rc_t rc = kOkRC; + csv_t* p = _handleToPtr(h); + + p->curLineIdx += 1; + + if((rc = _fill_line_buffer(p)) != kOkRC ) + goto errLabel; + + // get a column count + if((rc = _parse_line_buf(p, p->curColCnt, _update_col_array)) != kOkRC ) + { + rc = cwLogError(rc,"Error parsing line index %i.",p->curLineIdx); + goto errLabel; + } + + errLabel: + return rc; +} + +unsigned cw::csv::cur_line_index( handle_t h ) +{ + csv_t* p = _handleToPtr(h); + return p->curLineIdx; +} + + +cw::rc_t cw::csv::field_char_count( handle_t h, unsigned colIdx, unsigned& charCntRef ) +{ + rc_t rc = kOkRC; + csv_t* p = _handleToPtr(h); + const char* fieldStr = nullptr; + + charCntRef = 0; + + if((rc = _get_field_str(p,colIdx,fieldStr)) != kOkRC ) + { + rc = cwLogError(rc,"Field char count failed."); + goto errLabel; + } + + charCntRef = textLength(fieldStr); + + errLabel: + return rc; +} + +cw::rc_t cw::csv::parse_field( handle_t h, unsigned colIdx, unsigned& valRef ) +{ + csv_t* p = _handleToPtr(h); + return _parse_number_field( p, colIdx, valRef ) ; +} + +cw::rc_t cw::csv::parse_field( handle_t h, unsigned colIdx, int& valRef ) +{ + csv_t* p = _handleToPtr(h); + return _parse_number_field( p, colIdx, valRef ) ; +} + +cw::rc_t cw::csv::parse_field( handle_t h, unsigned colIdx, double& valRef ) +{ + csv_t* p = _handleToPtr(h); + return _parse_number_field( p, colIdx, valRef ) ; +} + +cw::rc_t cw::csv::parse_field( handle_t h, unsigned colIdx, const char*& valRef ) +{ + csv_t* p = _handleToPtr(h); + return _parse_string_field( p, colIdx, valRef ); +} + +cw::rc_t cw::csv::parse_field( handle_t h, const char* colLabel, unsigned& valRef ) +{ + csv_t* p = _handleToPtr(h); + return _parse_number_field(p, colLabel, valRef ); +} + +cw::rc_t cw::csv::parse_field( handle_t h, const char* colLabel, int& valRef ) +{ + csv_t* p = _handleToPtr(h); + return _parse_number_field(p, colLabel, valRef ); +} + +cw::rc_t cw::csv::parse_field( handle_t h, const char* colLabel, double& valRef ) +{ + csv_t* p = _handleToPtr(h); + return _parse_number_field(p, colLabel, valRef ); +} + +cw::rc_t cw::csv::parse_field( handle_t h, const char* colLabel, const char*& valRef ) +{ + csv_t* p = _handleToPtr(h); + + unsigned colIdx; + if((colIdx = _title_to_col_index(p, colLabel)) == kInvalidIdx ) + return cwLogError(kInvalidArgRC,"The column label '%s' is not valid.",cwStringNullGuard(colLabel)); + + return _parse_string_field(p,colIdx,valRef); +} + + +cw::rc_t cw::csv::test( const object_t* args ) +{ + rc_t rc = kOkRC; + const char* fname = nullptr; + const object_t* titleL = nullptr; + csv::handle_t csvH; + + if((rc = args->getv("fname",fname, + "titleL",titleL )) != kOkRC ) + { + rc = cwLogError(rc,"CSV test arg. parse failed."); + goto errLabel; + } + else + { + rc_t rc = kOkRC; + unsigned titleN = titleL == nullptr ? 0 : titleL->child_count(); + const char* titleA[ titleN ]; + unsigned line_cnt = 0; + + for(unsigned i=0; ichild_ele(i)->value(titleA[i])) != kOkRC ) + { + cwLogError(rc,"Reference title array parsing failed."); + goto errLabel; + } + } + + if((rc = csv::create(csvH,fname,titleA,titleN)) != kOkRC ) + { + rc = cwLogError(rc,"CSV create failed."); + goto errLabel; + } + + line_count(csvH,line_cnt); + printf("lines:%i\n",line_cnt); + + for(unsigned i=0; i<10 && (rc = next_line(csvH)) == kOkRC; ++i ) + { + const char* opcode = nullptr; + unsigned d0,d1; + if((rc = getv(csvH,"opcode",opcode, + "d0",d0, + "d1",d1)) != kOkRC ) + { + rc = cwLogError(rc,"CSV get failed."); + break; + } + + printf("%s %i %i\n",cwStringNullGuard(opcode),d0,d1); + } + + line_count(csvH,line_cnt); + printf("lines:%i\n",line_cnt); + + } + + + errLabel: + + destroy(csvH); + + return rc; +} diff --git a/cwCsv.h b/cwCsv.h new file mode 100644 index 0000000..c2795ac --- /dev/null +++ b/cwCsv.h @@ -0,0 +1,63 @@ +#ifndef cwCsv_h +#define cwCsv_h + +namespace cw +{ + namespace csv + { + typedef handle handle_t; + + rc_t create( handle_t& hRef, const char* fname, const char** titleA=nullptr, unsigned titleN=0 ); + + rc_t destroy(handle_t& hRef ); + + rc_t line_count( handle_t h, unsigned& lineCntRef ); + + unsigned title_col_index( handle_t h, const char* title ); + + rc_t rewind( handle_t h ); + + rc_t next_line( handle_t h ); + + // line index (first line==0) of the line currently being parsed. + unsigned cur_line_index( handle_t h ); + + rc_t field_char_count( handle_t h, unsigned colIdx, unsigned& charCntRef ); + + rc_t parse_field( handle_t h, unsigned colIdx, unsigned& valRef ); + rc_t parse_field( handle_t h, unsigned colIdx, int& valRef ); + rc_t parse_field( handle_t h, unsigned colIdx, double& valRef ); + rc_t parse_field( handle_t h, unsigned colIdx, const char*& valRef ); + + rc_t parse_field( handle_t h, const char* colLabel, unsigned& valRef ); + rc_t parse_field( handle_t h, const char* colLabel, int& valRef ); + rc_t parse_field( handle_t h, const char* colLabel, double& valRef ); + rc_t parse_field( handle_t h, const char* colLabel, const char*& valRef ); + + inline rc_t _getv(handle_t) { return kOkRC; } + + // getv("label0",v0,"label1",v1, ... ) + template< typename T0, typename T1, typename... ARGS > + rc_t _getv( handle_t h, T0 label, T1& valRef, ARGS&&... args ) + { + rc_t rc = parse_field(h,label,valRef); + + // if no error occurred .... + if( rc == kOkRC ) + rc = _getv(h,std::forward(args)...); // ... recurse to find next label/value pair + else + rc = cwLogError(rc,"CSV parse failed for column label:'%s' on line index:%i.",cwStringNullGuard(label),cur_line_index(h)); + + return rc; + } + + // getv("label0",v0,"label1",v1, ... ) + template< typename T0, typename T1, typename... ARGS > + rc_t getv( handle_t h, T0 label, T1& valRef, ARGS&&... args ) + { return _getv(h,label,valRef,args...); } + + rc_t test( const object_t* args ); + } +} + +#endif