2024-12-01 19:35:24 +00:00
//| Copyright: (C) 2020-2024 Kevin Larke <contact AT larke DOT org>
//| License: GNU GPL version 3.0 or above. See the accompanying LICENSE file.
2023-05-12 01:09:04 +00:00
#include "cwCommon.h"
#include "cwLog.h"
#include "cwCommonImpl.h"
2024-05-29 16:36:57 +00:00
#include "cwTest.h"
2023-05-12 01:09:04 +00:00
#include "cwMem.h"
#include "cwText.h"
#include "cwFile.h"
#include "cwObject.h"
#include "cwCsv.h"
#include "cwNumericConvert.h"
2024-07-03 18:24:18 +00:00
#include <type_traits>
2023-05-12 01:09:04 +00:00
namespace cw
namespace csv
typedef struct col_str
char* title;
unsigned char_idx;
} col_t;
typedef struct csv_str
file::handle_t fH;
char* lineBuf;
unsigned lineCharCnt;
col_t* colA;
unsigned colN;
unsigned curLineIdx;
unsigned curColCnt;
} csv_t;
typedef rc_t (*field_handler_t)( csv_t* p, unsigned col_idx, unsigned lineBufCharIdx );
csv_t* _handleToPtr( handle_t h )
{ return handleToPtr<handle_t,csv_t>(h); }
rc_t _destroy( csv_t* p )
rc_t rc = kOkRC;
for(unsigned i=0; i<p->colN; ++i)
return rc;
rc_t _print_field_handler( csv_t* p, unsigned col_idx, unsigned lineBufCharIdx )
printf("%i '%s'\n",col_idx, p->lineBuf + lineBufCharIdx);
return kOkRC;
rc_t _fill_titles_field_handler( csv_t* p, unsigned col_idx, unsigned lineBufCharIdx )
assert( col_idx < p->colN );
p->colA[col_idx].title = mem::duplStr(p->lineBuf + lineBufCharIdx);
p->colA[col_idx].char_idx = lineBufCharIdx;
return kOkRC;
rc_t _update_col_array( csv_t* p, unsigned col_idx, unsigned lineBufCharIdx )
rc_t rc = kOkRC;
if( col_idx > p->colN )
rc = cwLogError(kSyntaxErrorRC,"Too many CSV columns on line index:%i",p->curLineIdx);
goto errLabel;
p->colA[col_idx].char_idx = lineBufCharIdx;
return rc;
rc_t _parse_line_buf( csv_t* p, unsigned& fieldN_Ref, field_handler_t fieldHandlerCb )
enum {
rc_t rc = kOkRC;
unsigned fieldN = 0;
unsigned state = kBeforeField;
unsigned bi = 0;
const char field_seperator_char = ',';
const char dquote = '"';
fieldN_Ref = 0;
for(unsigned i=0; i<p->lineCharCnt; ++i)
char c = p->lineBuf[i];
switch( state )
case kBeforeField:
if( isspace(c) )
state = c==dquote ? kInQuotedField : kInField;
2024-03-25 14:54:02 +00:00
2023-05-12 01:09:04 +00:00
case kInField:
if(c == field_seperator_char )
if( fieldHandlerCb != nullptr )
p->lineBuf[i] = 0; // zero terminate the field
if((rc = fieldHandlerCb( p, fieldN, bi)) != kOkRC )
goto errLabel;
fieldN += 1;
bi = i+1;
state = kBeforeField;
case kInQuotedField:
if(c == dquote)
if( i+1 < p->lineCharCnt && p->lineBuf[i+1] == dquote )
state = kEscapeState;
state = kInField;
case kEscapeState:
assert( c == dquote );
state = kInQuotedField;
if( fieldHandlerCb != nullptr )
if((rc = fieldHandlerCb( p, fieldN, bi )) != kOkRC )
goto errLabel;
fieldN_Ref = fieldN + 1;
// Invalidate any fields that were not populated
// This allows rows that have fewer than p->colN columns to be legal.
if( p->colA != nullptr && fieldN_Ref < p->colN )
for(unsigned i=fieldN_Ref; i<p->colN; ++i)
p->colA[i].char_idx = kInvalidIdx;
return rc;
rc_t _fill_line_buffer( csv_t* p )
rc_t rc;
2023-05-21 16:37:28 +00:00
// read the next line
2023-05-12 01:09:04 +00:00
if((rc = getLineAuto( p->fH, &p->lineBuf, &p->lineCharCnt )) != kOkRC )
if( rc != kEofRC )
rc = cwLogError(rc,"Line buf alloc failed on line index:%i.",p->curLineIdx);
2023-05-21 16:37:28 +00:00
p->lineCharCnt = textLength(p->lineBuf);
// trim trailing white space from the line buffer.
if( p->lineCharCnt > 0 )
int i = ((int)p->lineCharCnt)-1;
while( i>=0 && isspace(p->lineBuf[i]) )
p->lineBuf[i] = '\0';
p->lineCharCnt -= 1;
2023-05-12 01:09:04 +00:00
return rc;
unsigned _title_to_col_index( csv_t* p, const char* title )
for(unsigned i=0; i<p->colN; ++i)
if( textCompare(p->colA[i].title,title) == 0 )
return i;
return kInvalidIdx;
rc_t _create_col_ref_array( csv_t* p, const char** titleA=nullptr, unsigned titleN=0 )
rc_t rc = kOkRC;
unsigned colN = 0;
if((rc = _fill_line_buffer(p)) != kOkRC )
goto errLabel;
// get a column count
if((rc = _parse_line_buf(p, colN, nullptr)) != kOkRC )
rc = cwLogError(rc,"Error parsing the title line.");
goto errLabel;
// allocate the column reference array
p->colA = mem::allocZ<col_t>(colN);
p->colN = colN;
// set the column titles
if((rc = _parse_line_buf(p, colN, _fill_titles_field_handler)) != kOkRC )
rc = cwLogError(rc,"Error setting the column reference titles.");
goto errLabel;
// verfiy the reference title
for(unsigned i=0; i<titleN; ++i)
if( _title_to_col_index( p, titleA[i] ) == kInvalidIdx )
2024-02-08 15:55:48 +00:00
rc = cwLogError(kEleNotFoundRC,"The required column '%s' does not exist.",titleA[i]);
2023-05-12 01:09:04 +00:00
goto errLabel;
if( rc != kOkRC )
rc = cwLogError(rc,"col ref array create failed.");
return rc;
rc_t _get_field_str( csv_t* p, unsigned colIdx, const char*& fieldStr_Ref )
rc_t rc = kOkRC;
fieldStr_Ref = nullptr;
if( colIdx > p->curColCnt || p->colA[colIdx].char_idx == kInvalidIdx )
rc = cwLogError(kInvalidArgRC,"The CSV column index '%i' is not valid on line index:%i. Field count:%i", colIdx,p->curLineIdx,p->curColCnt );
goto errLabel;
2024-07-03 18:24:18 +00:00
2023-05-12 01:09:04 +00:00
fieldStr_Ref = p->lineBuf + p->colA[colIdx].char_idx;
return rc;
template < typename T >
rc_t _parse_number_field( csv_t* p, unsigned colIdx, T& valueRef )
rc_t rc = kOkRC;
const char* fieldStr = nullptr;
2024-07-03 18:24:18 +00:00
2023-05-12 01:09:04 +00:00
if((rc = _get_field_str(p,colIdx,fieldStr)) != kOkRC )
goto errLabel;
2024-07-03 18:24:18 +00:00
if( fieldStr != nullptr )
2023-05-12 01:09:04 +00:00
2024-07-03 18:24:18 +00:00
// advance past white space
while( *fieldStr && isspace(*fieldStr) )
2023-05-12 01:09:04 +00:00
2024-07-03 18:24:18 +00:00
// the first char must be a number or decimal point
if( isdigit(*fieldStr) || (*fieldStr=='.' && std::is_floating_point<T>()) )
if((rc = string_to_number(fieldStr,valueRef)) != kOkRC )
rc = cwLogError(rc,"Numeric parse failed on column '%s' on line index:%i",cwStringNullGuard(p->colA[colIdx].title),p->curLineIdx);
goto errLabel;
2023-05-12 01:09:04 +00:00
return rc;
template < typename T >
rc_t _parse_number_field( csv_t* p, const char* colLabel, T& valueRef )
unsigned colIdx;
if((colIdx = _title_to_col_index(p, colLabel)) == kInvalidIdx )
return cwLogError(kInvalidArgRC,"The column label '%s' is not valid.",cwStringNullGuard(colLabel));
return _parse_number_field(p,colIdx,valueRef);
rc_t _parse_string_field( csv_t* p, unsigned colIdx, const char*& valRef )
rc_t rc = kOkRC;
valRef = nullptr;
if((rc = _get_field_str(p,colIdx,valRef)) != kOkRC )
goto errLabel;
return rc;
2024-02-20 02:57:54 +00:00
rc_t _parse_bool_field( csv_t* p, unsigned colIdx, bool& valRef )
rc_t rc = kOkRC;
const char* fieldStr = nullptr;
2023-05-12 01:09:04 +00:00
2024-02-20 02:57:54 +00:00
if((rc = _get_field_str(p,colIdx,fieldStr)) != kOkRC )
goto errLabel;
if( textIsEqualI(fieldStr,"true") )
valRef = true;
if( textIsEqualI(fieldStr,"false") )
valRef = false;
rc = cwLogError(kSyntaxErrorRC,"The value of a boolean must be either 'true' or 'false'.");
return rc;
rc_t _parse_bool_field( csv_t* p, const char* colLabel, bool& valRef )
unsigned colIdx;
if((colIdx = _title_to_col_index(p, colLabel)) == kInvalidIdx )
return cwLogError(kInvalidArgRC,"The column label '%s' is not valid.",cwStringNullGuard(colLabel));
return _parse_bool_field(p,colIdx,valRef);
2023-05-12 01:09:04 +00:00
cw::rc_t cw::csv::create( handle_t& hRef, const char* fname, const char** titleA, unsigned titleN )
rc_t rc;
if((rc = destroy(hRef)) != kOkRC )
return rc;
csv_t* p = mem::allocZ<csv_t>();
//_store_title_array( p, titleA, titleN );
if((rc = file::open(p->fH,fname,file::kReadFl)) != kOkRC )
goto errLabel;
if((rc = _create_col_ref_array( p, titleA, titleN )) != kOkRC )
goto errLabel;
if( rc != kOkRC )
rc = cwLogError(rc,"CSV file open failed.");
return rc;
cw::rc_t cw::csv::destroy(handle_t& hRef )
rc_t rc = kOkRC;
if(!hRef.isValid() )
return rc;
csv_t* p = _handleToPtr(hRef);
if((rc = _destroy(p)) != kOkRC )
return rc;
return rc;
cw::rc_t cw::csv::line_count( handle_t h, unsigned& lineCntRef )
rc_t rc = kOkRC;
csv_t* p = _handleToPtr(h);
lineCntRef = 0;
long offset = 0;
if((rc = file::tell(p->fH,&offset)) != kOkRC )
rc = cwLogError(rc,"file tell() failed.");
goto errLabel;
if((rc = file::seek(p->fH,file::kBeginFl,0)) != kOkRC )
rc = cwLogError(rc,"file seek failed.");
goto errLabel;
if((rc = file::lineCount(p->fH, &lineCntRef)) != kOkRC )
rc = cwLogError(rc,"CSV line count query failed.");
goto errLabel;
if((rc = file::seek(p->fH,file::kBeginFl,offset)) != kOkRC )
rc = cwLogError(rc,"file seek() failed.");
goto errLabel;
return rc;
2023-05-12 13:00:50 +00:00
unsigned cw::csv::col_count( handle_t h )
csv_t* p = _handleToPtr(h);
return p->colN;
const char* cw::csv::col_title( handle_t h, unsigned idx )
csv_t* p = _handleToPtr(h);
if( idx >= p->colN )
return nullptr;
return p->colA[idx].title;
2023-05-12 01:09:04 +00:00
unsigned cw::csv::title_col_index( handle_t h, const char* title )
csv_t* p = _handleToPtr(h);
return _title_to_col_index(p,title);
2024-01-28 18:20:10 +00:00
bool cw::csv::has_field( handle_t h, const char* title )
csv_t* p = _handleToPtr(h);
return _title_to_col_index(p,title) != kInvalidIdx;
2023-05-12 01:09:04 +00:00
cw::rc_t cw::csv::rewind( handle_t h )
rc_t rc = kOkRC;
csv_t* p = _handleToPtr(h);
if((rc = file::seek(p->fH,file::kBeginFl,0)) != kOkRC )
rc = cwLogError(rc,"Rewind failed on CSV.");
goto errLabel;
if((rc = _fill_line_buffer(p)) != kOkRC )
goto errLabel;
p->curLineIdx = 0;
return rc;
cw::rc_t cw::csv::next_line( handle_t h )
rc_t rc = kOkRC;
csv_t* p = _handleToPtr(h);
p->curLineIdx += 1;
if((rc = _fill_line_buffer(p)) != kOkRC )
goto errLabel;
// get a column count
if((rc = _parse_line_buf(p, p->curColCnt, _update_col_array)) != kOkRC )
rc = cwLogError(rc,"Error parsing line index %i.",p->curLineIdx);
goto errLabel;
return rc;
unsigned cw::csv::cur_line_index( handle_t h )
csv_t* p = _handleToPtr(h);
return p->curLineIdx;
cw::rc_t cw::csv::field_char_count( handle_t h, unsigned colIdx, unsigned& charCntRef )
rc_t rc = kOkRC;
csv_t* p = _handleToPtr(h);
const char* fieldStr = nullptr;
charCntRef = 0;
if((rc = _get_field_str(p,colIdx,fieldStr)) != kOkRC )
rc = cwLogError(rc,"Field char count failed.");
goto errLabel;
charCntRef = textLength(fieldStr);
return rc;
2024-02-20 02:57:54 +00:00
cw::rc_t cw::csv::parse_field( handle_t h, unsigned colIdx, bool& valRef )
csv_t* p = _handleToPtr(h);
return _parse_bool_field( p, colIdx, valRef ) ;
2023-07-26 00:19:59 +00:00
cw::rc_t cw::csv::parse_field( handle_t h, unsigned colIdx, uint8_t& valRef )
csv_t* p = _handleToPtr(h);
return _parse_number_field( p, colIdx, valRef ) ;
2023-05-12 01:09:04 +00:00
cw::rc_t cw::csv::parse_field( handle_t h, unsigned colIdx, unsigned& valRef )
csv_t* p = _handleToPtr(h);
return _parse_number_field( p, colIdx, valRef ) ;
cw::rc_t cw::csv::parse_field( handle_t h, unsigned colIdx, int& valRef )
csv_t* p = _handleToPtr(h);
return _parse_number_field( p, colIdx, valRef ) ;
cw::rc_t cw::csv::parse_field( handle_t h, unsigned colIdx, double& valRef )
csv_t* p = _handleToPtr(h);
return _parse_number_field( p, colIdx, valRef ) ;
cw::rc_t cw::csv::parse_field( handle_t h, unsigned colIdx, const char*& valRef )
csv_t* p = _handleToPtr(h);
return _parse_string_field( p, colIdx, valRef );
2024-02-20 02:57:54 +00:00
cw::rc_t cw::csv::parse_field( handle_t h, const char* colLabel, bool& valRef )
csv_t* p = _handleToPtr(h);
return _parse_bool_field(p, colLabel, valRef );
2023-07-26 00:19:59 +00:00
cw::rc_t cw::csv::parse_field( handle_t h, const char* colLabel, uint8_t& valRef )
csv_t* p = _handleToPtr(h);
return _parse_number_field(p, colLabel, valRef );
2023-05-12 01:09:04 +00:00
cw::rc_t cw::csv::parse_field( handle_t h, const char* colLabel, unsigned& valRef )
csv_t* p = _handleToPtr(h);
return _parse_number_field(p, colLabel, valRef );
cw::rc_t cw::csv::parse_field( handle_t h, const char* colLabel, int& valRef )
csv_t* p = _handleToPtr(h);
return _parse_number_field(p, colLabel, valRef );
cw::rc_t cw::csv::parse_field( handle_t h, const char* colLabel, double& valRef )
csv_t* p = _handleToPtr(h);
return _parse_number_field(p, colLabel, valRef );
cw::rc_t cw::csv::parse_field( handle_t h, const char* colLabel, const char*& valRef )
csv_t* p = _handleToPtr(h);
unsigned colIdx;
if((colIdx = _title_to_col_index(p, colLabel)) == kInvalidIdx )
return cwLogError(kInvalidArgRC,"The column label '%s' is not valid.",cwStringNullGuard(colLabel));
return _parse_string_field(p,colIdx,valRef);
cw::rc_t cw::csv::test( const object_t* args )
rc_t rc = kOkRC;
const char* fname = nullptr;
const object_t* titleL = nullptr;
csv::handle_t csvH;
if((rc = args->getv("fname",fname,
"titleL",titleL )) != kOkRC )
rc = cwLogError(rc,"CSV test arg. parse failed.");
goto errLabel;
rc_t rc = kOkRC;
unsigned titleN = titleL == nullptr ? 0 : titleL->child_count();
const char* titleA[ titleN ];
unsigned line_cnt = 0;
for(unsigned i=0; i<titleN; ++i)
if((rc = titleL->child_ele(i)->value(titleA[i])) != kOkRC )
cwLogError(rc,"Reference title array parsing failed.");
goto errLabel;
if((rc = csv::create(csvH,fname,titleA,titleN)) != kOkRC )
rc = cwLogError(rc,"CSV create failed.");
goto errLabel;
for(unsigned i=0; i<10 && (rc = next_line(csvH)) == kOkRC; ++i )
const char* opcode = nullptr;
2023-05-25 19:59:02 +00:00
unsigned d0=0xff,d1=0xff;
2023-05-12 01:09:04 +00:00
if((rc = getv(csvH,"opcode",opcode,
"d1",d1)) != kOkRC )
rc = cwLogError(rc,"CSV get failed.");
printf("%s %i %i\n",cwStringNullGuard(opcode),d0,d1);
return rc;