2024-12-01 19:35:24 +00:00
|
|
|
//| Copyright: (C) 2020-2024 Kevin Larke <contact AT larke DOT org>
|
|
|
|
//| License: GNU GPL version 3.0 or above. See the accompanying LICENSE file.
|
2019-12-19 03:24:12 +00:00
|
|
|
#include "cwCommon.h"
|
|
|
|
#include "cwLog.h"
|
|
|
|
#include "cwCommonImpl.h"
|
|
|
|
#include "cwMem.h"
|
|
|
|
#include "cwFile.h"
|
2024-05-29 16:37:53 +00:00
|
|
|
#include "cwTest.h"
|
|
|
|
#include "cwObject.h"
|
2019-12-19 03:24:12 +00:00
|
|
|
#include "cwLex.h"
|
|
|
|
|
|
|
|
|
|
|
|
namespace cw
|
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
namespace lex
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
enum
|
|
|
|
{
|
|
|
|
kRealFloatLexFl = 0x01,
|
|
|
|
kIntUnsignedLexFl = 0x02
|
|
|
|
};
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
struct lex_str;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
typedef unsigned (*lexMatcherFuncPtr_t)( struct lex_str* p, const char* cp, unsigned cn, const char* keyStr );
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
// token match function record
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
unsigned typeId; // token type this matcher recognizes
|
|
|
|
lexMatcherFuncPtr_t funcPtr; // recognizer function (only used if userPtr==nullptr)
|
|
|
|
char* tokenStr; // fixed string data used by the recognizer (only used if userPtr==nullptr)
|
|
|
|
lexUserMatcherPtr_t userPtr; // user defined recognizer function (only used if funcPtr==nullptr)
|
|
|
|
bool enableFl; // true if this matcher is enabled
|
|
|
|
} lexMatcher;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
typedef struct lex_str
|
|
|
|
{
|
|
|
|
const char* cp; // character buffer
|
|
|
|
unsigned cn; // count of characters in buffer
|
|
|
|
unsigned ci; // current buffer index position
|
|
|
|
unsigned flags; // lexer control flags
|
|
|
|
unsigned curTokenId; // type id of the current token
|
|
|
|
unsigned curTokenCharIdx; // index into cp[] of the current token
|
|
|
|
unsigned curTokenCharCnt; // count of characters in the current token
|
|
|
|
unsigned curLine; // line number of the current token
|
|
|
|
unsigned curCol; // column number of the current token
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned nextLine;
|
|
|
|
unsigned nextCol;
|
|
|
|
char* blockBegCmtStr;
|
|
|
|
char* blockEndCmtStr;
|
|
|
|
char* lineCmtStr;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
lexMatcher* mfp; // base of matcher array
|
|
|
|
unsigned mfi; // next available matcher array slot
|
|
|
|
unsigned mfn; // count of elementes in mfp[]
|
|
|
|
char* textBuf; // text buf used by lexSetFile()
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned attrFlags; // used to store the int and real suffix type flags
|
|
|
|
unsigned lastRC;
|
|
|
|
} lex_t;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* _handleToPtr(handle_t h){ return handleToPtr<handle_t,lex_t>(h); }
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
bool _lexIsNewline( char c )
|
|
|
|
{ return c == '\n'; }
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
bool _lexIsCommentTypeId( unsigned typeId )
|
|
|
|
{ return typeId == kBlockCmtLexTId || typeId == kLineCmtLexTId; }
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
// Locate 'keyStr' in cp[cn] and return the index into cp[cn] of the character
|
|
|
|
// following the last char in 'keyStr'. If keyStr is not found return kInvalidIdx.
|
|
|
|
unsigned _lexScanTo( const char* cp, unsigned cn, const char* keyStr )
|
|
|
|
{
|
|
|
|
unsigned i = 0;
|
|
|
|
unsigned n = strlen(keyStr);
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
if( n <= cn )
|
|
|
|
for(; i<=cn-n; ++i)
|
|
|
|
if( strncmp(cp + i, keyStr, n ) == 0 )
|
|
|
|
return i+n;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
return kInvalidIdx;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned _lexExactStringMatcher( lex_t* p, const char* cp, unsigned cn, const char* keyStr )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned n = strlen(keyStr);
|
|
|
|
return strncmp(keyStr,cp,n) == 0 ? n : 0;
|
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned _lexSpaceMatcher( lex_t* p, const char* cp, unsigned cn, const char* keyStr )
|
|
|
|
{
|
|
|
|
unsigned i=0;
|
|
|
|
for(; i<cn; ++i)
|
|
|
|
if( !isspace(cp[i]) )
|
|
|
|
break;
|
|
|
|
return i;
|
2019-12-19 03:24:12 +00:00
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned _lexRealMatcher( lex_t* p, const char* cp, unsigned cn, const char* keyStr )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned i = 0;
|
|
|
|
unsigned n = 0; // decimal point counter
|
|
|
|
unsigned d = 0; // digit counter
|
|
|
|
bool fl = false; // expo flag
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
for(; i<cn && n<=1; ++i)
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
if( i==0 && cp[i]=='-' ) // allow a leading '-'
|
2019-12-19 03:24:12 +00:00
|
|
|
continue;
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
if( isdigit(cp[i]) ) // allow digits
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
|
|
|
++d;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
if( cp[i] == '.' && n==0 ) // allow exactly one decimal point
|
|
|
|
++n;
|
|
|
|
else
|
|
|
|
break;
|
2019-12-19 03:24:12 +00:00
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
// if there was at least one digit and the next char is an 'e'
|
|
|
|
if( d>0 && i<cn && (cp[i] == 'e' || cp[i] == 'E') )
|
|
|
|
{
|
|
|
|
unsigned e=0;
|
|
|
|
++i;
|
|
|
|
unsigned j = i;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
fl = false;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
for(; i<cn; ++i)
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
if( i==j && cp[i]=='-' ) // allow the char following the e to be '-'
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if( isdigit(cp[i]) )
|
|
|
|
{
|
|
|
|
++e;
|
|
|
|
++d;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// stop at the first non-digit
|
|
|
|
break;
|
2019-12-19 03:24:12 +00:00
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
// an exp exists if digits follwed the 'e'
|
|
|
|
fl = e > 0;
|
|
|
|
|
2019-12-19 03:24:12 +00:00
|
|
|
}
|
2019-12-27 21:52:45 +00:00
|
|
|
|
|
|
|
// if at least one digit was found
|
|
|
|
if( d>0 )
|
|
|
|
{
|
|
|
|
// Note that this path allows a string w/o a decimal pt to trigger a match.
|
|
|
|
|
|
|
|
if(i<cn)
|
|
|
|
{
|
|
|
|
// if the real has a suffix
|
|
|
|
switch(cp[i])
|
|
|
|
{
|
|
|
|
case 'F':
|
|
|
|
case 'f':
|
|
|
|
p->attrFlags = cwSetFlag(p->attrFlags,kRealFloatLexFl);
|
|
|
|
++i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
// match w/o suffix return
|
|
|
|
if( d>0 && (fl || n==1 || cwIsFlag(p->attrFlags,kRealFloatLexFl)) )
|
|
|
|
return i;
|
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
return 0; // no-match return
|
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned _lexIntMatcher( lex_t* p, const char* cp, unsigned cn, const char* keyStr )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned i = 0;
|
|
|
|
bool signFl = false;
|
|
|
|
unsigned digitCnt = 0;
|
|
|
|
|
|
|
|
for(; i<cn; ++i)
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
if( i==0 && cp[i]=='-' )
|
|
|
|
{
|
|
|
|
signFl = true;
|
|
|
|
continue;
|
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
if( !isdigit(cp[i]) )
|
|
|
|
break;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
++digitCnt;
|
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
// BUG BUG BUG
|
|
|
|
// If an integer is specified using 'e' notiation
|
|
|
|
// (see _lexRealMatcher()) and the number of exponent places
|
|
|
|
// specified following the 'e' is positive and >= number of
|
|
|
|
// digits following the decimal point (in effect zeros are
|
|
|
|
// padded on the right side) then the value is an integer.
|
|
|
|
//
|
|
|
|
// The current implementation recognizes all numeric strings
|
|
|
|
// containing a decimal point as reals.
|
|
|
|
|
|
|
|
// if no integer was found
|
|
|
|
if( digitCnt==0)
|
|
|
|
return 0;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
// check for suffix
|
|
|
|
if(i<cn )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
|
|
|
|
switch(cp[i])
|
|
|
|
{
|
|
|
|
case 'u':
|
|
|
|
case 'U':
|
|
|
|
if( signFl )
|
|
|
|
cwLogError(kSyntaxErrorRC,"A signed integer has a 'u' or 'U' suffix on line %i",p->curLine);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
p->attrFlags = cwSetFlag(p->attrFlags,kIntUnsignedLexFl);
|
|
|
|
++i;
|
|
|
|
}
|
|
|
|
break;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
return i;
|
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned _lexHexMatcher( lex_t* p, const char* cp, unsigned cn, const char* keyStr )
|
|
|
|
{
|
|
|
|
unsigned i = 0;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
if( cn < 3 )
|
|
|
|
return 0;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
if( cp[0]=='0' && cp[1]=='x')
|
|
|
|
for(i=2; i<cn; ++i)
|
|
|
|
if( !isxdigit(cp[i]) )
|
|
|
|
break;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
return i;
|
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned _lexIdentMatcher( lex_t* p, const char* cp, unsigned cn, const char* keyStr )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned i = 0;
|
|
|
|
if( isalpha(cp[0]) || (cp[0]== '_'))
|
|
|
|
{
|
|
|
|
i = 1;
|
|
|
|
for(; i<cn; ++i)
|
|
|
|
if( !isalnum(cp[i]) && (cp[i] != '_') )
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return i;
|
2019-12-19 03:24:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned _lexQStrMatcher( lex_t* p, const char* cp, unsigned cn, const char* keyStr )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
bool escFl = false;
|
|
|
|
unsigned i = 0;
|
|
|
|
if( cp[i] != '"' )
|
|
|
|
return 0;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
for(i=1; i<cn; ++i)
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
if( escFl )
|
|
|
|
{
|
|
|
|
escFl = false;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if( cp[i] == '\\' )
|
|
|
|
{
|
|
|
|
escFl = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if( cp[i] == '"' )
|
|
|
|
return i+1;
|
2019-12-19 03:24:12 +00:00
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
return cwLogError(kSyntaxErrorRC, "Missing string literal end quote on line: %i.", p->curLine);
|
2019-12-19 03:24:12 +00:00
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned _lexQCharMatcher( lex_t* p, const char* cp, unsigned cn, const char* keyStr )
|
|
|
|
{
|
|
|
|
unsigned i = 0;
|
|
|
|
if( i >= cn || cp[i]!='\'' )
|
|
|
|
return 0;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
i+=2;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
if( i >= cn || cp[i]!='\'')
|
|
|
|
return 0;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
return 3;
|
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned _lexBlockCmtMatcher( lex_t* p, const char* cp, unsigned cn, const char* keyStr )
|
|
|
|
{
|
|
|
|
unsigned n = strlen(p->blockBegCmtStr);
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
if( strncmp( p->blockBegCmtStr, cp, n ) == 0 )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned i;
|
|
|
|
if((i = _lexScanTo(cp + n, cn-n,p->blockEndCmtStr)) == kInvalidIdx )
|
|
|
|
{
|
|
|
|
cwLogError(kSyntaxErrorRC, "Missing end of block comment on line:%i.", p->curLine);
|
|
|
|
return 0;
|
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
return n + i;
|
|
|
|
}
|
|
|
|
return 0;
|
2019-12-19 03:24:12 +00:00
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned _lexLineCmtMatcher( lex_t* p, const char* cp, unsigned cn, const char* keyStr )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned n = strlen(p->lineCmtStr);
|
|
|
|
if( strncmp( p->lineCmtStr, cp, n ) == 0)
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned i;
|
|
|
|
const char newlineStr[] = "\n";
|
|
|
|
if((i = _lexScanTo(cp + n, cn-n, newlineStr)) == kInvalidIdx )
|
|
|
|
{
|
|
|
|
// no EOL was found so the comment must be on the last line of the source
|
|
|
|
return cn;
|
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
return n + i;
|
|
|
|
}
|
|
|
|
return 0;
|
2019-12-19 03:24:12 +00:00
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
rc_t _lexInstallMatcher( lex_t* p, unsigned typeId, lexMatcherFuncPtr_t funcPtr, const char* keyStr, lexUserMatcherPtr_t userPtr )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
assert( funcPtr==nullptr || userPtr==nullptr );
|
|
|
|
assert( !(funcPtr==nullptr && userPtr==nullptr));
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
// if there is no space in the user token array - then expand it
|
|
|
|
if( p->mfi == p->mfn )
|
|
|
|
{
|
|
|
|
int incr_cnt = 10;
|
2019-12-28 02:51:28 +00:00
|
|
|
lexMatcher* np = mem::allocZ<lexMatcher>( p->mfn + incr_cnt );
|
2019-12-27 21:52:45 +00:00
|
|
|
memcpy(np,p->mfp,p->mfi*sizeof(lexMatcher));
|
2019-12-28 02:51:28 +00:00
|
|
|
mem::release(p->mfp);
|
2019-12-27 21:52:45 +00:00
|
|
|
p->mfp = np;
|
|
|
|
p->mfn += incr_cnt;
|
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
p->mfp[p->mfi].tokenStr = nullptr;
|
|
|
|
p->mfp[p->mfi].typeId = typeId;
|
|
|
|
p->mfp[p->mfi].funcPtr = funcPtr;
|
|
|
|
p->mfp[p->mfi].userPtr = userPtr;
|
|
|
|
p->mfp[p->mfi].enableFl = true;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
if( keyStr != nullptr )
|
|
|
|
{
|
|
|
|
// allocate space for the token string and store it
|
2019-12-28 02:51:28 +00:00
|
|
|
p->mfp[p->mfi].tokenStr = mem::duplStr(keyStr);
|
2019-12-27 21:52:45 +00:00
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
p->mfi++;
|
|
|
|
return kOkRC;
|
|
|
|
}
|
|
|
|
rc_t _lexReset( lex_t* p )
|
|
|
|
{
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
p->ci = 0;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
p->curTokenId = kErrorLexTId;
|
|
|
|
p->curTokenCharIdx = kInvalidIdx;
|
|
|
|
p->curTokenCharCnt = 0;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
p->curLine = 0;
|
|
|
|
p->curCol = 0;
|
|
|
|
p->nextLine = 0;
|
|
|
|
p->nextCol = 0;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
p->lastRC = kOkRC;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
return kOkRC;
|
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
rc_t _lexSetTextBuffer( lex_t* p, const char* cp, unsigned cn )
|
|
|
|
{
|
|
|
|
p->cp = cp;
|
|
|
|
p->cn = cn;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
return _lexReset(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
lexMatcher* _lexFindUserToken( lex_t* p, unsigned id, const char* tokenStr )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned i = 0;
|
|
|
|
for(; i<p->mfi; ++i)
|
|
|
|
{
|
|
|
|
if( id != kInvalidId && p->mfp[i].typeId == id )
|
|
|
|
return p->mfp + i;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
if( p->mfp[i].tokenStr != nullptr && tokenStr != nullptr && strcmp(p->mfp[i].tokenStr,tokenStr)==0 )
|
|
|
|
return p->mfp + i;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
return nullptr;
|
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
} // namespace cw
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
cw::rc_t cw::lex::create( handle_t& hRef, const char* cp, unsigned cn, unsigned flags )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
|
|
|
rc_t rc = kOkRC;
|
|
|
|
char dfltLineCmt[] = "//";
|
|
|
|
char dfltBlockBegCmt[] = "/*";
|
|
|
|
char dfltBlockEndCmt[] = "*/";
|
|
|
|
lex_t* p = nullptr;
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
if((rc = lex::destroy(hRef)) != kOkRC )
|
2019-12-19 03:24:12 +00:00
|
|
|
return rc;
|
|
|
|
|
2019-12-28 02:51:28 +00:00
|
|
|
p = mem::allocZ<lex_t>();
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
p->flags = flags;
|
|
|
|
|
|
|
|
_lexSetTextBuffer( p, cp, cn );
|
|
|
|
|
|
|
|
int init_mfn = 10;
|
2019-12-28 02:51:28 +00:00
|
|
|
p->mfp = mem::allocZ<lexMatcher>( init_mfn );
|
2019-12-19 03:24:12 +00:00
|
|
|
p->mfn = init_mfn;
|
|
|
|
p->mfi = 0;
|
|
|
|
|
2019-12-28 02:51:28 +00:00
|
|
|
p->lineCmtStr = mem::duplStr( dfltLineCmt );
|
|
|
|
p->blockBegCmtStr = mem::duplStr( dfltBlockBegCmt );
|
|
|
|
p->blockEndCmtStr = mem::duplStr( dfltBlockEndCmt );
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
_lexInstallMatcher( p, kSpaceLexTId, _lexSpaceMatcher, nullptr, nullptr );
|
|
|
|
_lexInstallMatcher( p, kRealLexTId, _lexRealMatcher, nullptr, nullptr );
|
|
|
|
_lexInstallMatcher( p, kIntLexTId, _lexIntMatcher, nullptr, nullptr );
|
|
|
|
_lexInstallMatcher( p, kHexLexTId, _lexHexMatcher, nullptr, nullptr );
|
|
|
|
_lexInstallMatcher( p, kIdentLexTId, _lexIdentMatcher, nullptr, nullptr );
|
|
|
|
_lexInstallMatcher( p, kQStrLexTId, _lexQStrMatcher, nullptr, nullptr );
|
|
|
|
_lexInstallMatcher( p, kBlockCmtLexTId, _lexBlockCmtMatcher, nullptr, nullptr );
|
|
|
|
_lexInstallMatcher( p, kLineCmtLexTId, _lexLineCmtMatcher, nullptr, nullptr );
|
|
|
|
|
|
|
|
if( cwIsFlag(flags,kReturnQCharLexFl) )
|
|
|
|
_lexInstallMatcher( p, kQCharLexTId, _lexQCharMatcher, nullptr, nullptr );
|
|
|
|
|
|
|
|
hRef.set(p);
|
|
|
|
|
|
|
|
_lexReset(p);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
cw::rc_t cw::lex::destroy( handle_t& hRef )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
|
|
|
if( hRef.isValid() == false )
|
|
|
|
return kOkRC;
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(hRef);
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
if( p != nullptr )
|
|
|
|
{
|
|
|
|
|
|
|
|
if( p->mfp != nullptr )
|
|
|
|
{
|
|
|
|
unsigned i = 0;
|
|
|
|
|
|
|
|
// free the user token strings
|
|
|
|
for(; i<p->mfi; ++i)
|
|
|
|
if( p->mfp[i].tokenStr != nullptr )
|
2019-12-28 02:51:28 +00:00
|
|
|
mem::release(p->mfp[i].tokenStr);
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
// free the matcher array
|
2019-12-28 02:51:28 +00:00
|
|
|
mem::release(p->mfp);
|
2019-12-19 03:24:12 +00:00
|
|
|
p->mfi = 0;
|
|
|
|
p->mfn = 0;
|
|
|
|
}
|
|
|
|
|
2019-12-28 02:51:28 +00:00
|
|
|
mem::release(p->lineCmtStr);
|
|
|
|
mem::release(p->blockBegCmtStr);
|
|
|
|
mem::release(p->blockEndCmtStr);
|
|
|
|
mem::release(p->textBuf);
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
// free the lexer object
|
2019-12-28 02:51:28 +00:00
|
|
|
mem::release(p);
|
2019-12-19 03:24:12 +00:00
|
|
|
hRef.set(nullptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
return kOkRC;
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
cw::rc_t cw::lex::reset( handle_t h )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
return _lexReset(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
bool cw::lex::isValid( handle_t h )
|
2019-12-19 03:24:12 +00:00
|
|
|
{ return h.isValid(); }
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
cw::rc_t cw::lex::setTextBuffer( handle_t h, const char* cp, unsigned cn )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
return _lexSetTextBuffer(p,cp,cn);
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
cw::rc_t cw::lex::setFile( handle_t h, const char* fn )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-26 02:44:14 +00:00
|
|
|
rc_t rc = kOkRC;
|
|
|
|
file::handle_t fh;
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-26 02:44:14 +00:00
|
|
|
long n = 0;
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
assert( fn != nullptr && p != nullptr );
|
|
|
|
|
|
|
|
// open the file
|
2019-12-26 02:44:14 +00:00
|
|
|
if((rc = file::open(fh,fn,file::kReadFl)) != kOkRC )
|
2019-12-19 03:24:12 +00:00
|
|
|
return rc;
|
|
|
|
|
|
|
|
// seek to the end of the file
|
2019-12-26 02:44:14 +00:00
|
|
|
if((rc = file::seek(fh,file::kEndFl,0)) != kOkRC )
|
2019-12-19 03:24:12 +00:00
|
|
|
return rc;
|
|
|
|
|
|
|
|
// get the length of the file
|
2019-12-26 02:44:14 +00:00
|
|
|
if((rc = file::tell(fh,&n)) != kOkRC )
|
2019-12-19 03:24:12 +00:00
|
|
|
return rc;
|
|
|
|
|
|
|
|
// rewind to the beginning of the file
|
2019-12-26 02:44:14 +00:00
|
|
|
if((rc = file::seek(fh,file::kBeginFl,0)) != kOkRC )
|
2019-12-19 03:24:12 +00:00
|
|
|
return rc;
|
|
|
|
|
|
|
|
// allocate the text buffer
|
2019-12-28 02:51:28 +00:00
|
|
|
if((p->textBuf = mem::resizeZ<char>(p->textBuf, n+1)) == nullptr )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
|
|
|
rc = cwLogError(kMemAllocFailRC,"Unable to allocate the text file buffer for:'%s'.",fn);
|
|
|
|
goto errLabel;
|
|
|
|
}
|
|
|
|
|
|
|
|
// read the file into the buffer
|
2019-12-26 02:44:14 +00:00
|
|
|
if((rc = file::read(fh,p->textBuf,n)) != kOkRC )
|
2019-12-19 03:24:12 +00:00
|
|
|
return rc;
|
|
|
|
|
|
|
|
if((rc = _lexSetTextBuffer( p, p->textBuf, n )) != kOkRC )
|
|
|
|
goto errLabel;
|
|
|
|
|
|
|
|
errLabel:
|
|
|
|
// close the file
|
2019-12-26 02:44:14 +00:00
|
|
|
rc_t rc0 = file::close(fh);
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
if(rc != kOkRC )
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
return rc0;
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
cw::rc_t cw::lex::registerToken( handle_t h, unsigned id, const char* tokenStr )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
// prevent duplicate tokens
|
|
|
|
if( _lexFindUserToken( p, id, tokenStr ) != nullptr )
|
|
|
|
return cwLogError( kInvalidArgRC, "id:%i token:%s duplicates the token string or id", id, tokenStr );
|
|
|
|
|
|
|
|
|
|
|
|
return _lexInstallMatcher( p, id, _lexExactStringMatcher, tokenStr, nullptr );
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
cw::rc_t cw::lex::registerMatcher( handle_t h, unsigned id, lexUserMatcherPtr_t userPtr )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
// prevent duplicate tokens
|
|
|
|
if( _lexFindUserToken( p, id, nullptr ) != nullptr )
|
|
|
|
return cwLogError(kInvalidArgRC, "A token matching function has already been installed for token id: %i", id );
|
|
|
|
|
|
|
|
return _lexInstallMatcher( p, id, nullptr, nullptr, userPtr );
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
cw::rc_t cw::lex::enableToken( handle_t h, unsigned id, bool enableFl )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
unsigned mi = 0;
|
|
|
|
for(; mi<p->mfi; ++mi)
|
|
|
|
if( p->mfp[mi].typeId == id )
|
|
|
|
{
|
|
|
|
p->mfp[mi].enableFl = enableFl;
|
|
|
|
return kOkRC;
|
|
|
|
}
|
|
|
|
|
|
|
|
return cwLogError( kInvalidArgRC, "%i is not a valid token type id.",id);
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned cw::lex::filterFlags( handle_t h )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
return p->flags;
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
void cw::lex::setFilterFlags( handle_t h, unsigned flags )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
p->flags = flags;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned cw::lex::getNextToken( handle_t h )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
if( p->lastRC != kOkRC )
|
|
|
|
return kErrorLexTId;
|
|
|
|
|
|
|
|
while( p->ci < p->cn )
|
|
|
|
{
|
|
|
|
unsigned i;
|
|
|
|
unsigned mi = 0;
|
|
|
|
unsigned maxCharCnt = 0;
|
|
|
|
unsigned maxIdx = kInvalidIdx;
|
|
|
|
|
|
|
|
p->curTokenId = kErrorLexTId;
|
|
|
|
p->curTokenCharIdx = kInvalidIdx;
|
|
|
|
p->curTokenCharCnt = 0;
|
|
|
|
p->attrFlags = 0;
|
|
|
|
|
|
|
|
// try each matcher
|
|
|
|
for(; mi<p->mfi; ++mi)
|
|
|
|
if( p->mfp[mi].enableFl )
|
|
|
|
{
|
|
|
|
unsigned charCnt = 0;
|
|
|
|
if( p->mfp[mi].funcPtr != nullptr )
|
|
|
|
charCnt = p->mfp[mi].funcPtr(p, p->cp + p->ci, p->cn - p->ci, p->mfp[mi].tokenStr );
|
|
|
|
else
|
|
|
|
charCnt = p->mfp[mi].userPtr( p->cp + p->ci, p->cn - p->ci);
|
|
|
|
|
|
|
|
// notice if the matcher set the error code
|
|
|
|
if( p->lastRC != kOkRC )
|
|
|
|
return kErrorLexTId;
|
|
|
|
|
|
|
|
// if this matched token is longer then the prev. matched token or
|
|
|
|
// if the prev matched token was an identifier and this matched token is an equal length user defined token
|
|
|
|
if( (charCnt > maxCharCnt)
|
|
|
|
|| (charCnt>0 && charCnt==maxCharCnt && p->mfp[maxIdx].typeId==kIdentLexTId && p->mfp[mi].typeId >=kUserLexTId )
|
|
|
|
|| (charCnt>0 && charCnt<maxCharCnt && p->mfp[maxIdx].typeId==kIdentLexTId && p->mfp[mi].typeId >=kUserLexTId && cwIsFlag(p->flags,kUserDefPriorityLexFl))
|
|
|
|
)
|
|
|
|
{
|
|
|
|
maxCharCnt = charCnt;
|
|
|
|
maxIdx = mi;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// no token was matched
|
|
|
|
if( maxIdx == kInvalidIdx )
|
|
|
|
{
|
|
|
|
if( cwIsFlag(p->flags,kReturnUnknownLexFl) )
|
|
|
|
{
|
|
|
|
maxCharCnt = 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
cwLogError( kSyntaxErrorRC, "Unable to recognize token:'%c' on line %i.",*(p->cp+p->ci), p->curLine);
|
|
|
|
return kErrorLexTId;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// update the current line and column position
|
|
|
|
p->curLine = p->nextLine;
|
|
|
|
p->curCol = p->nextCol;
|
|
|
|
|
|
|
|
|
|
|
|
// find the next column and line position
|
|
|
|
for(i=0; i<maxCharCnt; ++i)
|
|
|
|
{
|
|
|
|
if( _lexIsNewline(p->cp[ p->ci + i ]) )
|
|
|
|
{
|
|
|
|
p->nextLine++;
|
|
|
|
p->nextCol = 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
p->nextCol++;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool returnFl = true;
|
|
|
|
|
|
|
|
if( maxIdx != kInvalidIdx )
|
|
|
|
{
|
|
|
|
// check the space token filter
|
|
|
|
if( (p->mfp[ maxIdx ].typeId == kSpaceLexTId) && (cwIsFlag(p->flags,kReturnSpaceLexFl)==0) )
|
|
|
|
returnFl = false;
|
|
|
|
|
|
|
|
// check the comment token filter
|
|
|
|
if( _lexIsCommentTypeId(p->mfp[ maxIdx ].typeId) && (cwIsFlag(p->flags,kReturnCommentsLexFl)==0) )
|
|
|
|
returnFl = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// update the lexer state
|
2024-03-25 14:50:11 +00:00
|
|
|
p->curTokenId = maxIdx==kInvalidIdx ? (unsigned)kUnknownLexTId : p->mfp[ maxIdx ].typeId;
|
2019-12-19 03:24:12 +00:00
|
|
|
p->curTokenCharIdx = p->ci;
|
|
|
|
p->curTokenCharCnt = maxCharCnt;
|
|
|
|
|
|
|
|
// advance the text buffer
|
|
|
|
p->ci += maxCharCnt;
|
|
|
|
|
|
|
|
if( returnFl )
|
|
|
|
return p->curTokenId;
|
|
|
|
}
|
|
|
|
|
|
|
|
p->lastRC = kEofRC;
|
|
|
|
|
|
|
|
return kEofLexTId;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned cw::lex::tokenId( handle_t h )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
return p->curTokenId;
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
const char* cw::lex::tokenText( handle_t h )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
if( p->curTokenCharIdx == kInvalidIdx )
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
unsigned n = p->curTokenId == kQStrLexTId ? 1 : 0;
|
|
|
|
|
|
|
|
return p->cp + p->curTokenCharIdx + n;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned cw::lex::tokenCharCount( handle_t h )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
if( p->curTokenCharIdx == kInvalidIdx )
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
unsigned n = p->curTokenId == kQStrLexTId ? 2 : 0;
|
|
|
|
|
|
|
|
return p->curTokenCharCnt - n;
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
int cw::lex::tokenInt( handle_t h )
|
|
|
|
{ return strtol( lex::tokenText(h),nullptr,0 ); }
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned cw::lex::tokenUInt( handle_t h )
|
|
|
|
{ return strtol( lex::tokenText(h),nullptr,0 ); }
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
float cw::lex::tokenFloat( handle_t h )
|
|
|
|
{ return strtof( lex::tokenText(h),nullptr ); }
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
double cw::lex::tokenDouble( handle_t h )
|
|
|
|
{ return strtod( lex::tokenText(h),nullptr ); }
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
bool cw::lex::tokenIsUnsigned( handle_t h )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
return p->curTokenId == kIntLexTId && cwIsFlag(p->attrFlags,kIntUnsignedLexFl);
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
bool cw::lex::tokenIsSinglePrecision( handle_t h )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
return p->curTokenId == kRealLexTId && cwIsFlag(p->attrFlags,kRealFloatLexFl);
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned cw::lex::currentLineNumber( handle_t h )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
return p->curLine + 1;
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned cw::lex::currentColumnNumber( handle_t h )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
return p->curCol + 1;
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
unsigned cw::lex::errorRC( handle_t h )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
return p->lastRC;
|
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
const char* cw::lex::idToLabel( handle_t h, unsigned typeId )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
lex_t* p = _handleToPtr(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
|
|
|
|
switch( typeId )
|
|
|
|
{
|
|
|
|
case kErrorLexTId: return "<error>";
|
|
|
|
case kEofLexTId: return "<EOF>";
|
|
|
|
case kSpaceLexTId: return "<space>";
|
|
|
|
case kRealLexTId: return "<real>";
|
|
|
|
case kIntLexTId: return "<int>";
|
|
|
|
case kHexLexTId: return "<hex>";
|
|
|
|
case kIdentLexTId: return "<ident>";
|
|
|
|
case kQStrLexTId: return "<qstr>";
|
|
|
|
case kBlockCmtLexTId: return "<bcmt>";
|
|
|
|
case kLineCmtLexTId: return "<lcmt>";
|
|
|
|
default:
|
|
|
|
{
|
|
|
|
lexMatcher* mp;
|
|
|
|
if((mp = _lexFindUserToken(p,typeId,nullptr)) == nullptr )
|
|
|
|
return "<unknown>";
|
|
|
|
return mp->tokenStr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return "<invalid>";
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
namespace cw
|
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
namespace lex
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
//{ { label:cwLexEx }
|
|
|
|
//(
|
|
|
|
// lexTest() gives a simple 'lex' example.
|
|
|
|
//)
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
//(
|
2024-05-29 16:37:53 +00:00
|
|
|
rc_t test( const test::test_args_t& args )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
rc_t rc = kOkRC;
|
|
|
|
unsigned tid = kInvalidId;
|
|
|
|
handle_t h;
|
|
|
|
|
|
|
|
char buf[] =
|
|
|
|
"123ident0\n 123.456\nident0\n"
|
|
|
|
"0xa12+.2\n"
|
|
|
|
" // comment \n"
|
|
|
|
"/* block \n"
|
|
|
|
"comment */"
|
|
|
|
"\"quoted string\""
|
2024-04-26 20:58:01 +00:00
|
|
|
"ident1 "
|
|
|
|
"1234.56f"
|
|
|
|
"345u"
|
2019-12-27 21:52:45 +00:00
|
|
|
" // last line comment";
|
|
|
|
|
|
|
|
// initialize a lexer with a buffer of text
|
|
|
|
if((rc = lex::create(h,buf,strlen(buf), kReturnSpaceLexFl | kReturnCommentsLexFl)) != kOkRC )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2024-04-29 13:44:45 +00:00
|
|
|
rc = cwLogError(rc,"Lexer initialization failed.");
|
|
|
|
goto errLabel;
|
2019-12-19 03:24:12 +00:00
|
|
|
}
|
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
// register some additional recoginizers
|
|
|
|
lex::registerToken(h,kUserLexTId+1,"+");
|
|
|
|
lex::registerToken(h,kUserLexTId+2,"-");
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
// ask for token id's
|
|
|
|
while( (tid = lex::getNextToken(h)) != kEofLexTId )
|
2019-12-19 03:24:12 +00:00
|
|
|
{
|
2019-12-27 21:52:45 +00:00
|
|
|
// print information about each token
|
2024-04-26 20:58:01 +00:00
|
|
|
cwLogInfo("ln:%i col:%i tok:%s '%.*s' len:%i ",
|
2019-12-27 21:52:45 +00:00
|
|
|
lex::currentLineNumber(h),
|
|
|
|
lex::currentColumnNumber(h),
|
|
|
|
lex::idToLabel(h,tid),
|
|
|
|
lex::tokenCharCount(h),
|
|
|
|
lex::tokenText(h) ,
|
|
|
|
lex::tokenCharCount(h));
|
|
|
|
|
|
|
|
// if the token is a number ...
|
|
|
|
if( tid==kIntLexTId || tid==kRealLexTId || tid==kHexLexTId )
|
|
|
|
{
|
|
|
|
// ... then request the numbers value
|
|
|
|
int iv = lex::tokenInt(h);
|
|
|
|
double dv = lex::tokenDouble(h);
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2024-04-26 20:58:01 +00:00
|
|
|
cwLogInfo("Number: int:%i dbl:%f unsigned:%i float:%i",iv,dv,tokenIsUnsigned(h),tokenIsSinglePrecision(h));
|
2019-12-27 21:52:45 +00:00
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2019-12-27 21:52:45 +00:00
|
|
|
// handle errors
|
|
|
|
if( tid == kErrorLexTId )
|
|
|
|
{
|
|
|
|
cwLogInfo("Error:%i\n", lex::errorRC(h));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
|
2024-04-29 13:44:45 +00:00
|
|
|
errLabel:
|
2019-12-27 21:52:45 +00:00
|
|
|
// finalize the lexer
|
|
|
|
lex::destroy(h);
|
|
|
|
|
2024-04-29 13:44:45 +00:00
|
|
|
|
|
|
|
return rc;
|
2019-12-27 21:52:45 +00:00
|
|
|
}
|
|
|
|
}
|
2019-12-19 03:24:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
//)
|
|
|
|
//}
|