123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970 |
- #include "cmPrefix.h"
- #include "cmGlobal.h"
- #include "cmRpt.h"
- #include "cmLex.h"
- #include "cmErr.h"
- #include "cmMem.h"
- #include "cmMallocDebug.h"
- #include "cmFile.h"
-
- enum
- {
- kRealFloatLexFl = 0x01,
- kIntUnsignedLexFl = 0x02
- };
-
- typedef struct
- {
- unsigned code;
- const cmChar_t* msg;
- } cmLexErrorRecd;
-
-
- cmLexErrorRecd cmLexErrorArray[] =
- {
- { kOkLexRC, "No error. The operation completed successfully."},
- { kDuplicateTokenLexRC, "The text or id passed as a user token is already in use by another token."},
- { kMissingCmtEndLexRC, "The end of a block comment could not be found."},
- { kMissingEndQuoteLexRC, "The end of a quoted string could not be found."},
- { kNoMatchLexRC, "The lexer encountered a string which could not be classified."},
- { kFileOpenErrLexRC, "File open failed on cmLexSetFile()"},
- { kFileSeekErrLexRC, "File seek failed on cmLexSetFile()"},
- { kFileTellErrLexRC, "File tell failed on cmLexSetFile()"},
- { kFileReadErrLexRC, "File read failed on cmLexSetFile()"},
- { kFileCloseErrLexRC, "File close failed on cmLexSetFile()"},
- { kMemAllocErrLexRC, "An attempted memory allocation failed"},
- { kEofRC, "The end of the input text was encountered (this is a normal condition not an error)"},
- { kInvalidLexTIdLexRC, "An invalid token id was encountered."},
- { kSignErrorLexRC, "A signed integer has a 'u' or 'U' suffix."},
- { kInvalidLexRC, "Unknown lexer error code." }
- };
-
- struct cmLex_str;
-
- typedef unsigned (*cmLexMatcherFuncPtr_t)( struct cmLex_str* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr );
-
- // token match function record
- typedef struct
- {
- unsigned typeId; // token type this matcher recognizes
- cmLexMatcherFuncPtr_t funcPtr; // recognizer function (only used if userPtr==NULL)
- cmChar_t* tokenStr; // fixed string data used by the recognizer (only used if userPtr==NULL)
- cmLexUserMatcherPtr_t userPtr; // user defined recognizer function (only used if funcPtr==NULL)
- bool enableFl; // true if this matcher is enabled
- } cmLexMatcher;
-
-
-
- typedef struct cmLex_str
- {
- cmErr_t err;
- const cmChar_t* cp; // character buffer
- unsigned cn; // count of characters in buffer
- unsigned ci; // current buffer index position
- unsigned flags; // lexer control flags
-
- unsigned curTokenId; // type id of the current token
- unsigned curTokenCharIdx; // index into cp[] of the current token
- unsigned curTokenCharCnt; // count of characters in the current token
- unsigned curLine; // line number of the current token
- unsigned curCol; // column number of the current token
-
- unsigned nextLine;
- unsigned nextCol;
-
- cmChar_t* blockBegCmtStr;
- cmChar_t* blockEndCmtStr;
- cmChar_t* lineCmtStr;
-
- cmLexMatcher* mfp; // base of matcher array
- unsigned mfi; // next available matcher array slot
- unsigned mfn; // count of elementes in mfp[]
-
- cmChar_t* textBuf; // text buf used by cmLexSetFile()
-
- unsigned attrFlags; // used to store the int and real suffix type flags
- } cmLex;
-
-
- cmLexH cmLexNullH = { NULL };
-
- bool _cmLexIsNewline( cmChar_t c )
- { return c == '\n'; }
-
- bool _cmLexIsCommentTypeId( unsigned typeId )
- { return typeId == kBlockCmtLexTId || typeId == kLineCmtLexTId; }
-
- cmLex* _cmLexHandleToPtr( cmLexH h )
- {
- cmLex* p = (cmLex*)h.h;
- assert(p != NULL);
- return p;
- };
-
- cmRC_t _cmLexError( cmLex* p, unsigned rc, const char* fmt, ... )
- {
- va_list vl;
- va_start(vl,fmt);
-
- unsigned bufCharCnt = 512;
- char buf[ bufCharCnt+1 ];
- snprintf(buf,bufCharCnt,"Error on line:%i ", p->curLine);
-
- unsigned sn = strlen(buf);
- vsnprintf(buf+sn,bufCharCnt-sn,fmt,vl);
- buf[bufCharCnt]=0;
-
- cmErrMsg(&p->err,rc,"%s",buf);
-
- va_end(vl);
- return rc;
- }
-
- // Locate 'keyStr' in cp[cn] and return the index into cp[cn] of the character
- // following the last char in 'keyStr'. If keyStr is not found return cmInvalidIdx.
- unsigned _cmLexScanTo( const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
- {
- unsigned i = 0;
- unsigned n = strlen(keyStr);
-
- if( n <= cn )
- for(; i<=cn-n; ++i)
- if( strncmp(cp + i, keyStr, n ) == 0 )
- return i+n;
-
- return cmInvalidIdx;
-
- }
-
-
- unsigned _cmLexExactStringMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
- {
- unsigned n = strlen(keyStr);
- return strncmp(keyStr,cp,n) == 0 ? n : 0;
- }
-
-
- unsigned _cmLexSpaceMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
- {
- unsigned i=0;
- for(; i<cn; ++i)
- if( !isspace(cp[i]) )
- break;
- return i;
- }
-
- unsigned _cmLexRealMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
- {
- unsigned i = 0;
- unsigned n = 0; // decimal point counter
- unsigned d = 0; // digit counter
- bool fl = false; // expo flag
-
- for(; i<cn && n<=1; ++i)
- {
- if( i==0 && cp[i]=='-' ) // allow a leading '-'
- continue;
-
- if( isdigit(cp[i]) ) // allow digits
- {
- ++d;
- continue;
- }
-
- if( cp[i] == '.' && n==0 ) // allow exactly one decimal point
- ++n;
- else
- break;
- }
-
- // if there was at least one digit and the next char is an 'e'
- if( d>0 && i<cn && (cp[i] == 'e' || cp[i] == 'E') )
- {
- unsigned e=0;
- ++i;
- unsigned j = i;
-
- fl = false;
-
- for(; i<cn; ++i)
- {
- if( i==j && cp[i]=='-' ) // allow the char following the e to be '-'
- continue;
-
- if( isdigit(cp[i]) )
- {
- ++e;
- ++d;
- continue;
- }
-
- // stop at the first non-digit
- break;
- }
-
- // an exp exists if digits follwed the 'e'
- fl = e > 0;
-
- }
-
- // if at least one digit was found
- if( d>0 )
- {
- // Note that this path allows a string w/o a decimal pt to trigger a match.
-
- if(i<cn)
- {
- // if the real has a suffix
- switch(cp[i])
- {
- case 'F':
- case 'f':
- p->attrFlags = cmSetFlag(p->attrFlags,kRealFloatLexFl);
- ++i;
- break;
- }
-
- }
-
- // match w/o suffix return
- if( d>0 && (fl || n==1 || cmIsFlag(p->attrFlags,kRealFloatLexFl)) )
- return i;
- }
-
- return 0; // no-match return
- }
-
- unsigned _cmLexIntMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
- {
- unsigned i = 0;
- bool signFl = false;
-
- for(; i<cn; ++i)
- {
- if( i==0 && cp[i]=='-' )
- {
- signFl = true;
- continue;
- }
-
- if( !isdigit(cp[i]) )
- break;
- }
-
- // BUG BUG BUG
- // If an integer is specified using 'e' notiation
- // (see _cmLexRealMatcher()) and the number of exponent places
- // specified following the 'e' is positive and >= number of
- // digits following the decimal point (in effect zeros are
- // padded on the right side) then the value is an integer.
- //
- // The current implementation recognizes all numeric strings
- // containing a decimal point as reals.
-
- // if no integer was found
- if( (signFl && i==0) || i==0 )
- return 0;
-
-
- // check for suffix
- if(i<cn )
- {
-
- switch(cp[i])
- {
- case 'u':
- case 'U':
- if( signFl )
- _cmLexError(p,kSignErrorLexRC,"A signed integer has a 'u' or 'U' suffix.");
- else
- {
- p->attrFlags = cmSetFlag(p->attrFlags,kIntUnsignedLexFl);
- ++i;
- }
- break;
-
- default:
- break;
- }
- }
-
- return i;
- }
-
- unsigned _cmLexHexMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
- {
- unsigned i = 0;
-
- if( cn < 3 )
- return 0;
-
- if( cp[0]=='0' && cp[1]=='x')
- for(i=2; i<cn; ++i)
- if( !isxdigit(cp[i]) )
- break;
-
- return i;
- }
-
-
- unsigned _cmLexIdentMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
- {
- unsigned i = 0;
- if( isalpha(cp[0]) || (cp[0]== '_'))
- {
- i = 1;
- for(; i<cn; ++i)
- if( !isalnum(cp[i]) && (cp[i] != '_') )
- break;
- }
- return i;
- }
-
-
- unsigned _cmLexQStrMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
- {
- cmChar_t qStr[]="\"";
- unsigned n = strlen(qStr);
- if( strncmp(qStr,cp,n) == 0 )
- {
- unsigned i;
- if((i = _cmLexScanTo(cp+n, cn-n, qStr)) == cmInvalidIdx )
- {
- _cmLexError( p, kMissingEndQuoteLexRC, "Missing string end quote.");
- return 0;
- }
- return n+i;
- }
- return 0;
- }
-
- unsigned _cmLexQCharMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
- {
- unsigned i = 0;
- if( i >= cn || cp[i]!='\'' )
- return 0;
-
- i+=2;
-
- if( i >= cn || cp[i]!='\'')
- return 0;
-
- return 3;
- }
-
-
- unsigned _cmLexBlockCmtMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
- {
- unsigned n = strlen(p->blockBegCmtStr);
-
- if( strncmp( p->blockBegCmtStr, cp, n ) == 0 )
- {
- unsigned i;
- if((i = _cmLexScanTo(cp + n, cn-n,p->blockEndCmtStr)) == cmInvalidIdx )
- {
- _cmLexError(p, kMissingCmtEndLexRC, "Missing end of block comment.");
- return 0;
- }
-
- return n + i;
- }
- return 0;
- }
-
- unsigned _cmLexLineCmtMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
- {
- unsigned n = strlen(p->lineCmtStr);
- if( strncmp( p->lineCmtStr, cp, n ) == 0)
- {
- unsigned i;
- const char newlineStr[] = "\n";
- if((i = _cmLexScanTo(cp + n, cn-n, newlineStr)) == cmInvalidIdx )
- {
- // no EOL was found so the comment must be on the last line of the source
- return cn;
- }
-
- return n + i;
- }
- return 0;
- }
-
- cmRC_t _cmLexInstallMatcher( cmLex* p, unsigned typeId, cmLexMatcherFuncPtr_t funcPtr, const cmChar_t* keyStr, cmLexUserMatcherPtr_t userPtr )
- {
- assert( funcPtr==NULL || userPtr==NULL );
- assert( !(funcPtr==NULL && userPtr==NULL));
-
- // if there is no space in the user token array - then expand it
- if( p->mfi == p->mfn )
- {
- int incr_cnt = 10;
- cmLexMatcher* np = cmMemAllocZ( cmLexMatcher, p->mfn + incr_cnt );
- memcpy(np,p->mfp,p->mfi*sizeof(cmLexMatcher));
- cmMemPtrFree(&p->mfp);
- p->mfp = np;
- p->mfn += incr_cnt;
- }
-
- p->mfp[p->mfi].tokenStr = NULL;
- p->mfp[p->mfi].typeId = typeId;
- p->mfp[p->mfi].funcPtr = funcPtr;
- p->mfp[p->mfi].userPtr = userPtr;
- p->mfp[p->mfi].enableFl = true;
-
- if( keyStr != NULL )
- {
- // allocate space for the token string and store it
- p->mfp[p->mfi].tokenStr = cmMemAlloc( cmChar_t, sizeof(cmChar_t) * (strlen(keyStr)+1) );
- strcpy(p->mfp[p->mfi].tokenStr, keyStr );
- }
-
-
- p->mfi++;
- return kOkLexRC;
- }
- cmRC_t _cmLexReset( cmLex* p )
- {
-
- p->ci = 0;
-
- p->curTokenId = kErrorLexTId;
- p->curTokenCharIdx = cmInvalidIdx;
- p->curTokenCharCnt = 0;
-
- p->curLine = 0;
- p->curCol = 0;
- p->nextLine = 0;
- p->nextCol = 0;
-
- cmErrClearRC(&p->err);
-
- return kOkLexRC;
- }
-
- cmRC_t _cmLexSetTextBuffer( cmLex* p, const cmChar_t* cp, unsigned cn )
- {
- p->cp = cp;
- p->cn = cn;
-
- return _cmLexReset(p);
- }
-
-
- cmLexH cmLexInit( const cmChar_t* cp, unsigned cn, unsigned flags, cmRpt_t* rpt )
- {
- cmLexH h;
- cmChar_t dfltLineCmt[] = "//";
- cmChar_t dfltBlockBegCmt[] = "/*";
- cmChar_t dfltBlockEndCmt[] = "*/";
-
- cmLex* p = cmMemAllocZ( cmLex, 1 );
-
- cmErrSetup(&p->err,rpt,"Lexer");
-
- p->flags = flags;
-
- _cmLexSetTextBuffer( p, cp, cn );
-
- int init_mfn = 10;
- p->mfp = cmMemAllocZ( cmLexMatcher, init_mfn );
- p->mfn = init_mfn;
- p->mfi = 0;
-
-
- p->lineCmtStr = cmMemAlloc( cmChar_t, strlen(dfltLineCmt)+1 );
- strcpy( p->lineCmtStr, dfltLineCmt );
-
- p->blockBegCmtStr = cmMemAlloc( cmChar_t, strlen(dfltBlockBegCmt)+1 );
- strcpy( p->blockBegCmtStr, dfltBlockBegCmt );
-
- p->blockEndCmtStr = cmMemAlloc( cmChar_t, strlen(dfltBlockEndCmt)+1 );
- strcpy( p->blockEndCmtStr, dfltBlockEndCmt );
-
-
- _cmLexInstallMatcher( p, kSpaceLexTId, _cmLexSpaceMatcher, NULL, NULL );
- _cmLexInstallMatcher( p, kRealLexTId, _cmLexRealMatcher, NULL, NULL );
- _cmLexInstallMatcher( p, kIntLexTId, _cmLexIntMatcher, NULL, NULL );
- _cmLexInstallMatcher( p, kHexLexTId, _cmLexHexMatcher, NULL, NULL );
- _cmLexInstallMatcher( p, kIdentLexTId, _cmLexIdentMatcher, NULL, NULL );
- _cmLexInstallMatcher( p, kQStrLexTId, _cmLexQStrMatcher, NULL, NULL );
- _cmLexInstallMatcher( p, kBlockCmtLexTId, _cmLexBlockCmtMatcher, NULL, NULL );
- _cmLexInstallMatcher( p, kLineCmtLexTId, _cmLexLineCmtMatcher, NULL, NULL );
-
- if( cmIsFlag(flags,kReturnQCharLexFl) )
- _cmLexInstallMatcher( p, kQCharLexTId, _cmLexQCharMatcher, NULL, NULL );
-
- h.h = p;
-
- _cmLexReset(p);
-
- return h;
- }
-
- cmRC_t cmLexFinal( cmLexH* hp )
- {
- if( hp == NULL || cmLexIsValid(*hp)==false )
- return cmOkRC;
-
- cmLex* p = _cmLexHandleToPtr(*hp);
-
- if( p != NULL )
- {
-
- if( p->mfp != NULL )
- {
- unsigned i = 0;
-
- // free the user token strings
- for(; i<p->mfi; ++i)
- if( p->mfp[i].tokenStr != NULL )
- cmMemPtrFree(&p->mfp[i].tokenStr);
-
- // free the matcher array
- cmMemPtrFree(&p->mfp);
- p->mfi = 0;
- p->mfn = 0;
- }
-
- cmMemPtrFree(&p->lineCmtStr);
- cmMemPtrFree(&p->blockBegCmtStr);
- cmMemPtrFree(&p->blockEndCmtStr);
- cmMemPtrFree(&p->textBuf);
-
- // free the lexer object
- cmMemPtrFree(&p);
- hp->h = NULL;
- }
-
- return kOkLexRC;
- }
-
- cmRC_t cmLexReset( cmLexH h )
- {
- cmLex* p = _cmLexHandleToPtr(h);
- return _cmLexReset(p);
- }
-
-
- bool cmLexIsValid( cmLexH h )
- { return h.h != NULL; }
-
- cmRC_t cmLexSetTextBuffer( cmLexH h, const cmChar_t* cp, unsigned cn )
- {
- cmLex* p = _cmLexHandleToPtr(h);
- return _cmLexSetTextBuffer(p,cp,cn);
- }
-
- cmRC_t cmLexSetFile( cmLexH h, const cmChar_t* fn )
- {
- cmRC_t rc = kOkLexRC;
- cmFileH_t fh = cmFileNullHandle;
- cmLex* p = _cmLexHandleToPtr(h);
- long n = 0;
-
- assert( fn != NULL && p != NULL );
-
- // open the file
- if( cmFileOpen(&fh,fn,kReadFileFl,p->err.rpt) != kOkFileRC )
- return kFileOpenErrLexRC;
-
- // seek to the end of the file
- if( cmFileSeek(fh,kEndFileFl,0) != kOkFileRC )
- return kFileSeekErrLexRC;
-
- // get the length of the file
- if( cmFileTell(fh,&n) != kOkFileRC )
- return kFileTellErrLexRC;
-
- // rewind to the beginning of the file
- if( cmFileSeek(fh,kBeginFileFl,0) != kOkFileRC )
- return kFileSeekErrLexRC;
-
- // allocate the text buffer
- if((p->textBuf = cmMemResizeZ( char, p->textBuf, n+1)) == NULL )
- {
- rc = _cmLexError(p,kMemAllocErrLexRC,"Unable to allocate the text file buffer for:'%s'.",fn);
- goto errLabel;
- }
-
- // read the file into the buffer
- if( cmFileRead(fh,p->textBuf,n) != kOkFileRC )
- return kFileReadErrLexRC;
-
- if((rc = _cmLexSetTextBuffer( p, p->textBuf, n )) != kOkLexRC )
- goto errLabel;
-
- errLabel:
- // close the file
- if( cmFileClose(&fh) != kOkFileRC )
- return kFileCloseErrLexRC;
-
- return rc;
- }
-
-
- cmLexMatcher* _cmLexFindUserToken( cmLex* p, unsigned id, const cmChar_t* tokenStr )
- {
- unsigned i = 0;
- for(; i<p->mfi; ++i)
- {
- if( id != cmInvalidId && p->mfp[i].typeId == id )
- return p->mfp + i;
-
- if( p->mfp[i].tokenStr != NULL && tokenStr != NULL && strcmp(p->mfp[i].tokenStr,tokenStr)==0 )
- return p->mfp + i;
-
- }
-
- return NULL;
- }
-
-
- cmRC_t cmLexRegisterToken( cmLexH h, unsigned id, const cmChar_t* tokenStr )
- {
- cmLex* p = _cmLexHandleToPtr(h);
-
- // prevent duplicate tokens
- if( _cmLexFindUserToken( p, id, tokenStr ) != NULL )
- return _cmLexError( p, kDuplicateTokenLexRC, "id:%i token:%s duplicates the token string or id", id, tokenStr );
-
-
- return _cmLexInstallMatcher( p, id, _cmLexExactStringMatcher, tokenStr, NULL );
-
-
- }
-
- cmRC_t cmLexRegisterMatcher( cmLexH h, unsigned id, cmLexUserMatcherPtr_t userPtr )
- {
- cmLex* p = _cmLexHandleToPtr(h);
-
- // prevent duplicate tokens
- if( _cmLexFindUserToken( p, id, NULL ) != NULL )
- return _cmLexError( p, kDuplicateTokenLexRC, "A token matching function has already been installed for token id: %i", id );
-
- return _cmLexInstallMatcher( p, id, NULL, NULL, userPtr );
- }
-
- cmRC_t cmLexEnableToken( cmLexH h, unsigned id, bool enableFl )
- {
- cmLex* p = _cmLexHandleToPtr(h);
-
- unsigned mi = 0;
- for(; mi<p->mfi; ++mi)
- if( p->mfp[mi].typeId == id )
- {
- p->mfp[mi].enableFl = enableFl;
- return cmOkRC;
- }
-
- return _cmLexError( p, kInvalidLexTIdLexRC, "%i is not a valid token type id.",id);
- }
-
- unsigned cmLexFilterFlags( cmLexH h )
- {
- cmLex* p = _cmLexHandleToPtr(h);
- return p->flags;
- }
-
- void cmLexSetFilterFlags( cmLexH h, unsigned flags )
- {
- cmLex* p = _cmLexHandleToPtr(h);
- p->flags = flags;
- }
-
-
- unsigned cmLexGetNextToken( cmLexH h )
- {
- cmLex* p = _cmLexHandleToPtr(h);
-
- if( cmErrLastRC(&p->err) != kOkLexRC )
- return kErrorLexTId;
-
- while( p->ci < p->cn )
- {
- unsigned i;
- unsigned mi = 0;
- unsigned maxCharCnt = 0;
- unsigned maxIdx = cmInvalidIdx;
-
- p->curTokenId = kErrorLexTId;
- p->curTokenCharIdx = cmInvalidIdx;
- p->curTokenCharCnt = 0;
- p->attrFlags = 0;
-
- // try each matcher
- for(; mi<p->mfi; ++mi)
- if( p->mfp[mi].enableFl )
- {
- unsigned charCnt = 0;
- if( p->mfp[mi].funcPtr != NULL )
- charCnt = p->mfp[mi].funcPtr(p, p->cp + p->ci, p->cn - p->ci, p->mfp[mi].tokenStr );
- else
- charCnt = p->mfp[mi].userPtr( p->cp + p->ci, p->cn - p->ci);
-
- // notice if the matcher set the error code
- if( cmErrLastRC(&p->err) != kOkLexRC )
- return kErrorLexTId;
-
- // if this matched token is longer then the prev. matched token or
- // if the prev matched token was an identifier and this matched token is an equal length user defined token
- if( (charCnt > maxCharCnt)
- || (charCnt>0 && charCnt==maxCharCnt && p->mfp[maxIdx].typeId==kIdentLexTId && p->mfp[mi].typeId >=kUserLexTId )
- || (charCnt>0 && charCnt<maxCharCnt && p->mfp[maxIdx].typeId==kIdentLexTId && p->mfp[mi].typeId >=kUserLexTId && cmIsFlag(p->flags,kUserDefPriorityLexFl))
- )
- {
- maxCharCnt = charCnt;
- maxIdx = mi;
- }
-
- }
-
- // no token was matched
- if( maxIdx == cmInvalidIdx )
- {
- if( cmIsFlag(p->flags,kReturnUnknownLexFl) )
- {
- maxCharCnt = 1;
- }
- else
- {
- _cmLexError( p, kNoMatchLexRC, "Unable to recognize token:'%c'.",*(p->cp+p->ci));
- return kErrorLexTId;
- }
- }
-
- // update the current line and column position
- p->curLine = p->nextLine;
- p->curCol = p->nextCol;
-
-
- // find the next column and line position
- for(i=0; i<maxCharCnt; ++i)
- {
- if( _cmLexIsNewline(p->cp[ p->ci + i ]) )
- {
- p->nextLine++;
- p->nextCol = 1;
- }
- else
- p->nextCol++;
- }
-
- bool returnFl = true;
-
- if( maxIdx != cmInvalidIdx )
- {
- // check the space token filter
- if( (p->mfp[ maxIdx ].typeId == kSpaceLexTId) && (cmIsFlag(p->flags,kReturnSpaceLexFl)==0) )
- returnFl = false;
-
- // check the comment token filter
- if( _cmLexIsCommentTypeId(p->mfp[ maxIdx ].typeId) && (cmIsFlag(p->flags,kReturnCommentsLexFl)==0) )
- returnFl = false;
- }
-
- // update the lexer state
- p->curTokenId = maxIdx==cmInvalidIdx ? kUnknownLexTId : p->mfp[ maxIdx ].typeId;
- p->curTokenCharIdx = p->ci;
- p->curTokenCharCnt = maxCharCnt;
-
- // advance the text buffer
- p->ci += maxCharCnt;
-
- if( returnFl )
- return p->curTokenId;
- }
-
- cmErrSetRC(&p->err,kEofRC);
-
- return kEofLexTId;
-
- }
-
- unsigned cmLexTokenId( cmLexH h )
- {
- cmLex* p = _cmLexHandleToPtr(h);
-
- return p->curTokenId;
- }
-
- const cmChar_t* cmLexTokenText( cmLexH h )
- {
- cmLex* p = _cmLexHandleToPtr(h);
-
- if( p->curTokenCharIdx == cmInvalidIdx )
- return NULL;
-
- unsigned n = p->curTokenId == kQStrLexTId ? 1 : 0;
-
- return p->cp + p->curTokenCharIdx + n;
- }
-
-
- unsigned cmLexTokenCharCount( cmLexH h )
- {
- cmLex* p = _cmLexHandleToPtr(h);
-
- if( p->curTokenCharIdx == cmInvalidIdx )
- return 0;
-
- unsigned n = p->curTokenId == kQStrLexTId ? 2 : 0;
-
- return p->curTokenCharCnt - n;
- }
-
- int cmLexTokenInt( cmLexH h )
- { return strtol( cmLexTokenText(h),NULL,0 ); }
-
- unsigned cmLexTokenUInt( cmLexH h )
- { return strtol( cmLexTokenText(h),NULL,0 ); }
-
- float cmLexTokenFloat( cmLexH h )
- { return strtof( cmLexTokenText(h),NULL ); }
-
- double cmLexTokenDouble( cmLexH h )
- { return strtod( cmLexTokenText(h),NULL ); }
-
-
- bool cmLexTokenIsUnsigned( cmLexH h )
- {
- cmLex* p = _cmLexHandleToPtr(h);
- return p->curTokenId == kIntLexTId && cmIsFlag(p->attrFlags,kIntUnsignedLexFl);
- }
-
- bool cmLexTokenIsSinglePrecision( cmLexH h )
- {
- cmLex* p = _cmLexHandleToPtr(h);
- return p->curTokenId == kRealLexTId && cmIsFlag(p->attrFlags,kRealFloatLexFl);
- }
-
- unsigned cmLexCurrentLineNumber( cmLexH h )
- {
- cmLex* p = _cmLexHandleToPtr(h);
- return p->curLine + 1;
- }
-
- unsigned cmLexCurrentColumnNumber( cmLexH h )
- {
- cmLex* p = _cmLexHandleToPtr(h);
- return p->curCol + 1;
- }
-
- unsigned cmLexErrorRC( cmLexH h )
- {
- cmLex* p = _cmLexHandleToPtr(h);
- return cmErrLastRC(&p->err);
- }
-
- const cmChar_t* cmLexIdToLabel( cmLexH h, unsigned typeId )
- {
- cmLex* p = _cmLexHandleToPtr(h);
-
- switch( typeId )
- {
- case kErrorLexTId: return "<error>";
- case kEofLexTId: return "<EOF>";
- case kSpaceLexTId: return "<space>";
- case kRealLexTId: return "<real>";
- case kIntLexTId: return "<int>";
- case kHexLexTId: return "<hex>";
- case kIdentLexTId: return "<ident>";
- case kQStrLexTId: return "<qstr>";
- case kBlockCmtLexTId: return "<bcmt>";
- case kLineCmtLexTId: return "<lcmt>";
- default:
- {
- cmLexMatcher* mp;
- if((mp = _cmLexFindUserToken(p,typeId,NULL)) == NULL )
- return "<unknown>";
- return mp->tokenStr;
- }
- }
- return "<invalid>";
- }
-
- const cmChar_t* cmLexRcToMsg( unsigned rc )
- {
- unsigned i=0;
- for(i=0; cmLexErrorArray[i].code != kInvalidLexRC; ++i)
- if( cmLexErrorArray[i].code == rc )
- break;
-
- return cmLexErrorArray[i].msg;
- }
-
-
- //{ { label:cmLexEx }
- //(
- // cmLexTest() gives a simple cmLex example.
- //)
-
- //(
- void cmLexTest( cmRpt_t* rpt)
- {
- cmChar_t buf[] =
- "123ident0\n 123.456\nident0\n"
- "0xa12+.2\n"
- "// comment \n"
- "/* block \n"
- "comment */"
- "\"quoted string\""
- "ident1"
- "// last line comment";
-
- // initialize a lexer with a buffer of text
- cmLexH h = cmLexInit(buf,strlen(buf),
- kReturnSpaceLexFl | kReturnCommentsLexFl,rpt);
-
- // verify that the lexer initialization succeded.
- if( cmLexIsValid(h) == false )
- {
- cmRptPrintf(rpt,"Lexer initialization failed.");
- return;
- }
-
- // register some additional recoginizers
- cmLexRegisterToken(h,kUserLexTId+1,"+");
- cmLexRegisterToken(h,kUserLexTId+2,"-");
-
- unsigned tid;
-
- // ask for token id's
- while( (tid = cmLexGetNextToken(h)) != kEofLexTId )
- {
- // print information about each token
- cmRptPrintf(rpt,"%i %i %s '%.*s' (%i) ",
- cmLexCurrentLineNumber(h),
- cmLexCurrentColumnNumber(h),
- cmLexIdToLabel(h,tid),
- cmLexTokenCharCount(h),
- cmLexTokenText(h) ,
- cmLexTokenCharCount(h));
-
- // if the token is a number ...
- if( tid==kIntLexTId || tid==kRealLexTId || tid==kHexLexTId )
- {
- // ... then request the numbers value
- int iv = cmLexTokenInt(h);
- double dv = cmLexTokenDouble(h);
-
- cmRptPrintf(rpt,"%i %f",iv,dv);
- }
-
- cmRptPrintf(rpt,"\n");
-
- // handle errors
- if( tid == kErrorLexTId )
- {
- cmRptPrintf(rpt,"Error:%i\n", cmLexErrorRC(h));
- break;
- }
-
- }
-
- // finalize the lexer
- cmLexFinal(&h);
-
- }
-
- //)
- //}
|