libcm is a C development framework with an emphasis on audio signal processing applications.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

cmLex.c 23KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952
  1. #include "cmPrefix.h"
  2. #include "cmGlobal.h"
  3. #include "cmRpt.h"
  4. #include "cmLex.h"
  5. #include "cmErr.h"
  6. #include "cmMem.h"
  7. #include "cmMallocDebug.h"
  8. #include "cmFile.h"
  9. typedef struct
  10. {
  11. unsigned code;
  12. const cmChar_t* msg;
  13. } cmLexErrorRecd;
  14. cmLexErrorRecd cmLexErrorArray[] =
  15. {
  16. { kOkLexRC, "No error. The operation completed successfully."},
  17. { kDuplicateTokenLexRC, "The text or id passed as a user token is already in use by another token."},
  18. { kMissingCmtEndLexRC, "The end of a block comment could not be found."},
  19. { kMissingEndQuoteLexRC, "The end of a quoted string could not be found."},
  20. { kNoMatchLexRC, "The lexer encountered a string which could not be classified."},
  21. { kFileOpenErrLexRC, "File open failed on cmLexSetFile()"},
  22. { kFileSeekErrLexRC, "File seek failed on cmLexSetFile()"},
  23. { kFileTellErrLexRC, "File tell failed on cmLexSetFile()"},
  24. { kFileReadErrLexRC, "File read failed on cmLexSetFile()"},
  25. { kFileCloseErrLexRC, "File close failed on cmLexSetFile()"},
  26. { kMemAllocErrLexRC, "An attempted memory allocation failed"},
  27. { kEofRC, "The end of the input text was encountered (this is a normal condition not an error)"},
  28. { kInvalidLexTIdLexRC, "An invalid token id was encountered."},
  29. { kInvalidLexRC, "Unknown lexer error code." }
  30. };
  31. struct cmLex_str;
  32. typedef unsigned (*cmLexMatcherFuncPtr_t)( struct cmLex_str* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr );
  33. // token match function record
  34. typedef struct
  35. {
  36. unsigned typeId; // token type this matcher recognizes
  37. cmLexMatcherFuncPtr_t funcPtr; // recognizer function (only used if userPtr==NULL)
  38. cmChar_t* tokenStr; // fixed string data used by the recognizer (only used if userPtr==NULL)
  39. cmLexUserMatcherPtr_t userPtr; // user defined recognizer function (only used if funcPtr==NULL)
  40. bool enableFl; // true if this matcher is enabled
  41. } cmLexMatcher;
  42. typedef struct cmLex_str
  43. {
  44. cmErr_t err;
  45. const cmChar_t* cp; // character buffer
  46. unsigned cn; // count of characters in buffer
  47. unsigned ci; // current buffer index position
  48. unsigned flags; // lexer control flags
  49. unsigned curTokenId; // type id of the current token
  50. unsigned curTokenCharIdx; // index into cp[] of the current token
  51. unsigned curTokenCharCnt; // count of characters in the current token
  52. unsigned curLine; // line number of the current token
  53. unsigned curCol; // column number of the current token
  54. unsigned nextLine;
  55. unsigned nextCol;
  56. cmChar_t* blockBegCmtStr;
  57. cmChar_t* blockEndCmtStr;
  58. cmChar_t* lineCmtStr;
  59. cmLexMatcher* mfp; // base of matcher array
  60. unsigned mfi; // next available matcher array slot
  61. unsigned mfn; // count of elementes in mfp[]
  62. cmChar_t* textBuf; // text buf used by cmLexSetFile()
  63. } cmLex;
  64. cmLexH cmLexNullH = { NULL };
  65. bool _cmLexIsNewline( cmChar_t c )
  66. { return c == '\n'; }
  67. bool _cmLexIsCommentTypeId( unsigned typeId )
  68. { return typeId == kBlockCmtLexTId || typeId == kLineCmtLexTId; }
  69. cmLex* _cmLexHandleToPtr( cmLexH h )
  70. {
  71. cmLex* p = (cmLex*)h.h;
  72. assert(p != NULL);
  73. return p;
  74. };
  75. cmRC_t _cmLexError( cmLex* p, unsigned rc, const char* fmt, ... )
  76. {
  77. va_list vl;
  78. va_start(vl,fmt);
  79. unsigned bufCharCnt = 512;
  80. char buf[ bufCharCnt+1 ];
  81. snprintf(buf,bufCharCnt,"Error on line:%i ", p->curLine);
  82. unsigned sn = strlen(buf);
  83. vsnprintf(buf+sn,bufCharCnt-sn,fmt,vl);
  84. buf[bufCharCnt]=0;
  85. cmErrMsg(&p->err,rc,"%s",buf);
  86. va_end(vl);
  87. return rc;
  88. }
  89. unsigned _cmLexScanTo( const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  90. {
  91. unsigned i = 0;
  92. unsigned n = strlen(keyStr);
  93. if( n <= cn )
  94. for(; i<=cn-n; ++i)
  95. if( strncmp(cp + i, keyStr, n ) == 0 )
  96. return i+n;
  97. return cmInvalidIdx;
  98. }
  99. unsigned _cmLexExactStringMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  100. {
  101. unsigned n = strlen(keyStr);
  102. return strncmp(keyStr,cp,n) == 0 ? n : 0;
  103. }
  104. unsigned _cmLexSpaceMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  105. {
  106. unsigned i=0;
  107. for(; i<cn; ++i)
  108. if( !isspace(cp[i]) )
  109. break;
  110. return i;
  111. }
  112. unsigned _cmLexRealMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  113. {
  114. unsigned i = 0;
  115. unsigned n = 0; // decimal point counter
  116. unsigned d = 0; // digit counter
  117. bool fl = false; // true if this real includes an exponent
  118. for(; i<cn && n<=1; ++i)
  119. {
  120. if( i==0 && cp[i]=='-' ) // allow a leading '-'
  121. continue;
  122. if( isdigit(cp[i]) ) // allow digits
  123. {
  124. ++d;
  125. continue;
  126. }
  127. if( cp[i] == '.' && n==0 ) // allow exactly one decimal point
  128. ++n;
  129. else
  130. break;
  131. }
  132. // if there was at least one digit and the next char is an 'e'
  133. if( d>0 && i<cn && (cp[i] == 'e' || cp[i] == 'E') )
  134. {
  135. d=0;
  136. ++i;
  137. unsigned j = i;
  138. for(; i<cn; ++i)
  139. {
  140. if( i==j && cp[i]=='-' ) // allow the char following the e to be '-'
  141. continue;
  142. if( isdigit(cp[i]) )
  143. {
  144. ++d;
  145. continue;
  146. }
  147. // stop at the first non-digit
  148. break;
  149. }
  150. // an exp exists if digits follwed the 'e'
  151. fl = d > 0;
  152. }
  153. return i>1 && (n==1 || fl) ? i : 0;
  154. }
  155. unsigned _cmLexIntMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  156. {
  157. unsigned i = 0;
  158. for(; i<cn; ++i)
  159. {
  160. if( i==0 && cp[i]=='-' )
  161. continue;
  162. if( !isdigit(cp[i]) )
  163. break;
  164. }
  165. // BUG BUG BUG
  166. // If an integer is specified using 'e' notiation
  167. // (see _cmLexRealMatcher()) and the number of exponent places
  168. // specified following the 'e' is positive and >= number of
  169. // digits following the decimal point (in effect zeros are
  170. // padded on the right side) then the value is an integer.
  171. //
  172. // The current implementation recognizes all numeric strings
  173. // containing a decimal point as reals.
  174. return i;
  175. }
  176. unsigned _cmLexHexMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  177. {
  178. unsigned i = 0;
  179. if( cn < 3 )
  180. return 0;
  181. if( cp[0]=='0' && cp[1]=='x')
  182. for(i=2; i<cn; ++i)
  183. if( !isxdigit(cp[i]) )
  184. break;
  185. return i;
  186. }
  187. unsigned _cmLexIdentMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  188. {
  189. unsigned i = 0;
  190. if( isalpha(cp[0]) || (cp[0]== '_'))
  191. {
  192. i = 1;
  193. for(; i<cn; ++i)
  194. if( !isalnum(cp[i]) && (cp[i] != '_') )
  195. break;
  196. }
  197. return i;
  198. }
  199. unsigned _cmLexQStrMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  200. {
  201. cmChar_t qStr[]="\"";
  202. unsigned n = strlen(qStr);
  203. if( strncmp(qStr,cp,n) == 0 )
  204. {
  205. unsigned i;
  206. if((i = _cmLexScanTo(cp+n, cn-n, qStr)) == cmInvalidIdx )
  207. {
  208. _cmLexError( p, kMissingEndQuoteLexRC, "Missing string end quote.");
  209. return 0;
  210. }
  211. return n+i;
  212. }
  213. return 0;
  214. }
  215. unsigned _cmLexBlockCmtMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  216. {
  217. unsigned n = strlen(p->blockBegCmtStr);
  218. if( strncmp( p->blockBegCmtStr, cp, n ) == 0 )
  219. {
  220. unsigned i;
  221. if((i = _cmLexScanTo(cp + n, cn-n,p->blockEndCmtStr)) == cmInvalidIdx )
  222. {
  223. _cmLexError(p, kMissingCmtEndLexRC, "Missing end of block comment.");
  224. return 0;
  225. }
  226. return n + i;
  227. }
  228. return 0;
  229. }
  230. unsigned _cmLexLineCmtMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  231. {
  232. unsigned n = strlen(p->lineCmtStr);
  233. if( strncmp( p->lineCmtStr, cp, n ) == 0)
  234. {
  235. unsigned i;
  236. const char newlineStr[] = "\n";
  237. if((i = _cmLexScanTo(cp + n, cn-n, newlineStr)) == cmInvalidIdx )
  238. {
  239. // no EOL was found so the comment must be on the last line of the source
  240. return cn;
  241. }
  242. return n + i;
  243. }
  244. return 0;
  245. }
  246. cmRC_t _cmLexInstallMatcher( cmLex* p, unsigned typeId, cmLexMatcherFuncPtr_t funcPtr, const cmChar_t* keyStr, cmLexUserMatcherPtr_t userPtr )
  247. {
  248. assert( funcPtr==NULL || userPtr==NULL );
  249. assert( !(funcPtr==NULL && userPtr==NULL));
  250. // if there is no space in the user token array - then expand it
  251. if( p->mfi == p->mfn )
  252. {
  253. int incr_cnt = 10;
  254. cmLexMatcher* np = cmMemAllocZ( cmLexMatcher, p->mfn + incr_cnt );
  255. memcpy(np,p->mfp,p->mfi*sizeof(cmLexMatcher));
  256. cmMemPtrFree(&p->mfp);
  257. p->mfp = np;
  258. p->mfn += incr_cnt;
  259. }
  260. p->mfp[p->mfi].tokenStr = NULL;
  261. p->mfp[p->mfi].typeId = typeId;
  262. p->mfp[p->mfi].funcPtr = funcPtr;
  263. p->mfp[p->mfi].userPtr = userPtr;
  264. p->mfp[p->mfi].enableFl = true;
  265. if( keyStr != NULL )
  266. {
  267. // allocate space for the token string and store it
  268. p->mfp[p->mfi].tokenStr = cmMemAlloc( cmChar_t, sizeof(cmChar_t) * (strlen(keyStr)+1) );
  269. strcpy(p->mfp[p->mfi].tokenStr, keyStr );
  270. }
  271. p->mfi++;
  272. return kOkLexRC;
  273. }
  274. cmRC_t _cmLexReset( cmLex* p )
  275. {
  276. p->ci = 0;
  277. p->curTokenId = kErrorLexTId;
  278. p->curTokenCharIdx = cmInvalidIdx;
  279. p->curTokenCharCnt = 0;
  280. p->curLine = 0;
  281. p->curCol = 0;
  282. p->nextLine = 0;
  283. p->nextCol = 0;
  284. cmErrClearRC(&p->err);
  285. return kOkLexRC;
  286. }
  287. cmRC_t _cmLexSetTextBuffer( cmLex* p, const cmChar_t* cp, unsigned cn )
  288. {
  289. p->cp = cp;
  290. p->cn = cn;
  291. return _cmLexReset(p);
  292. }
  293. cmLexH cmLexInit( const cmChar_t* cp, unsigned cn, unsigned flags, cmRpt_t* rpt )
  294. {
  295. cmLexH h;
  296. cmChar_t dfltLineCmt[] = "//";
  297. cmChar_t dfltBlockBegCmt[] = "/*";
  298. cmChar_t dfltBlockEndCmt[] = "*/";
  299. cmLex* p = cmMemAllocZ( cmLex, 1 );
  300. cmErrSetup(&p->err,rpt,"Lexer");
  301. p->flags = flags;
  302. _cmLexSetTextBuffer( p, cp, cn );
  303. /*
  304. p->cp = (cn==0) ? NULL : cp;
  305. p->cn = (cp==NULL) ? 0 : cn;
  306. p->ci = 0;
  307. p->curTokenId = kErrorLexTId;
  308. p->curTokenCharIdx = cmInvalidIdx;
  309. p->curTokenCharCnt = 0;
  310. p->curLine = 0;
  311. p->curCol = 0;
  312. p->nextLine = 0;
  313. p->nextCol = 0;
  314. */
  315. int init_mfn = 10;
  316. p->mfp = cmMemAllocZ( cmLexMatcher, init_mfn );
  317. p->mfn = init_mfn;
  318. p->mfi = 0;
  319. p->lineCmtStr = cmMemAlloc( cmChar_t, strlen(dfltLineCmt)+1 );
  320. strcpy( p->lineCmtStr, dfltLineCmt );
  321. p->blockBegCmtStr = cmMemAlloc( cmChar_t, strlen(dfltBlockBegCmt)+1 );
  322. strcpy( p->blockBegCmtStr, dfltBlockBegCmt );
  323. p->blockEndCmtStr = cmMemAlloc( cmChar_t, strlen(dfltBlockEndCmt)+1 );
  324. strcpy( p->blockEndCmtStr, dfltBlockEndCmt );
  325. _cmLexInstallMatcher( p, kSpaceLexTId, _cmLexSpaceMatcher, NULL, NULL );
  326. _cmLexInstallMatcher( p, kRealLexTId, _cmLexRealMatcher, NULL, NULL );
  327. _cmLexInstallMatcher( p, kIntLexTId, _cmLexIntMatcher, NULL, NULL );
  328. _cmLexInstallMatcher( p, kHexLexTId, _cmLexHexMatcher, NULL, NULL );
  329. _cmLexInstallMatcher( p, kIdentLexTId, _cmLexIdentMatcher, NULL, NULL );
  330. _cmLexInstallMatcher( p, kQStrLexTId, _cmLexQStrMatcher, NULL, NULL );
  331. _cmLexInstallMatcher( p, kBlockCmtLexTId, _cmLexBlockCmtMatcher, NULL, NULL );
  332. _cmLexInstallMatcher( p, kLineCmtLexTId, _cmLexLineCmtMatcher, NULL, NULL );
  333. h.h = p;
  334. _cmLexReset(p);
  335. return h;
  336. }
  337. cmRC_t cmLexFinal( cmLexH* hp )
  338. {
  339. if( hp == NULL || cmLexIsValid(*hp)==false )
  340. return cmOkRC;
  341. cmLex* p = _cmLexHandleToPtr(*hp);
  342. if( p != NULL )
  343. {
  344. if( p->mfp != NULL )
  345. {
  346. unsigned i = 0;
  347. // free the user token strings
  348. for(; i<p->mfi; ++i)
  349. if( p->mfp[i].tokenStr != NULL )
  350. cmMemPtrFree(&p->mfp[i].tokenStr);
  351. // free the matcher array
  352. cmMemPtrFree(&p->mfp);
  353. p->mfi = 0;
  354. p->mfn = 0;
  355. }
  356. cmMemPtrFree(&p->lineCmtStr);
  357. cmMemPtrFree(&p->blockBegCmtStr);
  358. cmMemPtrFree(&p->blockEndCmtStr);
  359. cmMemPtrFree(&p->textBuf);
  360. // free the lexer object
  361. cmMemPtrFree(&p);
  362. hp->h = NULL;
  363. }
  364. return kOkLexRC;
  365. }
  366. cmRC_t cmLexReset( cmLexH h )
  367. {
  368. cmLex* p = _cmLexHandleToPtr(h);
  369. return _cmLexReset(p);
  370. }
  371. bool cmLexIsValid( cmLexH h )
  372. { return h.h != NULL; }
  373. cmRC_t cmLexSetTextBuffer( cmLexH h, const cmChar_t* cp, unsigned cn )
  374. {
  375. cmLex* p = _cmLexHandleToPtr(h);
  376. return _cmLexSetTextBuffer(p,cp,cn);
  377. }
  378. cmRC_t cmLexSetFile( cmLexH h, const cmChar_t* fn )
  379. {
  380. cmRC_t rc = kOkLexRC;
  381. cmFileH_t fh = cmFileNullHandle;
  382. cmLex* p = _cmLexHandleToPtr(h);
  383. long n = 0;
  384. assert( fn != NULL && p != NULL );
  385. // open the file
  386. if( cmFileOpen(&fh,fn,kReadFileFl,p->err.rpt) != kOkFileRC )
  387. return kFileOpenErrLexRC;
  388. // seek to the end of the file
  389. if( cmFileSeek(fh,kEndFileFl,0) != kOkFileRC )
  390. return kFileSeekErrLexRC;
  391. // get the length of the file
  392. if( cmFileTell(fh,&n) != kOkFileRC )
  393. return kFileTellErrLexRC;
  394. // rewind to the beginning of the file
  395. if( cmFileSeek(fh,kBeginFileFl,0) != kOkFileRC )
  396. return kFileSeekErrLexRC;
  397. // allocate the text buffer
  398. if((p->textBuf = cmMemResizeZ( char, p->textBuf, n+1)) == NULL )
  399. {
  400. rc = _cmLexError(p,kMemAllocErrLexRC,"Unable to allocate the text file buffer for:'%s'.",fn);
  401. goto errLabel;
  402. }
  403. // read the file into the buffer
  404. if( cmFileRead(fh,p->textBuf,n) != kOkFileRC )
  405. return kFileReadErrLexRC;
  406. if((rc = _cmLexSetTextBuffer( p, p->textBuf, n )) != kOkLexRC )
  407. goto errLabel;
  408. errLabel:
  409. // close the file
  410. if( cmFileClose(&fh) != kOkFileRC )
  411. return kFileCloseErrLexRC;
  412. return rc;
  413. }
  414. /*
  415. cmRC_t cmLexSetFile( cmLexH h, const cmChar_t* fn )
  416. {
  417. cmRC_t rc = kOkLexRC;
  418. FILE* fp = NULL;
  419. cmLex* p = _cmLexHandleToPtr(h);
  420. unsigned n = 0;
  421. assert( fn != NULL && p != NULL );
  422. // open the file
  423. if((fp = fopen(fn,"rb")) == NULL )
  424. return _cmLexError(p,kFileOpenErrLexRC,"Unable to open the file:'%s'.",fn);
  425. // seek to the end
  426. if( fseek(fp,0,SEEK_END) != 0 )
  427. {
  428. rc= _cmLexError(p,kFileSeekErrLexRC,"Unable to seek to the end of '%s'.",fn);
  429. goto errLabel;
  430. }
  431. // get the length of the file
  432. if( (n=ftell(fp)) == 0 )
  433. {
  434. rc = _cmLexError(p,kFileOpenErrLexRC,"The file '%s' appears to be empty.",fn);
  435. goto errLabel;
  436. }
  437. // rewind the file
  438. if( fseek(fp,0,SEEK_SET) != 0 )
  439. {
  440. rc = _cmLexError(p,kFileSeekErrLexRC,"Unable to seek to the beginning of '%s'.",fn);
  441. goto errLabel;
  442. }
  443. // allocate the text buffer
  444. if((p->textBuf = cmMemResizeZ( char, p->textBuf, n+1)) == NULL )
  445. {
  446. rc = _cmLexError(p,kMemAllocErrLexRC,"Unable to allocate the text file buffer for:'%s'.",fn);
  447. goto errLabel;
  448. }
  449. // read the file into the text buffer
  450. if( fread(p->textBuf,n,1,fp) != 1 )
  451. {
  452. rc = _cmLexError(p,kFileReadErrLexRC,"File read failed on:'%s'.",fn);
  453. goto errLabel;
  454. }
  455. if((rc = _cmLexSetTextBuffer( p, p->textBuf, n )) != kOkLexRC )
  456. goto errLabel;
  457. errLabel:
  458. // close the file
  459. if( fclose(fp) != 0 )
  460. {
  461. rc = _cmLexError(p,kFileCloseErrLexRC,"File close failed on:'%s'.",fn);
  462. goto errLabel;
  463. }
  464. return rc;
  465. }
  466. */
  467. cmLexMatcher* _cmLexFindUserToken( cmLex* p, unsigned id, const cmChar_t* tokenStr )
  468. {
  469. unsigned i = 0;
  470. for(; i<p->mfi; ++i)
  471. {
  472. if( id != cmInvalidId && p->mfp[i].typeId == id )
  473. return p->mfp + i;
  474. if( p->mfp[i].tokenStr != NULL && tokenStr != NULL && strcmp(p->mfp[i].tokenStr,tokenStr)==0 )
  475. return p->mfp + i;
  476. }
  477. return NULL;
  478. }
  479. cmRC_t cmLexRegisterToken( cmLexH h, unsigned id, const cmChar_t* tokenStr )
  480. {
  481. cmLex* p = _cmLexHandleToPtr(h);
  482. // prevent duplicate tokens
  483. if( _cmLexFindUserToken( p, id, tokenStr ) != NULL )
  484. return _cmLexError( p, kDuplicateTokenLexRC, "id:%i token:%s duplicates the token string or id", id, tokenStr );
  485. return _cmLexInstallMatcher( p, id, _cmLexExactStringMatcher, tokenStr, NULL );
  486. }
  487. cmRC_t cmLexRegisterMatcher( cmLexH h, unsigned id, cmLexUserMatcherPtr_t userPtr )
  488. {
  489. cmLex* p = _cmLexHandleToPtr(h);
  490. // prevent duplicate tokens
  491. if( _cmLexFindUserToken( p, id, NULL ) != NULL )
  492. return _cmLexError( p, kDuplicateTokenLexRC, "A token matching function has already been installed for token id: %i", id );
  493. return _cmLexInstallMatcher( p, id, NULL, NULL, userPtr );
  494. }
  495. cmRC_t cmLexEnableToken( cmLexH h, unsigned id, bool enableFl )
  496. {
  497. cmLex* p = _cmLexHandleToPtr(h);
  498. unsigned mi = 0;
  499. for(; mi<p->mfi; ++mi)
  500. if( p->mfp[mi].typeId == id )
  501. {
  502. p->mfp[mi].enableFl = enableFl;
  503. return cmOkRC;
  504. }
  505. return _cmLexError( p, kInvalidLexTIdLexRC, "%i is not a valid token type id.",id);
  506. }
  507. unsigned cmLexFilterFlags( cmLexH h )
  508. {
  509. cmLex* p = _cmLexHandleToPtr(h);
  510. return p->flags;
  511. }
  512. void cmLexSetFilterFlags( cmLexH h, unsigned flags )
  513. {
  514. cmLex* p = _cmLexHandleToPtr(h);
  515. p->flags = flags;
  516. }
  517. unsigned cmLexGetNextToken( cmLexH h )
  518. {
  519. cmLex* p = _cmLexHandleToPtr(h);
  520. if( cmErrLastRC(&p->err) != kOkLexRC )
  521. return kErrorLexTId;
  522. while( p->ci < p->cn )
  523. {
  524. unsigned i;
  525. unsigned mi = 0;
  526. unsigned maxCharCnt = 0;
  527. unsigned maxIdx = cmInvalidIdx;
  528. p->curTokenId = kErrorLexTId;
  529. p->curTokenCharIdx = cmInvalidIdx;
  530. p->curTokenCharCnt = 0;
  531. // try each mater
  532. for(; mi<p->mfi; ++mi)
  533. if( p->mfp[mi].enableFl )
  534. {
  535. unsigned charCnt = 0;
  536. if( p->mfp[mi].funcPtr != NULL )
  537. charCnt = p->mfp[mi].funcPtr(p, p->cp + p->ci, p->cn - p->ci, p->mfp[mi].tokenStr );
  538. else
  539. charCnt = p->mfp[mi].userPtr( p->cp + p->ci, p->cn - p->ci);
  540. if( cmErrLastRC(&p->err) != kOkLexRC )
  541. return kErrorLexTId;
  542. // if this matched token is longer then the prev. matched token or
  543. // if the prev matched token was an identifier and this matched token is an equal length user defined token
  544. if( (charCnt > maxCharCnt)
  545. || (charCnt>0 && charCnt==maxCharCnt && p->mfp[maxIdx].typeId==kIdentLexTId && p->mfp[mi].typeId >=kUserLexTId )
  546. || (charCnt>0 && charCnt<maxCharCnt && p->mfp[maxIdx].typeId==kIdentLexTId && p->mfp[mi].typeId >=kUserLexTId && cmIsFlag(p->flags,kUserDefPriorityLexFl))
  547. )
  548. {
  549. maxCharCnt = charCnt;
  550. maxIdx = mi;
  551. }
  552. }
  553. // no token was matched
  554. if( maxIdx == cmInvalidIdx )
  555. {
  556. if( cmIsFlag(p->flags,kReturnUnknownLexFl) )
  557. {
  558. maxCharCnt = 1;
  559. }
  560. else
  561. {
  562. _cmLexError( p, kNoMatchLexRC, "Unable to recognize token:'%c'.",*(p->cp+p->ci));
  563. return kErrorLexTId;
  564. }
  565. }
  566. // update the current line and column position
  567. p->curLine = p->nextLine;
  568. p->curCol = p->nextCol;
  569. // find the next column and line position
  570. for(i=0; i<maxCharCnt; ++i)
  571. {
  572. if( _cmLexIsNewline(p->cp[ p->ci + i ]) )
  573. {
  574. p->nextLine++;
  575. p->nextCol = 1;
  576. }
  577. else
  578. p->nextCol++;
  579. }
  580. bool returnFl = true;
  581. if( maxIdx != cmInvalidIdx )
  582. {
  583. // check the space token filter
  584. if( (p->mfp[ maxIdx ].typeId == kSpaceLexTId) && (cmIsFlag(p->flags,kReturnSpaceLexFl)==0) )
  585. returnFl = false;
  586. // check the comment token filter
  587. if( _cmLexIsCommentTypeId(p->mfp[ maxIdx ].typeId) && (cmIsFlag(p->flags,kReturnCommentsLexFl)==0) )
  588. returnFl = false;
  589. }
  590. // update the lexer state
  591. p->curTokenId = maxIdx==cmInvalidIdx ? kUnknownLexTId : p->mfp[ maxIdx ].typeId;
  592. p->curTokenCharIdx = p->ci;
  593. p->curTokenCharCnt = maxCharCnt;
  594. // advance the text buffer
  595. p->ci += maxCharCnt;
  596. if( returnFl )
  597. return p->curTokenId;
  598. }
  599. cmErrSetRC(&p->err,kEofRC);
  600. return kEofLexTId;
  601. }
  602. unsigned cmLexTokenId( cmLexH h )
  603. {
  604. cmLex* p = _cmLexHandleToPtr(h);
  605. return p->curTokenId;
  606. }
  607. const cmChar_t* cmLexTokenText( cmLexH h )
  608. {
  609. cmLex* p = _cmLexHandleToPtr(h);
  610. if( p->curTokenCharIdx == cmInvalidIdx )
  611. return NULL;
  612. unsigned n = p->curTokenId == kQStrLexTId ? 1 : 0;
  613. return p->cp + p->curTokenCharIdx + n;
  614. }
  615. unsigned cmLexTokenCharCount( cmLexH h )
  616. {
  617. cmLex* p = _cmLexHandleToPtr(h);
  618. if( p->curTokenCharIdx == cmInvalidIdx )
  619. return 0;
  620. unsigned n = p->curTokenId == kQStrLexTId ? 2 : 0;
  621. return p->curTokenCharCnt - n;
  622. }
  623. int cmLexTokenInt( cmLexH h )
  624. { return strtol( cmLexTokenText(h),NULL,0 ); }
  625. unsigned cmLexTokenUInt( cmLexH h )
  626. { return strtol( cmLexTokenText(h),NULL,0 ); }
  627. float cmLexTokenFloat( cmLexH h )
  628. { return strtof( cmLexTokenText(h),NULL ); }
  629. double cmLexTokenDouble( cmLexH h )
  630. { return strtod( cmLexTokenText(h),NULL ); }
  631. unsigned cmLexCurrentLineNumber( cmLexH h )
  632. {
  633. cmLex* p = _cmLexHandleToPtr(h);
  634. return p->curLine + 1;
  635. }
  636. unsigned cmLexCurrentColumnNumber( cmLexH h )
  637. {
  638. cmLex* p = _cmLexHandleToPtr(h);
  639. return p->curCol + 1;
  640. }
  641. unsigned cmLexErrorRC( cmLexH h )
  642. {
  643. cmLex* p = _cmLexHandleToPtr(h);
  644. return cmErrLastRC(&p->err);
  645. }
  646. const cmChar_t* cmLexIdToLabel( cmLexH h, unsigned typeId )
  647. {
  648. cmLex* p = _cmLexHandleToPtr(h);
  649. switch( typeId )
  650. {
  651. case kErrorLexTId: return "<error>";
  652. case kEofLexTId: return "<EOF>";
  653. case kSpaceLexTId: return "<space>";
  654. case kRealLexTId: return "<real>";
  655. case kIntLexTId: return "<int>";
  656. case kHexLexTId: return "<hex>";
  657. case kIdentLexTId: return "<ident>";
  658. case kQStrLexTId: return "<qstr>";
  659. case kBlockCmtLexTId: return "<bcmt>";
  660. case kLineCmtLexTId: return "<lcmt>";
  661. default:
  662. {
  663. cmLexMatcher* mp;
  664. if((mp = _cmLexFindUserToken(p,typeId,NULL)) == NULL )
  665. return "<unknown>";
  666. return mp->tokenStr;
  667. }
  668. }
  669. return "<invalid>";
  670. }
  671. const cmChar_t* cmLexRcToMsg( unsigned rc )
  672. {
  673. unsigned i=0;
  674. for(i=0; cmLexErrorArray[i].code != kInvalidLexRC; ++i)
  675. if( cmLexErrorArray[i].code == rc )
  676. break;
  677. return cmLexErrorArray[i].msg;
  678. }
  679. //{ { label:cmLexEx }
  680. //(
  681. // cmLexTest() gives a simple cmLex example.
  682. //)
  683. //(
  684. void cmLexTest( cmRpt_t* rpt)
  685. {
  686. cmChar_t buf[] =
  687. "123ident0\n 123.456\nident0\n"
  688. "0xa12+.2\n"
  689. "// comment \n"
  690. "/* block \n"
  691. "comment */"
  692. "\"quoted string\""
  693. "ident1"
  694. "// last line comment";
  695. // initialize a lexer with a buffer of text
  696. cmLexH h = cmLexInit(buf,strlen(buf),
  697. kReturnSpaceLexFl | kReturnCommentsLexFl,rpt);
  698. // verify that the lexer initialization succeded.
  699. if( cmLexIsValid(h) == false )
  700. {
  701. cmRptPrintf(rpt,"Lexer initialization failed.");
  702. return;
  703. }
  704. // register some additional recoginizers
  705. cmLexRegisterToken(h,kUserLexTId+1,"+");
  706. cmLexRegisterToken(h,kUserLexTId+2,"-");
  707. unsigned tid;
  708. // ask for token id's
  709. while( (tid = cmLexGetNextToken(h)) != kEofLexTId )
  710. {
  711. // print information about each token
  712. cmRptPrintf(rpt,"%i %i %s '%.*s' (%i) ",
  713. cmLexCurrentLineNumber(h),
  714. cmLexCurrentColumnNumber(h),
  715. cmLexIdToLabel(h,tid),
  716. cmLexTokenCharCount(h),
  717. cmLexTokenText(h) ,
  718. cmLexTokenCharCount(h));
  719. // if the token is a number ...
  720. if( tid==kIntLexTId || tid==kRealLexTId || tid==kHexLexTId )
  721. {
  722. // ... then request the numbers value
  723. int iv = cmLexTokenInt(h);
  724. double dv = cmLexTokenDouble(h);
  725. cmRptPrintf(rpt,"%i %f",iv,dv);
  726. }
  727. cmRptPrintf(rpt,"\n");
  728. // handle errors
  729. if( tid == kErrorLexTId )
  730. {
  731. cmRptPrintf(rpt,"Error:%i\n", cmLexErrorRC(h));
  732. break;
  733. }
  734. }
  735. // finalize the lexer
  736. cmLexFinal(&h);
  737. }
  738. //)
  739. //}