libcm is a C development framework with an emphasis on audio signal processing applications.
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

cmLex.c 23KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951
  1. #include "cmPrefix.h"
  2. #include "cmGlobal.h"
  3. #include "cmRpt.h"
  4. #include "cmLex.h"
  5. #include "cmErr.h"
  6. #include "cmMem.h"
  7. #include "cmMallocDebug.h"
  8. #include "cmFile.h"
  9. enum
  10. {
  11. kRealFloatLexFl = 0x01,
  12. kIntUnsignedLexFl = 0x02
  13. };
  14. typedef struct
  15. {
  16. unsigned code;
  17. const cmChar_t* msg;
  18. } cmLexErrorRecd;
  19. cmLexErrorRecd cmLexErrorArray[] =
  20. {
  21. { kOkLexRC, "No error. The operation completed successfully."},
  22. { kDuplicateTokenLexRC, "The text or id passed as a user token is already in use by another token."},
  23. { kMissingCmtEndLexRC, "The end of a block comment could not be found."},
  24. { kMissingEndQuoteLexRC, "The end of a quoted string could not be found."},
  25. { kNoMatchLexRC, "The lexer encountered a string which could not be classified."},
  26. { kFileOpenErrLexRC, "File open failed on cmLexSetFile()"},
  27. { kFileSeekErrLexRC, "File seek failed on cmLexSetFile()"},
  28. { kFileTellErrLexRC, "File tell failed on cmLexSetFile()"},
  29. { kFileReadErrLexRC, "File read failed on cmLexSetFile()"},
  30. { kFileCloseErrLexRC, "File close failed on cmLexSetFile()"},
  31. { kMemAllocErrLexRC, "An attempted memory allocation failed"},
  32. { kEofRC, "The end of the input text was encountered (this is a normal condition not an error)"},
  33. { kInvalidLexTIdLexRC, "An invalid token id was encountered."},
  34. { kSignErrorLexRC, "A signed integer has a 'u' or 'U' suffix."},
  35. { kInvalidLexRC, "Unknown lexer error code." }
  36. };
  37. struct cmLex_str;
  38. typedef unsigned (*cmLexMatcherFuncPtr_t)( struct cmLex_str* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr );
  39. // token match function record
  40. typedef struct
  41. {
  42. unsigned typeId; // token type this matcher recognizes
  43. cmLexMatcherFuncPtr_t funcPtr; // recognizer function (only used if userPtr==NULL)
  44. cmChar_t* tokenStr; // fixed string data used by the recognizer (only used if userPtr==NULL)
  45. cmLexUserMatcherPtr_t userPtr; // user defined recognizer function (only used if funcPtr==NULL)
  46. bool enableFl; // true if this matcher is enabled
  47. } cmLexMatcher;
  48. typedef struct cmLex_str
  49. {
  50. cmErr_t err;
  51. const cmChar_t* cp; // character buffer
  52. unsigned cn; // count of characters in buffer
  53. unsigned ci; // current buffer index position
  54. unsigned flags; // lexer control flags
  55. unsigned curTokenId; // type id of the current token
  56. unsigned curTokenCharIdx; // index into cp[] of the current token
  57. unsigned curTokenCharCnt; // count of characters in the current token
  58. unsigned curLine; // line number of the current token
  59. unsigned curCol; // column number of the current token
  60. unsigned nextLine;
  61. unsigned nextCol;
  62. cmChar_t* blockBegCmtStr;
  63. cmChar_t* blockEndCmtStr;
  64. cmChar_t* lineCmtStr;
  65. cmLexMatcher* mfp; // base of matcher array
  66. unsigned mfi; // next available matcher array slot
  67. unsigned mfn; // count of elementes in mfp[]
  68. cmChar_t* textBuf; // text buf used by cmLexSetFile()
  69. unsigned attrFlags; // used to store the int and real suffix type flags
  70. } cmLex;
  71. cmLexH cmLexNullH = { NULL };
  72. bool _cmLexIsNewline( cmChar_t c )
  73. { return c == '\n'; }
  74. bool _cmLexIsCommentTypeId( unsigned typeId )
  75. { return typeId == kBlockCmtLexTId || typeId == kLineCmtLexTId; }
  76. cmLex* _cmLexHandleToPtr( cmLexH h )
  77. {
  78. cmLex* p = (cmLex*)h.h;
  79. assert(p != NULL);
  80. return p;
  81. };
  82. cmRC_t _cmLexError( cmLex* p, unsigned rc, const char* fmt, ... )
  83. {
  84. va_list vl;
  85. va_start(vl,fmt);
  86. unsigned bufCharCnt = 512;
  87. char buf[ bufCharCnt+1 ];
  88. snprintf(buf,bufCharCnt,"Error on line:%i ", p->curLine);
  89. unsigned sn = strlen(buf);
  90. vsnprintf(buf+sn,bufCharCnt-sn,fmt,vl);
  91. buf[bufCharCnt]=0;
  92. cmErrMsg(&p->err,rc,"%s",buf);
  93. va_end(vl);
  94. return rc;
  95. }
  96. unsigned _cmLexScanTo( const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  97. {
  98. unsigned i = 0;
  99. unsigned n = strlen(keyStr);
  100. if( n <= cn )
  101. for(; i<=cn-n; ++i)
  102. if( strncmp(cp + i, keyStr, n ) == 0 )
  103. return i+n;
  104. return cmInvalidIdx;
  105. }
  106. unsigned _cmLexExactStringMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  107. {
  108. unsigned n = strlen(keyStr);
  109. return strncmp(keyStr,cp,n) == 0 ? n : 0;
  110. }
  111. unsigned _cmLexSpaceMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  112. {
  113. unsigned i=0;
  114. for(; i<cn; ++i)
  115. if( !isspace(cp[i]) )
  116. break;
  117. return i;
  118. }
  119. unsigned _cmLexRealMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  120. {
  121. unsigned i = 0;
  122. unsigned n = 0; // decimal point counter
  123. unsigned d = 0; // digit counter
  124. bool fl = false; // expo flag
  125. for(; i<cn && n<=1; ++i)
  126. {
  127. if( i==0 && cp[i]=='-' ) // allow a leading '-'
  128. continue;
  129. if( isdigit(cp[i]) ) // allow digits
  130. {
  131. ++d;
  132. continue;
  133. }
  134. if( cp[i] == '.' && n==0 ) // allow exactly one decimal point
  135. ++n;
  136. else
  137. break;
  138. }
  139. // if there was at least one digit and the next char is an 'e'
  140. if( d>0 && i<cn && (cp[i] == 'e' || cp[i] == 'E') )
  141. {
  142. unsigned e=0;
  143. ++i;
  144. unsigned j = i;
  145. fl = false;
  146. for(; i<cn; ++i)
  147. {
  148. if( i==j && cp[i]=='-' ) // allow the char following the e to be '-'
  149. continue;
  150. if( isdigit(cp[i]) )
  151. {
  152. ++e;
  153. ++d;
  154. continue;
  155. }
  156. // stop at the first non-digit
  157. break;
  158. }
  159. // an exp exists if digits follwed the 'e'
  160. fl = e > 0;
  161. }
  162. // if at least one digit was found
  163. if( d>0 )
  164. {
  165. // Note that this path allows a string w/o a decimal pt to trigger a match.
  166. if(i<cn)
  167. {
  168. // if the real has a suffix
  169. switch(cp[i])
  170. {
  171. case 'F':
  172. case 'f':
  173. p->attrFlags = cmSetFlag(p->attrFlags,kRealFloatLexFl);
  174. ++i;
  175. break;
  176. }
  177. }
  178. // match w/o suffix return
  179. if( d>0 && (fl || n==1 || cmIsFlag(p->attrFlags,kRealFloatLexFl)) )
  180. return i;
  181. }
  182. return 0; // no-match return
  183. }
  184. unsigned _cmLexIntMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  185. {
  186. unsigned i = 0;
  187. bool signFl = false;
  188. for(; i<cn; ++i)
  189. {
  190. if( i==0 && cp[i]=='-' )
  191. {
  192. signFl = true;
  193. continue;
  194. }
  195. if( !isdigit(cp[i]) )
  196. break;
  197. }
  198. // BUG BUG BUG
  199. // If an integer is specified using 'e' notiation
  200. // (see _cmLexRealMatcher()) and the number of exponent places
  201. // specified following the 'e' is positive and >= number of
  202. // digits following the decimal point (in effect zeros are
  203. // padded on the right side) then the value is an integer.
  204. //
  205. // The current implementation recognizes all numeric strings
  206. // containing a decimal point as reals.
  207. // if no integer was found
  208. if( (signFl && i==0) || i==0 )
  209. return 0;
  210. // check for suffix
  211. if(i<cn )
  212. {
  213. switch(cp[i])
  214. {
  215. case 'u':
  216. case 'U':
  217. if( signFl )
  218. _cmLexError(p,kSignErrorLexRC,"A signed integer has a 'u' or 'U' suffix.");
  219. else
  220. {
  221. p->attrFlags = cmSetFlag(p->attrFlags,kIntUnsignedLexFl);
  222. ++i;
  223. }
  224. break;
  225. default:
  226. break;
  227. }
  228. }
  229. return i;
  230. }
  231. unsigned _cmLexHexMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  232. {
  233. unsigned i = 0;
  234. if( cn < 3 )
  235. return 0;
  236. if( cp[0]=='0' && cp[1]=='x')
  237. for(i=2; i<cn; ++i)
  238. if( !isxdigit(cp[i]) )
  239. break;
  240. return i;
  241. }
  242. unsigned _cmLexIdentMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  243. {
  244. unsigned i = 0;
  245. if( isalpha(cp[0]) || (cp[0]== '_'))
  246. {
  247. i = 1;
  248. for(; i<cn; ++i)
  249. if( !isalnum(cp[i]) && (cp[i] != '_') )
  250. break;
  251. }
  252. return i;
  253. }
  254. unsigned _cmLexQStrMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  255. {
  256. cmChar_t qStr[]="\"";
  257. unsigned n = strlen(qStr);
  258. if( strncmp(qStr,cp,n) == 0 )
  259. {
  260. unsigned i;
  261. if((i = _cmLexScanTo(cp+n, cn-n, qStr)) == cmInvalidIdx )
  262. {
  263. _cmLexError( p, kMissingEndQuoteLexRC, "Missing string end quote.");
  264. return 0;
  265. }
  266. return n+i;
  267. }
  268. return 0;
  269. }
  270. unsigned _cmLexBlockCmtMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  271. {
  272. unsigned n = strlen(p->blockBegCmtStr);
  273. if( strncmp( p->blockBegCmtStr, cp, n ) == 0 )
  274. {
  275. unsigned i;
  276. if((i = _cmLexScanTo(cp + n, cn-n,p->blockEndCmtStr)) == cmInvalidIdx )
  277. {
  278. _cmLexError(p, kMissingCmtEndLexRC, "Missing end of block comment.");
  279. return 0;
  280. }
  281. return n + i;
  282. }
  283. return 0;
  284. }
  285. unsigned _cmLexLineCmtMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  286. {
  287. unsigned n = strlen(p->lineCmtStr);
  288. if( strncmp( p->lineCmtStr, cp, n ) == 0)
  289. {
  290. unsigned i;
  291. const char newlineStr[] = "\n";
  292. if((i = _cmLexScanTo(cp + n, cn-n, newlineStr)) == cmInvalidIdx )
  293. {
  294. // no EOL was found so the comment must be on the last line of the source
  295. return cn;
  296. }
  297. return n + i;
  298. }
  299. return 0;
  300. }
  301. cmRC_t _cmLexInstallMatcher( cmLex* p, unsigned typeId, cmLexMatcherFuncPtr_t funcPtr, const cmChar_t* keyStr, cmLexUserMatcherPtr_t userPtr )
  302. {
  303. assert( funcPtr==NULL || userPtr==NULL );
  304. assert( !(funcPtr==NULL && userPtr==NULL));
  305. // if there is no space in the user token array - then expand it
  306. if( p->mfi == p->mfn )
  307. {
  308. int incr_cnt = 10;
  309. cmLexMatcher* np = cmMemAllocZ( cmLexMatcher, p->mfn + incr_cnt );
  310. memcpy(np,p->mfp,p->mfi*sizeof(cmLexMatcher));
  311. cmMemPtrFree(&p->mfp);
  312. p->mfp = np;
  313. p->mfn += incr_cnt;
  314. }
  315. p->mfp[p->mfi].tokenStr = NULL;
  316. p->mfp[p->mfi].typeId = typeId;
  317. p->mfp[p->mfi].funcPtr = funcPtr;
  318. p->mfp[p->mfi].userPtr = userPtr;
  319. p->mfp[p->mfi].enableFl = true;
  320. if( keyStr != NULL )
  321. {
  322. // allocate space for the token string and store it
  323. p->mfp[p->mfi].tokenStr = cmMemAlloc( cmChar_t, sizeof(cmChar_t) * (strlen(keyStr)+1) );
  324. strcpy(p->mfp[p->mfi].tokenStr, keyStr );
  325. }
  326. p->mfi++;
  327. return kOkLexRC;
  328. }
  329. cmRC_t _cmLexReset( cmLex* p )
  330. {
  331. p->ci = 0;
  332. p->curTokenId = kErrorLexTId;
  333. p->curTokenCharIdx = cmInvalidIdx;
  334. p->curTokenCharCnt = 0;
  335. p->curLine = 0;
  336. p->curCol = 0;
  337. p->nextLine = 0;
  338. p->nextCol = 0;
  339. cmErrClearRC(&p->err);
  340. return kOkLexRC;
  341. }
  342. cmRC_t _cmLexSetTextBuffer( cmLex* p, const cmChar_t* cp, unsigned cn )
  343. {
  344. p->cp = cp;
  345. p->cn = cn;
  346. return _cmLexReset(p);
  347. }
  348. cmLexH cmLexInit( const cmChar_t* cp, unsigned cn, unsigned flags, cmRpt_t* rpt )
  349. {
  350. cmLexH h;
  351. cmChar_t dfltLineCmt[] = "//";
  352. cmChar_t dfltBlockBegCmt[] = "/*";
  353. cmChar_t dfltBlockEndCmt[] = "*/";
  354. cmLex* p = cmMemAllocZ( cmLex, 1 );
  355. cmErrSetup(&p->err,rpt,"Lexer");
  356. p->flags = flags;
  357. _cmLexSetTextBuffer( p, cp, cn );
  358. int init_mfn = 10;
  359. p->mfp = cmMemAllocZ( cmLexMatcher, init_mfn );
  360. p->mfn = init_mfn;
  361. p->mfi = 0;
  362. p->lineCmtStr = cmMemAlloc( cmChar_t, strlen(dfltLineCmt)+1 );
  363. strcpy( p->lineCmtStr, dfltLineCmt );
  364. p->blockBegCmtStr = cmMemAlloc( cmChar_t, strlen(dfltBlockBegCmt)+1 );
  365. strcpy( p->blockBegCmtStr, dfltBlockBegCmt );
  366. p->blockEndCmtStr = cmMemAlloc( cmChar_t, strlen(dfltBlockEndCmt)+1 );
  367. strcpy( p->blockEndCmtStr, dfltBlockEndCmt );
  368. _cmLexInstallMatcher( p, kSpaceLexTId, _cmLexSpaceMatcher, NULL, NULL );
  369. _cmLexInstallMatcher( p, kRealLexTId, _cmLexRealMatcher, NULL, NULL );
  370. _cmLexInstallMatcher( p, kIntLexTId, _cmLexIntMatcher, NULL, NULL );
  371. _cmLexInstallMatcher( p, kHexLexTId, _cmLexHexMatcher, NULL, NULL );
  372. _cmLexInstallMatcher( p, kIdentLexTId, _cmLexIdentMatcher, NULL, NULL );
  373. _cmLexInstallMatcher( p, kQStrLexTId, _cmLexQStrMatcher, NULL, NULL );
  374. _cmLexInstallMatcher( p, kBlockCmtLexTId, _cmLexBlockCmtMatcher, NULL, NULL );
  375. _cmLexInstallMatcher( p, kLineCmtLexTId, _cmLexLineCmtMatcher, NULL, NULL );
  376. h.h = p;
  377. _cmLexReset(p);
  378. return h;
  379. }
  380. cmRC_t cmLexFinal( cmLexH* hp )
  381. {
  382. if( hp == NULL || cmLexIsValid(*hp)==false )
  383. return cmOkRC;
  384. cmLex* p = _cmLexHandleToPtr(*hp);
  385. if( p != NULL )
  386. {
  387. if( p->mfp != NULL )
  388. {
  389. unsigned i = 0;
  390. // free the user token strings
  391. for(; i<p->mfi; ++i)
  392. if( p->mfp[i].tokenStr != NULL )
  393. cmMemPtrFree(&p->mfp[i].tokenStr);
  394. // free the matcher array
  395. cmMemPtrFree(&p->mfp);
  396. p->mfi = 0;
  397. p->mfn = 0;
  398. }
  399. cmMemPtrFree(&p->lineCmtStr);
  400. cmMemPtrFree(&p->blockBegCmtStr);
  401. cmMemPtrFree(&p->blockEndCmtStr);
  402. cmMemPtrFree(&p->textBuf);
  403. // free the lexer object
  404. cmMemPtrFree(&p);
  405. hp->h = NULL;
  406. }
  407. return kOkLexRC;
  408. }
  409. cmRC_t cmLexReset( cmLexH h )
  410. {
  411. cmLex* p = _cmLexHandleToPtr(h);
  412. return _cmLexReset(p);
  413. }
  414. bool cmLexIsValid( cmLexH h )
  415. { return h.h != NULL; }
  416. cmRC_t cmLexSetTextBuffer( cmLexH h, const cmChar_t* cp, unsigned cn )
  417. {
  418. cmLex* p = _cmLexHandleToPtr(h);
  419. return _cmLexSetTextBuffer(p,cp,cn);
  420. }
  421. cmRC_t cmLexSetFile( cmLexH h, const cmChar_t* fn )
  422. {
  423. cmRC_t rc = kOkLexRC;
  424. cmFileH_t fh = cmFileNullHandle;
  425. cmLex* p = _cmLexHandleToPtr(h);
  426. long n = 0;
  427. assert( fn != NULL && p != NULL );
  428. // open the file
  429. if( cmFileOpen(&fh,fn,kReadFileFl,p->err.rpt) != kOkFileRC )
  430. return kFileOpenErrLexRC;
  431. // seek to the end of the file
  432. if( cmFileSeek(fh,kEndFileFl,0) != kOkFileRC )
  433. return kFileSeekErrLexRC;
  434. // get the length of the file
  435. if( cmFileTell(fh,&n) != kOkFileRC )
  436. return kFileTellErrLexRC;
  437. // rewind to the beginning of the file
  438. if( cmFileSeek(fh,kBeginFileFl,0) != kOkFileRC )
  439. return kFileSeekErrLexRC;
  440. // allocate the text buffer
  441. if((p->textBuf = cmMemResizeZ( char, p->textBuf, n+1)) == NULL )
  442. {
  443. rc = _cmLexError(p,kMemAllocErrLexRC,"Unable to allocate the text file buffer for:'%s'.",fn);
  444. goto errLabel;
  445. }
  446. // read the file into the buffer
  447. if( cmFileRead(fh,p->textBuf,n) != kOkFileRC )
  448. return kFileReadErrLexRC;
  449. if((rc = _cmLexSetTextBuffer( p, p->textBuf, n )) != kOkLexRC )
  450. goto errLabel;
  451. errLabel:
  452. // close the file
  453. if( cmFileClose(&fh) != kOkFileRC )
  454. return kFileCloseErrLexRC;
  455. return rc;
  456. }
  457. cmLexMatcher* _cmLexFindUserToken( cmLex* p, unsigned id, const cmChar_t* tokenStr )
  458. {
  459. unsigned i = 0;
  460. for(; i<p->mfi; ++i)
  461. {
  462. if( id != cmInvalidId && p->mfp[i].typeId == id )
  463. return p->mfp + i;
  464. if( p->mfp[i].tokenStr != NULL && tokenStr != NULL && strcmp(p->mfp[i].tokenStr,tokenStr)==0 )
  465. return p->mfp + i;
  466. }
  467. return NULL;
  468. }
  469. cmRC_t cmLexRegisterToken( cmLexH h, unsigned id, const cmChar_t* tokenStr )
  470. {
  471. cmLex* p = _cmLexHandleToPtr(h);
  472. // prevent duplicate tokens
  473. if( _cmLexFindUserToken( p, id, tokenStr ) != NULL )
  474. return _cmLexError( p, kDuplicateTokenLexRC, "id:%i token:%s duplicates the token string or id", id, tokenStr );
  475. return _cmLexInstallMatcher( p, id, _cmLexExactStringMatcher, tokenStr, NULL );
  476. }
  477. cmRC_t cmLexRegisterMatcher( cmLexH h, unsigned id, cmLexUserMatcherPtr_t userPtr )
  478. {
  479. cmLex* p = _cmLexHandleToPtr(h);
  480. // prevent duplicate tokens
  481. if( _cmLexFindUserToken( p, id, NULL ) != NULL )
  482. return _cmLexError( p, kDuplicateTokenLexRC, "A token matching function has already been installed for token id: %i", id );
  483. return _cmLexInstallMatcher( p, id, NULL, NULL, userPtr );
  484. }
  485. cmRC_t cmLexEnableToken( cmLexH h, unsigned id, bool enableFl )
  486. {
  487. cmLex* p = _cmLexHandleToPtr(h);
  488. unsigned mi = 0;
  489. for(; mi<p->mfi; ++mi)
  490. if( p->mfp[mi].typeId == id )
  491. {
  492. p->mfp[mi].enableFl = enableFl;
  493. return cmOkRC;
  494. }
  495. return _cmLexError( p, kInvalidLexTIdLexRC, "%i is not a valid token type id.",id);
  496. }
  497. unsigned cmLexFilterFlags( cmLexH h )
  498. {
  499. cmLex* p = _cmLexHandleToPtr(h);
  500. return p->flags;
  501. }
  502. void cmLexSetFilterFlags( cmLexH h, unsigned flags )
  503. {
  504. cmLex* p = _cmLexHandleToPtr(h);
  505. p->flags = flags;
  506. }
  507. unsigned cmLexGetNextToken( cmLexH h )
  508. {
  509. cmLex* p = _cmLexHandleToPtr(h);
  510. if( cmErrLastRC(&p->err) != kOkLexRC )
  511. return kErrorLexTId;
  512. while( p->ci < p->cn )
  513. {
  514. unsigned i;
  515. unsigned mi = 0;
  516. unsigned maxCharCnt = 0;
  517. unsigned maxIdx = cmInvalidIdx;
  518. p->curTokenId = kErrorLexTId;
  519. p->curTokenCharIdx = cmInvalidIdx;
  520. p->curTokenCharCnt = 0;
  521. p->attrFlags = 0;
  522. // try each matcher
  523. for(; mi<p->mfi; ++mi)
  524. if( p->mfp[mi].enableFl )
  525. {
  526. unsigned charCnt = 0;
  527. if( p->mfp[mi].funcPtr != NULL )
  528. charCnt = p->mfp[mi].funcPtr(p, p->cp + p->ci, p->cn - p->ci, p->mfp[mi].tokenStr );
  529. else
  530. charCnt = p->mfp[mi].userPtr( p->cp + p->ci, p->cn - p->ci);
  531. // notice if the matcher set the error code
  532. if( cmErrLastRC(&p->err) != kOkLexRC )
  533. return kErrorLexTId;
  534. // if this matched token is longer then the prev. matched token or
  535. // if the prev matched token was an identifier and this matched token is an equal length user defined token
  536. if( (charCnt > maxCharCnt)
  537. || (charCnt>0 && charCnt==maxCharCnt && p->mfp[maxIdx].typeId==kIdentLexTId && p->mfp[mi].typeId >=kUserLexTId )
  538. || (charCnt>0 && charCnt<maxCharCnt && p->mfp[maxIdx].typeId==kIdentLexTId && p->mfp[mi].typeId >=kUserLexTId && cmIsFlag(p->flags,kUserDefPriorityLexFl))
  539. )
  540. {
  541. maxCharCnt = charCnt;
  542. maxIdx = mi;
  543. }
  544. }
  545. // no token was matched
  546. if( maxIdx == cmInvalidIdx )
  547. {
  548. if( cmIsFlag(p->flags,kReturnUnknownLexFl) )
  549. {
  550. maxCharCnt = 1;
  551. }
  552. else
  553. {
  554. _cmLexError( p, kNoMatchLexRC, "Unable to recognize token:'%c'.",*(p->cp+p->ci));
  555. return kErrorLexTId;
  556. }
  557. }
  558. // update the current line and column position
  559. p->curLine = p->nextLine;
  560. p->curCol = p->nextCol;
  561. // find the next column and line position
  562. for(i=0; i<maxCharCnt; ++i)
  563. {
  564. if( _cmLexIsNewline(p->cp[ p->ci + i ]) )
  565. {
  566. p->nextLine++;
  567. p->nextCol = 1;
  568. }
  569. else
  570. p->nextCol++;
  571. }
  572. bool returnFl = true;
  573. if( maxIdx != cmInvalidIdx )
  574. {
  575. // check the space token filter
  576. if( (p->mfp[ maxIdx ].typeId == kSpaceLexTId) && (cmIsFlag(p->flags,kReturnSpaceLexFl)==0) )
  577. returnFl = false;
  578. // check the comment token filter
  579. if( _cmLexIsCommentTypeId(p->mfp[ maxIdx ].typeId) && (cmIsFlag(p->flags,kReturnCommentsLexFl)==0) )
  580. returnFl = false;
  581. }
  582. // update the lexer state
  583. p->curTokenId = maxIdx==cmInvalidIdx ? kUnknownLexTId : p->mfp[ maxIdx ].typeId;
  584. p->curTokenCharIdx = p->ci;
  585. p->curTokenCharCnt = maxCharCnt;
  586. // advance the text buffer
  587. p->ci += maxCharCnt;
  588. if( returnFl )
  589. return p->curTokenId;
  590. }
  591. cmErrSetRC(&p->err,kEofRC);
  592. return kEofLexTId;
  593. }
  594. unsigned cmLexTokenId( cmLexH h )
  595. {
  596. cmLex* p = _cmLexHandleToPtr(h);
  597. return p->curTokenId;
  598. }
  599. const cmChar_t* cmLexTokenText( cmLexH h )
  600. {
  601. cmLex* p = _cmLexHandleToPtr(h);
  602. if( p->curTokenCharIdx == cmInvalidIdx )
  603. return NULL;
  604. unsigned n = p->curTokenId == kQStrLexTId ? 1 : 0;
  605. return p->cp + p->curTokenCharIdx + n;
  606. }
  607. unsigned cmLexTokenCharCount( cmLexH h )
  608. {
  609. cmLex* p = _cmLexHandleToPtr(h);
  610. if( p->curTokenCharIdx == cmInvalidIdx )
  611. return 0;
  612. unsigned n = p->curTokenId == kQStrLexTId ? 2 : 0;
  613. return p->curTokenCharCnt - n;
  614. }
  615. int cmLexTokenInt( cmLexH h )
  616. { return strtol( cmLexTokenText(h),NULL,0 ); }
  617. unsigned cmLexTokenUInt( cmLexH h )
  618. { return strtol( cmLexTokenText(h),NULL,0 ); }
  619. float cmLexTokenFloat( cmLexH h )
  620. { return strtof( cmLexTokenText(h),NULL ); }
  621. double cmLexTokenDouble( cmLexH h )
  622. { return strtod( cmLexTokenText(h),NULL ); }
  623. bool cmLexTokenIsUnsigned( cmLexH h )
  624. {
  625. cmLex* p = _cmLexHandleToPtr(h);
  626. return p->curTokenId == kIntLexTId && cmIsFlag(p->attrFlags,kIntUnsignedLexFl);
  627. }
  628. bool cmLexTokenIsSinglePrecision( cmLexH h )
  629. {
  630. cmLex* p = _cmLexHandleToPtr(h);
  631. return p->curTokenId == kRealLexTId && cmIsFlag(p->attrFlags,kRealFloatLexFl);
  632. }
  633. unsigned cmLexCurrentLineNumber( cmLexH h )
  634. {
  635. cmLex* p = _cmLexHandleToPtr(h);
  636. return p->curLine + 1;
  637. }
  638. unsigned cmLexCurrentColumnNumber( cmLexH h )
  639. {
  640. cmLex* p = _cmLexHandleToPtr(h);
  641. return p->curCol + 1;
  642. }
  643. unsigned cmLexErrorRC( cmLexH h )
  644. {
  645. cmLex* p = _cmLexHandleToPtr(h);
  646. return cmErrLastRC(&p->err);
  647. }
  648. const cmChar_t* cmLexIdToLabel( cmLexH h, unsigned typeId )
  649. {
  650. cmLex* p = _cmLexHandleToPtr(h);
  651. switch( typeId )
  652. {
  653. case kErrorLexTId: return "<error>";
  654. case kEofLexTId: return "<EOF>";
  655. case kSpaceLexTId: return "<space>";
  656. case kRealLexTId: return "<real>";
  657. case kIntLexTId: return "<int>";
  658. case kHexLexTId: return "<hex>";
  659. case kIdentLexTId: return "<ident>";
  660. case kQStrLexTId: return "<qstr>";
  661. case kBlockCmtLexTId: return "<bcmt>";
  662. case kLineCmtLexTId: return "<lcmt>";
  663. default:
  664. {
  665. cmLexMatcher* mp;
  666. if((mp = _cmLexFindUserToken(p,typeId,NULL)) == NULL )
  667. return "<unknown>";
  668. return mp->tokenStr;
  669. }
  670. }
  671. return "<invalid>";
  672. }
  673. const cmChar_t* cmLexRcToMsg( unsigned rc )
  674. {
  675. unsigned i=0;
  676. for(i=0; cmLexErrorArray[i].code != kInvalidLexRC; ++i)
  677. if( cmLexErrorArray[i].code == rc )
  678. break;
  679. return cmLexErrorArray[i].msg;
  680. }
  681. //{ { label:cmLexEx }
  682. //(
  683. // cmLexTest() gives a simple cmLex example.
  684. //)
  685. //(
  686. void cmLexTest( cmRpt_t* rpt)
  687. {
  688. cmChar_t buf[] =
  689. "123ident0\n 123.456\nident0\n"
  690. "0xa12+.2\n"
  691. "// comment \n"
  692. "/* block \n"
  693. "comment */"
  694. "\"quoted string\""
  695. "ident1"
  696. "// last line comment";
  697. // initialize a lexer with a buffer of text
  698. cmLexH h = cmLexInit(buf,strlen(buf),
  699. kReturnSpaceLexFl | kReturnCommentsLexFl,rpt);
  700. // verify that the lexer initialization succeded.
  701. if( cmLexIsValid(h) == false )
  702. {
  703. cmRptPrintf(rpt,"Lexer initialization failed.");
  704. return;
  705. }
  706. // register some additional recoginizers
  707. cmLexRegisterToken(h,kUserLexTId+1,"+");
  708. cmLexRegisterToken(h,kUserLexTId+2,"-");
  709. unsigned tid;
  710. // ask for token id's
  711. while( (tid = cmLexGetNextToken(h)) != kEofLexTId )
  712. {
  713. // print information about each token
  714. cmRptPrintf(rpt,"%i %i %s '%.*s' (%i) ",
  715. cmLexCurrentLineNumber(h),
  716. cmLexCurrentColumnNumber(h),
  717. cmLexIdToLabel(h,tid),
  718. cmLexTokenCharCount(h),
  719. cmLexTokenText(h) ,
  720. cmLexTokenCharCount(h));
  721. // if the token is a number ...
  722. if( tid==kIntLexTId || tid==kRealLexTId || tid==kHexLexTId )
  723. {
  724. // ... then request the numbers value
  725. int iv = cmLexTokenInt(h);
  726. double dv = cmLexTokenDouble(h);
  727. cmRptPrintf(rpt,"%i %f",iv,dv);
  728. }
  729. cmRptPrintf(rpt,"\n");
  730. // handle errors
  731. if( tid == kErrorLexTId )
  732. {
  733. cmRptPrintf(rpt,"Error:%i\n", cmLexErrorRC(h));
  734. break;
  735. }
  736. }
  737. // finalize the lexer
  738. cmLexFinal(&h);
  739. }
  740. //)
  741. //}