libcm is a C development framework with an emphasis on audio signal processing applications.
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983
  1. //| Copyright: (C) 2009-2020 Kevin Larke <contact AT larke DOT org>
  2. //| License: GNU GPL version 3.0 or above. See the accompanying LICENSE file.
  3. #include "cmPrefix.h"
  4. #include "cmGlobal.h"
  5. #include "cmRpt.h"
  6. #include "cmLex.h"
  7. #include "cmErr.h"
  8. #include "cmMem.h"
  9. #include "cmMallocDebug.h"
  10. #include "cmFile.h"
  11. enum
  12. {
  13. kRealFloatLexFl = 0x01,
  14. kIntUnsignedLexFl = 0x02
  15. };
  16. typedef struct
  17. {
  18. unsigned code;
  19. const cmChar_t* msg;
  20. } cmLexErrorRecd;
  21. cmLexErrorRecd cmLexErrorArray[] =
  22. {
  23. { kOkLexRC, "No error. The operation completed successfully."},
  24. { kDuplicateTokenLexRC, "The text or id passed as a user token is already in use by another token."},
  25. { kMissingCmtEndLexRC, "The end of a block comment could not be found."},
  26. { kMissingEndQuoteLexRC, "The end of a quoted string could not be found."},
  27. { kNoMatchLexRC, "The lexer encountered a string which could not be classified."},
  28. { kFileOpenErrLexRC, "File open failed on cmLexSetFile()"},
  29. { kFileSeekErrLexRC, "File seek failed on cmLexSetFile()"},
  30. { kFileTellErrLexRC, "File tell failed on cmLexSetFile()"},
  31. { kFileReadErrLexRC, "File read failed on cmLexSetFile()"},
  32. { kFileCloseErrLexRC, "File close failed on cmLexSetFile()"},
  33. { kMemAllocErrLexRC, "An attempted memory allocation failed"},
  34. { kEofRC, "The end of the input text was encountered (this is a normal condition not an error)"},
  35. { kInvalidLexTIdLexRC, "An invalid token id was encountered."},
  36. { kSignErrorLexRC, "A signed integer has a 'u' or 'U' suffix."},
  37. { kInvalidLexRC, "Unknown lexer error code." }
  38. };
  39. struct cmLex_str;
  40. typedef unsigned (*cmLexMatcherFuncPtr_t)( struct cmLex_str* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr );
  41. // token match function record
  42. typedef struct
  43. {
  44. unsigned typeId; // token type this matcher recognizes
  45. cmLexMatcherFuncPtr_t funcPtr; // recognizer function (only used if userPtr==NULL)
  46. cmChar_t* tokenStr; // fixed string data used by the recognizer (only used if userPtr==NULL)
  47. cmLexUserMatcherPtr_t userPtr; // user defined recognizer function (only used if funcPtr==NULL)
  48. bool enableFl; // true if this matcher is enabled
  49. } cmLexMatcher;
  50. typedef struct cmLex_str
  51. {
  52. cmErr_t err;
  53. const cmChar_t* cp; // character buffer
  54. unsigned cn; // count of characters in buffer
  55. unsigned ci; // current buffer index position
  56. unsigned flags; // lexer control flags
  57. unsigned curTokenId; // type id of the current token
  58. unsigned curTokenCharIdx; // index into cp[] of the current token
  59. unsigned curTokenCharCnt; // count of characters in the current token
  60. unsigned curLine; // line number of the current token
  61. unsigned curCol; // column number of the current token
  62. unsigned nextLine;
  63. unsigned nextCol;
  64. cmChar_t* blockBegCmtStr;
  65. cmChar_t* blockEndCmtStr;
  66. cmChar_t* lineCmtStr;
  67. cmLexMatcher* mfp; // base of matcher array
  68. unsigned mfi; // next available matcher array slot
  69. unsigned mfn; // count of elementes in mfp[]
  70. cmChar_t* textBuf; // text buf used by cmLexSetFile()
  71. unsigned attrFlags; // used to store the int and real suffix type flags
  72. } cmLex;
  73. cmLexH cmLexNullH = { NULL };
  74. bool _cmLexIsNewline( cmChar_t c )
  75. { return c == '\n'; }
  76. bool _cmLexIsCommentTypeId( unsigned typeId )
  77. { return typeId == kBlockCmtLexTId || typeId == kLineCmtLexTId; }
  78. cmLex* _cmLexHandleToPtr( cmLexH h )
  79. {
  80. cmLex* p = (cmLex*)h.h;
  81. assert(p != NULL);
  82. return p;
  83. };
  84. cmRC_t _cmLexError( cmLex* p, unsigned rc, const char* fmt, ... )
  85. {
  86. va_list vl;
  87. va_start(vl,fmt);
  88. unsigned bufCharCnt = 512;
  89. char buf[ bufCharCnt+1 ];
  90. snprintf(buf,bufCharCnt,"Error on line:%i ", p->curLine);
  91. unsigned sn = strlen(buf);
  92. vsnprintf(buf+sn,bufCharCnt-sn,fmt,vl);
  93. buf[bufCharCnt]=0;
  94. cmErrMsg(&p->err,rc,"%s",buf);
  95. va_end(vl);
  96. return rc;
  97. }
  98. // Locate 'keyStr' in cp[cn] and return the index into cp[cn] of the character
  99. // following the last char in 'keyStr'. If keyStr is not found return cmInvalidIdx.
  100. unsigned _cmLexScanTo( const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  101. {
  102. unsigned i = 0;
  103. unsigned n = strlen(keyStr);
  104. if( n <= cn )
  105. for(; i<=cn-n; ++i)
  106. if( strncmp(cp + i, keyStr, n ) == 0 )
  107. return i+n;
  108. return cmInvalidIdx;
  109. }
  110. unsigned _cmLexExactStringMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  111. {
  112. unsigned n = strlen(keyStr);
  113. return strncmp(keyStr,cp,n) == 0 ? n : 0;
  114. }
  115. unsigned _cmLexSpaceMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  116. {
  117. unsigned i=0;
  118. for(; i<cn; ++i)
  119. if( !isspace(cp[i]) )
  120. break;
  121. return i;
  122. }
  123. unsigned _cmLexRealMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  124. {
  125. unsigned i = 0;
  126. unsigned n = 0; // decimal point counter
  127. unsigned d = 0; // digit counter
  128. bool fl = false; // expo flag
  129. for(; i<cn && n<=1; ++i)
  130. {
  131. if( i==0 && cp[i]=='-' ) // allow a leading '-'
  132. continue;
  133. if( isdigit(cp[i]) ) // allow digits
  134. {
  135. ++d;
  136. continue;
  137. }
  138. if( cp[i] == '.' && n==0 ) // allow exactly one decimal point
  139. ++n;
  140. else
  141. break;
  142. }
  143. // if there was at least one digit and the next char is an 'e'
  144. if( d>0 && i<cn && (cp[i] == 'e' || cp[i] == 'E') )
  145. {
  146. unsigned e=0;
  147. ++i;
  148. unsigned j = i;
  149. fl = false;
  150. for(; i<cn; ++i)
  151. {
  152. if( i==j && cp[i]=='-' ) // allow the char following the e to be '-'
  153. continue;
  154. if( isdigit(cp[i]) )
  155. {
  156. ++e;
  157. ++d;
  158. continue;
  159. }
  160. // stop at the first non-digit
  161. break;
  162. }
  163. // an exp exists if digits follwed the 'e'
  164. fl = e > 0;
  165. }
  166. // if at least one digit was found
  167. if( d>0 )
  168. {
  169. // Note that this path allows a string w/o a decimal pt to trigger a match.
  170. if(i<cn)
  171. {
  172. // if the real has a suffix
  173. switch(cp[i])
  174. {
  175. case 'F':
  176. case 'f':
  177. p->attrFlags = cmSetFlag(p->attrFlags,kRealFloatLexFl);
  178. ++i;
  179. break;
  180. }
  181. }
  182. // match w/o suffix return
  183. if( d>0 && (fl || n==1 || cmIsFlag(p->attrFlags,kRealFloatLexFl)) )
  184. return i;
  185. }
  186. return 0; // no-match return
  187. }
  188. unsigned _cmLexIntMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  189. {
  190. unsigned i = 0;
  191. bool signFl = false;
  192. unsigned digitCnt = 0;
  193. for(; i<cn; ++i)
  194. {
  195. if( i==0 && cp[i]=='-' )
  196. {
  197. signFl = true;
  198. continue;
  199. }
  200. if( !isdigit(cp[i]) )
  201. break;
  202. ++digitCnt;
  203. }
  204. // BUG BUG BUG
  205. // If an integer is specified using 'e' notiation
  206. // (see _cmLexRealMatcher()) and the number of exponent places
  207. // specified following the 'e' is positive and >= number of
  208. // digits following the decimal point (in effect zeros are
  209. // padded on the right side) then the value is an integer.
  210. //
  211. // The current implementation recognizes all numeric strings
  212. // containing a decimal point as reals.
  213. // if no integer was found
  214. if( digitCnt==0)
  215. return 0;
  216. // check for suffix
  217. if(i<cn )
  218. {
  219. switch(cp[i])
  220. {
  221. case 'u':
  222. case 'U':
  223. if( signFl )
  224. _cmLexError(p,kSignErrorLexRC,"A signed integer has a 'u' or 'U' suffix.");
  225. else
  226. {
  227. p->attrFlags = cmSetFlag(p->attrFlags,kIntUnsignedLexFl);
  228. ++i;
  229. }
  230. break;
  231. default:
  232. break;
  233. }
  234. }
  235. return i;
  236. }
  237. unsigned _cmLexHexMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  238. {
  239. unsigned i = 0;
  240. if( cn < 3 )
  241. return 0;
  242. if( cp[0]=='0' && cp[1]=='x')
  243. for(i=2; i<cn; ++i)
  244. if( !isxdigit(cp[i]) )
  245. break;
  246. return i;
  247. }
  248. unsigned _cmLexIdentMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  249. {
  250. unsigned i = 0;
  251. if( isalpha(cp[0]) || (cp[0]== '_'))
  252. {
  253. i = 1;
  254. for(; i<cn; ++i)
  255. if( !isalnum(cp[i]) && (cp[i] != '_') )
  256. break;
  257. }
  258. return i;
  259. }
  260. unsigned _cmLexQStrMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  261. {
  262. bool escFl = false;
  263. unsigned i = 0;
  264. if( cp[i] != '"' )
  265. return 0;
  266. for(i=1; i<cn; ++i)
  267. {
  268. if( escFl )
  269. {
  270. escFl = false;
  271. continue;
  272. }
  273. if( cp[i] == '\\' )
  274. {
  275. escFl = true;
  276. continue;
  277. }
  278. if( cp[i] == '"' )
  279. return i+1;
  280. }
  281. return _cmLexError(p, kMissingEndQuoteLexRC, "Missing string literal end quote.");
  282. }
  283. unsigned _cmLexQCharMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  284. {
  285. unsigned i = 0;
  286. if( i >= cn || cp[i]!='\'' )
  287. return 0;
  288. i+=2;
  289. if( i >= cn || cp[i]!='\'')
  290. return 0;
  291. return 3;
  292. }
  293. unsigned _cmLexBlockCmtMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  294. {
  295. unsigned n = strlen(p->blockBegCmtStr);
  296. if( strncmp( p->blockBegCmtStr, cp, n ) == 0 )
  297. {
  298. unsigned i;
  299. if((i = _cmLexScanTo(cp + n, cn-n,p->blockEndCmtStr)) == cmInvalidIdx )
  300. {
  301. _cmLexError(p, kMissingCmtEndLexRC, "Missing end of block comment.");
  302. return 0;
  303. }
  304. return n + i;
  305. }
  306. return 0;
  307. }
  308. unsigned _cmLexLineCmtMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
  309. {
  310. unsigned n = strlen(p->lineCmtStr);
  311. if( strncmp( p->lineCmtStr, cp, n ) == 0)
  312. {
  313. unsigned i;
  314. const char newlineStr[] = "\n";
  315. if((i = _cmLexScanTo(cp + n, cn-n, newlineStr)) == cmInvalidIdx )
  316. {
  317. // no EOL was found so the comment must be on the last line of the source
  318. return cn;
  319. }
  320. return n + i;
  321. }
  322. return 0;
  323. }
  324. cmRC_t _cmLexInstallMatcher( cmLex* p, unsigned typeId, cmLexMatcherFuncPtr_t funcPtr, const cmChar_t* keyStr, cmLexUserMatcherPtr_t userPtr )
  325. {
  326. assert( funcPtr==NULL || userPtr==NULL );
  327. assert( !(funcPtr==NULL && userPtr==NULL));
  328. // if there is no space in the user token array - then expand it
  329. if( p->mfi == p->mfn )
  330. {
  331. int incr_cnt = 10;
  332. cmLexMatcher* np = cmMemAllocZ( cmLexMatcher, p->mfn + incr_cnt );
  333. memcpy(np,p->mfp,p->mfi*sizeof(cmLexMatcher));
  334. cmMemPtrFree(&p->mfp);
  335. p->mfp = np;
  336. p->mfn += incr_cnt;
  337. }
  338. p->mfp[p->mfi].tokenStr = NULL;
  339. p->mfp[p->mfi].typeId = typeId;
  340. p->mfp[p->mfi].funcPtr = funcPtr;
  341. p->mfp[p->mfi].userPtr = userPtr;
  342. p->mfp[p->mfi].enableFl = true;
  343. if( keyStr != NULL )
  344. {
  345. // allocate space for the token string and store it
  346. p->mfp[p->mfi].tokenStr = cmMemAlloc( cmChar_t, sizeof(cmChar_t) * (strlen(keyStr)+1) );
  347. strcpy(p->mfp[p->mfi].tokenStr, keyStr );
  348. }
  349. p->mfi++;
  350. return kOkLexRC;
  351. }
  352. cmRC_t _cmLexReset( cmLex* p )
  353. {
  354. p->ci = 0;
  355. p->curTokenId = kErrorLexTId;
  356. p->curTokenCharIdx = cmInvalidIdx;
  357. p->curTokenCharCnt = 0;
  358. p->curLine = 0;
  359. p->curCol = 0;
  360. p->nextLine = 0;
  361. p->nextCol = 0;
  362. cmErrClearRC(&p->err);
  363. return kOkLexRC;
  364. }
  365. cmRC_t _cmLexSetTextBuffer( cmLex* p, const cmChar_t* cp, unsigned cn )
  366. {
  367. p->cp = cp;
  368. p->cn = cn;
  369. return _cmLexReset(p);
  370. }
  371. cmLexH cmLexInit( const cmChar_t* cp, unsigned cn, unsigned flags, cmRpt_t* rpt )
  372. {
  373. cmLexH h;
  374. cmChar_t dfltLineCmt[] = "//";
  375. cmChar_t dfltBlockBegCmt[] = "/*";
  376. cmChar_t dfltBlockEndCmt[] = "*/";
  377. cmLex* p = cmMemAllocZ( cmLex, 1 );
  378. cmErrSetup(&p->err,rpt,"Lexer");
  379. p->flags = flags;
  380. _cmLexSetTextBuffer( p, cp, cn );
  381. int init_mfn = 10;
  382. p->mfp = cmMemAllocZ( cmLexMatcher, init_mfn );
  383. p->mfn = init_mfn;
  384. p->mfi = 0;
  385. p->lineCmtStr = cmMemAlloc( cmChar_t, strlen(dfltLineCmt)+1 );
  386. strcpy( p->lineCmtStr, dfltLineCmt );
  387. p->blockBegCmtStr = cmMemAlloc( cmChar_t, strlen(dfltBlockBegCmt)+1 );
  388. strcpy( p->blockBegCmtStr, dfltBlockBegCmt );
  389. p->blockEndCmtStr = cmMemAlloc( cmChar_t, strlen(dfltBlockEndCmt)+1 );
  390. strcpy( p->blockEndCmtStr, dfltBlockEndCmt );
  391. _cmLexInstallMatcher( p, kSpaceLexTId, _cmLexSpaceMatcher, NULL, NULL );
  392. _cmLexInstallMatcher( p, kRealLexTId, _cmLexRealMatcher, NULL, NULL );
  393. _cmLexInstallMatcher( p, kIntLexTId, _cmLexIntMatcher, NULL, NULL );
  394. _cmLexInstallMatcher( p, kHexLexTId, _cmLexHexMatcher, NULL, NULL );
  395. _cmLexInstallMatcher( p, kIdentLexTId, _cmLexIdentMatcher, NULL, NULL );
  396. _cmLexInstallMatcher( p, kQStrLexTId, _cmLexQStrMatcher, NULL, NULL );
  397. _cmLexInstallMatcher( p, kBlockCmtLexTId, _cmLexBlockCmtMatcher, NULL, NULL );
  398. _cmLexInstallMatcher( p, kLineCmtLexTId, _cmLexLineCmtMatcher, NULL, NULL );
  399. if( cmIsFlag(flags,kReturnQCharLexFl) )
  400. _cmLexInstallMatcher( p, kQCharLexTId, _cmLexQCharMatcher, NULL, NULL );
  401. h.h = p;
  402. _cmLexReset(p);
  403. return h;
  404. }
  405. cmRC_t cmLexFinal( cmLexH* hp )
  406. {
  407. if( hp == NULL || cmLexIsValid(*hp)==false )
  408. return cmOkRC;
  409. cmLex* p = _cmLexHandleToPtr(*hp);
  410. if( p != NULL )
  411. {
  412. if( p->mfp != NULL )
  413. {
  414. unsigned i = 0;
  415. // free the user token strings
  416. for(; i<p->mfi; ++i)
  417. if( p->mfp[i].tokenStr != NULL )
  418. cmMemPtrFree(&p->mfp[i].tokenStr);
  419. // free the matcher array
  420. cmMemPtrFree(&p->mfp);
  421. p->mfi = 0;
  422. p->mfn = 0;
  423. }
  424. cmMemPtrFree(&p->lineCmtStr);
  425. cmMemPtrFree(&p->blockBegCmtStr);
  426. cmMemPtrFree(&p->blockEndCmtStr);
  427. cmMemPtrFree(&p->textBuf);
  428. // free the lexer object
  429. cmMemPtrFree(&p);
  430. hp->h = NULL;
  431. }
  432. return kOkLexRC;
  433. }
  434. cmRC_t cmLexReset( cmLexH h )
  435. {
  436. cmLex* p = _cmLexHandleToPtr(h);
  437. return _cmLexReset(p);
  438. }
  439. bool cmLexIsValid( cmLexH h )
  440. { return h.h != NULL; }
  441. cmRC_t cmLexSetTextBuffer( cmLexH h, const cmChar_t* cp, unsigned cn )
  442. {
  443. cmLex* p = _cmLexHandleToPtr(h);
  444. return _cmLexSetTextBuffer(p,cp,cn);
  445. }
  446. cmRC_t cmLexSetFile( cmLexH h, const cmChar_t* fn )
  447. {
  448. cmRC_t rc = kOkLexRC;
  449. cmFileH_t fh = cmFileNullHandle;
  450. cmLex* p = _cmLexHandleToPtr(h);
  451. long n = 0;
  452. assert( fn != NULL && p != NULL );
  453. // open the file
  454. if( cmFileOpen(&fh,fn,kReadFileFl,p->err.rpt) != kOkFileRC )
  455. return kFileOpenErrLexRC;
  456. // seek to the end of the file
  457. if( cmFileSeek(fh,kEndFileFl,0) != kOkFileRC )
  458. return kFileSeekErrLexRC;
  459. // get the length of the file
  460. if( cmFileTell(fh,&n) != kOkFileRC )
  461. return kFileTellErrLexRC;
  462. // rewind to the beginning of the file
  463. if( cmFileSeek(fh,kBeginFileFl,0) != kOkFileRC )
  464. return kFileSeekErrLexRC;
  465. // allocate the text buffer
  466. if((p->textBuf = cmMemResizeZ( char, p->textBuf, n+1)) == NULL )
  467. {
  468. rc = _cmLexError(p,kMemAllocErrLexRC,"Unable to allocate the text file buffer for:'%s'.",fn);
  469. goto errLabel;
  470. }
  471. // read the file into the buffer
  472. if( cmFileRead(fh,p->textBuf,n) != kOkFileRC )
  473. return kFileReadErrLexRC;
  474. if((rc = _cmLexSetTextBuffer( p, p->textBuf, n )) != kOkLexRC )
  475. goto errLabel;
  476. errLabel:
  477. // close the file
  478. if( cmFileClose(&fh) != kOkFileRC )
  479. return kFileCloseErrLexRC;
  480. return rc;
  481. }
  482. cmLexMatcher* _cmLexFindUserToken( cmLex* p, unsigned id, const cmChar_t* tokenStr )
  483. {
  484. unsigned i = 0;
  485. for(; i<p->mfi; ++i)
  486. {
  487. if( id != cmInvalidId && p->mfp[i].typeId == id )
  488. return p->mfp + i;
  489. if( p->mfp[i].tokenStr != NULL && tokenStr != NULL && strcmp(p->mfp[i].tokenStr,tokenStr)==0 )
  490. return p->mfp + i;
  491. }
  492. return NULL;
  493. }
  494. cmRC_t cmLexRegisterToken( cmLexH h, unsigned id, const cmChar_t* tokenStr )
  495. {
  496. cmLex* p = _cmLexHandleToPtr(h);
  497. // prevent duplicate tokens
  498. if( _cmLexFindUserToken( p, id, tokenStr ) != NULL )
  499. return _cmLexError( p, kDuplicateTokenLexRC, "id:%i token:%s duplicates the token string or id", id, tokenStr );
  500. return _cmLexInstallMatcher( p, id, _cmLexExactStringMatcher, tokenStr, NULL );
  501. }
  502. cmRC_t cmLexRegisterMatcher( cmLexH h, unsigned id, cmLexUserMatcherPtr_t userPtr )
  503. {
  504. cmLex* p = _cmLexHandleToPtr(h);
  505. // prevent duplicate tokens
  506. if( _cmLexFindUserToken( p, id, NULL ) != NULL )
  507. return _cmLexError( p, kDuplicateTokenLexRC, "A token matching function has already been installed for token id: %i", id );
  508. return _cmLexInstallMatcher( p, id, NULL, NULL, userPtr );
  509. }
  510. cmRC_t cmLexEnableToken( cmLexH h, unsigned id, bool enableFl )
  511. {
  512. cmLex* p = _cmLexHandleToPtr(h);
  513. unsigned mi = 0;
  514. for(; mi<p->mfi; ++mi)
  515. if( p->mfp[mi].typeId == id )
  516. {
  517. p->mfp[mi].enableFl = enableFl;
  518. return cmOkRC;
  519. }
  520. return _cmLexError( p, kInvalidLexTIdLexRC, "%i is not a valid token type id.",id);
  521. }
  522. unsigned cmLexFilterFlags( cmLexH h )
  523. {
  524. cmLex* p = _cmLexHandleToPtr(h);
  525. return p->flags;
  526. }
  527. void cmLexSetFilterFlags( cmLexH h, unsigned flags )
  528. {
  529. cmLex* p = _cmLexHandleToPtr(h);
  530. p->flags = flags;
  531. }
  532. unsigned cmLexGetNextToken( cmLexH h )
  533. {
  534. cmLex* p = _cmLexHandleToPtr(h);
  535. if( cmErrLastRC(&p->err) != kOkLexRC )
  536. return kErrorLexTId;
  537. while( p->ci < p->cn )
  538. {
  539. unsigned i;
  540. unsigned mi = 0;
  541. unsigned maxCharCnt = 0;
  542. unsigned maxIdx = cmInvalidIdx;
  543. p->curTokenId = kErrorLexTId;
  544. p->curTokenCharIdx = cmInvalidIdx;
  545. p->curTokenCharCnt = 0;
  546. p->attrFlags = 0;
  547. // try each matcher
  548. for(; mi<p->mfi; ++mi)
  549. if( p->mfp[mi].enableFl )
  550. {
  551. unsigned charCnt = 0;
  552. if( p->mfp[mi].funcPtr != NULL )
  553. charCnt = p->mfp[mi].funcPtr(p, p->cp + p->ci, p->cn - p->ci, p->mfp[mi].tokenStr );
  554. else
  555. charCnt = p->mfp[mi].userPtr( p->cp + p->ci, p->cn - p->ci);
  556. // notice if the matcher set the error code
  557. if( cmErrLastRC(&p->err) != kOkLexRC )
  558. return kErrorLexTId;
  559. // if this matched token is longer then the prev. matched token or
  560. // if the prev matched token was an identifier and this matched token is an equal length user defined token
  561. if( (charCnt > maxCharCnt)
  562. || (charCnt>0 && charCnt==maxCharCnt && p->mfp[maxIdx].typeId==kIdentLexTId && p->mfp[mi].typeId >=kUserLexTId )
  563. || (charCnt>0 && charCnt<maxCharCnt && p->mfp[maxIdx].typeId==kIdentLexTId && p->mfp[mi].typeId >=kUserLexTId && cmIsFlag(p->flags,kUserDefPriorityLexFl))
  564. )
  565. {
  566. maxCharCnt = charCnt;
  567. maxIdx = mi;
  568. }
  569. }
  570. // no token was matched
  571. if( maxIdx == cmInvalidIdx )
  572. {
  573. if( cmIsFlag(p->flags,kReturnUnknownLexFl) )
  574. {
  575. maxCharCnt = 1;
  576. }
  577. else
  578. {
  579. _cmLexError( p, kNoMatchLexRC, "Unable to recognize token:'%c'.",*(p->cp+p->ci));
  580. return kErrorLexTId;
  581. }
  582. }
  583. // update the current line and column position
  584. p->curLine = p->nextLine;
  585. p->curCol = p->nextCol;
  586. // find the next column and line position
  587. for(i=0; i<maxCharCnt; ++i)
  588. {
  589. if( _cmLexIsNewline(p->cp[ p->ci + i ]) )
  590. {
  591. p->nextLine++;
  592. p->nextCol = 1;
  593. }
  594. else
  595. p->nextCol++;
  596. }
  597. bool returnFl = true;
  598. if( maxIdx != cmInvalidIdx )
  599. {
  600. // check the space token filter
  601. if( (p->mfp[ maxIdx ].typeId == kSpaceLexTId) && (cmIsFlag(p->flags,kReturnSpaceLexFl)==0) )
  602. returnFl = false;
  603. // check the comment token filter
  604. if( _cmLexIsCommentTypeId(p->mfp[ maxIdx ].typeId) && (cmIsFlag(p->flags,kReturnCommentsLexFl)==0) )
  605. returnFl = false;
  606. }
  607. // update the lexer state
  608. p->curTokenId = maxIdx==cmInvalidIdx ? kUnknownLexTId : p->mfp[ maxIdx ].typeId;
  609. p->curTokenCharIdx = p->ci;
  610. p->curTokenCharCnt = maxCharCnt;
  611. // advance the text buffer
  612. p->ci += maxCharCnt;
  613. if( returnFl )
  614. return p->curTokenId;
  615. }
  616. cmErrSetRC(&p->err,kEofRC);
  617. return kEofLexTId;
  618. }
  619. unsigned cmLexTokenId( cmLexH h )
  620. {
  621. cmLex* p = _cmLexHandleToPtr(h);
  622. return p->curTokenId;
  623. }
  624. const cmChar_t* cmLexTokenText( cmLexH h )
  625. {
  626. cmLex* p = _cmLexHandleToPtr(h);
  627. if( p->curTokenCharIdx == cmInvalidIdx )
  628. return NULL;
  629. unsigned n = p->curTokenId == kQStrLexTId ? 1 : 0;
  630. return p->cp + p->curTokenCharIdx + n;
  631. }
  632. unsigned cmLexTokenCharCount( cmLexH h )
  633. {
  634. cmLex* p = _cmLexHandleToPtr(h);
  635. if( p->curTokenCharIdx == cmInvalidIdx )
  636. return 0;
  637. unsigned n = p->curTokenId == kQStrLexTId ? 2 : 0;
  638. return p->curTokenCharCnt - n;
  639. }
  640. int cmLexTokenInt( cmLexH h )
  641. { return strtol( cmLexTokenText(h),NULL,0 ); }
  642. unsigned cmLexTokenUInt( cmLexH h )
  643. { return strtol( cmLexTokenText(h),NULL,0 ); }
  644. float cmLexTokenFloat( cmLexH h )
  645. { return strtof( cmLexTokenText(h),NULL ); }
  646. double cmLexTokenDouble( cmLexH h )
  647. { return strtod( cmLexTokenText(h),NULL ); }
  648. bool cmLexTokenIsUnsigned( cmLexH h )
  649. {
  650. cmLex* p = _cmLexHandleToPtr(h);
  651. return p->curTokenId == kIntLexTId && cmIsFlag(p->attrFlags,kIntUnsignedLexFl);
  652. }
  653. bool cmLexTokenIsSinglePrecision( cmLexH h )
  654. {
  655. cmLex* p = _cmLexHandleToPtr(h);
  656. return p->curTokenId == kRealLexTId && cmIsFlag(p->attrFlags,kRealFloatLexFl);
  657. }
  658. unsigned cmLexCurrentLineNumber( cmLexH h )
  659. {
  660. cmLex* p = _cmLexHandleToPtr(h);
  661. return p->curLine + 1;
  662. }
  663. unsigned cmLexCurrentColumnNumber( cmLexH h )
  664. {
  665. cmLex* p = _cmLexHandleToPtr(h);
  666. return p->curCol + 1;
  667. }
  668. unsigned cmLexErrorRC( cmLexH h )
  669. {
  670. cmLex* p = _cmLexHandleToPtr(h);
  671. return cmErrLastRC(&p->err);
  672. }
  673. const cmChar_t* cmLexIdToLabel( cmLexH h, unsigned typeId )
  674. {
  675. cmLex* p = _cmLexHandleToPtr(h);
  676. switch( typeId )
  677. {
  678. case kErrorLexTId: return "<error>";
  679. case kEofLexTId: return "<EOF>";
  680. case kSpaceLexTId: return "<space>";
  681. case kRealLexTId: return "<real>";
  682. case kIntLexTId: return "<int>";
  683. case kHexLexTId: return "<hex>";
  684. case kIdentLexTId: return "<ident>";
  685. case kQStrLexTId: return "<qstr>";
  686. case kBlockCmtLexTId: return "<bcmt>";
  687. case kLineCmtLexTId: return "<lcmt>";
  688. default:
  689. {
  690. cmLexMatcher* mp;
  691. if((mp = _cmLexFindUserToken(p,typeId,NULL)) == NULL )
  692. return "<unknown>";
  693. return mp->tokenStr;
  694. }
  695. }
  696. return "<invalid>";
  697. }
  698. const cmChar_t* cmLexRcToMsg( unsigned rc )
  699. {
  700. unsigned i=0;
  701. for(i=0; cmLexErrorArray[i].code != kInvalidLexRC; ++i)
  702. if( cmLexErrorArray[i].code == rc )
  703. break;
  704. return cmLexErrorArray[i].msg;
  705. }
  706. //( { label:cmLexEx }
  707. //
  708. // cmLexTest() gives a simple cmLex example.
  709. //
  710. void cmLexTest( cmRpt_t* rpt)
  711. {
  712. cmChar_t buf[] =
  713. "123ident0\n 123.456\nident0\n"
  714. "0xa12+.2\n"
  715. "// comment \n"
  716. "/* block \n"
  717. "comment */"
  718. "\"quoted string\""
  719. "ident1"
  720. "// last line comment";
  721. // initialize a lexer with a buffer of text
  722. cmLexH h = cmLexInit(buf,strlen(buf),
  723. kReturnSpaceLexFl | kReturnCommentsLexFl,rpt);
  724. // verify that the lexer initialization succeded.
  725. if( cmLexIsValid(h) == false )
  726. {
  727. cmRptPrintf(rpt,"Lexer initialization failed.");
  728. return;
  729. }
  730. // register some additional recoginizers
  731. cmLexRegisterToken(h,kUserLexTId+1,"+");
  732. cmLexRegisterToken(h,kUserLexTId+2,"-");
  733. unsigned tid;
  734. // ask for token id's
  735. while( (tid = cmLexGetNextToken(h)) != kEofLexTId )
  736. {
  737. // print information about each token
  738. cmRptPrintf(rpt,"%i %i %s '%.*s' (%i) ",
  739. cmLexCurrentLineNumber(h),
  740. cmLexCurrentColumnNumber(h),
  741. cmLexIdToLabel(h,tid),
  742. cmLexTokenCharCount(h),
  743. cmLexTokenText(h) ,
  744. cmLexTokenCharCount(h));
  745. // if the token is a number ...
  746. if( tid==kIntLexTId || tid==kRealLexTId || tid==kHexLexTId )
  747. {
  748. // ... then request the numbers value
  749. int iv = cmLexTokenInt(h);
  750. double dv = cmLexTokenDouble(h);
  751. cmRptPrintf(rpt,"%i %f",iv,dv);
  752. }
  753. cmRptPrintf(rpt,"\n");
  754. // handle errors
  755. if( tid == kErrorLexTId )
  756. {
  757. cmRptPrintf(rpt,"Error:%i\n", cmLexErrorRC(h));
  758. break;
  759. }
  760. }
  761. // finalize the lexer
  762. cmLexFinal(&h);
  763. }
  764. //)