libcm is a C development framework with an emphasis on audio signal processing applications.
Du kannst nicht mehr als 25 Themen auswählen Themen müssen mit entweder einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

cmXml.c 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705
  1. #include "cmPrefix.h"
  2. #include "cmGlobal.h"
  3. #include "cmFloatTypes.h"
  4. #include "cmRpt.h"
  5. #include "cmErr.h"
  6. #include "cmCtx.h"
  7. #include "cmJson.h"
  8. #include "cmMem.h"
  9. #include "cmMallocDebug.h"
  10. #include "cmLex.h"
  11. #include "cmLinkedHeap.h"
  12. #include "cmFile.h"
  13. #include "cmXml.h"
  14. /*
  15. file -> decl doctype node
  16. decl -> "<?" attr-list "?>"
  17. doctype -> "<!DOCTYPE" dt-text ">"
  18. node -> beg-node node-body end-node
  19. | "<!--" cmmt-text "-->"
  20. node-body -> data-text
  21. | node
  22. beg-node -> "<" tag-label attr-list {"/"} ">"
  23. end-node -> "<" tag-label "/>"
  24. attr-list -> attr*
  25. attr -> attr-label "=" qstring
  26. attr-label -> A string of characters ending with an '=' or <space>.
  27. Attribute labels may not contain '<' or '>'.
  28. tag-label -> A string of characters ending with:
  29. <space>, '>' or '/>'.
  30. Tag labels may not contain '<' or '>'.
  31. data-text -> A string of characters ending with '<'.
  32. dt-text -> A string of characters beginning with a non-whitespace
  33. and ending with '>'
  34. cmmt-text -> A string of characters ending with '-->'
  35. */
  36. /*
  37. t = get_next_attr_token(p,end_char, tn* )
  38. {
  39. }
  40. parse_attr_list(p,end_char)
  41. {
  42. }
  43. read_beg_tag(p)
  44. {
  45. c = goto_next_non_white_char(p)
  46. if( c != '<' )
  47. error();
  48. c = goto_next_non_white_char(p)
  49. if c == '?'
  50. {
  51. end_tag_str = "?";
  52. if( scan_past(p,"xml") == false )
  53. error();
  54. parse_attr_list(p,'?');
  55. }
  56. if c == '!'
  57. {
  58. if( scan_past(p,"--") )
  59. {
  60. if(go_past(p,"-->")==false)
  61. error();
  62. }
  63. if( scan_past(p,"DOCTYPE") )
  64. {
  65. while( s = get_next_attr_token(p,'>') != NULL )
  66. store_attr(p,s,"");
  67. }
  68. }
  69. }
  70. read_body( p )
  71. {
  72. c = goto_next_non_white_char(p);
  73. if c == '<'
  74. read_node(p)
  75. else
  76. read_data_string(p)
  77. }
  78. n = read_node( p )
  79. {
  80. t = read_beg_tag(p);
  81. if( is_beg_tag(t) )
  82. {
  83. read_body()
  84. read_end_tag()
  85. }
  86. }
  87. */
  88. cmXmlH_t cmXmlNullHandle = cmSTATIC_NULL_HANDLE;
  89. typedef struct
  90. {
  91. cmErr_t err; //
  92. cmLHeapH_t heapH; // linked heap stores all node memory
  93. cmChar_t* b; // base of the text buffer
  94. unsigned bn; // length of the text buffer in characters
  95. cmChar_t* c; // current lexer position
  96. cmXmlNode_t* root; // root XML tree node
  97. cmXmlNode_t* decl; // xml declaratoin node <? ... ?>
  98. cmXmlNode_t* doctype; // DOCTYPE node
  99. cmXmlNode_t* stack; // parsing stack
  100. } cmXml_t;
  101. cmXml_t* _cmXmlHandleToPtr( cmXmlH_t h )
  102. {
  103. cmXml_t* p = (cmXml_t*)h.h;
  104. assert( p != NULL );
  105. return p;
  106. }
  107. cmXmlRC_t _cmXmlFree( cmXml_t* p )
  108. {
  109. cmLHeapDestroy( &p->heapH );
  110. cmLexDestroy( &p->lexH );
  111. }
  112. cmXmlRC_t _cmXmlParse( cmXml_t* p, const cmChar_t* fn )
  113. {
  114. cmXmlRC_t rc = kOkXmlRC;
  115. if( cmLexReset( p->lexH ) != kOkLexRC )
  116. {
  117. rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lexer reset failed.");
  118. goto errLabel:
  119. }
  120. if( cmLexSetFile( p->lexH, fn ) != kOkLexRC )
  121. {
  122. rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lexer parse failed on '%s'.",cmStringNullGuard(fn));
  123. goto errLabel;
  124. }
  125. unsigned tokId;
  126. while((tokId = cmLexGetNextToken( cmLexH h )) != kEofRC && tokId != kErrorLexTId )
  127. {
  128. switch(tokId)
  129. {
  130. case kTagBegLexTId:
  131. case kTagEndLexTid:
  132. case kEqualLexTId:
  133. case kQStrLexTId:
  134. }
  135. }
  136. errLabel:
  137. return rc;
  138. }
  139. cmXmlRC_t cmXmlAlloc( cmCtx_t* ctx, cmXmlH_t* hp, const cmChar_t* fn )
  140. {
  141. cmXmlRC_t rc = kOkXmlRC;
  142. cmXml_t* p = NULL;
  143. // finalize before initialize
  144. if((rc = cmXmlFree(hp)) != kOkXmlRC )
  145. return rc;
  146. // allocate the main object record
  147. if((p = cmMemAllocZ( cmXml_t, 1 )) == NULL )
  148. return cmErrMsg(&ctx->err,kMemAllocErrXmlRC,"Object memory allocation failed.");
  149. cmErrSetup(&p->err,&ctx->rpt,"XML Parser");
  150. // allocate the linked heap mgr
  151. if( cmLHeapIsValid(p->heapH = cmLHeapCreate(1024,ctx)) == false )
  152. {
  153. rc = cmErrMsg(&p->err,kMemAllocErrXmlRC,"Linked heap object allocation failed.");
  154. goto errLabel;
  155. }
  156. // allocate the lexer
  157. if(cmLexIsValid(p->lexH = cmLexInit(NULL,0,0,&ctx->rpt)) == false )
  158. {
  159. rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lex allocation failed.");
  160. goto errLabel;
  161. }
  162. // register xml specific tokens with the lexer
  163. for(i=0; _cmXmlTokenArray[i].id != kErrorLexTId; ++i)
  164. {
  165. cmRC_t lexRC;
  166. if( (lexRC = cmLexRegisterToken(p->lexH, _cmXmlTokenArray[i].id, _cmXmlTokenArray[i].text )) != kOkLexRC )
  167. {
  168. rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lex token registration failed for:'%s'.",_cmXmlTokenArray[i].text );
  169. goto errLabel;
  170. }
  171. }
  172. hp->h = p;
  173. errLabel:
  174. if(rc != kOkXmlRC )
  175. _cmXmlFree(p);
  176. return rc;
  177. }
  178. cmXmlRC_t cmXmlFree( cmXmlH_t* hp )
  179. {
  180. cmXmlRC_t rc = kOkXmlRC;
  181. if( hp!=NULL || cmXmlIsValid(*hp)==false )
  182. return kOkXmlRC;
  183. cmXml_t* p = _cmXmlHandleToPtr(*hp);
  184. if((rc = _cmXmlFree(p)) != kOkXmlRC )
  185. return rc;
  186. hp->h = NULL;
  187. return rc;
  188. }
  189. bool cmXmlIsValid( cmXmlH_t h )
  190. { return h.h != NULL; }
  191. cmXmlRC_t cmXmlParse( cmXmlH_t h, const cmChar_t* fn )
  192. {
  193. }
  194. cmXmlRC_t cmXmlClear( cmXmlH_t h )
  195. {
  196. }
  197. cmXmlRC_t _cmXmlSyntaxError( cmXml_t* p )
  198. {
  199. return _cmErrMsg(&p->err,kSyntaxErrorXmlRC,"Syntax error on line '%i.",p->line);
  200. }
  201. cmXmlNode_t* _cmXmlNodeAlloc( cmXml_t* p, unsigned flags, const cmChar_t* label, unsigned labelN )
  202. {
  203. cmXmlNode_t* np = cmLhAllocZ(p->heapH,cmXmlNode_t,1);
  204. if( cmIsFlag(kNormalXmlFl) )
  205. {
  206. if( p->root == NULL )
  207. p->root = np;
  208. if( p->stack == NULL )
  209. p->stack = np;
  210. else
  211. {
  212. np->parent = p->stack;
  213. if( p->stack->children == NULL )
  214. p->stack->children = np;
  215. else
  216. {
  217. cmXmlNode_t* n0p = NULL;
  218. cmXmlNode_t* n1p = p->stack->children;
  219. for(; n1p != NULL; n1p=n1p->sibling )
  220. n0p = n1p;
  221. n0p->sibling = np;
  222. }
  223. }
  224. }
  225. else
  226. {
  227. if( cmIsFlag(kDeclXmlFl) )
  228. p->decl = np;
  229. else
  230. {
  231. if( cmIsFlag(kDoctypeXmlF0 ) )
  232. p->doctype = np;
  233. else
  234. {
  235. _cmXmlSyntaxError(p);
  236. return NULL;
  237. }
  238. }
  239. }
  240. if( label != NULL )
  241. np->label = cmLhAllocStrN(p->heapH,label,labelN);
  242. return np;
  243. }
  244. cmXmlNode_t* _cmXmlAttrAlloc( cmXml_t* p, cmXmlNode_t* np, const cmChar_t* label, unsigned labelN, const cmChar_t* value, unsigned valueN )
  245. {
  246. cmXmlAttr_t* ap = cmLhAllocZ(p->heapH, cmXmlAttr_t,1);
  247. if( label != NULL && labelN > 0 )
  248. ap->label = cmLhAllocStr(p->heapH,label,labelN);
  249. if( value != NULL and valueN > 0 )
  250. ap->value = cmLhAllocStr(p->attrH,value,valueN);
  251. ap->link = np->attr;
  252. np->attr = ap;
  253. return np;
  254. }
  255. bool _cmXmlIsEof( cmXml_t* p )
  256. { return p->c >= p->b + p->bn; }
  257. // Return false if EOF is encountered
  258. bool _cmXmlAdvance( cmXml_t* p )
  259. {
  260. if( _cmXmlIsEof(p) )
  261. return false;
  262. p->c += 1;
  263. if( *p->c == '\n' )
  264. p->line += 1;
  265. return true;
  266. }
  267. // Advance the cursor to the next non-white char
  268. // Return a pointer to a non-space character.
  269. // Return NULL if the EOF is encountered.
  270. const cmChar_t* _cmXmlAdvanceToNextNonWhite( cmXml_t* p )
  271. {
  272. if( _cmXmlIsEof(p) )
  273. return NULL;
  274. while( isspace(*p->c) )
  275. if( _cmXmlAdvance(p) == false )
  276. return NULL;
  277. return p->c;
  278. }
  279. // Advance to the next white space character or 'c'.
  280. // Returns a pointer to a white space or 'c'.
  281. const cmChar_t* _cmXmlAdvanceToNextWhiteOr( cmXml_t* p, cmChar_t c0, cmChar_t c1 )
  282. {
  283. if( _cmXmlIsEof(p) )
  284. return NULL;
  285. while( isspace(*p->c)==false && *p->c!=c0 && *p->c!=c1 )
  286. if(_cmXmlAdvance(p) == false )
  287. return NULL;
  288. return p->c;
  289. }
  290. // Advance past leading white space followed by 's'.
  291. // Note that 's' is expected to immediately follow any leading white space.
  292. // Returns a pointer to the character after 's'.
  293. // Returns NULL if 'c' is not encountered
  294. const cmChar_t* _cmXmlAdvancePast( cmXml_t* p, const cmChar_t* s )
  295. {
  296. if( _cmXmlIsEof(p) )
  297. return NULL;
  298. while( isspace(*p->c) )
  299. if( _cmXmlAdvance(p) == false )
  300. return NULL;
  301. for(; *s && *p->c == *s; ++s )
  302. if( _cmXmlAdvance(p) == false )
  303. return NULL;
  304. return *s==0 ? p->c : NULL;
  305. }
  306. // Advance past the current character and then
  307. // advance to the next occurrence of 's' and return
  308. // a pointer to the last char in 's'.
  309. const cmChar_t* _cmXmlAdvanceToNext( cmXml_t* p, cmChar_t* s )
  310. {
  311. unsigned i = 0;
  312. unsigned n = strlen(s);
  313. while( _cmXmlAdvance(p) )
  314. {
  315. if( *p->c != s[i] )
  316. i = 0;
  317. else
  318. {
  319. i+= 1;
  320. if( i == n )
  321. break;
  322. }
  323. }
  324. return p->c;
  325. }
  326. // Return the character following the current character.
  327. const cmChar_t* _cmXmlAdvanceOne( cmXml_t* p )
  328. {
  329. if( _cmXmlIsEof(p) )
  330. return NULL;
  331. p->c += 1;
  332. return _cmXmlIsEof(p) ? NULL : p->c;
  333. }
  334. cmXmlRC_t _cmXmlParseAttr( cmXml_t* p, cmChar_t endChar )
  335. {
  336. cmXmlRC_t rc = kOkXmlRC;
  337. const cmChar_t* l0 = NULL;
  338. const cmChar_t* l1 = NULL;
  339. const cmChar_t* v0 = NULL;
  340. const cmChar_t* v1 = NULL;
  341. // advance to the next label
  342. if(( l0 = _cmXmlAdvanceToNextNonWhite(p)) == NULL )
  343. return _cmXmlSyntaxError(p);
  344. // if the 'endChar' was encountered
  345. if( *p->c == endChar )
  346. return kOkXmlRC;
  347. // advance past last character in label
  348. if((l1 = _cmXmlAdvanceToNextWhiteOr(p,'=',' ')) == NULL )
  349. return _cmXmlSyntaxError(p);
  350. // advance past the next '='
  351. if( _cmXmlAdvancePast(p,"=") == NULL )
  352. return _cmXmlSyntaxError(p);
  353. // advance to the next non-white character
  354. if( (v0 = _cmXmlAdvanceToNextNonWhite(p)) == NULL )
  355. return _cmXmlSyntaxError(p);
  356. // the first character in the value must be a single quote
  357. if( *p->c != '\'' )
  358. return _cmXmlSyntaxError(p);
  359. // advance to the next single quote
  360. if( (v1 = _cmXmlAdvanceToNext(p,"'")) == NULL )
  361. return _cmXmlSyntaxError(p);
  362. // advance past the ending single quote
  363. if( _cmXmlAdvanceOne(p) == NULL )
  364. return _cmXmlSyntaxError(p);
  365. // p->c now points just past the ending single quote
  366. return rc;
  367. }
  368. cmXmlRC_t _cmXmlParseAttrList( cmXml_t* p, cmChar_t endChar )
  369. {
  370. cmXmlRC_t rc = kOkXmlRC;
  371. while( *p->c != endChar && *p->c != '>' )
  372. if((rc = _cmXmlParseAttr(p,endChar)) != kOkXmlRC )
  373. break;
  374. if( *p->c == endChar )
  375. {
  376. if( endChar = '/' )
  377. {
  378. // this is a simple node
  379. }
  380. if( _cmXmlAdvanceOne(p) == NULL )
  381. return _cmXmlSyntaxError(p);
  382. }
  383. if( *p->c != '>' )
  384. return _cmXmlSyntaxError(p);
  385. if( _cmXmlAdvancePast(p,">") == NULL )
  386. return _cmXmlSyntaxError(p);
  387. // p->c is now past the ending '>'
  388. return rc;
  389. }
  390. cmXmlRC_t _cmXmlParseDoctypeToken( cmXml_t* p, cmXmlNode_t* np )
  391. {
  392. const cmChar_t* t0 = NULL;
  393. const cmChar_t* t1 = NULL;
  394. // advance to the first char in the doctype token
  395. if((t0 = _cmXmlAdvanceToNextNonWhite(p) ) == NULL )
  396. {
  397. return _cmXmlSyntaxError(p);
  398. }
  399. // if the end of the tag was encountered
  400. if( *p->c == '>' )
  401. return kOkXmlRC;
  402. // if the token begins with a quote
  403. if( *p->c == '\'' )
  404. {
  405. if((t1 = _cmXmlAdvanceToNext(p,"'")) == NULL )
  406. return _cmXmlSyntaxError(p);
  407. }
  408. else
  409. {
  410. if((t1 = _cmXmlAdvanceToNextWhiteOr(p,'>',' ')) == NULL )
  411. return _cmXmlSyntaxError(p);
  412. }
  413. // t1 and p->c now point just past the last character in the token
  414. return rc;
  415. }
  416. cmXmlRC_t _cmXmlParseDoctype( cmXml_t* p )
  417. {
  418. cmXmlRC_t rc = kOkXmlRC;
  419. cmXmlNode_t* np;
  420. if((np = _cmXmlNodeAlloc(p,kDoctypeXmlFl,"DOCTYPE",strlen("DOCTYPE"))) == NULL )
  421. return cmErrLastRC(&p->err);
  422. while( *p->c != '>' )
  423. if((rc = _cmXmlParseDoctypeToken(p,np)) != kOkXmlRC )
  424. break;
  425. return rc;
  426. }
  427. // Node tags are tags that begin with a '<' and are not
  428. // followed by any special character.
  429. cmXmlRC_t _cmXmlParseNodeTag( cmXml_t* p )
  430. {
  431. cmXmlRC_t rc = kOkXmlRC;
  432. const cmChar_t* l0 = NULL;
  433. const cmChar_t* l1 = NULL;
  434. // Advance to the first character of the tag label.
  435. if((l0 = _cmXmlAdvanceToNextNonWhite(p)) == NULL )
  436. return _cmXmlSyntaxError(p);
  437. // Advance to the last character following the tag label.
  438. if((l1 = _cmXmlAdvanceToNextWhiteOr(p,'/','>')) == NULL )
  439. return _cmXmlSyntaxError(p);
  440. // look for attributes
  441. if((rc = _cmXmlParseAttrList(p,'/')) != kOkXmlRC )
  442. return _cmXmlSyntaxError(p);
  443. // p->c is now past the ending '>'
  444. return rc;
  445. }
  446. cmXmlRC_t _cmXmlReadEndTag( cmXml_t* p )
  447. {
  448. const cmChar_t* l0 = NULL;
  449. const cmChar_t* l1 = NULL;
  450. assert( *p->c == '/' );
  451. // advance past the '/'
  452. if(( l0 = _cmXmlAdvanceOne(p)) == NULL )
  453. return _cmXmlSyntaxError(p);
  454. // advance to the ending '>'
  455. if(( l1 = _cmXmlAdvanceToNext(p,">")) == NULL )
  456. return _cmXmlSyntaxError(p);
  457. // advance past the
  458. if( _cmXmlAdvanceOne(p) == NULL )
  459. return _cmXmlSyntaxError(p);
  460. // trim trailing space on label
  461. l1 -= 1;
  462. while( l1>l0 && isspace(*l1) )
  463. --l1;
  464. // verify that the label has a length
  465. if( l0 == l1 )
  466. return _cmXmlSyntaxError(p);
  467. assert( !isspace(*l1) );
  468. // the label should match the node on the top of the stack
  469. if( strncmp( p->stack->label, l0, (l1-l0)+1 ) )
  470. return _cmXmlSyntaxError(p);
  471. // since we just parsed an end-tag there should be at least one node on the stack
  472. if( p->stack == NULL )
  473. return _cmXmlSyntaxError(p);
  474. // pop the stack
  475. p->stack = p->stack->parent;
  476. return kOkXmlRC;
  477. }
  478. //
  479. cmXmlRC_t _cmXmlReadTag( cmXml_t* p, cmXmlNode_t** newNodeRef )
  480. {
  481. cmXmlRC_t rc = kOkXmlRC;
  482. assert(newNodeRef != NULL );
  483. *newNodeRef = NULL;
  484. // No leading '<' was found
  485. if( _cmXmlAdvancePast(p,"<") == NULL )
  486. {
  487. // error or EOF
  488. return NULL;
  489. }
  490. // examine the character following the opening '<'
  491. switch( *p->c )
  492. {
  493. // node end tag
  494. case '/':
  495. return _cmXmlReadEndTag(p);
  496. // declaration tag
  497. case '?':
  498. if( _cmXmlAdvancePast(p,"xml") == NULL )
  499. return _cmXmlSyntaxError(p);
  500. if( _cmXmlNodeAlloc(p,kDeclXmlFl, "xml",strlen("xml") ) == NULL )
  501. return cmErrLastRC(&p->err);
  502. if((rc = _cmXmlParseAttrList(p,'?')) != kOkXmlRC )
  503. return rc;
  504. break;
  505. case '!':
  506. switch( *(p->c+1) )
  507. {
  508. // comment node
  509. case '-':
  510. if( _cmXmlAdvancePast(p,"--") == NULL )
  511. return _cmXmlSyntaxError(p);
  512. if( _cmXmlAdvanceToNext("->") == NULL )
  513. return _cmXmlSyntaxError(p);
  514. // p->c is just after "-->"
  515. break;
  516. // DOCTYPE node
  517. case 'D':
  518. if( _cmXmlAdvancePast(P,"DOCTYPE")==NULL )
  519. return _cmXmlSyntaxError(p);
  520. if((rc = _cmXmlParseDocType(p)) != kOkXmlRC )
  521. return _cmXmlSyntaxError(p);
  522. // p->c is just after ">"
  523. break;
  524. default:
  525. return _cmXmlSyntaxError(p);
  526. }
  527. break;
  528. default:
  529. // normal node
  530. if((rc = _cmXmlParseNodeTag(p)) != kOkXmlRC )
  531. return rc;
  532. // p->c is just after ">"
  533. }
  534. return rc;
  535. }
  536. cmXmlRC_t _cmXmlReadNode( cmXml_t* p )
  537. {
  538. }