libcm is a C development framework with an emphasis on audio signal processing applications.
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

cmXml.c 15KB


  1. #include "cmPrefix.h"
  2. #include "cmGlobal.h"
  3. #include "cmFloatTypes.h"
  4. #include "cmRpt.h"
  5. #include "cmErr.h"
  6. #include "cmCtx.h"
  7. #include "cmJson.h"
  8. #include "cmMem.h"
  9. #include "cmMallocDebug.h"
  10. #include "cmLex.h"
  11. #include "cmLinkedHeap.h"
  12. #include "cmFile.h"
  13. #include "cmXml.h"
  14. /*
  15. file -> decl doctype node
  16. decl -> "<?" attr-list "?>"
  17. doctype -> "<!DOCTYPE" dt-text ">"
  18. node -> beg-node node-body end-node
  19. | "<!--" cmmt-text "-->"
  20. node-body -> data-text
  21. | node
  22. beg-node -> "<" tag-label attr-list {"/"} ">"
  23. end-node -> "<" tag-label "/>"
  24. attr-list -> attr*
  25. attr -> attr-label "=" qstring
  26. attr-label -> A string of characters ending with an '=' or <space>.
  27. Attribute labels may not contain '<' or '>'.
  28. tag-label -> A string of characters ending with:
  29. <space>, '>' or '/>'.
  30. Tag labels may not contain '<' or '>'.
  31. data-text -> A string of characters ending with '<'.
  32. dt-text -> A string of characters beginning with a non-whitespace
  33. and ending with '>'
  34. cmmt-text -> A string of characters ending with '-->'
  35. */
  36. /*
  37. t = get_next_attr_token(p,end_char, tn* )
  38. {
  39. }
  40. parse_attr_list(p,end_char)
  41. {
  42. }
  43. read_beg_tag(p)
  44. {
  45. c = goto_next_non_white_char(p)
  46. if( c != '<' )
  47. error();
  48. c = goto_next_non_white_char(p)
  49. if c == '?'
  50. {
  51. end_tag_str = "?";
  52. if( scan_past(p,"xml") == false )
  53. error();
  54. parse_attr_list(p,'?');
  55. }
  56. if c == '!'
  57. {
  58. if( scan_past(p,"--") )
  59. {
  60. if(go_past(p,"-->")==false)
  61. error();
  62. }
  63. if( scan_past(p,"DOCTYPE") )
  64. {
  65. while( s = get_next_attr_token(p,'>') != NULL )
  66. store_attr(p,s,"");
  67. }
  68. }
  69. }
  70. read_body( p )
  71. {
  72. c = goto_next_non_white_char(p);
  73. if c == '<'
  74. read_node(p)
  75. else
  76. read_data_string(p)
  77. }
  78. n = read_node( p )
  79. {
  80. t = read_beg_tag(p);
  81. if( is_beg_tag(t) )
  82. {
  83. read_body()
  84. read_end_tag()
  85. }
  86. }
  87. */
  88. cmXmlH_t cmXmlNullHandle = cmSTATIC_NULL_HANDLE;
  89. typedef struct
  90. {
  91. cmErr_t err; //
  92. cmLHeapH_t heapH; // linked heap stores all node memory
  93. cmChar_t* b; // base of the text buffer
  94. unsigned bn; // length of the text buffer in characters
  95. cmChar_t* c; // current lexer position
  96. cmXmlNode_t* root; // root XML tree node
  97. cmXmlNode_t* decl; // xml declaratoin node <? ... ?>
  98. cmXmlNode_t* doctype; // DOCTYPE node
  99. cmXmlNode_t* stack; // parsing stack
  100. } cmXml_t;
  101. cmXml_t* _cmXmlHandleToPtr( cmXmlH_t h )
  102. {
  103. cmXml_t* p = (cmXml_t*)h.h;
  104. assert( p != NULL );
  105. return p;
  106. }
  107. cmXmlRC_t _cmXmlFree( cmXml_t* p )
  108. {
  109. cmLHeapDestroy( &p->heapH );
  110. cmLexDestroy( &p->lexH );
  111. }
  112. cmXmlRC_t _cmXmlParse( cmXml_t* p, const cmChar_t* fn )
  113. {
  114. cmXmlRC_t rc = kOkXmlRC;
  115. if( cmLexReset( p->lexH ) != kOkLexRC )
  116. {
  117. rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lexer reset failed.");
  118. goto errLabel:
  119. }
  120. if( cmLexSetFile( p->lexH, fn ) != kOkLexRC )
  121. {
  122. rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lexer parse failed on '%s'.",cmStringNullGuard(fn));
  123. goto errLabel;
  124. }
  125. unsigned tokId;
  126. while((tokId = cmLexGetNextToken( cmLexH h )) != kEofRC && tokId != kErrorLexTId )
  127. {
  128. switch(tokId)
  129. {
  130. case kTagBegLexTId:
  131. case kTagEndLexTid:
  132. case kEqualLexTId:
  133. case kQStrLexTId:
  134. }
  135. }
  136. errLabel:
  137. return rc;
  138. }
  139. cmXmlRC_t cmXmlAlloc( cmCtx_t* ctx, cmXmlH_t* hp, const cmChar_t* fn )
  140. {
  141. cmXmlRC_t rc = kOkXmlRC;
  142. cmXml_t* p = NULL;
  143. // finalize before initialize
  144. if((rc = cmXmlFree(hp)) != kOkXmlRC )
  145. return rc;
  146. // allocate the main object record
  147. if((p = cmMemAllocZ( cmXml_t, 1 )) == NULL )
  148. return cmErrMsg(&ctx->err,kMemAllocErrXmlRC,"Object memory allocation failed.");
  149. cmErrSetup(&p->err,&ctx->rpt,"XML Parser");
  150. // allocate the linked heap mgr
  151. if( cmLHeapIsValid(p->heapH = cmLHeapCreate(1024,ctx)) == false )
  152. {
  153. rc = cmErrMsg(&p->err,kMemAllocErrXmlRC,"Linked heap object allocation failed.");
  154. goto errLabel;
  155. }
  156. // allocate the lexer
  157. if(cmLexIsValid(p->lexH = cmLexInit(NULL,0,0,&ctx->rpt)) == false )
  158. {
  159. rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lex allocation failed.");
  160. goto errLabel;
  161. }
  162. // register xml specific tokens with the lexer
  163. for(i=0; _cmXmlTokenArray[i].id != kErrorLexTId; ++i)
  164. {
  165. cmRC_t lexRC;
  166. if( (lexRC = cmLexRegisterToken(p->lexH, _cmXmlTokenArray[i].id, _cmXmlTokenArray[i].text )) != kOkLexRC )
  167. {
  168. rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lex token registration failed for:'%s'.",_cmXmlTokenArray[i].text );
  169. goto errLabel;
  170. }
  171. }
  172. hp->h = p;
  173. errLabel:
  174. if(rc != kOkXmlRC )
  175. _cmXmlFree(p);
  176. return rc;
  177. }
  178. cmXmlRC_t cmXmlFree( cmXmlH_t* hp )
  179. {
  180. cmXmlRC_t rc = kOkXmlRC;
  181. if( hp!=NULL || cmXmlIsValid(*hp)==false )
  182. return kOkXmlRC;
  183. cmXml_t* p = _cmXmlHandleToPtr(*hp);
  184. if((rc = _cmXmlFree(p)) != kOkXmlRC )
  185. return rc;
  186. hp->h = NULL;
  187. return rc;
  188. }
  189. bool cmXmlIsValid( cmXmlH_t h )
  190. { return h.h != NULL; }
  191. cmXmlRC_t cmXmlParse( cmXmlH_t h, const cmChar_t* fn )
  192. {
  193. }
  194. cmXmlRC_t cmXmlClear( cmXmlH_t h )
  195. {
  196. }
  197. cmXmlRC_t _cmXmlSyntaxError( cmXml_t* p )
  198. {
  199. return _cmErrMsg(&p->err,kSyntaxErrorXmlRC,"Syntax error on line '%i.",p->line);
  200. }
  201. cmXmlNode_t* _cmXmlNodeAlloc( cmXml_t* p, unsigned flags, const cmChar_t* label, unsigned labelN )
  202. {
  203. cmXmlNode_t* np = cmLhAllocZ(p->heapH,cmXmlNode_t,1);
  204. if( cmIsFlag(kNormalXmlFl) )
  205. {
  206. if( p->root == NULL )
  207. p->root = np;
  208. if( p->stack == NULL )
  209. p->stack = np;
  210. else
  211. {
  212. np->parent = p->stack;
  213. if( p->stack->children == NULL )
  214. p->stack->children = np;
  215. else
  216. {
  217. cmXmlNode_t* n0p = NULL;
  218. cmXmlNode_t* n1p = p->stack->children;
  219. for(; n1p != NULL; n1p=n1p->sibling )
  220. n0p = n1p;
  221. n0p->sibling = np;
  222. }
  223. }
  224. }
  225. else
  226. {
  227. if( cmIsFlag(kDeclXmlFl) )
  228. p->decl = np;
  229. else
  230. {
  231. if( cmIsFlag(kDoctypeXmlF0 ) )
  232. p->doctype = np;
  233. else
  234. {
  235. _cmXmlSyntaxError(p);
  236. return NULL;
  237. }
  238. }
  239. }
  240. if( label != NULL )
  241. np->label = cmLhAllocStrN(p->heapH,label,labelN);
  242. return np;
  243. }
  244. cmXmlNode_t* _cmXmlAttrAlloc( cmXml_t* p, cmXmlNode_t* np, const cmChar_t* label, unsigned labelN, const cmChar_t* value, unsigned valueN )
  245. {
  246. cmXmlAttr_t* ap = cmLhAllocZ(p->heapH, cmXmlAttr_t,1);
  247. if( label != NULL && labelN > 0 )
  248. ap->label = cmLhAllocStr(p->heapH,label,labelN);
  249. if( value != NULL and valueN > 0 )
  250. ap->value = cmLhAllocStr(p->attrH,value,valueN);
  251. ap->link = np->attr;
  252. np->attr = ap;
  253. return np;
  254. }
  255. bool _cmXmlIsEof( cmXml_t* p )
  256. { return p->c >= p->b + p->bn; }
  257. // Return false if EOF is encountered
  258. bool _cmXmlAdvance( cmXml_t* p )
  259. {
  260. if( _cmXmlIsEof(p) )
  261. return false;
  262. p->c += 1;
  263. if( *p->c == '\n' )
  264. p->line += 1;
  265. return true;
  266. }
  267. // Advance the cursor to the next non-white char
  268. // Return a pointer to a non-space character.
  269. // Return NULL if the EOF is encountered.
  270. const cmChar_t* _cmXmlAdvanceToNextNonWhite( cmXml_t* p )
  271. {
  272. if( _cmXmlIsEof(p) )
  273. return NULL;
  274. while( isspace(*p->c) )
  275. if( _cmXmlAdvance(p) == false )
  276. return NULL;
  277. return p->c;
  278. }
  279. // Advance to the next white space character or 'c'.
  280. // Returns a pointer to a white space or 'c'.
  281. const cmChar_t* _cmXmlAdvanceToNextWhiteOr( cmXml_t* p, cmChar_t c0, cmChar_t c1 )
  282. {
  283. if( _cmXmlIsEof(p) )
  284. return NULL;
  285. while( isspace(*p->c)==false && *p->c!=c0 && *p->c!=c1 )
  286. if(_cmXmlAdvance(p) == false )
  287. return NULL;
  288. return p->c;
  289. }
  290. // Advance past leading white space followed by 's'.
  291. // Note that 's' is expected to immediately follow any leading white space.
  292. // Returns a pointer to the character after 's'.
  293. // Returns NULL if 'c' is not encountered
  294. const cmChar_t* _cmXmlAdvancePast( cmXml_t* p, const cmChar_t* s )
  295. {
  296. if( _cmXmlIsEof(p) )
  297. return NULL;
  298. while( isspace(*p->c) )
  299. if( _cmXmlAdvance(p) == false )
  300. return NULL;
  301. for(; *s && *p->c == *s; ++s )
  302. if( _cmXmlAdvance(p) == false )
  303. return NULL;
  304. return *s==0 ? p->c : NULL;
  305. }
  306. // Advance past the current character and then
  307. // advance to the next occurrence of 's' and return
  308. // a pointer to the last char in 's'.
  309. const cmChar_t* _cmXmlAdvanceToNext( cmXml_t* p, cmChar_t* s )
  310. {
  311. unsigned i = 0;
  312. unsigned n = strlen(s);
  313. while( _cmXmlAdvance(p) )
  314. {
  315. if( *p->c != s[i] )
  316. i = 0;
  317. else
  318. {
  319. i+= 1;
  320. if( i == n )
  321. break;
  322. }
  323. }
  324. return p->c;
  325. }
  326. // Return the character following the current character.
  327. const cmChar_t* _cmXmlAdvanceOne( cmXml_t* p )
  328. {
  329. if( _cmXmlIsEof(p) )
  330. return NULL;
  331. p->c += 1;
  332. return _cmXmlIsEof(p) ? NULL : p->c;
  333. }
  334. cmXmlRC_t _cmXmlParseAttr( cmXml_t* p, cmChar_t endChar )
  335. {
  336. cmXmlRC_t rc = kOkXmlRC;
  337. const cmChar_t* l0 = NULL;
  338. const cmChar_t* l1 = NULL;
  339. const cmChar_t* v0 = NULL;
  340. const cmChar_t* v1 = NULL;
  341. // advance to the next label
  342. if(( l0 = _cmXmlAdvanceToNextNonWhite(p)) == NULL )
  343. return _cmXmlSyntaxError(p);
  344. // if the 'endChar' was encountered
  345. if( *p->c == endChar )
  346. return kOkXmlRC;
  347. // advance past last character in label
  348. if((l1 = _cmXmlAdvanceToNextWhiteOr(p,'=',' ')) == NULL )
  349. return _cmXmlSyntaxError(p);
  350. // advance past the next '='
  351. if( _cmXmlAdvancePast(p,"=") == NULL )
  352. return _cmXmlSyntaxError(p);
  353. // advance to the next non-white character
  354. if( (v0 = _cmXmlAdvanceToNextNonWhite(p)) == NULL )
  355. return _cmXmlSyntaxError(p);
  356. // the first character in the value must be a single quote
  357. if( *p->c != '\'' )
  358. return _cmXmlSyntaxError(p);
  359. // advance to the next single quote
  360. if( (v1 = _cmXmlAdvanceToNext(p,"'")) == NULL )
  361. return _cmXmlSyntaxError(p);
  362. // advance past the ending single quote
  363. if( _cmXmlAdvanceOne(p) == NULL )
  364. return _cmXmlSyntaxError(p);
  365. // p->c now points just past the ending single quote
  366. return rc;
  367. }
  368. cmXmlRC_t _cmXmlParseAttrList( cmXml_t* p, cmChar_t endChar )
  369. {
  370. cmXmlRC_t rc = kOkXmlRC;
  371. while( *p->c != endChar && *p->c != '>' )
  372. if((rc = _cmXmlParseAttr(p,endChar)) != kOkXmlRC )
  373. break;
  374. if( *p->c == endChar )
  375. {
  376. if( endChar = '/' )
  377. {
  378. // this is a simple node
  379. }
  380. if( _cmXmlAdvanceOne(p) == NULL )
  381. return _cmXmlSyntaxError(p);
  382. }
  383. if( *p->c != '>' )
  384. return _cmXmlSyntaxError(p);
  385. if( _cmXmlAdvancePast(p,">") == NULL )
  386. return _cmXmlSyntaxError(p);
  387. // p->c is now past the ending '>'
  388. return rc;
  389. }
  390. cmXmlRC_t _cmXmlParseDoctypeToken( cmXml_t* p, cmXmlNode_t* np )
  391. {
  392. const cmChar_t* t0 = NULL;
  393. const cmChar_t* t1 = NULL;
  394. // advance to the first char in the doctype token
  395. if((t0 = _cmXmlAdvanceToNextNonWhite(p) ) == NULL )
  396. {
  397. return _cmXmlSyntaxError(p);
  398. }
  399. // if the end of the tag was encountered
  400. if( *p->c == '>' )
  401. return kOkXmlRC;
  402. // if the token begins with a quote
  403. if( *p->c == '\'' )
  404. {
  405. if((t1 = _cmXmlAdvanceToNext(p,"'")) == NULL )
  406. return _cmXmlSyntaxError(p);
  407. }
  408. else
  409. {
  410. if((t1 = _cmXmlAdvanceToNextWhiteOr(p,'>',' ')) == NULL )
  411. return _cmXmlSyntaxError(p);
  412. }
  413. // t1 and p->c now point just past the last character in the token
  414. return rc;
  415. }
  416. cmXmlRC_t _cmXmlParseDoctype( cmXml_t* p )
  417. {
  418. cmXmlRC_t rc = kOkXmlRC;
  419. cmXmlNode_t* np;
  420. if((np = _cmXmlNodeAlloc(p,kDoctypeXmlFl,"DOCTYPE",strlen("DOCTYPE"))) == NULL )
  421. return cmErrLastRC(&p->err);
  422. while( *p->c != '>' )
  423. if((rc = _cmXmlParseDoctypeToken(p,np)) != kOkXmlRC )
  424. break;
  425. return rc;
  426. }
  427. // Node tags are tags that begin with a '<' and are not
  428. // followed by any special character.
  429. cmXmlRC_t _cmXmlParseNodeTag( cmXml_t* p )
  430. {
  431. cmXmlRC_t rc = kOkXmlRC;
  432. const cmChar_t* l0 = NULL;
  433. const cmChar_t* l1 = NULL;
  434. // Advance to the first character of the tag label.
  435. if((l0 = _cmXmlAdvanceToNextNonWhite(p)) == NULL )
  436. return _cmXmlSyntaxError(p);
  437. // Advance to the last character following the tag label.
  438. if((l1 = _cmXmlAdvanceToNextWhiteOr(p,'/','>')) == NULL )
  439. return _cmXmlSyntaxError(p);
  440. // look for attributes
  441. if((rc = _cmXmlParseAttrList(p,'/')) != kOkXmlRC )
  442. return _cmXmlSyntaxError(p);
  443. // p->c is now past the ending '>'
  444. return rc;
  445. }
  446. cmXmlRC_t _cmXmlReadEndTag( cmXml_t* p )
  447. {
  448. const cmChar_t* l0 = NULL;
  449. const cmChar_t* l1 = NULL;
  450. assert( *p->c == '/' );
  451. // advance past the '/'
  452. if(( l0 = _cmXmlAdvanceOne(p)) == NULL )
  453. return _cmXmlSyntaxError(p);
  454. // advance to the ending '>'
  455. if(( l1 = _cmXmlAdvanceToNext(p,">")) == NULL )
  456. return _cmXmlSyntaxError(p);
  457. // advance past the
  458. if( _cmXmlAdvanceOne(p) == NULL )
  459. return _cmXmlSyntaxError(p);
  460. // trim trailing space on label
  461. l1 -= 1;
  462. while( l1>l0 && isspace(*l1) )
  463. --l1;
  464. // verify that the label has a length
  465. if( l0 == l1 )
  466. return _cmXmlSyntaxError(p);
  467. assert( !isspace(*l1) );
  468. // the label should match the node on the top of the stack
  469. if( strncmp( p->stack->label, l0, (l1-l0)+1 ) )
  470. return _cmXmlSyntaxError(p);
  471. // since we just parsed an end-tag there should be at least one node on the stack
  472. if( p->stack == NULL )
  473. return _cmXmlSyntaxError(p);
  474. // pop the stack
  475. p->stack = p->stack->parent;
  476. return kOkXmlRC;
  477. }
  478. //
  479. cmXmlRC_t _cmXmlReadTag( cmXml_t* p, cmXmlNode_t** newNodeRef )
  480. {
  481. cmXmlRC_t rc = kOkXmlRC;
  482. assert(newNodeRef != NULL );
  483. *newNodeRef = NULL;
  484. // No leading '<' was found
  485. if( _cmXmlAdvancePast(p,"<") == NULL )
  486. {
  487. // error or EOF
  488. return NULL;
  489. }
  490. // examine the character following the opening '<'
  491. switch( *p->c )
  492. {
  493. // node end tag
  494. case '/':
  495. return _cmXmlReadEndTag(p);
  496. // declaration tag
  497. case '?':
  498. if( _cmXmlAdvancePast(p,"xml") == NULL )
  499. return _cmXmlSyntaxError(p);
  500. if( _cmXmlNodeAlloc(p,kDeclXmlFl, "xml",strlen("xml") ) == NULL )
  501. return cmErrLastRC(&p->err);
  502. if((rc = _cmXmlParseAttrList(p,'?')) != kOkXmlRC )
  503. return rc;
  504. break;
  505. case '!':
  506. switch( *(p->c+1) )
  507. {
  508. // comment node
  509. case '-':
  510. if( _cmXmlAdvancePast(p,"--") == NULL )
  511. return _cmXmlSyntaxError(p);
  512. if( _cmXmlAdvanceToNext("->") == NULL )
  513. return _cmXmlSyntaxError(p);
  514. // p->c is just after "-->"
  515. break;
  516. // DOCTYPE node
  517. case 'D':
  518. if( _cmXmlAdvancePast(P,"DOCTYPE")==NULL )
  519. return _cmXmlSyntaxError(p);
  520. if((rc = _cmXmlParseDocType(p)) != kOkXmlRC )
  521. return _cmXmlSyntaxError(p);
  522. // p->c is just after ">"
  523. break;
  524. default:
  525. return _cmXmlSyntaxError(p);
  526. }
  527. break;
  528. default:
  529. // normal node
  530. if((rc = _cmXmlParseNodeTag(p)) != kOkXmlRC )
  531. return rc;
  532. // p->c is just after ">"
  533. }
  534. return rc;
  535. }
  536. cmXmlRC_t _cmXmlReadNode( cmXml_t* p )
  537. {
  538. }