diff --git a/cmXml.c b/cmXml.c index 63596ea..19b0810 100644 --- a/cmXml.c +++ b/cmXml.c @@ -12,15 +12,99 @@ #include "cmFile.h" #include "cmXml.h" +/* +file -> decl doctype node +decl -> "" +doctype -> "" +node -> beg-node node-body end-node + | "" + +node-body -> data-text + | node + +beg-node -> "<" tag-label attr-list ">" +end-node -> "<" tag-label "/>" +attr-list -> attr* +attr -> attr-label "=" qstring + +attr-label -> A string of characters ending with an '=' or . + Attribute labels may not contain '<' or '>'. + +tag-label -> A string of characters ending with: + , '>' or '/>'. + Tag labels may not contain '<' or '>'. + +data-text -> A string of characters ending with '<'. + +dt-text -> A string of characters beginning with a non-whitespace + and ending with '>' + +cmmt-text -> A string of characters ending with '-->' + +*/ + cmXmlH_t cmXmlNullHandle = cmSTATIC_NULL_HANDLE; typedef struct { cmErr_t err; // cmLHeapH_t heapH; // linked heap stores all node memory + cmLexH lexH; cmXmlNode_t* root; } cmXml_t; +enum +{ + kTagBegLexTId = kUserLexTId+1, + kTagEndLexTId, + kDeclBegLexTId, + kDeclEndLexTId, + kSpclBegLexTId, + kDocTypeLexTId, + kCmmtBegLexTId, + kCmmtEndLexTId, + kEqualLexTId +}; + +cmXmlToken_t _cmXmlTokenArray[] = +{ + { kTagBegLexTId = kUserLexId+1, "<" }, + { kTagEndLexTid, ">" }, + { kDeclBegLexTId, "" }, + { kSpclBegLexTId, "" }, + { kEqualLexTid, "=" }, + { kErrorLexTId,""} +}; + +// Match a tag label. +// A string ending with a or '>' +unsigned cmLexTagLabelMatcher( const cmChar_t* cp, unsigned cn ) +{ + for(i=0; i' || isspace(cp[i]) ) + break; + return i>0 ? i-1 : 0; +} + +unsigned cmLexStringMatcher( const cmChar_t* cp, unsigned cn ) +{ + for(i=0; i0 ? +} + + cmXml_t* _cmXmlHandleToPtr( cmXmlH_t h ) { cmXml_t* p = (cmXml_t*)h.h; @@ -30,9 +114,41 @@ cmXml_t* _cmXmlHandleToPtr( cmXmlH_t h ) cmXmlRC_t _cmXmlFree( cmXml_t* p ) { - // free the internal heap object cmLHeapDestroy( &p->heapH ); + cmLexDestroy( &p->lexH ); +} +cmXmlRC_t _cmXmlParse( cmXml_t* p, const cmChar_t* fn ) +{ + cmXmlRC_t rc = kOkXmlRC; + + if( cmLexReset( p->lexH ) != kOkLexRC ) + { + rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lexer reset failed."); + goto errLabel: + } + + if( cmLexSetFile( p->lexH, fn ) != kOkLexRC ) + { + rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lexer parse failed on '%s'.",cmStringNullGuard(fn)); + goto errLabel; + } + + unsigned tokId; + + while((tokId = cmLexGetNextToken( cmLexH h )) != kEofRC && tokId != kErrorLexTId ) + { + switch(tokId) + { + case kTagBegLexTId: + case kTagEndLexTid: + case kEqualLexTId: + case kQStrLexTId: + } + } + + errLabel: + return rc; } cmXmlRC_t cmXmlAlloc( cmCtx_t* ctx, cmXmlH_t* hp, const cmChar_t* fn ) @@ -41,7 +157,7 @@ cmXmlRC_t cmXmlAlloc( cmCtx_t* ctx, cmXmlH_t* hp, const cmChar_t* fn ) cmXml_t* p = NULL; // finalize before initialize - if((rc = cmXmlFree(hp)) != kOkJsRC ) + if((rc = cmXmlFree(hp)) != kOkXmlRC ) return rc; // allocate the main object record @@ -56,6 +172,24 @@ cmXmlRC_t cmXmlAlloc( cmCtx_t* ctx, cmXmlH_t* hp, const cmChar_t* fn ) rc = cmErrMsg(&p->err,kMemAllocErrXmlRC,"Linked heap object allocation failed."); goto errLabel; } + + // allocate the lexer + if(cmLexIsValid(p->lexH = cmLexInit(NULL,0,0,&ctx->rpt)) == false ) + { + rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lex allocation failed."); + goto errLabel; + } + + // register xml specific tokens with the lexer + for(i=0; _cmXmlTokenArray[i].id != kErrorLexTId; ++i) + { + cmRC_t lexRC; + if( (lexRC = cmLexRegisterToken(p->lexH, _cmXmlTokenArray[i].id, _cmXmlTokenArray[i].text )) != kOkLexRC ) + { + rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lex token registration failed for:'%s'.",_cmXmlTokenArray[i].text ); + goto errLabel; + } + } hp->h = p; diff --git a/cmXml.h b/cmXml.h index ee9211d..1b910c0 100644 --- a/cmXml.h +++ b/cmXml.h @@ -9,7 +9,8 @@ extern "C" { { kOkXmlRC = cmOkRC, kMemAllocErrXmlRC, - kLHeapXmlRC + kLHeapXmlRC, + kLexErrXmlRC }; typedef struct cmXmlAttr_str