cmXml.h/c : Updated but still incomplete.
This commit is contained in:
parent
a2a613f3b8
commit
a839c3dd92
589
cmXml.c
589
cmXml.c
@ -22,7 +22,7 @@ node -> beg-node node-body end-node
|
||||
node-body -> data-text
|
||||
| node
|
||||
|
||||
beg-node -> "<" tag-label attr-list ">"
|
||||
beg-node -> "<" tag-label attr-list {"/"} ">"
|
||||
end-node -> "<" tag-label "/>"
|
||||
attr-list -> attr*
|
||||
attr -> attr-label "=" qstring
|
||||
@ -41,69 +41,96 @@ dt-text -> A string of characters beginning with a non-whitespace
|
||||
|
||||
cmmt-text -> A string of characters ending with '-->'
|
||||
|
||||
*/
|
||||
*/
|
||||
|
||||
/*
|
||||
|
||||
|
||||
t = get_next_attr_token(p,end_char, tn* )
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
parse_attr_list(p,end_char)
|
||||
{
|
||||
}
|
||||
|
||||
read_beg_tag(p)
|
||||
{
|
||||
c = goto_next_non_white_char(p)
|
||||
|
||||
if( c != '<' )
|
||||
error();
|
||||
|
||||
c = goto_next_non_white_char(p)
|
||||
|
||||
if c == '?'
|
||||
{
|
||||
end_tag_str = "?";
|
||||
if( scan_past(p,"xml") == false )
|
||||
error();
|
||||
|
||||
parse_attr_list(p,'?');
|
||||
}
|
||||
|
||||
if c == '!'
|
||||
{
|
||||
if( scan_past(p,"--") )
|
||||
{
|
||||
if(go_past(p,"-->")==false)
|
||||
error();
|
||||
}
|
||||
|
||||
if( scan_past(p,"DOCTYPE") )
|
||||
{
|
||||
while( s = get_next_attr_token(p,'>') != NULL )
|
||||
store_attr(p,s,"");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
read_body( p )
|
||||
{
|
||||
c = goto_next_non_white_char(p);
|
||||
|
||||
if c == '<'
|
||||
read_node(p)
|
||||
else
|
||||
read_data_string(p)
|
||||
}
|
||||
|
||||
n = read_node( p )
|
||||
{
|
||||
t = read_beg_tag(p);
|
||||
|
||||
if( is_beg_tag(t) )
|
||||
{
|
||||
read_body()
|
||||
read_end_tag()
|
||||
}
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
cmXmlH_t cmXmlNullHandle = cmSTATIC_NULL_HANDLE;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
cmErr_t err; //
|
||||
cmLHeapH_t heapH; // linked heap stores all node memory
|
||||
cmLexH lexH;
|
||||
cmXmlNode_t* root;
|
||||
cmErr_t err; //
|
||||
cmLHeapH_t heapH; // linked heap stores all node memory
|
||||
|
||||
cmChar_t* b; // base of the text buffer
|
||||
unsigned bn; // length of the text buffer in characters
|
||||
cmChar_t* c; // current lexer position
|
||||
|
||||
cmXmlNode_t* root; // root XML tree node
|
||||
cmXmlNode_t* decl; // xml declaratoin node <? ... ?>
|
||||
cmXmlNode_t* doctype; // DOCTYPE node
|
||||
|
||||
cmXmlNode_t* stack; // parsing stack
|
||||
} cmXml_t;
|
||||
|
||||
enum
|
||||
{
|
||||
kTagBegLexTId = kUserLexTId+1,
|
||||
kTagEndLexTId,
|
||||
kDeclBegLexTId,
|
||||
kDeclEndLexTId,
|
||||
kSpclBegLexTId,
|
||||
kDocTypeLexTId,
|
||||
kCmmtBegLexTId,
|
||||
kCmmtEndLexTId,
|
||||
kEqualLexTId
|
||||
};
|
||||
|
||||
cmXmlToken_t _cmXmlTokenArray[] =
|
||||
{
|
||||
{ kTagBegLexTId = kUserLexId+1, "<" },
|
||||
{ kTagEndLexTid, ">" },
|
||||
{ kDeclBegLexTId, "<?" },
|
||||
{ kDeclEndLexTid, "?>" },
|
||||
{ kSpclBegLexTId, "<!" },
|
||||
{ kDocTypeLexTId, "<!DOCTYPE" },
|
||||
{ kCmmtBegLexTId, "<!--" },
|
||||
{ kCmmtEndLexTid, "-->" },
|
||||
{ kEqualLexTid, "=" },
|
||||
{ kErrorLexTId,""}
|
||||
};
|
||||
|
||||
// Match a tag label.
|
||||
// A string ending with a <space> or '>'
|
||||
unsigned cmLexTagLabelMatcher( const cmChar_t* cp, unsigned cn )
|
||||
{
|
||||
for(i=0; i<cn; ++i)
|
||||
if( cp[i] == '>' || isspace(cp[i]) )
|
||||
break;
|
||||
return i>0 ? i-1 : 0;
|
||||
}
|
||||
|
||||
unsigned cmLexStringMatcher( const cmChar_t* cp, unsigned cn )
|
||||
{
|
||||
for(i=0; i<cn; ++i)
|
||||
{
|
||||
if( cp[i] == ' ')
|
||||
break;
|
||||
|
||||
if( cp[i] == '<' )
|
||||
break;
|
||||
|
||||
}
|
||||
return i>0 ?
|
||||
}
|
||||
|
||||
|
||||
cmXml_t* _cmXmlHandleToPtr( cmXmlH_t h )
|
||||
{
|
||||
@ -228,3 +255,451 @@ cmXmlRC_t cmXmlParse( cmXmlH_t h, const cmChar_t* fn )
|
||||
cmXmlRC_t cmXmlClear( cmXmlH_t h )
|
||||
{
|
||||
}
|
||||
|
||||
cmXmlRC_t _cmXmlSyntaxError( cmXml_t* p )
|
||||
{
|
||||
return _cmErrMsg(&p->err,kSyntaxErrorXmlRC,"Syntax error on line '%i.",p->line);
|
||||
}
|
||||
|
||||
cmXmlNode_t* _cmXmlNodeAlloc( cmXml_t* p, unsigned flags, const cmChar_t* label, unsigned labelN )
|
||||
{
|
||||
cmXmlNode_t* np = cmLhAllocZ(p->heapH,cmXmlNode_t,1);
|
||||
|
||||
if( cmIsFlag(kNormalXmlFl) )
|
||||
{
|
||||
if( p->root == NULL )
|
||||
p->root = np;
|
||||
|
||||
if( p->stack == NULL )
|
||||
p->stack = np;
|
||||
else
|
||||
{
|
||||
np->parent = p->stack;
|
||||
|
||||
if( p->stack->children == NULL )
|
||||
p->stack->children = np;
|
||||
else
|
||||
{
|
||||
cmXmlNode_t* n0p = NULL;
|
||||
cmXmlNode_t* n1p = p->stack->children;
|
||||
|
||||
for(; n1p != NULL; n1p=n1p->sibling )
|
||||
n0p = n1p;
|
||||
|
||||
n0p->sibling = np;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( cmIsFlag(kDeclXmlFl) )
|
||||
p->decl = np;
|
||||
else
|
||||
{
|
||||
if( cmIsFlag(kDoctypeXmlF0 ) )
|
||||
p->doctype = np;
|
||||
else
|
||||
{
|
||||
_cmXmlSyntaxError(p);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( label != NULL )
|
||||
np->label = cmLhAllocStrN(p->heapH,label,labelN);
|
||||
|
||||
return np;
|
||||
}
|
||||
|
||||
cmXmlNode_t* _cmXmlAttrAlloc( cmXml_t* p, cmXmlNode_t* np, const cmChar_t* label, unsigned labelN, const cmChar_t* value, unsigned valueN )
|
||||
{
|
||||
cmXmlAttr_t* ap = cmLhAllocZ(p->heapH, cmXmlAttr_t,1);
|
||||
|
||||
if( label != NULL && labelN > 0 )
|
||||
ap->label = cmLhAllocStr(p->heapH,label,labelN);
|
||||
|
||||
if( value != NULL and valueN > 0 )
|
||||
ap->value = cmLhAllocStr(p->attrH,value,valueN);
|
||||
|
||||
ap->link = np->attr;
|
||||
np->attr = ap;
|
||||
|
||||
return np;
|
||||
}
|
||||
|
||||
|
||||
bool _cmXmlIsEof( cmXml_t* p )
|
||||
{ return p->c >= p->b + p->bn; }
|
||||
|
||||
// Return false if EOF is encountered
|
||||
bool _cmXmlAdvance( cmXml_t* p )
|
||||
{
|
||||
if( _cmXmlIsEof(p) )
|
||||
return false;
|
||||
|
||||
p->c += 1;
|
||||
|
||||
if( *p->c == '\n' )
|
||||
p->line += 1;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Advance the cursor to the next non-white char
|
||||
// Return a pointer to a non-space character.
|
||||
// Return NULL if the EOF is encountered.
|
||||
const cmChar_t* _cmXmlAdvanceToNextNonWhite( cmXml_t* p )
|
||||
{
|
||||
if( _cmXmlIsEof(p) )
|
||||
return NULL;
|
||||
|
||||
while( isspace(*p->c) )
|
||||
if( _cmXmlAdvance(p) == false )
|
||||
return NULL;
|
||||
|
||||
return p->c;
|
||||
}
|
||||
|
||||
// Advance to the next white space character or 'c'.
|
||||
// Returns a pointer to a white space or 'c'.
|
||||
const cmChar_t* _cmXmlAdvanceToNextWhiteOr( cmXml_t* p, cmChar_t c0, cmChar_t c1 )
|
||||
{
|
||||
if( _cmXmlIsEof(p) )
|
||||
return NULL;
|
||||
|
||||
while( isspace(*p->c)==false && *p->c!=c0 && *p->c!=c1 )
|
||||
if(_cmXmlAdvance(p) == false )
|
||||
return NULL;
|
||||
|
||||
return p->c;
|
||||
}
|
||||
|
||||
// Advance past leading white space followed by 's'.
|
||||
// Note that 's' is expected to immediately follow any leading white space.
|
||||
// Returns a pointer to the character after 's'.
|
||||
// Returns NULL if 'c' is not encountered
|
||||
const cmChar_t* _cmXmlAdvancePast( cmXml_t* p, const cmChar_t* s )
|
||||
{
|
||||
if( _cmXmlIsEof(p) )
|
||||
return NULL;
|
||||
|
||||
while( isspace(*p->c) )
|
||||
if( _cmXmlAdvance(p) == false )
|
||||
return NULL;
|
||||
|
||||
for(; *s && *p->c == *s; ++s )
|
||||
if( _cmXmlAdvance(p) == false )
|
||||
return NULL;
|
||||
|
||||
return *s==0 ? p->c : NULL;
|
||||
}
|
||||
|
||||
// Advance past the current character and then
|
||||
// advance to the next occurrence of 's' and return
|
||||
// a pointer to the last char in 's'.
|
||||
const cmChar_t* _cmXmlAdvanceToNext( cmXml_t* p, cmChar_t* s )
|
||||
{
|
||||
unsigned i = 0;
|
||||
unsigned n = strlen(s);
|
||||
|
||||
while( _cmXmlAdvance(p) )
|
||||
{
|
||||
if( *p->c != s[i] )
|
||||
i = 0;
|
||||
else
|
||||
{
|
||||
i+= 1;
|
||||
if( i == n )
|
||||
break;
|
||||
}
|
||||
}
|
||||
return p->c;
|
||||
}
|
||||
|
||||
// Return the character following the current character.
|
||||
const cmChar_t* _cmXmlAdvanceOne( cmXml_t* p )
|
||||
{
|
||||
if( _cmXmlIsEof(p) )
|
||||
return NULL;
|
||||
|
||||
p->c += 1;
|
||||
|
||||
return _cmXmlIsEof(p) ? NULL : p->c;
|
||||
}
|
||||
|
||||
cmXmlRC_t _cmXmlParseAttr( cmXml_t* p, cmChar_t endChar )
|
||||
{
|
||||
cmXmlRC_t rc = kOkXmlRC;
|
||||
const cmChar_t* l0 = NULL;
|
||||
const cmChar_t* l1 = NULL;
|
||||
const cmChar_t* v0 = NULL;
|
||||
const cmChar_t* v1 = NULL;
|
||||
|
||||
// advance to the next label
|
||||
if(( l0 = _cmXmlAdvanceToNextNonWhite(p)) == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// if the 'endChar' was encountered
|
||||
if( *p->c == endChar )
|
||||
return kOkXmlRC;
|
||||
|
||||
// advance past last character in label
|
||||
if((l1 = _cmXmlAdvanceToNextWhiteOr(p,'=',' ')) == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// advance past the next '='
|
||||
if( _cmXmlAdvancePast(p,"=") == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// advance to the next non-white character
|
||||
if( (v0 = _cmXmlAdvanceToNextNonWhite(p)) == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// the first character in the value must be a single quote
|
||||
if( *p->c != '\'' )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// advance to the next single quote
|
||||
if( (v1 = _cmXmlAdvanceToNext(p,"'")) == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// advance past the ending single quote
|
||||
if( _cmXmlAdvanceOne(p) == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// p->c now points just past the ending single quote
|
||||
return rc;
|
||||
}
|
||||
|
||||
cmXmlRC_t _cmXmlParseAttrList( cmXml_t* p, cmChar_t endChar )
|
||||
{
|
||||
cmXmlRC_t rc = kOkXmlRC;
|
||||
|
||||
|
||||
while( *p->c != endChar && *p->c != '>' )
|
||||
if((rc = _cmXmlParseAttr(p,endChar)) != kOkXmlRC )
|
||||
break;
|
||||
|
||||
if( *p->c == endChar )
|
||||
{
|
||||
if( endChar = '/' )
|
||||
{
|
||||
// this is a simple node
|
||||
}
|
||||
|
||||
if( _cmXmlAdvanceOne(p) == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
}
|
||||
|
||||
if( *p->c != '>' )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
if( _cmXmlAdvancePast(p,">") == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// p->c is now past the ending '>'
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
cmXmlRC_t _cmXmlParseDoctypeToken( cmXml_t* p, cmXmlNode_t* np )
|
||||
{
|
||||
const cmChar_t* t0 = NULL;
|
||||
const cmChar_t* t1 = NULL;
|
||||
|
||||
// advance to the first char in the doctype token
|
||||
if((t0 = _cmXmlAdvanceToNextNonWhite(p) ) == NULL )
|
||||
{
|
||||
return _cmXmlSyntaxError(p);
|
||||
}
|
||||
|
||||
// if the end of the tag was encountered
|
||||
if( *p->c == '>' )
|
||||
return kOkXmlRC;
|
||||
|
||||
|
||||
// if the token begins with a quote
|
||||
if( *p->c == '\'' )
|
||||
{
|
||||
if((t1 = _cmXmlAdvanceToNext(p,"'")) == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
}
|
||||
else
|
||||
{
|
||||
if((t1 = _cmXmlAdvanceToNextWhiteOr(p,'>',' ')) == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
}
|
||||
|
||||
// t1 and p->c now point just past the last character in the token
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
cmXmlRC_t _cmXmlParseDoctype( cmXml_t* p )
|
||||
{
|
||||
cmXmlRC_t rc = kOkXmlRC;
|
||||
cmXmlNode_t* np;
|
||||
|
||||
if((np = _cmXmlNodeAlloc(p,kDoctypeXmlFl,"DOCTYPE",strlen("DOCTYPE"))) == NULL )
|
||||
return cmErrLastRC(&p->err);
|
||||
|
||||
while( *p->c != '>' )
|
||||
if((rc = _cmXmlParseDoctypeToken(p,np)) != kOkXmlRC )
|
||||
break;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
// Node tags are tags that begin with a '<' and are not
|
||||
// followed by any special character.
|
||||
cmXmlRC_t _cmXmlParseNodeTag( cmXml_t* p )
|
||||
{
|
||||
cmXmlRC_t rc = kOkXmlRC;
|
||||
const cmChar_t* l0 = NULL;
|
||||
const cmChar_t* l1 = NULL;
|
||||
|
||||
// Advance to the first character of the tag label.
|
||||
if((l0 = _cmXmlAdvanceToNextNonWhite(p)) == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// Advance to the last character following the tag label.
|
||||
if((l1 = _cmXmlAdvanceToNextWhiteOr(p,'/','>')) == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// look for attributes
|
||||
if((rc = _cmXmlParseAttrList(p,'/')) != kOkXmlRC )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// p->c is now past the ending '>'
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
cmXmlRC_t _cmXmlReadEndTag( cmXml_t* p )
|
||||
{
|
||||
const cmChar_t* l0 = NULL;
|
||||
const cmChar_t* l1 = NULL;
|
||||
|
||||
assert( *p->c == '/' );
|
||||
|
||||
// advance past the '/'
|
||||
if(( l0 = _cmXmlAdvanceOne(p)) == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// advance to the ending '>'
|
||||
if(( l1 = _cmXmlAdvanceToNext(p,">")) == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// advance past the
|
||||
if( _cmXmlAdvanceOne(p) == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// trim trailing space on label
|
||||
l1 -= 1;
|
||||
while( l1>l0 && isspace(*l1) )
|
||||
--l1;
|
||||
|
||||
// verify that the label has a length
|
||||
if( l0 == l1 )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
assert( !isspace(*l1) );
|
||||
|
||||
// the label should match the node on the top of the stack
|
||||
if( strncmp( p->stack->label, l0, (l1-l0)+1 ) )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// since we just parsed an end-tag there should be at least one node on the stack
|
||||
if( p->stack == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// pop the stack
|
||||
p->stack = p->stack->parent;
|
||||
|
||||
return kOkXmlRC;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
cmXmlRC_t _cmXmlReadTag( cmXml_t* p, cmXmlNode_t** newNodeRef )
|
||||
{
|
||||
cmXmlRC_t rc = kOkXmlRC;
|
||||
|
||||
assert(newNodeRef != NULL );
|
||||
*newNodeRef = NULL;
|
||||
|
||||
// No leading '<' was found
|
||||
if( _cmXmlAdvancePast(p,"<") == NULL )
|
||||
{
|
||||
// error or EOF
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// examine the character following the opening '<'
|
||||
switch( *p->c )
|
||||
{
|
||||
// node end tag
|
||||
case '/':
|
||||
return _cmXmlReadEndTag(p);
|
||||
|
||||
// declaration tag
|
||||
case '?':
|
||||
if( _cmXmlAdvancePast(p,"xml") == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
if( _cmXmlNodeAlloc(p,kDeclXmlFl, "xml",strlen("xml") ) == NULL )
|
||||
return cmErrLastRC(&p->err);
|
||||
|
||||
if((rc = _cmXmlParseAttrList(p,'?')) != kOkXmlRC )
|
||||
return rc;
|
||||
|
||||
break;
|
||||
|
||||
case '!':
|
||||
switch( *(p->c+1) )
|
||||
{
|
||||
// comment node
|
||||
case '-':
|
||||
if( _cmXmlAdvancePast(p,"--") == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
if( _cmXmlAdvanceToNext("->") == NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// p->c is just after "-->"
|
||||
break;
|
||||
|
||||
// DOCTYPE node
|
||||
case 'D':
|
||||
if( _cmXmlAdvancePast(P,"DOCTYPE")==NULL )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
if((rc = _cmXmlParseDocType(p)) != kOkXmlRC )
|
||||
return _cmXmlSyntaxError(p);
|
||||
|
||||
// p->c is just after ">"
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
return _cmXmlSyntaxError(p);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
// normal node
|
||||
if((rc = _cmXmlParseNodeTag(p)) != kOkXmlRC )
|
||||
return rc;
|
||||
|
||||
// p->c is just after ">"
|
||||
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
cmXmlRC_t _cmXmlReadNode( cmXml_t* p )
|
||||
{
|
||||
}
|
||||
|
25
cmXml.h
25
cmXml.h
@ -10,22 +10,39 @@ extern "C" {
|
||||
kOkXmlRC = cmOkRC,
|
||||
kMemAllocErrXmlRC,
|
||||
kLHeapXmlRC,
|
||||
kLexErrXmlRC
|
||||
kLexErrXmlRC,
|
||||
kSyntaxErrorXmlRC
|
||||
};
|
||||
|
||||
typedef struct cmXmlAttr_str
|
||||
{
|
||||
const cmChar_t* label;
|
||||
const cmChar_t* value;
|
||||
const cmChar_t* label;
|
||||
const cmChar_t* value;
|
||||
struct cmXmlAttr_str* link;
|
||||
} cmXmlAttr_t;
|
||||
|
||||
enum
|
||||
{
|
||||
kDeclXmlFl = 0x0001,
|
||||
kDoctypeXmlFl = 0x0002,
|
||||
kNormalXmlFl = 0x0004,
|
||||
|
||||
};
|
||||
|
||||
typedef struct cmXmlNode_str
|
||||
{
|
||||
unsigned flags;
|
||||
|
||||
const cmChar_t* label;
|
||||
const cmChar_t* dataStr;
|
||||
|
||||
cmXmlAttr_t* attr;
|
||||
|
||||
struct cmXmlNode_str* parent;
|
||||
struct cmXmlNode_str* children;
|
||||
struct cmXmlNode_str* sibling;
|
||||
cmXmlAttr_t* attr;
|
||||
|
||||
|
||||
} cmXmlNode_t;
|
||||
|
||||
typedef cmHandle_t cmXmlH_t;
|
||||
|
Loading…
Reference in New Issue
Block a user