|
@@ -12,15 +12,99 @@
|
12
|
12
|
#include "cmFile.h"
|
13
|
13
|
#include "cmXml.h"
|
14
|
14
|
|
|
15
|
+/*
|
|
16
|
+file -> decl doctype node
|
|
17
|
+decl -> "<?" attr-list "?>"
|
|
18
|
+doctype -> "<!DOCTYPE" dt-text ">"
|
|
19
|
+node -> beg-node node-body end-node
|
|
20
|
+ | "<!--" cmmt-text "-->"
|
|
21
|
+
|
|
22
|
+node-body -> data-text
|
|
23
|
+ | node
|
|
24
|
+
|
|
25
|
+beg-node -> "<" tag-label attr-list ">"
|
|
26
|
+end-node -> "<" tag-label "/>"
|
|
27
|
+attr-list -> attr*
|
|
28
|
+attr -> attr-label "=" qstring
|
|
29
|
+
|
|
30
|
+attr-label -> A string of characters ending with an '=' or <space>.
|
|
31
|
+ Attribute labels may not contain '<' or '>'.
|
|
32
|
+
|
|
33
|
+tag-label -> A string of characters ending with:
|
|
34
|
+ <space>, '>' or '/>'.
|
|
35
|
+ Tag labels may not contain '<' or '>'.
|
|
36
|
+
|
|
37
|
+data-text -> A string of characters ending with '<'.
|
|
38
|
+
|
|
39
|
+dt-text -> A string of characters beginning with a non-whitespace
|
|
40
|
+ and ending with '>'
|
|
41
|
+
|
|
42
|
+cmmt-text -> A string of characters ending with '-->'
|
|
43
|
+
|
|
44
|
+*/
|
|
45
|
+
|
15
|
46
|
cmXmlH_t cmXmlNullHandle = cmSTATIC_NULL_HANDLE;
|
16
|
47
|
|
17
|
48
|
typedef struct
|
18
|
49
|
{
|
19
|
50
|
cmErr_t err; //
|
20
|
51
|
cmLHeapH_t heapH; // linked heap stores all node memory
|
|
52
|
+ cmLexH lexH;
|
21
|
53
|
cmXmlNode_t* root;
|
22
|
54
|
} cmXml_t;
|
23
|
55
|
|
|
56
|
+enum
|
|
57
|
+{
|
|
58
|
+ kTagBegLexTId = kUserLexTId+1,
|
|
59
|
+ kTagEndLexTId,
|
|
60
|
+ kDeclBegLexTId,
|
|
61
|
+ kDeclEndLexTId,
|
|
62
|
+ kSpclBegLexTId,
|
|
63
|
+ kDocTypeLexTId,
|
|
64
|
+ kCmmtBegLexTId,
|
|
65
|
+ kCmmtEndLexTId,
|
|
66
|
+ kEqualLexTId
|
|
67
|
+};
|
|
68
|
+
|
|
69
|
+cmXmlToken_t _cmXmlTokenArray[] =
|
|
70
|
+{
|
|
71
|
+ { kTagBegLexTId = kUserLexId+1, "<" },
|
|
72
|
+ { kTagEndLexTid, ">" },
|
|
73
|
+ { kDeclBegLexTId, "<?" },
|
|
74
|
+ { kDeclEndLexTid, "?>" },
|
|
75
|
+ { kSpclBegLexTId, "<!" },
|
|
76
|
+ { kDocTypeLexTId, "<!DOCTYPE" },
|
|
77
|
+ { kCmmtBegLexTId, "<!--" },
|
|
78
|
+ { kCmmtEndLexTid, "-->" },
|
|
79
|
+ { kEqualLexTid, "=" },
|
|
80
|
+ { kErrorLexTId,""}
|
|
81
|
+};
|
|
82
|
+
|
|
83
|
+// Match a tag label.
|
|
84
|
+// A string ending with a <space> or '>'
|
|
85
|
+unsigned cmLexTagLabelMatcher( const cmChar_t* cp, unsigned cn )
|
|
86
|
+{
|
|
87
|
+ for(i=0; i<cn; ++i)
|
|
88
|
+ if( cp[i] == '>' || isspace(cp[i]) )
|
|
89
|
+ break;
|
|
90
|
+ return i>0 ? i-1 : 0;
|
|
91
|
+}
|
|
92
|
+
|
|
93
|
+unsigned cmLexStringMatcher( const cmChar_t* cp, unsigned cn )
|
|
94
|
+{
|
|
95
|
+ for(i=0; i<cn; ++i)
|
|
96
|
+ {
|
|
97
|
+ if( cp[i] == ' ')
|
|
98
|
+ break;
|
|
99
|
+
|
|
100
|
+ if( cp[i] == '<' )
|
|
101
|
+ break;
|
|
102
|
+
|
|
103
|
+ }
|
|
104
|
+ return i>0 ?
|
|
105
|
+}
|
|
106
|
+
|
|
107
|
+
|
24
|
108
|
cmXml_t* _cmXmlHandleToPtr( cmXmlH_t h )
|
25
|
109
|
{
|
26
|
110
|
cmXml_t* p = (cmXml_t*)h.h;
|
|
@@ -30,9 +114,41 @@ cmXml_t* _cmXmlHandleToPtr( cmXmlH_t h )
|
30
|
114
|
|
31
|
115
|
cmXmlRC_t _cmXmlFree( cmXml_t* p )
|
32
|
116
|
{
|
33
|
|
- // free the internal heap object
|
34
|
117
|
cmLHeapDestroy( &p->heapH );
|
|
118
|
+ cmLexDestroy( &p->lexH );
|
|
119
|
+}
|
35
|
120
|
|
|
121
|
+cmXmlRC_t _cmXmlParse( cmXml_t* p, const cmChar_t* fn )
|
|
122
|
+{
|
|
123
|
+ cmXmlRC_t rc = kOkXmlRC;
|
|
124
|
+
|
|
125
|
+ if( cmLexReset( p->lexH ) != kOkLexRC )
|
|
126
|
+ {
|
|
127
|
+ rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lexer reset failed.");
|
|
128
|
+ goto errLabel:
|
|
129
|
+ }
|
|
130
|
+
|
|
131
|
+ if( cmLexSetFile( p->lexH, fn ) != kOkLexRC )
|
|
132
|
+ {
|
|
133
|
+ rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lexer parse failed on '%s'.",cmStringNullGuard(fn));
|
|
134
|
+ goto errLabel;
|
|
135
|
+ }
|
|
136
|
+
|
|
137
|
+ unsigned tokId;
|
|
138
|
+
|
|
139
|
+ while((tokId = cmLexGetNextToken( cmLexH h )) != kEofRC && tokId != kErrorLexTId )
|
|
140
|
+ {
|
|
141
|
+ switch(tokId)
|
|
142
|
+ {
|
|
143
|
+ case kTagBegLexTId:
|
|
144
|
+ case kTagEndLexTid:
|
|
145
|
+ case kEqualLexTId:
|
|
146
|
+ case kQStrLexTId:
|
|
147
|
+ }
|
|
148
|
+ }
|
|
149
|
+
|
|
150
|
+ errLabel:
|
|
151
|
+ return rc;
|
36
|
152
|
}
|
37
|
153
|
|
38
|
154
|
cmXmlRC_t cmXmlAlloc( cmCtx_t* ctx, cmXmlH_t* hp, const cmChar_t* fn )
|
|
@@ -41,7 +157,7 @@ cmXmlRC_t cmXmlAlloc( cmCtx_t* ctx, cmXmlH_t* hp, const cmChar_t* fn )
|
41
|
157
|
cmXml_t* p = NULL;
|
42
|
158
|
|
43
|
159
|
// finalize before initialize
|
44
|
|
- if((rc = cmXmlFree(hp)) != kOkJsRC )
|
|
160
|
+ if((rc = cmXmlFree(hp)) != kOkXmlRC )
|
45
|
161
|
return rc;
|
46
|
162
|
|
47
|
163
|
// allocate the main object record
|
|
@@ -56,6 +172,24 @@ cmXmlRC_t cmXmlAlloc( cmCtx_t* ctx, cmXmlH_t* hp, const cmChar_t* fn )
|
56
|
172
|
rc = cmErrMsg(&p->err,kMemAllocErrXmlRC,"Linked heap object allocation failed.");
|
57
|
173
|
goto errLabel;
|
58
|
174
|
}
|
|
175
|
+
|
|
176
|
+ // allocate the lexer
|
|
177
|
+ if(cmLexIsValid(p->lexH = cmLexInit(NULL,0,0,&ctx->rpt)) == false )
|
|
178
|
+ {
|
|
179
|
+ rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lex allocation failed.");
|
|
180
|
+ goto errLabel;
|
|
181
|
+ }
|
|
182
|
+
|
|
183
|
+ // register xml specific tokens with the lexer
|
|
184
|
+ for(i=0; _cmXmlTokenArray[i].id != kErrorLexTId; ++i)
|
|
185
|
+ {
|
|
186
|
+ cmRC_t lexRC;
|
|
187
|
+ if( (lexRC = cmLexRegisterToken(p->lexH, _cmXmlTokenArray[i].id, _cmXmlTokenArray[i].text )) != kOkLexRC )
|
|
188
|
+ {
|
|
189
|
+ rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lex token registration failed for:'%s'.",_cmXmlTokenArray[i].text );
|
|
190
|
+ goto errLabel;
|
|
191
|
+ }
|
|
192
|
+ }
|
59
|
193
|
|
60
|
194
|
hp->h = p;
|
61
|
195
|
|