cml
/
libcm


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705
							#include "cmPrefix.h"
#include "cmGlobal.h"
#include "cmFloatTypes.h"
#include "cmRpt.h"
#include "cmErr.h"
#include "cmCtx.h"
#include "cmJson.h"
#include "cmMem.h"
#include "cmMallocDebug.h"
#include "cmLex.h"
#include "cmLinkedHeap.h"
#include "cmFile.h"
#include "cmXml.h"

/*
file     -> decl doctype node
decl     -> "<?" attr-list "?>"
doctype  -> "<!DOCTYPE" dt-text ">"
node     -> beg-node node-body end-node
         |  "<!--" cmmt-text "-->"

node-body -> data-text
          |  node
  
beg-node   -> "<" tag-label  attr-list {"/"} ">"
end-node   -> "<" tag-label "/>"
attr-list  -> attr*
attr       -> attr-label "=" qstring

attr-label -> A string of characters ending with an '=' or <space>.
              Attribute labels may not contain '<' or '>'.

tag-label  -> A string of characters ending with:
                <space>, '>' or '/>'.
              Tag labels may not contain '<' or '>'.

data-text  -> A string of characters ending with '<'.

dt-text    -> A string of characters beginning with a non-whitespace
               and ending with '>'

cmmt-text  -> A string of characters ending with '-->'

*/

/*


t = get_next_attr_token(p,end_char, tn* )
{

}

parse_attr_list(p,end_char)
{
}

read_beg_tag(p)
{
   c = goto_next_non_white_char(p)

   if( c != '<' )
     error();

   c = goto_next_non_white_char(p)

   if c == '?'
   {
      end_tag_str = "?";
      if( scan_past(p,"xml") == false )
        error();

      parse_attr_list(p,'?');
   }

   if c == '!'
   {
      if( scan_past(p,"--") )
      {
         if(go_past(p,"-->")==false)
           error();
      }

      if( scan_past(p,"DOCTYPE") )
      {
          while( s = get_next_attr_token(p,'>') != NULL )
             store_attr(p,s,"");
      }
   }
    
}

read_body( p )
{
  c = goto_next_non_white_char(p);

  if c == '<'
    read_node(p)
  else
    read_data_string(p)
}

n = read_node( p )
{
   t = read_beg_tag(p);

   if( is_beg_tag(t) )
   {
      read_body()
      read_end_tag()
   }
}

 */
 
cmXmlH_t cmXmlNullHandle = cmSTATIC_NULL_HANDLE;

typedef struct
{
  cmErr_t      err;     // 
  cmLHeapH_t   heapH;   // linked heap stores all node memory
  
  cmChar_t*    b;       // base of the text buffer
  unsigned     bn;      // length of the text buffer in characters
  cmChar_t*    c;       // current lexer position
  
  cmXmlNode_t* root;    // root XML tree node
  cmXmlNode_t* decl;    // xml declaratoin node <? ... ?>
  cmXmlNode_t* doctype; // DOCTYPE  node

  cmXmlNode_t* stack;   // parsing stack
} cmXml_t;


cmXml_t* _cmXmlHandleToPtr( cmXmlH_t h )
{
  cmXml_t* p = (cmXml_t*)h.h;
  assert( p != NULL );
  return p;
}

cmXmlRC_t _cmXmlFree( cmXml_t* p )
{
  cmLHeapDestroy( &p->heapH );
  cmLexDestroy( &p->lexH );
}

cmXmlRC_t _cmXmlParse( cmXml_t* p, const cmChar_t* fn )
{
  cmXmlRC_t rc = kOkXmlRC;
  
  if( cmLexReset( p->lexH ) != kOkLexRC )
  {
    rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lexer reset failed.");
    goto errLabel:
  }

  if( cmLexSetFile( p->lexH, fn ) != kOkLexRC )
  {
    rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lexer parse failed on '%s'.",cmStringNullGuard(fn));
    goto errLabel;
  }

  unsigned tokId;
  
  while((tokId = cmLexGetNextToken( cmLexH h )) != kEofRC && tokId != kErrorLexTId )
  {
    switch(tokId)
    {
      case kTagBegLexTId:
      case kTagEndLexTid:
      case kEqualLexTId:
      case kQStrLexTId:
    }
  }

 errLabel:
  return rc;
}

cmXmlRC_t cmXmlAlloc( cmCtx_t* ctx, cmXmlH_t* hp, const cmChar_t* fn )
{
  cmXmlRC_t rc = kOkXmlRC;
  cmXml_t*   p = NULL;
  
  // finalize before initialize 
  if((rc = cmXmlFree(hp)) != kOkXmlRC )
    return rc;

  // allocate the main object record
  if((p = cmMemAllocZ( cmXml_t, 1 )) == NULL )
    return cmErrMsg(&ctx->err,kMemAllocErrXmlRC,"Object memory allocation failed.");

  cmErrSetup(&p->err,&ctx->rpt,"XML Parser");

  // allocate the linked heap mgr
  if( cmLHeapIsValid(p->heapH = cmLHeapCreate(1024,ctx)) == false )
  {
    rc = cmErrMsg(&p->err,kMemAllocErrXmlRC,"Linked heap object allocation failed.");
    goto errLabel;
  }

  // allocate the lexer
  if(cmLexIsValid(p->lexH = cmLexInit(NULL,0,0,&ctx->rpt)) == false )
  {
    rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lex allocation failed.");
    goto errLabel;
  }

  // register xml specific tokens with the lexer
  for(i=0; _cmXmlTokenArray[i].id != kErrorLexTId; ++i)
  {
    cmRC_t lexRC;
    if( (lexRC = cmLexRegisterToken(p->lexH, _cmXmlTokenArray[i].id, _cmXmlTokenArray[i].text )) != kOkLexRC )
    {
      rc = cmErrMsg(&p->err,kLexErrXmlRC,"Lex token registration failed for:'%s'.",_cmXmlTokenArray[i].text );
      goto errLabel;
    }
  }
  
  hp->h = p;
  
 errLabel:
  if(rc != kOkXmlRC )
    _cmXmlFree(p);
    
  return rc;
}

cmXmlRC_t cmXmlFree(  cmXmlH_t* hp )
{
  cmXmlRC_t rc = kOkXmlRC;
  
  if( hp!=NULL || cmXmlIsValid(*hp)==false )
    return kOkXmlRC;

  cmXml_t* p = _cmXmlHandleToPtr(*hp);

  if((rc = _cmXmlFree(p)) != kOkXmlRC )
    return rc;

  hp->h = NULL;
  
  return rc;  
}
  
bool      cmXmlIsValid( cmXmlH_t h )
{ return h.h != NULL; }

  
cmXmlRC_t cmXmlParse( cmXmlH_t h, const cmChar_t* fn )
{
}

cmXmlRC_t cmXmlClear( cmXmlH_t h )
{
}

cmXmlRC_t _cmXmlSyntaxError( cmXml_t* p )
{
  return _cmErrMsg(&p->err,kSyntaxErrorXmlRC,"Syntax error on line '%i.",p->line);
}

cmXmlNode_t* _cmXmlNodeAlloc( cmXml_t* p, unsigned flags, const cmChar_t* label, unsigned labelN )
{
  cmXmlNode_t* np = cmLhAllocZ(p->heapH,cmXmlNode_t,1);

  if( cmIsFlag(kNormalXmlFl) )
  {  
    if( p->root == NULL )
      p->root = np;

    if( p->stack == NULL )
      p->stack = np;
    else
    {
      np->parent = p->stack;
      
      if( p->stack->children == NULL )
        p->stack->children = np;
      else
      {
        cmXmlNode_t* n0p = NULL;
        cmXmlNode_t* n1p = p->stack->children;
        
        for(; n1p != NULL; n1p=n1p->sibling )
          n0p = n1p;

        n0p->sibling = np;
      }
    }
  }
  else
  {
    if( cmIsFlag(kDeclXmlFl) )
      p->decl = np;
    else
    {
      if( cmIsFlag(kDoctypeXmlF0 ) )
        p->doctype = np;
      else
      {
        _cmXmlSyntaxError(p);
        return NULL;
      }
    }
  }
  
  if( label != NULL )
    np->label = cmLhAllocStrN(p->heapH,label,labelN);

  return np;
}

cmXmlNode_t* _cmXmlAttrAlloc( cmXml_t* p, cmXmlNode_t* np, const cmChar_t* label, unsigned labelN, const cmChar_t* value, unsigned valueN )
{
  cmXmlAttr_t* ap = cmLhAllocZ(p->heapH, cmXmlAttr_t,1);

  if( label != NULL && labelN > 0 )
    ap->label = cmLhAllocStr(p->heapH,label,labelN);

  if( value != NULL and valueN > 0 )
    ap->value = cmLhAllocStr(p->attrH,value,valueN);
  
  ap->link  = np->attr;
  np->attr  = ap;
  
  return np;
}


bool _cmXmlIsEof( cmXml_t* p )
{  return p->c >= p->b + p->bn; }

// Return false if EOF is encountered
bool _cmXmlAdvance( cmXml_t* p )
{
  if( _cmXmlIsEof(p) )
    return false;

  p->c += 1;

  if( *p->c == '\n' )
    p->line += 1;
  
  return true;
}

// Advance the cursor to the next non-white char
// Return a pointer to a non-space character.
// Return NULL if the EOF is encountered.
const cmChar_t*  _cmXmlAdvanceToNextNonWhite( cmXml_t* p )
{
  if( _cmXmlIsEof(p) )
    return NULL;
  
  while( isspace(*p->c) )
    if( _cmXmlAdvance(p) == false )
      return NULL;

  return p->c;
}

// Advance to the next white space character or 'c'.
// Returns a pointer to a white space or 'c'.
const cmChar_t*  _cmXmlAdvanceToNextWhiteOr( cmXml_t* p, cmChar_t c0, cmChar_t c1 )
{
  if( _cmXmlIsEof(p) )
    return NULL;

  while( isspace(*p->c)==false && *p->c!=c0 && *p->c!=c1 )
    if(_cmXmlAdvance(p) == false )
      return NULL;

  return p->c;
}

// Advance past leading white space followed by 's'.
// Note that 's' is expected to immediately follow any leading white space.
// Returns a pointer to the character after 's'.
// Returns NULL if 'c' is not encountered
const cmChar_t* _cmXmlAdvancePast( cmXml_t* p, const cmChar_t* s )
{
  if( _cmXmlIsEof(p) )
    return NULL;

  while( isspace(*p->c) )
    if( _cmXmlAdvance(p) == false )
      return NULL;

  for(; *s && *p->c == *s; ++s )
    if( _cmXmlAdvance(p) == false )
      return NULL;

  return *s==0 ? p->c : NULL; 
}

// Advance past the current character and then 
// advance to the next occurrence of 's' and return
// a pointer to the last char in 's'.
const cmChar_t* _cmXmlAdvanceToNext( cmXml_t* p, cmChar_t* s )
{
  unsigned i = 0;
  unsigned n = strlen(s);

  while( _cmXmlAdvance(p) )
  {
    if( *p->c != s[i] )
      i = 0;
    else
    {
      i+= 1;
      if( i == n )
        break;
    }
  }
  return p->c;
}

// Return the character following the current character.
const cmChar_t* _cmXmlAdvanceOne( cmXml_t* p )
{
  if( _cmXmlIsEof(p) )
    return NULL;

  p->c += 1;

  return _cmXmlIsEof(p) ? NULL : p->c;
}

cmXmlRC_t  _cmXmlParseAttr( cmXml_t* p, cmChar_t endChar )
{
  cmXmlRC_t       rc = kOkXmlRC;
  const cmChar_t* l0 = NULL;
  const cmChar_t* l1 = NULL;
  const cmChar_t* v0 = NULL;
  const cmChar_t* v1 = NULL;

  // advance to the next label
  if(( l0 = _cmXmlAdvanceToNextNonWhite(p)) == NULL )
    return _cmXmlSyntaxError(p);

  // if the 'endChar' was encountered
  if( *p->c == endChar )
    return kOkXmlRC;
  
  // advance past last character in label
  if((l1 = _cmXmlAdvanceToNextWhiteOr(p,'=',' ')) == NULL )
    return _cmXmlSyntaxError(p);

  // advance past the next '='
  if( _cmXmlAdvancePast(p,"=") == NULL )
    return _cmXmlSyntaxError(p);
  
  // advance to the next non-white character
  if( (v0 = _cmXmlAdvanceToNextNonWhite(p)) == NULL )
    return _cmXmlSyntaxError(p);

  // the first character in the value must be a single quote
  if( *p->c != '\'' )
    return _cmXmlSyntaxError(p);
  
  // advance to the next single quote
  if( (v1 = _cmXmlAdvanceToNext(p,"'")) == NULL )
    return _cmXmlSyntaxError(p);

  // advance past the ending single quote
  if( _cmXmlAdvanceOne(p) == NULL )
    return _cmXmlSyntaxError(p);

  // p->c now points just past the ending single quote
  return rc;
}

cmXmlRC_t _cmXmlParseAttrList( cmXml_t* p, cmChar_t endChar )
{
  cmXmlRC_t rc = kOkXmlRC;

  
  while( *p->c != endChar && *p->c != '>' )
    if((rc = _cmXmlParseAttr(p,endChar)) != kOkXmlRC )
      break;

  if( *p->c == endChar )
  {
    if( endChar = '/' )
    {
      // this is a simple node
    }
    
    if( _cmXmlAdvanceOne(p) == NULL )
      return _cmXmlSyntaxError(p);
  }
  
  if( *p->c != '>' )
    return _cmXmlSyntaxError(p);
  
  if( _cmXmlAdvancePast(p,">") == NULL )
    return _cmXmlSyntaxError(p);

  // p->c is now past the ending '>'
  
  return rc;
}

cmXmlRC_t _cmXmlParseDoctypeToken( cmXml_t* p, cmXmlNode_t* np )
{
  const cmChar_t* t0 = NULL;
  const cmChar_t* t1 = NULL;

  // advance to the first char in the doctype token
  if((t0 = _cmXmlAdvanceToNextNonWhite(p) ) == NULL )
  {
    return _cmXmlSyntaxError(p);
  }

  // if the end of the tag was encountered
  if( *p->c == '>' )
      return kOkXmlRC;
    

  // if the token begins with a quote
  if( *p->c == '\'' )
  {
    if((t1 = _cmXmlAdvanceToNext(p,"'")) == NULL )
      return _cmXmlSyntaxError(p);
  }
  else
  {
    if((t1 = _cmXmlAdvanceToNextWhiteOr(p,'>',' ')) == NULL )
      return _cmXmlSyntaxError(p);
  }

  // t1 and p->c now point just past the last character in the token

  return rc;  
}

cmXmlRC_t _cmXmlParseDoctype( cmXml_t* p )
{
  cmXmlRC_t rc = kOkXmlRC;
  cmXmlNode_t* np;
  
  if((np = _cmXmlNodeAlloc(p,kDoctypeXmlFl,"DOCTYPE",strlen("DOCTYPE"))) == NULL )
    return cmErrLastRC(&p->err);
  
  while( *p->c != '>' )
    if((rc = _cmXmlParseDoctypeToken(p,np)) != kOkXmlRC )
      break;

  return rc;
}

// Node tags are tags that begin with a '<' and are not
// followed by any special character.
cmXmlRC_t _cmXmlParseNodeTag( cmXml_t* p )
{
  cmXmlRC_t       rc = kOkXmlRC;
  const cmChar_t* l0 = NULL;
  const cmChar_t* l1 = NULL;

  // Advance to the first character of the tag label.
  if((l0 = _cmXmlAdvanceToNextNonWhite(p)) == NULL )
    return _cmXmlSyntaxError(p);

  // Advance to the last character following the tag label.
  if((l1 = _cmXmlAdvanceToNextWhiteOr(p,'/','>')) == NULL )
    return _cmXmlSyntaxError(p);

  // look for attributes
  if((rc = _cmXmlParseAttrList(p,'/')) != kOkXmlRC )
      return _cmXmlSyntaxError(p);

  // p->c is now past the ending '>'
  
  return rc;
}

cmXmlRC_t _cmXmlReadEndTag( cmXml_t* p )
{
  const cmChar_t* l0 = NULL;
  const cmChar_t* l1 = NULL;

  assert( *p->c == '/' );

  // advance past the '/'
  if(( l0 = _cmXmlAdvanceOne(p)) == NULL )
    return _cmXmlSyntaxError(p);

  // advance to the ending '>'
  if(( l1 = _cmXmlAdvanceToNext(p,">")) == NULL )
    return _cmXmlSyntaxError(p);

  // advance past the 
  if( _cmXmlAdvanceOne(p) == NULL )
    return _cmXmlSyntaxError(p);

  // trim trailing space on label
  l1 -= 1;
  while( l1>l0 && isspace(*l1) )
    --l1;

  // verify that the label has a length
  if( l0 == l1 )
    return _cmXmlSyntaxError(p);

  assert( !isspace(*l1) );

  // the label should match the node on the top of the stack
  if( strncmp( p->stack->label, l0, (l1-l0)+1 ) )
    return _cmXmlSyntaxError(p);

  // since we just parsed an end-tag there should be at least one node on the stack
  if( p->stack == NULL )
    return _cmXmlSyntaxError(p);

  // pop the stack
  p->stack = p->stack->parent;
  
  return kOkXmlRC;  
}
  

// 
cmXmlRC_t  _cmXmlReadTag( cmXml_t* p, cmXmlNode_t** newNodeRef )
{
  cmXmlRC_t rc = kOkXmlRC;

  assert(newNodeRef != NULL );
  *newNodeRef = NULL;
  
  // No leading '<' was found 
  if( _cmXmlAdvancePast(p,"<") == NULL )
  {
    // error or EOF
    return NULL;  
  }

  // examine the character following the opening '<'
  switch( *p->c )
  {
    // node end tag
    case '/':
      return _cmXmlReadEndTag(p);
    
    // declaration tag
    case '?':
      if( _cmXmlAdvancePast(p,"xml") == NULL )
        return _cmXmlSyntaxError(p);

      if( _cmXmlNodeAlloc(p,kDeclXmlFl, "xml",strlen("xml") ) == NULL )
        return cmErrLastRC(&p->err);
      
      if((rc = _cmXmlParseAttrList(p,'?')) != kOkXmlRC )
        return rc;
      
      break;
      
    case '!':
      switch( *(p->c+1) )
      {
        // comment node
        case '-':
          if( _cmXmlAdvancePast(p,"--") == NULL )
            return _cmXmlSyntaxError(p);
        
          if( _cmXmlAdvanceToNext("->") == NULL )
            return _cmXmlSyntaxError(p);

          // p->c is just after "-->"
          break;
          
          // DOCTYPE node
        case 'D':
          if( _cmXmlAdvancePast(P,"DOCTYPE")==NULL )
            return _cmXmlSyntaxError(p);
        
          if((rc = _cmXmlParseDocType(p)) != kOkXmlRC )
            return _cmXmlSyntaxError(p);

          // p->c is just after ">"

          break;
        
        default:
          return _cmXmlSyntaxError(p);
      }
      break;
      
    default:
      // normal node
      if((rc = _cmXmlParseNodeTag(p)) != kOkXmlRC )
        return rc;

      // p->c is just after ">"
      
  }
  
  return rc;
}

cmXmlRC_t  _cmXmlReadNode( cmXml_t* p )
{
}