Browse Source

cmLex.h/c: Additions made to support cmTextFmt.h/c.

Added kReturnUnknownLexFl configuration flag.  If this flag is set
tokens which are not recognized will be returned with the
token id set to kUnknownLexTId.  This is useful for returning all text.

Added kUserDefPriorityLexFl configuration flag.  User defined tokens
take priority even if an identifier (kIdentLexTId) has a longer match.

Added cmLexEnableToken().  This function allows tokens recognizers to be
enabled and disabled.
master
kevin 11 years ago
parent
commit
445824a5ed
2 changed files with 78 additions and 38 deletions
  1. 59
    26
      cmLex.c
  2. 19
    12
      cmLex.h

+ 59
- 26
cmLex.c View File

28
   { kFileCloseErrLexRC,    "File close failed on cmLexSetFile()"},
28
   { kFileCloseErrLexRC,    "File close failed on cmLexSetFile()"},
29
   { kMemAllocErrLexRC,     "An attempted memory allocation failed"},
29
   { kMemAllocErrLexRC,     "An attempted memory allocation failed"},
30
   { kEofRC,                "The end of the input text was encountered (this is a normal condition not an error)"},
30
   { kEofRC,                "The end of the input text was encountered (this is a normal condition not an error)"},
31
+  { kInvalidLexTIdLexRC,   "An invalid token id was encountered."},
31
   { kInvalidLexRC,         "Unknown lexer error code." }
32
   { kInvalidLexRC,         "Unknown lexer error code." }
32
 };
33
 };
33
 
34
 
42
   cmLexMatcherFuncPtr_t funcPtr;  // recognizer function (only used if userPtr==NULL)
43
   cmLexMatcherFuncPtr_t funcPtr;  // recognizer function (only used if userPtr==NULL)
43
   cmChar_t*             tokenStr; // fixed string data used by the recognizer (only used if userPtr==NULL)
44
   cmChar_t*             tokenStr; // fixed string data used by the recognizer (only used if userPtr==NULL)
44
   cmLexUserMatcherPtr_t userPtr;  // user defined recognizer function (only used if funcPtr==NULL)
45
   cmLexUserMatcherPtr_t userPtr;  // user defined recognizer function (only used if funcPtr==NULL)
46
+  bool                  enableFl; // true if this matcher is enabled
45
 } cmLexMatcher;
47
 } cmLexMatcher;
46
 
48
 
47
 
49
 
325
   p->mfp[p->mfi].typeId   = typeId;
327
   p->mfp[p->mfi].typeId   = typeId;
326
   p->mfp[p->mfi].funcPtr  = funcPtr;
328
   p->mfp[p->mfi].funcPtr  = funcPtr;
327
   p->mfp[p->mfi].userPtr  = userPtr;
329
   p->mfp[p->mfi].userPtr  = userPtr;
330
+  p->mfp[p->mfi].enableFl = true;
328
 
331
 
329
   if( keyStr != NULL )
332
   if( keyStr != NULL )
330
   {
333
   {
637
   return _cmLexInstallMatcher( p, id, NULL, NULL, userPtr );
640
   return _cmLexInstallMatcher( p, id, NULL, NULL, userPtr );
638
 }
641
 }
639
 
642
 
643
+cmRC_t             cmLexEnableToken( cmLexH h, unsigned id, bool enableFl )
644
+{
645
+  cmLex* p = _cmLexHandleToPtr(h);
646
+
647
+  unsigned mi = 0;
648
+  for(; mi<p->mfi; ++mi)
649
+    if( p->mfp[mi].typeId == id )
650
+    {
651
+      p->mfp[mi].enableFl = enableFl;
652
+      return cmOkRC;
653
+    }
654
+
655
+  return _cmLexError( p, kInvalidLexTIdLexRC, "%i is not a valid token type id.",id);
656
+}
657
+
640
 unsigned           cmLexFilterFlags( cmLexH h )
658
 unsigned           cmLexFilterFlags( cmLexH h )
641
 {
659
 {
642
   cmLex* p = _cmLexHandleToPtr(h);
660
   cmLex* p = _cmLexHandleToPtr(h);
669
     p->curTokenCharCnt = 0;
687
     p->curTokenCharCnt = 0;
670
 
688
 
671
 
689
 
690
+    // try each mater
672
     for(; mi<p->mfi; ++mi)
691
     for(; mi<p->mfi; ++mi)
673
-    {
674
-      unsigned charCnt = 0;
675
-      if( p->mfp[mi].funcPtr != NULL )
676
-        charCnt = p->mfp[mi].funcPtr(p, p->cp + p->ci, p->cn - p->ci, p->mfp[mi].tokenStr );
677
-      else
678
-        charCnt = p->mfp[mi].userPtr( p->cp + p->ci, p->cn - p->ci);
679
-
680
-      if( cmErrLastRC(&p->err) != kOkLexRC )
681
-        return kErrorLexTId;
682
-
683
-      // if this matched token is longer then the prev. matched token or
684
-      // if the prev matched token was an identifier and this matched token is an equal length user defined token
685
-      if( (charCnt > maxCharCnt) || (charCnt>0 && charCnt==maxCharCnt && p->mfp[maxIdx].typeId==kIdentLexTId && p->mfp[mi].typeId >=kUserLexTId ) )
692
+      if( p->mfp[mi].enableFl )
686
       {
693
       {
687
-        maxCharCnt = charCnt;
688
-        maxIdx     = mi;
689
-      }
694
+        unsigned charCnt = 0;
695
+        if( p->mfp[mi].funcPtr != NULL )
696
+          charCnt = p->mfp[mi].funcPtr(p, p->cp + p->ci, p->cn - p->ci, p->mfp[mi].tokenStr );
697
+        else
698
+          charCnt = p->mfp[mi].userPtr( p->cp + p->ci, p->cn - p->ci);
699
+
700
+        if( cmErrLastRC(&p->err) != kOkLexRC )
701
+          return kErrorLexTId;
702
+
703
+        // if this matched token is longer then the prev. matched token or
704
+        // if the prev matched token was an identifier and this matched token is an equal length user defined token
705
+        if( (charCnt > maxCharCnt) 
706
+          || (charCnt>0 && charCnt==maxCharCnt && p->mfp[maxIdx].typeId==kIdentLexTId && p->mfp[mi].typeId >=kUserLexTId ) 
707
+          || (charCnt>0 && charCnt<maxCharCnt  && p->mfp[maxIdx].typeId==kIdentLexTId && p->mfp[mi].typeId >=kUserLexTId && cmIsFlag(p->flags,kUserDefPriorityLexFl))
708
+            )
709
+        {
710
+          maxCharCnt = charCnt;
711
+          maxIdx     = mi;
712
+        }
690
 
713
 
691
-    }
714
+      }
692
 
715
 
693
     // no token was matched
716
     // no token was matched
694
     if( maxIdx == cmInvalidIdx )
717
     if( maxIdx == cmInvalidIdx )
695
     {
718
     {
696
-      _cmLexError( p, kNoMatchLexRC, "Unable to recognize token:'%c'.",*(p->cp+p->ci));
697
-      return kErrorLexTId;     
719
+      if( cmIsFlag(p->flags,kReturnUnknownLexFl) )
720
+      {
721
+        maxCharCnt = 1;
722
+      }
723
+      else
724
+      {
725
+        _cmLexError( p, kNoMatchLexRC, "Unable to recognize token:'%c'.",*(p->cp+p->ci));
726
+        return kErrorLexTId;     
727
+      }
698
     }
728
     }
699
 
729
 
700
     // update the current line and column position    
730
     // update the current line and column position    
716
 
746
 
717
     bool returnFl = true;
747
     bool returnFl = true;
718
 
748
 
719
-    // check the space token filter
720
-    if( (p->mfp[ maxIdx ].typeId == kSpaceLexTId) && (cmIsFlag(p->flags,kReturnSpaceLexFl)==0) )
721
-      returnFl = false;
749
+    if( maxIdx != cmInvalidIdx )
750
+    {
751
+      // check the space token filter
752
+      if( (p->mfp[ maxIdx ].typeId == kSpaceLexTId) && (cmIsFlag(p->flags,kReturnSpaceLexFl)==0) )
753
+        returnFl = false;
722
 
754
 
723
-    // check the comment token filter
724
-    if( _cmLexIsCommentTypeId(p->mfp[ maxIdx ].typeId) && (cmIsFlag(p->flags,kReturnCommentsLexFl)==0) )
725
-      returnFl = false;
755
+      // check the comment token filter
756
+      if( _cmLexIsCommentTypeId(p->mfp[ maxIdx ].typeId) && (cmIsFlag(p->flags,kReturnCommentsLexFl)==0) )
757
+        returnFl = false;
758
+    }
726
 
759
 
727
     // update the lexer state
760
     // update the lexer state
728
-    p->curTokenId      = p->mfp[ maxIdx ].typeId;    
761
+    p->curTokenId      = maxIdx==cmInvalidIdx ? kUnknownLexTId : p->mfp[ maxIdx ].typeId;    
729
     p->curTokenCharIdx = p->ci;
762
     p->curTokenCharIdx = p->ci;
730
     p->curTokenCharCnt = maxCharCnt;
763
     p->curTokenCharCnt = maxCharCnt;
731
       
764
       

+ 19
- 12
cmLex.h View File

12
 enum
12
 enum
13
 {
13
 {
14
   kErrorLexTId,    // 0  the lexer was unable to identify the current token
14
   kErrorLexTId,    // 0  the lexer was unable to identify the current token
15
-  kEofLexTId,      // 1  the lexer reached the end of input
16
-  kSpaceLexTId,    // 2  white space
17
-  kRealLexTId,     // 3  real number (contains a decimal point or is in scientific notation) 
18
-  kIntLexTId,      // 4  decimal integer
19
-  kHexLexTId,      // 5  hexidecimal integer
20
-  kIdentLexTId,    // 6  identifier
21
-  kQStrLexTId,     // 7  quoted string
22
-  kBlockCmtLexTId, // 8  block comment
23
-  kLineCmtLexTId,  // 9  line comment
24
-  kUserLexTId      // 10 user registered token (See cmLexRegisterToken().)
15
+  kUnknownLexTId,  // 1  the token is of an unknown type (only used when kReturnUnknownLexFl is set)
16
+  kEofLexTId,      // 2  the lexer reached the end of input
17
+  kSpaceLexTId,    // 3  white space
18
+  kRealLexTId,     // 4  real number (contains a decimal point or is in scientific notation) 
19
+  kIntLexTId,      // 5  decimal integer
20
+  kHexLexTId,      // 6  hexidecimal integer
21
+  kIdentLexTId,    // 7  identifier
22
+  kQStrLexTId,     // 8  quoted string
23
+  kBlockCmtLexTId, // 9  block comment
24
+  kLineCmtLexTId,  // 10  line comment
25
+  kUserLexTId      // 11 user registered token (See cmLexRegisterToken().)
25
 };
26
 };
26
 
27
 
27
 // Lexer control flags used with cmLexInit().
28
 // Lexer control flags used with cmLexInit().
28
 enum
29
 enum
29
 {
30
 {
30
   kReturnSpaceLexFl    = 0x01, //< Return space tokens
31
   kReturnSpaceLexFl    = 0x01, //< Return space tokens
31
-  kReturnCommentsLexFl = 0x02  //< Return comment tokens
32
+  kReturnCommentsLexFl = 0x02, //< Return comment tokens
33
+  kReturnUnknownLexFl  = 0x04, //< Return unknown tokens
34
+  kUserDefPriorityLexFl= 0x08  //< User defined tokens take priority even if a kIdentLexTId token has a longer match
32
 };
35
 };
33
 
36
 
34
 // cmLex result codes.
37
 // cmLex result codes.
46
   kFileCloseErrLexRC,      //< 9  File close failed on cmLexSetFile()
49
   kFileCloseErrLexRC,      //< 9  File close failed on cmLexSetFile()
47
   kMemAllocErrLexRC,       //< 10  An attempted memory allocation failed
50
   kMemAllocErrLexRC,       //< 10  An attempted memory allocation failed
48
   kEofRC,                  //< 11 The end of the input text was encountered (this is a normal condition not an error)
51
   kEofRC,                  //< 11 The end of the input text was encountered (this is a normal condition not an error)
49
-  kInvalidLexRC            //< 12 Sentinal value.
52
+  kInvalidLexTIdLexRC,     //< 12 An invalid lex token id was encountered.
53
+  kInvalidLexRC            //< 13 Sentinal value.
50
 
54
 
51
 };
55
 };
52
 
56
 
84
 
88
 
85
 cmRC_t             cmLexRegisterMatcher( cmLexH h, unsigned id, cmLexUserMatcherPtr_t funcPtr );
89
 cmRC_t             cmLexRegisterMatcher( cmLexH h, unsigned id, cmLexUserMatcherPtr_t funcPtr );
86
 
90
 
91
+// Enable or disable the specified token type.
92
+cmRC_t             cmLexEnableToken( cmLexH h, unsigned id, bool enableFl );
93
+
87
 // Get and set the lexer filter flags kReturnXXXLexFl.
94
 // Get and set the lexer filter flags kReturnXXXLexFl.
88
 // These flags can be safely enabled and disabled between
95
 // These flags can be safely enabled and disabled between
89
 // calls to cmLexGetNextToken().
96
 // calls to cmLexGetNextToken().

Loading…
Cancel
Save