cmLex.h/c: Additions made to support cmTextFmt.h/c.

Added kReturnUnknownLexFl configuration flag.  If this flag is set
tokens which are not recognized will be returned with the
token id set to kUnknownLexTId.  This is useful for returning all text.

Added kUserDefPriorityLexFl configuration flag.  User defined tokens
take priority even if an identifier (kIdentLexTId) has a longer match.

Added cmLexEnableToken().  This function allows tokens recognizers to be
enabled and disabled.
This commit is contained in:
kevin 2013-02-18 14:30:24 -08:00
parent 7bfa8a109e
commit 445824a5ed
2 changed files with 78 additions and 38 deletions

85
cmLex.c
View File

@ -28,6 +28,7 @@ cmLexErrorRecd cmLexErrorArray[] =
{ kFileCloseErrLexRC, "File close failed on cmLexSetFile()"}, { kFileCloseErrLexRC, "File close failed on cmLexSetFile()"},
{ kMemAllocErrLexRC, "An attempted memory allocation failed"}, { kMemAllocErrLexRC, "An attempted memory allocation failed"},
{ kEofRC, "The end of the input text was encountered (this is a normal condition not an error)"}, { kEofRC, "The end of the input text was encountered (this is a normal condition not an error)"},
{ kInvalidLexTIdLexRC, "An invalid token id was encountered."},
{ kInvalidLexRC, "Unknown lexer error code." } { kInvalidLexRC, "Unknown lexer error code." }
}; };
@ -42,6 +43,7 @@ typedef struct
cmLexMatcherFuncPtr_t funcPtr; // recognizer function (only used if userPtr==NULL) cmLexMatcherFuncPtr_t funcPtr; // recognizer function (only used if userPtr==NULL)
cmChar_t* tokenStr; // fixed string data used by the recognizer (only used if userPtr==NULL) cmChar_t* tokenStr; // fixed string data used by the recognizer (only used if userPtr==NULL)
cmLexUserMatcherPtr_t userPtr; // user defined recognizer function (only used if funcPtr==NULL) cmLexUserMatcherPtr_t userPtr; // user defined recognizer function (only used if funcPtr==NULL)
bool enableFl; // true if this matcher is enabled
} cmLexMatcher; } cmLexMatcher;
@ -325,6 +327,7 @@ cmRC_t _cmLexInstallMatcher( cmLex* p, unsigned typeId, cmLexMatcherFuncPtr_t f
p->mfp[p->mfi].typeId = typeId; p->mfp[p->mfi].typeId = typeId;
p->mfp[p->mfi].funcPtr = funcPtr; p->mfp[p->mfi].funcPtr = funcPtr;
p->mfp[p->mfi].userPtr = userPtr; p->mfp[p->mfi].userPtr = userPtr;
p->mfp[p->mfi].enableFl = true;
if( keyStr != NULL ) if( keyStr != NULL )
{ {
@ -637,6 +640,21 @@ cmRC_t cmLexRegisterMatcher( cmLexH h, unsigned id, cmLexUserMatcher
return _cmLexInstallMatcher( p, id, NULL, NULL, userPtr ); return _cmLexInstallMatcher( p, id, NULL, NULL, userPtr );
} }
cmRC_t cmLexEnableToken( cmLexH h, unsigned id, bool enableFl )
{
cmLex* p = _cmLexHandleToPtr(h);
unsigned mi = 0;
for(; mi<p->mfi; ++mi)
if( p->mfp[mi].typeId == id )
{
p->mfp[mi].enableFl = enableFl;
return cmOkRC;
}
return _cmLexError( p, kInvalidLexTIdLexRC, "%i is not a valid token type id.",id);
}
unsigned cmLexFilterFlags( cmLexH h ) unsigned cmLexFilterFlags( cmLexH h )
{ {
cmLex* p = _cmLexHandleToPtr(h); cmLex* p = _cmLexHandleToPtr(h);
@ -669,32 +687,44 @@ unsigned cmLexGetNextToken( cmLexH h )
p->curTokenCharCnt = 0; p->curTokenCharCnt = 0;
// try each mater
for(; mi<p->mfi; ++mi) for(; mi<p->mfi; ++mi)
{ if( p->mfp[mi].enableFl )
unsigned charCnt = 0;
if( p->mfp[mi].funcPtr != NULL )
charCnt = p->mfp[mi].funcPtr(p, p->cp + p->ci, p->cn - p->ci, p->mfp[mi].tokenStr );
else
charCnt = p->mfp[mi].userPtr( p->cp + p->ci, p->cn - p->ci);
if( cmErrLastRC(&p->err) != kOkLexRC )
return kErrorLexTId;
// if this matched token is longer then the prev. matched token or
// if the prev matched token was an identifier and this matched token is an equal length user defined token
if( (charCnt > maxCharCnt) || (charCnt>0 && charCnt==maxCharCnt && p->mfp[maxIdx].typeId==kIdentLexTId && p->mfp[mi].typeId >=kUserLexTId ) )
{ {
maxCharCnt = charCnt; unsigned charCnt = 0;
maxIdx = mi; if( p->mfp[mi].funcPtr != NULL )
} charCnt = p->mfp[mi].funcPtr(p, p->cp + p->ci, p->cn - p->ci, p->mfp[mi].tokenStr );
else
charCnt = p->mfp[mi].userPtr( p->cp + p->ci, p->cn - p->ci);
} if( cmErrLastRC(&p->err) != kOkLexRC )
return kErrorLexTId;
// if this matched token is longer then the prev. matched token or
// if the prev matched token was an identifier and this matched token is an equal length user defined token
if( (charCnt > maxCharCnt)
|| (charCnt>0 && charCnt==maxCharCnt && p->mfp[maxIdx].typeId==kIdentLexTId && p->mfp[mi].typeId >=kUserLexTId )
|| (charCnt>0 && charCnt<maxCharCnt && p->mfp[maxIdx].typeId==kIdentLexTId && p->mfp[mi].typeId >=kUserLexTId && cmIsFlag(p->flags,kUserDefPriorityLexFl))
)
{
maxCharCnt = charCnt;
maxIdx = mi;
}
}
// no token was matched // no token was matched
if( maxIdx == cmInvalidIdx ) if( maxIdx == cmInvalidIdx )
{ {
_cmLexError( p, kNoMatchLexRC, "Unable to recognize token:'%c'.",*(p->cp+p->ci)); if( cmIsFlag(p->flags,kReturnUnknownLexFl) )
return kErrorLexTId; {
maxCharCnt = 1;
}
else
{
_cmLexError( p, kNoMatchLexRC, "Unable to recognize token:'%c'.",*(p->cp+p->ci));
return kErrorLexTId;
}
} }
// update the current line and column position // update the current line and column position
@ -716,16 +746,19 @@ unsigned cmLexGetNextToken( cmLexH h )
bool returnFl = true; bool returnFl = true;
// check the space token filter if( maxIdx != cmInvalidIdx )
if( (p->mfp[ maxIdx ].typeId == kSpaceLexTId) && (cmIsFlag(p->flags,kReturnSpaceLexFl)==0) ) {
returnFl = false; // check the space token filter
if( (p->mfp[ maxIdx ].typeId == kSpaceLexTId) && (cmIsFlag(p->flags,kReturnSpaceLexFl)==0) )
returnFl = false;
// check the comment token filter // check the comment token filter
if( _cmLexIsCommentTypeId(p->mfp[ maxIdx ].typeId) && (cmIsFlag(p->flags,kReturnCommentsLexFl)==0) ) if( _cmLexIsCommentTypeId(p->mfp[ maxIdx ].typeId) && (cmIsFlag(p->flags,kReturnCommentsLexFl)==0) )
returnFl = false; returnFl = false;
}
// update the lexer state // update the lexer state
p->curTokenId = p->mfp[ maxIdx ].typeId; p->curTokenId = maxIdx==cmInvalidIdx ? kUnknownLexTId : p->mfp[ maxIdx ].typeId;
p->curTokenCharIdx = p->ci; p->curTokenCharIdx = p->ci;
p->curTokenCharCnt = maxCharCnt; p->curTokenCharCnt = maxCharCnt;

31
cmLex.h
View File

@ -12,23 +12,26 @@
enum enum
{ {
kErrorLexTId, // 0 the lexer was unable to identify the current token kErrorLexTId, // 0 the lexer was unable to identify the current token
kEofLexTId, // 1 the lexer reached the end of input kUnknownLexTId, // 1 the token is of an unknown type (only used when kReturnUnknownLexFl is set)
kSpaceLexTId, // 2 white space kEofLexTId, // 2 the lexer reached the end of input
kRealLexTId, // 3 real number (contains a decimal point or is in scientific notation) kSpaceLexTId, // 3 white space
kIntLexTId, // 4 decimal integer kRealLexTId, // 4 real number (contains a decimal point or is in scientific notation)
kHexLexTId, // 5 hexidecimal integer kIntLexTId, // 5 decimal integer
kIdentLexTId, // 6 identifier kHexLexTId, // 6 hexidecimal integer
kQStrLexTId, // 7 quoted string kIdentLexTId, // 7 identifier
kBlockCmtLexTId, // 8 block comment kQStrLexTId, // 8 quoted string
kLineCmtLexTId, // 9 line comment kBlockCmtLexTId, // 9 block comment
kUserLexTId // 10 user registered token (See cmLexRegisterToken().) kLineCmtLexTId, // 10 line comment
kUserLexTId // 11 user registered token (See cmLexRegisterToken().)
}; };
// Lexer control flags used with cmLexInit(). // Lexer control flags used with cmLexInit().
enum enum
{ {
kReturnSpaceLexFl = 0x01, //< Return space tokens kReturnSpaceLexFl = 0x01, //< Return space tokens
kReturnCommentsLexFl = 0x02 //< Return comment tokens kReturnCommentsLexFl = 0x02, //< Return comment tokens
kReturnUnknownLexFl = 0x04, //< Return unknown tokens
kUserDefPriorityLexFl= 0x08 //< User defined tokens take priority even if a kIdentLexTId token has a longer match
}; };
// cmLex result codes. // cmLex result codes.
@ -46,7 +49,8 @@ enum
kFileCloseErrLexRC, //< 9 File close failed on cmLexSetFile() kFileCloseErrLexRC, //< 9 File close failed on cmLexSetFile()
kMemAllocErrLexRC, //< 10 An attempted memory allocation failed kMemAllocErrLexRC, //< 10 An attempted memory allocation failed
kEofRC, //< 11 The end of the input text was encountered (this is a normal condition not an error) kEofRC, //< 11 The end of the input text was encountered (this is a normal condition not an error)
kInvalidLexRC //< 12 Sentinal value. kInvalidLexTIdLexRC, //< 12 An invalid lex token id was encountered.
kInvalidLexRC //< 13 Sentinal value.
}; };
@ -84,6 +88,9 @@ typedef unsigned (*cmLexUserMatcherPtr_t)( const cmChar_t* cp, unsigned cn );
cmRC_t cmLexRegisterMatcher( cmLexH h, unsigned id, cmLexUserMatcherPtr_t funcPtr ); cmRC_t cmLexRegisterMatcher( cmLexH h, unsigned id, cmLexUserMatcherPtr_t funcPtr );
// Enable or disable the specified token type.
cmRC_t cmLexEnableToken( cmLexH h, unsigned id, bool enableFl );
// Get and set the lexer filter flags kReturnXXXLexFl. // Get and set the lexer filter flags kReturnXXXLexFl.
// These flags can be safely enabled and disabled between // These flags can be safely enabled and disabled between
// calls to cmLexGetNextToken(). // calls to cmLexGetNextToken().