cmLex.h/c : Added optional quoted character token recognizer.

This commit is contained in:
kevin 2014-01-06 16:10:05 -05:00
parent 0a71501780
commit 83314506d7
2 changed files with 35 additions and 14 deletions

19
cmLex.c
View File

@ -120,6 +120,8 @@ cmRC_t _cmLexError( cmLex* p, unsigned rc, const char* fmt, ... )
return rc; return rc;
} }
// Locate 'keyStr' in cp[cn] and return the index into cp[cn] of the character
// following the last char in 'keyStr'. If keyStr is not found return cmInvalidIdx.
unsigned _cmLexScanTo( const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr ) unsigned _cmLexScanTo( const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
{ {
unsigned i = 0; unsigned i = 0;
@ -336,6 +338,20 @@ unsigned _cmLexQStrMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmC
return 0; return 0;
} }
unsigned _cmLexQCharMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
{
unsigned i = 0;
if( i >= cn || cp[i]!='\'' )
return 0;
i+=2;
if( i >= cn || cp[i]!='\'')
return 0;
return 3;
}
unsigned _cmLexBlockCmtMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr ) unsigned _cmLexBlockCmtMatcher( cmLex* p, const cmChar_t* cp, unsigned cn, const cmChar_t* keyStr )
{ {
@ -474,6 +490,9 @@ cmLexH cmLexInit( const cmChar_t* cp, unsigned cn, unsigned flags, cmRpt_t* rpt
_cmLexInstallMatcher( p, kBlockCmtLexTId, _cmLexBlockCmtMatcher, NULL, NULL ); _cmLexInstallMatcher( p, kBlockCmtLexTId, _cmLexBlockCmtMatcher, NULL, NULL );
_cmLexInstallMatcher( p, kLineCmtLexTId, _cmLexLineCmtMatcher, NULL, NULL ); _cmLexInstallMatcher( p, kLineCmtLexTId, _cmLexLineCmtMatcher, NULL, NULL );
if( cmIsFlag(flags,kReturnQCharLexFl) )
_cmLexInstallMatcher( p, kQCharLexTId, _cmLexQCharMatcher, NULL, NULL );
h.h = p; h.h = p;
_cmLexReset(p); _cmLexReset(p);

30
cmLex.h
View File

@ -11,18 +11,19 @@
// Predefined Lexer Id's // Predefined Lexer Id's
enum enum
{ {
kErrorLexTId, // 0 the lexer was unable to identify the current token kErrorLexTId, // 0 the lexer was unable to identify the current token
kUnknownLexTId, // 1 the token is of an unknown type (only used when kReturnUnknownLexFl is set) kUnknownLexTId, // 1 the token is of an unknown type (only used when kReturnUnknownLexFl is set)
kEofLexTId, // 2 the lexer reached the end of input kEofLexTId, // 2 the lexer reached the end of input
kSpaceLexTId, // 3 white space kSpaceLexTId, // 3 white space
kRealLexTId, // 4 real number (contains a decimal point or is in scientific notation) kRealLexTId, // 4 real number (contains a decimal point or is in scientific notation)
kIntLexTId, // 5 decimal integer kIntLexTId, // 5 decimal integer
kHexLexTId, // 6 hexidecimal integer kHexLexTId, // 6 hexidecimal integer
kIdentLexTId, // 7 identifier kIdentLexTId, // 7 identifier
kQStrLexTId, // 8 quoted string kQStrLexTId, // 8 quoted string
kBlockCmtLexTId, // 9 block comment kQCharLexTId, // 9 quoted char
kLineCmtLexTId, // 10 line comment kBlockCmtLexTId, // 10 block comment
kUserLexTId // 11 user registered token (See cmLexRegisterToken().) kLineCmtLexTId, // 11 line comment
kUserLexTId // 12 user registered token (See cmLexRegisterToken().)
}; };
// Lexer control flags used with cmLexInit(). // Lexer control flags used with cmLexInit().
@ -31,7 +32,8 @@ enum
kReturnSpaceLexFl = 0x01, //< Return space tokens kReturnSpaceLexFl = 0x01, //< Return space tokens
kReturnCommentsLexFl = 0x02, //< Return comment tokens kReturnCommentsLexFl = 0x02, //< Return comment tokens
kReturnUnknownLexFl = 0x04, //< Return unknown tokens kReturnUnknownLexFl = 0x04, //< Return unknown tokens
kUserDefPriorityLexFl= 0x08 //< User defined tokens take priority even if a kIdentLexTId token has a longer match kReturnQCharLexFl = 0x08, //< Return quoted characters
kUserDefPriorityLexFl= 0x10 //< User defined tokens take priority even if a kIdentLexTId token has a longer match
}; };
// cmLex result codes. // cmLex result codes.
@ -84,7 +86,7 @@ cmRC_t cmLexSetFile( cmLexH h, const cmChar_t* fn );
cmRC_t cmLexRegisterToken( cmLexH h, unsigned id, const cmChar_t* token ); cmRC_t cmLexRegisterToken( cmLexH h, unsigned id, const cmChar_t* token );
// Register a user defined token recognition function. This function should return the count // Register a user defined token recognition function. This function should return the count
// of initial, consecutive, characters in 'cp' which match its token pattern. // of initial, consecutive, characters in 'cp[cn]' which match its token pattern.
typedef unsigned (*cmLexUserMatcherPtr_t)( const cmChar_t* cp, unsigned cn ); typedef unsigned (*cmLexUserMatcherPtr_t)( const cmChar_t* cp, unsigned cn );
cmRC_t cmLexRegisterMatcher( cmLexH h, unsigned id, cmLexUserMatcherPtr_t funcPtr ); cmRC_t cmLexRegisterMatcher( cmLexH h, unsigned id, cmLexUserMatcherPtr_t funcPtr );