libcm/cmLex.h
kevin 445824a5ed cmLex.h/c: Additions made to support cmTextFmt.h/c.
Added kReturnUnknownLexFl configuration flag.  If this flag is set
tokens which are not recognized will be returned with the
token id set to kUnknownLexTId.  This is useful for returning all text.

Added kUserDefPriorityLexFl configuration flag.  User defined tokens
take priority even if an identifier (kIdentLexTId) has a longer match.

Added cmLexEnableToken().  This function allows tokens recognizers to be
enabled and disabled.
2013-02-18 14:30:24 -08:00

153 regels
6.0 KiB
C

#ifndef cmLex_h
#define cmLex_h
//{
//(
//)
//(
// Predefined Lexer Id's
enum
{
kErrorLexTId, // 0 the lexer was unable to identify the current token
kUnknownLexTId, // 1 the token is of an unknown type (only used when kReturnUnknownLexFl is set)
kEofLexTId, // 2 the lexer reached the end of input
kSpaceLexTId, // 3 white space
kRealLexTId, // 4 real number (contains a decimal point or is in scientific notation)
kIntLexTId, // 5 decimal integer
kHexLexTId, // 6 hexidecimal integer
kIdentLexTId, // 7 identifier
kQStrLexTId, // 8 quoted string
kBlockCmtLexTId, // 9 block comment
kLineCmtLexTId, // 10 line comment
kUserLexTId // 11 user registered token (See cmLexRegisterToken().)
};
// Lexer control flags used with cmLexInit().
enum
{
kReturnSpaceLexFl = 0x01, //< Return space tokens
kReturnCommentsLexFl = 0x02, //< Return comment tokens
kReturnUnknownLexFl = 0x04, //< Return unknown tokens
kUserDefPriorityLexFl= 0x08 //< User defined tokens take priority even if a kIdentLexTId token has a longer match
};
// cmLex result codes.
enum
{
kOkLexRC = cmOkRC, //< 0 No error. The operation completed successfully
kDuplicateTokenLexRC, //< 1 The text or id passed as a user token is already in use by another token
kMissingCmtEndLexRC, //< 2 The end of a block comment could not be found.
kMissingEndQuoteLexRC, //< 3 The end of a quoted string could not be found.
kNoMatchLexRC, //< 4 The lexer encountered a string which could not be classified.
kFileOpenErrLexRC, //< 5 File open failed on cmLexSetFile()
kFileSeekErrLexRC, //< 6 File seek failed on cmLexSetFile()
kFileTellErrLexRC, //< 7 File tell failed on cmLexSetFile()
kFileReadErrLexRC, //< 8 File read failed on cmLexSetFile()
kFileCloseErrLexRC, //< 9 File close failed on cmLexSetFile()
kMemAllocErrLexRC, //< 10 An attempted memory allocation failed
kEofRC, //< 11 The end of the input text was encountered (this is a normal condition not an error)
kInvalidLexTIdLexRC, //< 12 An invalid lex token id was encountered.
kInvalidLexRC //< 13 Sentinal value.
};
typedef cmHandle_t cmLexH;
extern cmLexH cmLexNullH;
// Iniitalize the lexer and receive a lexer handle in return.
// Set cp to NULL if the buffer will be later via cmLexSetTextBuffer();
// See the kXXXLexFl enum's above for possible flag values.
cmLexH cmLexInit( const cmChar_t* cp, unsigned cn, unsigned flags, cmRpt_t* rpt );
// Finalize a lexer created by an earlier call to cmLexInit()
cmRC_t cmLexFinal( cmLexH* hp );
// Rewind the lexer to the begining of the buffer (the same as post initialize state)
cmRC_t cmLexReset( cmLexH h );
// Verify that a lexer handle is valid
bool cmLexIsValid( cmLexH h );
// Set a new text buffer and reset the lexer to the post initialize state.
cmRC_t cmLexSetTextBuffer( cmLexH h, const cmChar_t* cp, unsigned cn );
cmRC_t cmLexSetFile( cmLexH h, const cmChar_t* fn );
// Register a user defined token. The id of the first user defined token should be
// kUserLexTId+1. Neither the id or token text can be used by a previously registered
// or built-in token.
cmRC_t cmLexRegisterToken( cmLexH h, unsigned id, const cmChar_t* token );
// Register a user defined token recognition function. This function should return the count
// of initial, consecutive, characters in 'cp' which match its token pattern.
typedef unsigned (*cmLexUserMatcherPtr_t)( const cmChar_t* cp, unsigned cn );
cmRC_t cmLexRegisterMatcher( cmLexH h, unsigned id, cmLexUserMatcherPtr_t funcPtr );
// Enable or disable the specified token type.
cmRC_t cmLexEnableToken( cmLexH h, unsigned id, bool enableFl );
// Get and set the lexer filter flags kReturnXXXLexFl.
// These flags can be safely enabled and disabled between
// calls to cmLexGetNextToken().
unsigned cmLexFilterFlags( cmLexH h );
void cmLexSetFilterFlags( cmLexH h, unsigned flags );
// Return the type id of the current token and advances to the next token
unsigned cmLexGetNextToken( cmLexH h );
// Return the type id associated with the current token. This is the same value
// returned by the previous call to cmLexGetNextToken().
unsigned cmLexTokenId( cmLexH h );
// Return a pointer to the first character of text associated with the
// current token. The returned pointer directly references the text contained
// in the buffer given to the lexer in the call to cmLexInit(). The string
// is therefore not zero terminated. Use cmLexTokenCharCount() to get the
// length of the token string.
const cmChar_t* cmLexTokenText( cmLexH h );
// Return the count of characters in the text associated with the current token.
// This is the only way to get this count since the string returned by
// cmLexTokenText() is not zero terminated.
unsigned cmLexTokenCharCount( cmLexH h );
// Return the value of the current token as an integer.
int cmLexTokenInt( cmLexH h );
// Return the value of the current token as an integer.
unsigned cmLexTokenUInt( cmLexH h );
// Return the value of the current token as an integer.
float cmLexTokenFloat( cmLexH h );
// Return the value of the current token as a double.
double cmLexTokenDouble( cmLexH h );
// Return the line number associated with the current token
unsigned cmLexCurrentLineNumber( cmLexH h );
// Return the starting column of the current token
unsigned cmLexCurrentColumnNumber( cmLexH h );
// Return the RC code associated with the last error
unsigned cmLexErrorRC( cmLexH h );
// Return the label associated with a token id
const cmChar_t* cmLexIdToLabel( cmLexH h, unsigned typeId );
// Return the text message associated with a return code.
const cmChar_t* cmLexRcToMsg( unsigned rc );
// Lexer testing stub.
void cmLexTest( cmRpt_t* rpt );
//)
//}
#endif