123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 |
- #ifndef cmLex_h
- #define cmLex_h
-
-
- //( { file_desc:"User configurable lexer for tokenizing text files." kw:[text]}
-
-
- // Predefined Lexer Id's
- enum
- {
- kErrorLexTId, // 0 the lexer was unable to identify the current token
- kUnknownLexTId, // 1 the token is of an unknown type (only used when kReturnUnknownLexFl is set)
- kEofLexTId, // 2 the lexer reached the end of input
- kSpaceLexTId, // 3 white space
- kRealLexTId, // 4 real number (contains a decimal point or is in scientific notation)
- kIntLexTId, // 5 decimal integer
- kHexLexTId, // 6 hexidecimal integer
- kIdentLexTId, // 7 identifier
- kQStrLexTId, // 8 quoted string
- kQCharLexTId, // 9 quoted char
- kBlockCmtLexTId, // 10 block comment
- kLineCmtLexTId, // 11 line comment
- kUserLexTId // 12 user registered token (See cmLexRegisterToken().)
- };
-
- // Lexer control flags used with cmLexInit().
- enum
- {
- kReturnSpaceLexFl = 0x01, //< Return space tokens
- kReturnCommentsLexFl = 0x02, //< Return comment tokens
- kReturnUnknownLexFl = 0x04, //< Return unknown tokens
- kReturnQCharLexFl = 0x08, //< Return quoted characters
- kUserDefPriorityLexFl= 0x10 //< User defined tokens take priority even if a kIdentLexTId token has a longer match
- };
-
- // cmLex result codes.
- enum
- {
- kOkLexRC = cmOkRC, //< 0 No error. The operation completed successfully
- kDuplicateTokenLexRC, //< 1 The text or id passed as a user token is already in use by another token
- kMissingCmtEndLexRC, //< 2 The end of a block comment could not be found.
- kMissingEndQuoteLexRC, //< 3 The end of a quoted string could not be found.
- kNoMatchLexRC, //< 4 The lexer encountered a string which could not be classified.
- kFileOpenErrLexRC, //< 5 File open failed on cmLexSetFile()
- kFileSeekErrLexRC, //< 6 File seek failed on cmLexSetFile()
- kFileTellErrLexRC, //< 7 File tell failed on cmLexSetFile()
- kFileReadErrLexRC, //< 8 File read failed on cmLexSetFile()
- kFileCloseErrLexRC, //< 9 File close failed on cmLexSetFile()
- kMemAllocErrLexRC, //< 10 An attempted memory allocation failed
- kEofRC, //< 11 The end of the input text was encountered (this is a normal condition not an error)
- kInvalidLexTIdLexRC, //< 12 An invalid lex token id was encountered.
- kSignErrorLexRC, //< 13 An signed integer has a 'u' or 'U' suffix."
- kInvalidLexRC //< 1r Sentinal value.
-
- };
-
- typedef cmHandle_t cmLexH;
-
- extern cmLexH cmLexNullH;
-
-
- // Iniitalize the lexer and receive a lexer handle in return.
- // Set cp to NULL if the buffer will be later via cmLexSetTextBuffer();
- // See the kXXXLexFl enum's above for possible flag values.
- cmLexH cmLexInit( const cmChar_t* cp, unsigned cn, unsigned flags, cmRpt_t* rpt );
-
- // Finalize a lexer created by an earlier call to cmLexInit()
- cmRC_t cmLexFinal( cmLexH* hp );
-
- // Rewind the lexer to the begining of the buffer (the same as post initialize state)
- cmRC_t cmLexReset( cmLexH h );
-
- // Verify that a lexer handle is valid
- bool cmLexIsValid( cmLexH h );
-
- // Set a new text buffer and reset the lexer to the post initialize state.
- cmRC_t cmLexSetTextBuffer( cmLexH h, const cmChar_t* cp, unsigned cn );
- cmRC_t cmLexSetFile( cmLexH h, const cmChar_t* fn );
-
- // Register a user defined token. The id of the first user defined token should be
- // kUserLexTId+1. Neither the id or token text can be used by a previously registered
- // or built-in token.
- cmRC_t cmLexRegisterToken( cmLexH h, unsigned id, const cmChar_t* token );
-
- // Register a user defined token recognition function. This function should return the count
- // of initial, consecutive, characters in 'cp[cn]' which match its token pattern.
- typedef unsigned (*cmLexUserMatcherPtr_t)( const cmChar_t* cp, unsigned cn );
-
- cmRC_t cmLexRegisterMatcher( cmLexH h, unsigned id, cmLexUserMatcherPtr_t funcPtr );
-
- // Enable or disable the specified token type.
- cmRC_t cmLexEnableToken( cmLexH h, unsigned id, bool enableFl );
-
- // Get and set the lexer filter flags kReturnXXXLexFl.
- // These flags can be safely enabled and disabled between
- // calls to cmLexGetNextToken().
- unsigned cmLexFilterFlags( cmLexH h );
- void cmLexSetFilterFlags( cmLexH h, unsigned flags );
-
- // Return the type id of the current token and advances to the next token
- unsigned cmLexGetNextToken( cmLexH h );
-
- // Return the type id associated with the current token. This is the same value
- // returned by the previous call to cmLexGetNextToken().
- unsigned cmLexTokenId( cmLexH h );
-
- // Return a pointer to the first character of text associated with the
- // current token. The returned pointer directly references the text contained
- // in the buffer given to the lexer in the call to cmLexInit(). The string
- // is therefore not zero terminated. Use cmLexTokenCharCount() to get the
- // length of the token string.
- const cmChar_t* cmLexTokenText( cmLexH h );
-
- // Return the count of characters in the text associated with the current token.
- // This is the only way to get this count since the string returned by
- // cmLexTokenText() is not zero terminated.
- unsigned cmLexTokenCharCount( cmLexH h );
-
- // Return the value of the current token as an integer.
- int cmLexTokenInt( cmLexH h );
-
- // Return the value of the current token as an unsigned integer.
- unsigned cmLexTokenUInt( cmLexH h );
-
- // Return the value of the current token as a float.
- float cmLexTokenFloat( cmLexH h );
-
- // Return the value of the current token as a double.
- double cmLexTokenDouble( cmLexH h );
-
- // Return true if the current token is an int and it was suffixed
- // with 'u' to indicate that it is unsigned.
- bool cmLexTokenIsUnsigned( cmLexH h );
-
- // Return true if the current token is a real and it was suffexed
- // with 'f' to indicate that it is a single precision float.
- bool cmLexTokenIsSinglePrecision( cmLexH h );
-
- // Return the line number associated with the current token
- unsigned cmLexCurrentLineNumber( cmLexH h );
-
- // Return the starting column of the current token
- unsigned cmLexCurrentColumnNumber( cmLexH h );
-
- // Return the RC code associated with the last error
- unsigned cmLexErrorRC( cmLexH h );
-
- // Return the label associated with a token id
- const cmChar_t* cmLexIdToLabel( cmLexH h, unsigned typeId );
-
- // Return the text message associated with a return code.
- const cmChar_t* cmLexRcToMsg( unsigned rc );
-
- // Lexer testing stub.
- void cmLexTest( cmRpt_t* rpt );
-
- //)
-
- #endif
|