+++ /dev/null
-#ifndef INC_CharScanner_hpp__
-#define INC_CharScanner_hpp__
-
-/* ANTLR Translator Generator
- * Project led by Terence Parr at http://www.jGuru.com
- * Software rights: http://www.antlr.org/license.html
- *
- * $Id: //depot/code/org.antlr/release/antlr-2.7.7/lib/cpp/antlr/CharScanner.hpp#2 $
- */
-
-#include <antlr/config.hpp>
-
-#include <map>
-
-#ifdef HAS_NOT_CCTYPE_H
-#include <ctype.h>
-#else
-#include <cctype>
-#endif
-
-#if ( _MSC_VER == 1200 )
-// VC6 seems to need this
-// note that this is not a standard C++ include file.
-# include <stdio.h>
-#endif
-
-#include <antlr/TokenStream.hpp>
-#include <antlr/RecognitionException.hpp>
-#include <antlr/SemanticException.hpp>
-#include <antlr/MismatchedCharException.hpp>
-#include <antlr/InputBuffer.hpp>
-#include <antlr/BitSet.hpp>
-#include <antlr/LexerSharedInputState.hpp>
-
-#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
-namespace antlr {
-#endif
-
-class ANTLR_API CharScanner;
-
-ANTLR_C_USING(tolower)
-
-#ifdef ANTLR_REALLY_NO_STRCASECMP
-// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
-// on the mac has neither...
-inline int strcasecmp(const char *s1, const char *s2)
-{
- while (true)
- {
- char c1 = tolower(*s1++),
- c2 = tolower(*s2++);
- if (c1 < c2) return -1;
- if (c1 > c2) return 1;
- if (c1 == 0) return 0;
- }
-}
-#else
-#ifdef NO_STRCASECMP
-ANTLR_C_USING(stricmp)
-#else
-ANTLR_C_USING(strcasecmp)
-#endif
-#endif
-
-/** Functor for the literals map
- */
-class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
-private:
- const CharScanner* scanner;
-public:
-#ifdef NO_TEMPLATE_PARTS
- CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
-#endif
- CharScannerLiteralsLess(const CharScanner* theScanner)
- : scanner(theScanner)
- {
- }
- bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
-// defaults are good enough..
- // CharScannerLiteralsLess(const CharScannerLiteralsLess&);
- // CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
-};
-
-/** Superclass of generated lexers
- */
-class ANTLR_API CharScanner : public TokenStream {
-protected:
- typedef RefToken (*factory_type)();
-public:
- CharScanner(InputBuffer& cb, bool case_sensitive );
- CharScanner(InputBuffer* cb, bool case_sensitive );
- CharScanner(const LexerSharedInputState& state, bool case_sensitive );
-
- virtual ~CharScanner()
- {
- }
-
- virtual int LA(unsigned int i);
-
- virtual void append(char c)
- {
- if (saveConsumedInput)
- {
- size_t l = text.length();
-
- if ((l%256) == 0)
- text.reserve(l+256);
-
- text.replace(l,0,&c,1);
- }
- }
-
- virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
- {
- if( saveConsumedInput )
- text += s;
- }
-
- virtual void commit()
- {
- inputState->getInput().commit();
- }
-
- /** called by the generated lexer to do error recovery, override to
- * customize the behaviour.
- */
- virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
- {
- consume();
- consumeUntil(tokenSet);
- }
-
- virtual void consume()
- {
- if (inputState->guessing == 0)
- {
- int c = LA(1);
- if (caseSensitive)
- {
- append(c);
- }
- else
- {
- // use input.LA(), not LA(), to get original case
- // CharScanner.LA() would toLower it.
- append(inputState->getInput().LA(1));
- }
-
- // RK: in a sense I don't like this automatic handling.
- if (c == '\t')
- tab();
- else
- inputState->column++;
- }
- inputState->getInput().consume();
- }
-
- /** Consume chars until one matches the given char */
- virtual void consumeUntil(int c)
- {
- for(;;)
- {
- int la_1 = LA(1);
- if( la_1 == EOF_CHAR || la_1 == c )
- break;
- consume();
- }
- }
-
- /** Consume chars until one matches the given set */
- virtual void consumeUntil(const BitSet& set)
- {
- for(;;)
- {
- int la_1 = LA(1);
- if( la_1 == EOF_CHAR || set.member(la_1) )
- break;
- consume();
- }
- }
-
- /// Mark the current position and return a id for it
- virtual unsigned int mark()
- {
- return inputState->getInput().mark();
- }
- /// Rewind the scanner to a previously marked position
- virtual void rewind(unsigned int pos)
- {
- inputState->getInput().rewind(pos);
- }
-
- /// See if input contains character 'c' throw MismatchedCharException if not
- virtual void match(int c)
- {
- int la_1 = LA(1);
- if ( la_1 != c )
- throw MismatchedCharException(la_1, c, false, this);
- consume();
- }
-
- /** See if input contains element from bitset b
- * throw MismatchedCharException if not
- */
- virtual void match(const BitSet& b)
- {
- int la_1 = LA(1);
-
- if ( !b.member(la_1) )
- throw MismatchedCharException( la_1, b, false, this );
- consume();
- }
-
- /** See if input contains string 's' throw MismatchedCharException if not
- * @note the string cannot match EOF
- */
- virtual void match( const char* s )
- {
- while( *s != '\0' )
- {
- // the & 0xFF is here to prevent sign extension lateron
- int la_1 = LA(1), c = (*s++ & 0xFF);
-
- if ( la_1 != c )
- throw MismatchedCharException(la_1, c, false, this);
-
- consume();
- }
- }
- /** See if input contains string 's' throw MismatchedCharException if not
- * @note the string cannot match EOF
- */
- virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
- {
- size_t len = s.length();
-
- for (size_t i = 0; i < len; i++)
- {
- // the & 0xFF is here to prevent sign extension lateron
- int la_1 = LA(1), c = (s[i] & 0xFF);
-
- if ( la_1 != c )
- throw MismatchedCharException(la_1, c, false, this);
-
- consume();
- }
- }
- /** See if input does not contain character 'c'
- * throw MismatchedCharException if not
- */
- virtual void matchNot(int c)
- {
- int la_1 = LA(1);
-
- if ( la_1 == c )
- throw MismatchedCharException(la_1, c, true, this);
-
- consume();
- }
- /** See if input contains character in range c1-c2
- * throw MismatchedCharException if not
- */
- virtual void matchRange(int c1, int c2)
- {
- int la_1 = LA(1);
-
- if ( la_1 < c1 || la_1 > c2 )
- throw MismatchedCharException(la_1, c1, c2, false, this);
-
- consume();
- }
-
- virtual bool getCaseSensitive() const
- {
- return caseSensitive;
- }
-
- virtual void setCaseSensitive(bool t)
- {
- caseSensitive = t;
- }
-
- virtual bool getCaseSensitiveLiterals() const=0;
-
- /// Get the line the scanner currently is in (starts at 1)
- virtual int getLine() const
- {
- return inputState->line;
- }
-
- /// set the line number
- virtual void setLine(int l)
- {
- inputState->line = l;
- }
-
- /// Get the column the scanner currently is in (starts at 1)
- virtual int getColumn() const
- {
- return inputState->column;
- }
- /// set the column number
- virtual void setColumn(int c)
- {
- inputState->column = c;
- }
-
- /// get the filename for the file currently used
- virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
- {
- return inputState->filename;
- }
- /// Set the filename the scanner is using (used in error messages)
- virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
- {
- inputState->filename = f;
- }
-
- virtual bool getCommitToPath() const
- {
- return commitToPath;
- }
-
- virtual void setCommitToPath(bool commit)
- {
- commitToPath = commit;
- }
-
- /** return a copy of the current text buffer */
- virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
- {
- return text;
- }
-
- virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
- {
- text = s;
- }
-
- virtual void resetText()
- {
- text = "";
- inputState->tokenStartColumn = inputState->column;
- inputState->tokenStartLine = inputState->line;
- }
-
- virtual RefToken getTokenObject() const
- {
- return _returnToken;
- }
-
- /** Used to keep track of line breaks, needs to be called from
- * within generated lexers when a \n \r is encountered.
- */
- virtual void newline()
- {
- ++inputState->line;
- inputState->column = 1;
- }
-
- /** Advance the current column number by an appropriate amount according
- * to the tabsize. This method needs to be explicitly called from the
- * lexer rules encountering tabs.
- */
- virtual void tab()
- {
- int c = getColumn();
- int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
- setColumn( nc );
- }
- /// set the tabsize. Returns the old tabsize
- int setTabsize( int size )
- {
- int oldsize = tabsize;
- tabsize = size;
- return oldsize;
- }
- /// Return the tabsize used by the scanner
- int getTabSize() const
- {
- return tabsize;
- }
-
- /** Report exception errors caught in nextToken() */
- virtual void reportError(const RecognitionException& e);
-
- /** Parser error-reporting function can be overridden in subclass */
- virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
-
- /** Parser warning-reporting function can be overridden in subclass */
- virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
-
- virtual InputBuffer& getInputBuffer()
- {
- return inputState->getInput();
- }
-
- virtual LexerSharedInputState getInputState()
- {
- return inputState;
- }
-
- /** set the input state for the lexer.
- * @note state is a reference counted object, hence no reference */
- virtual void setInputState(LexerSharedInputState state)
- {
- inputState = state;
- }
-
- /// Set the factory for created tokens
- virtual void setTokenObjectFactory(factory_type factory)
- {
- tokenFactory = factory;
- }
-
- /** Test the token text against the literals table
- * Override this method to perform a different literals test
- */
- virtual int testLiteralsTable(int ttype) const
- {
- ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
- if (i != literals.end())
- ttype = (*i).second;
- return ttype;
- }
-
- /** Test the text passed in against the literals table
- * Override this method to perform a different literals test
- * This is used primarily when you want to test a portion of
- * a token
- */
- virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
- {
- ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
- if (i != literals.end())
- ttype = (*i).second;
- return ttype;
- }
-
- /// Override this method to get more specific case handling
- virtual int toLower(int c) const
- {
- // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
- // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
- // this one is more structural. Maybe make this configurable.
- return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
- }
-
- /** This method is called by YourLexer::nextToken() when the lexer has
- * hit EOF condition. EOF is NOT a character.
- * This method is not called if EOF is reached during
- * syntactic predicate evaluation or during evaluation
- * of normal lexical rules, which presumably would be
- * an IOException. This traps the "normal" EOF condition.
- *
- * uponEOF() is called after the complete evaluation of
- * the previous token and only if your parser asks
- * for another token beyond that last non-EOF token.
- *
- * You might want to throw token or char stream exceptions
- * like: "Heh, premature eof" or a retry stream exception
- * ("I found the end of this file, go back to referencing file").
- */
- virtual void uponEOF()
- {
- }
-
- /// Methods used to change tracing behavior
- virtual void traceIndent();
- virtual void traceIn(const char* rname);
- virtual void traceOut(const char* rname);
-
-#ifndef NO_STATIC_CONSTS
- static const int EOF_CHAR = EOF;
-#else
- enum {
- EOF_CHAR = EOF
- };
-#endif
-protected:
- ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
- /// flag indicating wether consume saves characters
- bool saveConsumedInput;
- factory_type tokenFactory; ///< Factory for tokens
- bool caseSensitive; ///< Is this lexer case sensitive
- ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
-
- RefToken _returnToken; ///< used to return tokens w/o using return val
-
- /// Input state, gives access to input stream, shared among different lexers
- LexerSharedInputState inputState;
-
- /** Used during filter mode to indicate that path is desired.
- * A subsequent scan error will report an error as usual
- * if acceptPath=true;
- */
- bool commitToPath;
-
- int tabsize; ///< tab size the scanner uses.
-
- /// Create a new RefToken of type t
- virtual RefToken makeToken(int t)
- {
- RefToken tok = tokenFactory();
- tok->setType(t);
- tok->setColumn(inputState->tokenStartColumn);
- tok->setLine(inputState->tokenStartLine);
- return tok;
- }
-
- /** Tracer class, used when -traceLexer is passed to antlr
- */
- class Tracer {
- private:
- CharScanner* parser;
- const char* text;
-
- Tracer(const Tracer& other); // undefined
- Tracer& operator=(const Tracer& other); // undefined
- public:
- Tracer( CharScanner* p,const char* t )
- : parser(p), text(t)
- {
- parser->traceIn(text);
- }
- ~Tracer()
- {
- parser->traceOut(text);
- }
- };
-
- int traceDepth;
-private:
- CharScanner( const CharScanner& other ); // undefined
- CharScanner& operator=( const CharScanner& other ); // undefined
-
-#ifndef NO_STATIC_CONSTS
- static const int NO_CHAR = 0;
-#else
- enum {
- NO_CHAR = 0
- };
-#endif
-};
-
-inline int CharScanner::LA(unsigned int i)
-{
- int c = inputState->getInput().LA(i);
-
- if ( caseSensitive )
- return c;
- else
- return toLower(c); // VC 6 tolower bug caught in toLower.
-}
-
-inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
-{
- if (scanner->getCaseSensitiveLiterals())
- return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
- else
- {
-#ifdef NO_STRCASECMP
- return (stricmp(x.c_str(),y.c_str())<0);
-#else
- return (strcasecmp(x.c_str(),y.c_str())<0);
-#endif
- }
-}
-
-#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
-}
-#endif
-
-#endif //INC_CharScanner_hpp__