Security/libsecurity_codesigning/antlr2/antlr/CharScanner.hpp

   1 #ifndef INC_CharScanner_hpp__
   2 #define INC_CharScanner_hpp__
   3
   4 /* ANTLR Translator Generator
   5  * Project led by Terence Parr at http://www.jGuru.com
   6  * Software rights: http://www.antlr.org/license.html
   7  *
   8  * $Id: //depot/code/org.antlr/release/antlr-2.7.7/lib/cpp/antlr/CharScanner.hpp#2 $
   9  */
  10
  11 #include <antlr/config.hpp>
  12
  13 #include <map>
  14
  15 #ifdef HAS_NOT_CCTYPE_H
  16 #include <ctype.h>
  17 #else
  18 #include <cctype>
  19 #endif
  20
  21 #if ( _MSC_VER == 1200 )
  22 // VC6 seems to need this
  23 // note that this is not a standard C++ include file.
  24 # include <stdio.h>
  25 #endif
  26
  27 #include <antlr/TokenStream.hpp>
  28 #include <antlr/RecognitionException.hpp>
  29 #include <antlr/SemanticException.hpp>
  30 #include <antlr/MismatchedCharException.hpp>
  31 #include <antlr/InputBuffer.hpp>
  32 #include <antlr/BitSet.hpp>
  33 #include <antlr/LexerSharedInputState.hpp>
  34
  35 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
  36 namespace antlr {
  37 #endif
  38
  39 class ANTLR_API CharScanner;
  40
  41 ANTLR_C_USING(tolower)
  42
  43 #ifdef ANTLR_REALLY_NO_STRCASECMP
  44 // Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
  45 // on the mac has neither...
  46 inline int strcasecmp(const char *s1, const char *s2)
  47 {
  48         while (true)
  49         {
  50                 char  c1 = tolower(*s1++),
  51                                 c2 = tolower(*s2++);
  52                 if (c1 < c2) return -1;
  53                 if (c1 > c2) return 1;
  54                 if (c1 == 0) return 0;
  55         }
  56 }
  57 #else
  58 #ifdef NO_STRCASECMP
  59 ANTLR_C_USING(stricmp)
  60 #else
  61 ANTLR_C_USING(strcasecmp)
  62 #endif
  63 #endif
  64
  65 /** Functor for the literals map
  66  */
  67 class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
  68 private:
  69         const CharScanner* scanner;
  70 public:
  71 #ifdef NO_TEMPLATE_PARTS
  72         CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
  73 #endif
  74         CharScannerLiteralsLess(const CharScanner* theScanner)
  75         : scanner(theScanner)
  76         {
  77         }
  78         bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
  79 // defaults are good enough..
  80         //      CharScannerLiteralsLess(const CharScannerLiteralsLess&);
  81         //      CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
  82 };
  83
  84 /** Superclass of generated lexers
  85  */
  86 class ANTLR_API CharScanner : public TokenStream {
  87 protected:
  88         typedef RefToken (*factory_type)();
  89 public:
  90         CharScanner(InputBuffer& cb, bool case_sensitive );
  91         CharScanner(InputBuffer* cb, bool case_sensitive );
  92         CharScanner(const LexerSharedInputState& state, bool case_sensitive );
  93
  94         virtual ~CharScanner()
  95         {
  96         }
  97
  98         virtual int LA(unsigned int i);
  99
 100         virtual void append(char c)
 101         {
 102                 if (saveConsumedInput)
 103                 {
 104                         size_t l = text.length();
 105
 106                         if ((l%256) == 0)
 107                                 text.reserve(l+256);
 108
 109                         text.replace(l,0,&c,1);
 110                 }
 111         }
 112
 113         virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
 114         {
 115                 if( saveConsumedInput )
 116                         text += s;
 117         }
 118
 119         virtual void commit()
 120         {
 121                 inputState->getInput().commit();
 122         }
 123
 124         /** called by the generated lexer to do error recovery, override to
 125          * customize the behaviour.
 126          */
 127         virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
 128         {
 129                 consume();
 130                 consumeUntil(tokenSet);
 131         }
 132
 133         virtual void consume()
 134         {
 135                 if (inputState->guessing == 0)
 136                 {
 137                         int c = LA(1);
 138                         if (caseSensitive)
 139                         {
 140                                 append(c);
 141                         }
 142                         else
 143                         {
 144                                 // use input.LA(), not LA(), to get original case
 145                                 // CharScanner.LA() would toLower it.
 146                                 append(inputState->getInput().LA(1));
 147                         }
 148
 149                         // RK: in a sense I don't like this automatic handling.
 150                         if (c == '\t')
 151                                 tab();
 152                         else
 153                                 inputState->column++;
 154                 }
 155                 inputState->getInput().consume();
 156         }
 157
 158         /** Consume chars until one matches the given char */
 159         virtual void consumeUntil(int c)
 160         {
 161                 for(;;)
 162                 {
 163                         int la_1 = LA(1);
 164                         if( la_1 == EOF_CHAR || la_1 == c )
 165                                 break;
 166                         consume();
 167                 }
 168         }
 169
 170         /** Consume chars until one matches the given set */
 171         virtual void consumeUntil(const BitSet& set)
 172         {
 173                 for(;;)
 174                 {
 175                         int la_1 = LA(1);
 176                         if( la_1 == EOF_CHAR || set.member(la_1) )
 177                                 break;
 178                         consume();
 179                 }
 180         }
 181
 182         /// Mark the current position and return a id for it
 183         virtual unsigned int mark()
 184         {
 185                 return inputState->getInput().mark();
 186         }
 187         /// Rewind the scanner to a previously marked position
 188         virtual void rewind(unsigned int pos)
 189         {
 190                 inputState->getInput().rewind(pos);
 191         }
 192
 193         /// See if input contains character 'c' throw MismatchedCharException if not
 194         virtual void match(int c)
 195         {
 196                 int la_1 = LA(1);
 197                 if ( la_1 != c )
 198                         throw MismatchedCharException(la_1, c, false, this);
 199                 consume();
 200         }
 201
 202         /** See if input contains element from bitset b
 203          * throw MismatchedCharException if not
 204          */
 205         virtual void match(const BitSet& b)
 206         {
 207                 int la_1 = LA(1);
 208
 209                 if ( !b.member(la_1) )
 210                         throw MismatchedCharException( la_1, b, false, this );
 211                 consume();
 212         }
 213
 214         /** See if input contains string 's' throw MismatchedCharException if not
 215          * @note the string cannot match EOF
 216          */
 217         virtual void match( const char* s )
 218         {
 219                 while( *s != '\0' )
 220                 {
 221                         // the & 0xFF is here to prevent sign extension lateron
 222                         int la_1 = LA(1), c = (*s++ & 0xFF);
 223
 224                         if ( la_1 != c )
 225                                 throw MismatchedCharException(la_1, c, false, this);
 226
 227                         consume();
 228                 }
 229         }
 230         /** See if input contains string 's' throw MismatchedCharException if not
 231          * @note the string cannot match EOF
 232          */
 233         virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
 234         {
 235                 size_t len = s.length();
 236
 237                 for (size_t i = 0; i < len; i++)
 238                 {
 239                         // the & 0xFF is here to prevent sign extension lateron
 240                         int la_1 = LA(1), c = (s[i] & 0xFF);
 241
 242                         if ( la_1 != c )
 243                                 throw MismatchedCharException(la_1, c, false, this);
 244
 245                         consume();
 246                 }
 247         }
 248         /** See if input does not contain character 'c'
 249          * throw MismatchedCharException if not
 250          */
 251         virtual void matchNot(int c)
 252         {
 253                 int la_1 = LA(1);
 254
 255                 if ( la_1 == c )
 256                         throw MismatchedCharException(la_1, c, true, this);
 257
 258                 consume();
 259         }
 260         /** See if input contains character in range c1-c2
 261          * throw MismatchedCharException if not
 262          */
 263         virtual void matchRange(int c1, int c2)
 264         {
 265                 int la_1 = LA(1);
 266
 267                 if ( la_1 < c1 || la_1 > c2 )
 268                         throw MismatchedCharException(la_1, c1, c2, false, this);
 269
 270                 consume();
 271         }
 272
 273         virtual bool getCaseSensitive() const
 274         {
 275                 return caseSensitive;
 276         }
 277
 278         virtual void setCaseSensitive(bool t)
 279         {
 280                 caseSensitive = t;
 281         }
 282
 283         virtual bool getCaseSensitiveLiterals() const=0;
 284
 285         /// Get the line the scanner currently is in (starts at 1)
 286         virtual int getLine() const
 287         {
 288                 return inputState->line;
 289         }
 290
 291         /// set the line number
 292         virtual void setLine(int l)
 293         {
 294                 inputState->line = l;
 295         }
 296
 297         /// Get the column the scanner currently is in (starts at 1)
 298         virtual int getColumn() const
 299         {
 300                 return inputState->column;
 301         }
 302         /// set the column number
 303         virtual void setColumn(int c)
 304         {
 305                 inputState->column = c;
 306         }
 307
 308         /// get the filename for the file currently used
 309         virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
 310         {
 311                 return inputState->filename;
 312         }
 313         /// Set the filename the scanner is using (used in error messages)
 314         virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
 315         {
 316                 inputState->filename = f;
 317         }
 318
 319         virtual bool getCommitToPath() const
 320         {
 321                 return commitToPath;
 322         }
 323
 324         virtual void setCommitToPath(bool commit)
 325         {
 326                 commitToPath = commit;
 327         }
 328
 329         /** return a copy of the current text buffer */
 330         virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
 331         {
 332                 return text;
 333         }
 334
 335         virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
 336         {
 337                 text = s;
 338         }
 339
 340         virtual void resetText()
 341         {
 342                 text = "";
 343                 inputState->tokenStartColumn = inputState->column;
 344                 inputState->tokenStartLine = inputState->line;
 345         }
 346
 347         virtual RefToken getTokenObject() const
 348         {
 349                 return _returnToken;
 350         }
 351
 352         /** Used to keep track of line breaks, needs to be called from
 353          * within generated lexers when a \n \r is encountered.
 354          */
 355         virtual void newline()
 356         {
 357                 ++inputState->line;
 358                 inputState->column = 1;
 359         }
 360
 361         /** Advance the current column number by an appropriate amount according
 362          * to the tabsize. This method needs to be explicitly called from the
 363          * lexer rules encountering tabs.
 364          */
 365         virtual void tab()
 366         {
 367                 int c = getColumn();
 368                 int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1;      // calculate tab stop
 369                 setColumn( nc );
 370         }
 371         /// set the tabsize. Returns the old tabsize
 372         int setTabsize( int size )
 373         {
 374                 int oldsize = tabsize;
 375                 tabsize = size;
 376                 return oldsize;
 377         }
 378         /// Return the tabsize used by the scanner
 379         int getTabSize() const
 380         {
 381                 return tabsize;
 382         }
 383
 384         /** Report exception errors caught in nextToken() */
 385         virtual void reportError(const RecognitionException& e);
 386
 387         /** Parser error-reporting function can be overridden in subclass */
 388         virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
 389
 390         /** Parser warning-reporting function can be overridden in subclass */
 391         virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
 392
 393         virtual InputBuffer& getInputBuffer()
 394         {
 395                 return inputState->getInput();
 396         }
 397
 398         virtual LexerSharedInputState getInputState()
 399         {
 400                 return inputState;
 401         }
 402
 403         /** set the input state for the lexer.
 404          * @note state is a reference counted object, hence no reference */
 405         virtual void setInputState(LexerSharedInputState state)
 406         {
 407                 inputState = state;
 408         }
 409
 410         /// Set the factory for created tokens
 411         virtual void setTokenObjectFactory(factory_type factory)
 412         {
 413                 tokenFactory = factory;
 414         }
 415
 416         /** Test the token text against the literals table
 417          * Override this method to perform a different literals test
 418          */
 419         virtual int testLiteralsTable(int ttype) const
 420         {
 421                 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
 422                 if (i != literals.end())
 423                         ttype = (*i).second;
 424                 return ttype;
 425         }
 426
 427         /** Test the text passed in against the literals table
 428          * Override this method to perform a different literals test
 429          * This is used primarily when you want to test a portion of
 430          * a token
 431          */
 432         virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
 433         {
 434                 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
 435                 if (i != literals.end())
 436                         ttype = (*i).second;
 437                 return ttype;
 438         }
 439
 440         /// Override this method to get more specific case handling
 441         virtual int toLower(int c) const
 442         {
 443                 // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
 444                 // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
 445                 // this one is more structural. Maybe make this configurable.
 446                 return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
 447         }
 448
 449         /** This method is called by YourLexer::nextToken() when the lexer has
 450          *  hit EOF condition.  EOF is NOT a character.
 451          *  This method is not called if EOF is reached during
 452          *  syntactic predicate evaluation or during evaluation
 453          *  of normal lexical rules, which presumably would be
 454          *  an IOException.  This traps the "normal" EOF condition.
 455          *
 456          *  uponEOF() is called after the complete evaluation of
 457          *  the previous token and only if your parser asks
 458          *  for another token beyond that last non-EOF token.
 459          *
 460          *  You might want to throw token or char stream exceptions
 461          *  like: "Heh, premature eof" or a retry stream exception
 462          *  ("I found the end of this file, go back to referencing file").
 463          */
 464         virtual void uponEOF()
 465         {
 466         }
 467
 468         /// Methods used to change tracing behavior
 469         virtual void traceIndent();
 470         virtual void traceIn(const char* rname);
 471         virtual void traceOut(const char* rname);
 472
 473 #ifndef NO_STATIC_CONSTS
 474         static const int EOF_CHAR = EOF;
 475 #else
 476         enum {
 477                 EOF_CHAR = EOF
 478         };
 479 #endif
 480 protected:
 481         ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
 482         /// flag indicating wether consume saves characters
 483         bool saveConsumedInput;
 484         factory_type tokenFactory;                              ///< Factory for tokens
 485         bool caseSensitive;                                             ///< Is this lexer case sensitive
 486         ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
 487
 488         RefToken _returnToken;          ///< used to return tokens w/o using return val
 489
 490         /// Input state, gives access to input stream, shared among different lexers
 491         LexerSharedInputState inputState;
 492
 493         /** Used during filter mode to indicate that path is desired.
 494          * A subsequent scan error will report an error as usual
 495          * if acceptPath=true;
 496          */
 497         bool commitToPath;
 498
 499         int tabsize;    ///< tab size the scanner uses.
 500
 501         /// Create a new RefToken of type t
 502         virtual RefToken makeToken(int t)
 503         {
 504                 RefToken tok = tokenFactory();
 505                 tok->setType(t);
 506                 tok->setColumn(inputState->tokenStartColumn);
 507                 tok->setLine(inputState->tokenStartLine);
 508                 return tok;
 509         }
 510
 511         /** Tracer class, used when -traceLexer is passed to antlr
 512          */
 513         class Tracer {
 514         private:
 515                 CharScanner* parser;
 516                 const char* text;
 517
 518                 Tracer(const Tracer& other);                                    // undefined
 519                 Tracer& operator=(const Tracer& other);         // undefined
 520         public:
 521                 Tracer( CharScanner* p,const char* t )
 522                 : parser(p), text(t)
 523                 {
 524                         parser->traceIn(text);
 525                 }
 526                 ~Tracer()
 527                 {
 528                         parser->traceOut(text);
 529                 }
 530         };
 531
 532         int traceDepth;
 533 private:
 534         CharScanner( const CharScanner& other );                                        // undefined
 535         CharScanner& operator=( const CharScanner& other );     // undefined
 536
 537 #ifndef NO_STATIC_CONSTS
 538         static const int NO_CHAR = 0;
 539 #else
 540         enum {
 541                 NO_CHAR = 0
 542         };
 543 #endif
 544 };
 545
 546 inline int CharScanner::LA(unsigned int i)
 547 {
 548         int c = inputState->getInput().LA(i);
 549
 550         if ( caseSensitive )
 551                 return c;
 552         else
 553                 return toLower(c);      // VC 6 tolower bug caught in toLower.
 554 }
 555
 556 inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
 557 {
 558         if (scanner->getCaseSensitiveLiterals())
 559                 return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
 560         else
 561         {
 562 #ifdef NO_STRCASECMP
 563                 return (stricmp(x.c_str(),y.c_str())<0);
 564 #else
 565                 return (strcasecmp(x.c_str(),y.c_str())<0);
 566 #endif
 567         }
 568 }
 569
 570 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
 571 }
 572 #endif
 573
 574 #endif //INC_CharScanner_hpp__