]> git.saurik.com Git - apple/security.git/blob - Security/libsecurity_codesigning/antlr2/antlr/CharScanner.hpp
Security-57031.40.6.tar.gz
[apple/security.git] / Security / libsecurity_codesigning / antlr2 / antlr / CharScanner.hpp
1 #ifndef INC_CharScanner_hpp__
2 #define INC_CharScanner_hpp__
3
4 /* ANTLR Translator Generator
5 * Project led by Terence Parr at http://www.jGuru.com
6 * Software rights: http://www.antlr.org/license.html
7 *
8 * $Id: //depot/code/org.antlr/release/antlr-2.7.7/lib/cpp/antlr/CharScanner.hpp#2 $
9 */
10
11 #include <antlr/config.hpp>
12
13 #include <map>
14
15 #ifdef HAS_NOT_CCTYPE_H
16 #include <ctype.h>
17 #else
18 #include <cctype>
19 #endif
20
21 #if ( _MSC_VER == 1200 )
22 // VC6 seems to need this
23 // note that this is not a standard C++ include file.
24 # include <stdio.h>
25 #endif
26
27 #include <antlr/TokenStream.hpp>
28 #include <antlr/RecognitionException.hpp>
29 #include <antlr/SemanticException.hpp>
30 #include <antlr/MismatchedCharException.hpp>
31 #include <antlr/InputBuffer.hpp>
32 #include <antlr/BitSet.hpp>
33 #include <antlr/LexerSharedInputState.hpp>
34
35 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
36 namespace antlr {
37 #endif
38
39 class ANTLR_API CharScanner;
40
41 ANTLR_C_USING(tolower)
42
43 #ifdef ANTLR_REALLY_NO_STRCASECMP
44 // Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
45 // on the mac has neither...
46 inline int strcasecmp(const char *s1, const char *s2)
47 {
48 while (true)
49 {
50 char c1 = tolower(*s1++),
51 c2 = tolower(*s2++);
52 if (c1 < c2) return -1;
53 if (c1 > c2) return 1;
54 if (c1 == 0) return 0;
55 }
56 }
57 #else
58 #ifdef NO_STRCASECMP
59 ANTLR_C_USING(stricmp)
60 #else
61 ANTLR_C_USING(strcasecmp)
62 #endif
63 #endif
64
65 /** Functor for the literals map
66 */
67 class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
68 private:
69 const CharScanner* scanner;
70 public:
71 #ifdef NO_TEMPLATE_PARTS
72 CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
73 #endif
74 CharScannerLiteralsLess(const CharScanner* theScanner)
75 : scanner(theScanner)
76 {
77 }
78 bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
79 // defaults are good enough..
80 // CharScannerLiteralsLess(const CharScannerLiteralsLess&);
81 // CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
82 };
83
84 /** Superclass of generated lexers
85 */
86 class ANTLR_API CharScanner : public TokenStream {
87 protected:
88 typedef RefToken (*factory_type)();
89 public:
90 CharScanner(InputBuffer& cb, bool case_sensitive );
91 CharScanner(InputBuffer* cb, bool case_sensitive );
92 CharScanner(const LexerSharedInputState& state, bool case_sensitive );
93
94 virtual ~CharScanner()
95 {
96 }
97
98 virtual int LA(unsigned int i);
99
100 virtual void append(char c)
101 {
102 if (saveConsumedInput)
103 {
104 size_t l = text.length();
105
106 if ((l%256) == 0)
107 text.reserve(l+256);
108
109 text.replace(l,0,&c,1);
110 }
111 }
112
113 virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
114 {
115 if( saveConsumedInput )
116 text += s;
117 }
118
119 virtual void commit()
120 {
121 inputState->getInput().commit();
122 }
123
124 /** called by the generated lexer to do error recovery, override to
125 * customize the behaviour.
126 */
127 virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
128 {
129 consume();
130 consumeUntil(tokenSet);
131 }
132
133 virtual void consume()
134 {
135 if (inputState->guessing == 0)
136 {
137 int c = LA(1);
138 if (caseSensitive)
139 {
140 append(c);
141 }
142 else
143 {
144 // use input.LA(), not LA(), to get original case
145 // CharScanner.LA() would toLower it.
146 append(inputState->getInput().LA(1));
147 }
148
149 // RK: in a sense I don't like this automatic handling.
150 if (c == '\t')
151 tab();
152 else
153 inputState->column++;
154 }
155 inputState->getInput().consume();
156 }
157
158 /** Consume chars until one matches the given char */
159 virtual void consumeUntil(int c)
160 {
161 for(;;)
162 {
163 int la_1 = LA(1);
164 if( la_1 == EOF_CHAR || la_1 == c )
165 break;
166 consume();
167 }
168 }
169
170 /** Consume chars until one matches the given set */
171 virtual void consumeUntil(const BitSet& set)
172 {
173 for(;;)
174 {
175 int la_1 = LA(1);
176 if( la_1 == EOF_CHAR || set.member(la_1) )
177 break;
178 consume();
179 }
180 }
181
182 /// Mark the current position and return a id for it
183 virtual unsigned int mark()
184 {
185 return inputState->getInput().mark();
186 }
187 /// Rewind the scanner to a previously marked position
188 virtual void rewind(unsigned int pos)
189 {
190 inputState->getInput().rewind(pos);
191 }
192
193 /// See if input contains character 'c' throw MismatchedCharException if not
194 virtual void match(int c)
195 {
196 int la_1 = LA(1);
197 if ( la_1 != c )
198 throw MismatchedCharException(la_1, c, false, this);
199 consume();
200 }
201
202 /** See if input contains element from bitset b
203 * throw MismatchedCharException if not
204 */
205 virtual void match(const BitSet& b)
206 {
207 int la_1 = LA(1);
208
209 if ( !b.member(la_1) )
210 throw MismatchedCharException( la_1, b, false, this );
211 consume();
212 }
213
214 /** See if input contains string 's' throw MismatchedCharException if not
215 * @note the string cannot match EOF
216 */
217 virtual void match( const char* s )
218 {
219 while( *s != '\0' )
220 {
221 // the & 0xFF is here to prevent sign extension lateron
222 int la_1 = LA(1), c = (*s++ & 0xFF);
223
224 if ( la_1 != c )
225 throw MismatchedCharException(la_1, c, false, this);
226
227 consume();
228 }
229 }
230 /** See if input contains string 's' throw MismatchedCharException if not
231 * @note the string cannot match EOF
232 */
233 virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
234 {
235 size_t len = s.length();
236
237 for (size_t i = 0; i < len; i++)
238 {
239 // the & 0xFF is here to prevent sign extension lateron
240 int la_1 = LA(1), c = (s[i] & 0xFF);
241
242 if ( la_1 != c )
243 throw MismatchedCharException(la_1, c, false, this);
244
245 consume();
246 }
247 }
248 /** See if input does not contain character 'c'
249 * throw MismatchedCharException if not
250 */
251 virtual void matchNot(int c)
252 {
253 int la_1 = LA(1);
254
255 if ( la_1 == c )
256 throw MismatchedCharException(la_1, c, true, this);
257
258 consume();
259 }
260 /** See if input contains character in range c1-c2
261 * throw MismatchedCharException if not
262 */
263 virtual void matchRange(int c1, int c2)
264 {
265 int la_1 = LA(1);
266
267 if ( la_1 < c1 || la_1 > c2 )
268 throw MismatchedCharException(la_1, c1, c2, false, this);
269
270 consume();
271 }
272
273 virtual bool getCaseSensitive() const
274 {
275 return caseSensitive;
276 }
277
278 virtual void setCaseSensitive(bool t)
279 {
280 caseSensitive = t;
281 }
282
283 virtual bool getCaseSensitiveLiterals() const=0;
284
285 /// Get the line the scanner currently is in (starts at 1)
286 virtual int getLine() const
287 {
288 return inputState->line;
289 }
290
291 /// set the line number
292 virtual void setLine(int l)
293 {
294 inputState->line = l;
295 }
296
297 /// Get the column the scanner currently is in (starts at 1)
298 virtual int getColumn() const
299 {
300 return inputState->column;
301 }
302 /// set the column number
303 virtual void setColumn(int c)
304 {
305 inputState->column = c;
306 }
307
308 /// get the filename for the file currently used
309 virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
310 {
311 return inputState->filename;
312 }
313 /// Set the filename the scanner is using (used in error messages)
314 virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
315 {
316 inputState->filename = f;
317 }
318
319 virtual bool getCommitToPath() const
320 {
321 return commitToPath;
322 }
323
324 virtual void setCommitToPath(bool commit)
325 {
326 commitToPath = commit;
327 }
328
329 /** return a copy of the current text buffer */
330 virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
331 {
332 return text;
333 }
334
335 virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
336 {
337 text = s;
338 }
339
340 virtual void resetText()
341 {
342 text = "";
343 inputState->tokenStartColumn = inputState->column;
344 inputState->tokenStartLine = inputState->line;
345 }
346
347 virtual RefToken getTokenObject() const
348 {
349 return _returnToken;
350 }
351
352 /** Used to keep track of line breaks, needs to be called from
353 * within generated lexers when a \n \r is encountered.
354 */
355 virtual void newline()
356 {
357 ++inputState->line;
358 inputState->column = 1;
359 }
360
361 /** Advance the current column number by an appropriate amount according
362 * to the tabsize. This method needs to be explicitly called from the
363 * lexer rules encountering tabs.
364 */
365 virtual void tab()
366 {
367 int c = getColumn();
368 int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
369 setColumn( nc );
370 }
371 /// set the tabsize. Returns the old tabsize
372 int setTabsize( int size )
373 {
374 int oldsize = tabsize;
375 tabsize = size;
376 return oldsize;
377 }
378 /// Return the tabsize used by the scanner
379 int getTabSize() const
380 {
381 return tabsize;
382 }
383
384 /** Report exception errors caught in nextToken() */
385 virtual void reportError(const RecognitionException& e);
386
387 /** Parser error-reporting function can be overridden in subclass */
388 virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
389
390 /** Parser warning-reporting function can be overridden in subclass */
391 virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
392
393 virtual InputBuffer& getInputBuffer()
394 {
395 return inputState->getInput();
396 }
397
398 virtual LexerSharedInputState getInputState()
399 {
400 return inputState;
401 }
402
403 /** set the input state for the lexer.
404 * @note state is a reference counted object, hence no reference */
405 virtual void setInputState(LexerSharedInputState state)
406 {
407 inputState = state;
408 }
409
410 /// Set the factory for created tokens
411 virtual void setTokenObjectFactory(factory_type factory)
412 {
413 tokenFactory = factory;
414 }
415
416 /** Test the token text against the literals table
417 * Override this method to perform a different literals test
418 */
419 virtual int testLiteralsTable(int ttype) const
420 {
421 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
422 if (i != literals.end())
423 ttype = (*i).second;
424 return ttype;
425 }
426
427 /** Test the text passed in against the literals table
428 * Override this method to perform a different literals test
429 * This is used primarily when you want to test a portion of
430 * a token
431 */
432 virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
433 {
434 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
435 if (i != literals.end())
436 ttype = (*i).second;
437 return ttype;
438 }
439
440 /// Override this method to get more specific case handling
441 virtual int toLower(int c) const
442 {
443 // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
444 // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
445 // this one is more structural. Maybe make this configurable.
446 return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
447 }
448
449 /** This method is called by YourLexer::nextToken() when the lexer has
450 * hit EOF condition. EOF is NOT a character.
451 * This method is not called if EOF is reached during
452 * syntactic predicate evaluation or during evaluation
453 * of normal lexical rules, which presumably would be
454 * an IOException. This traps the "normal" EOF condition.
455 *
456 * uponEOF() is called after the complete evaluation of
457 * the previous token and only if your parser asks
458 * for another token beyond that last non-EOF token.
459 *
460 * You might want to throw token or char stream exceptions
461 * like: "Heh, premature eof" or a retry stream exception
462 * ("I found the end of this file, go back to referencing file").
463 */
464 virtual void uponEOF()
465 {
466 }
467
468 /// Methods used to change tracing behavior
469 virtual void traceIndent();
470 virtual void traceIn(const char* rname);
471 virtual void traceOut(const char* rname);
472
473 #ifndef NO_STATIC_CONSTS
474 static const int EOF_CHAR = EOF;
475 #else
476 enum {
477 EOF_CHAR = EOF
478 };
479 #endif
480 protected:
481 ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
482 /// flag indicating wether consume saves characters
483 bool saveConsumedInput;
484 factory_type tokenFactory; ///< Factory for tokens
485 bool caseSensitive; ///< Is this lexer case sensitive
486 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
487
488 RefToken _returnToken; ///< used to return tokens w/o using return val
489
490 /// Input state, gives access to input stream, shared among different lexers
491 LexerSharedInputState inputState;
492
493 /** Used during filter mode to indicate that path is desired.
494 * A subsequent scan error will report an error as usual
495 * if acceptPath=true;
496 */
497 bool commitToPath;
498
499 int tabsize; ///< tab size the scanner uses.
500
501 /// Create a new RefToken of type t
502 virtual RefToken makeToken(int t)
503 {
504 RefToken tok = tokenFactory();
505 tok->setType(t);
506 tok->setColumn(inputState->tokenStartColumn);
507 tok->setLine(inputState->tokenStartLine);
508 return tok;
509 }
510
511 /** Tracer class, used when -traceLexer is passed to antlr
512 */
513 class Tracer {
514 private:
515 CharScanner* parser;
516 const char* text;
517
518 Tracer(const Tracer& other); // undefined
519 Tracer& operator=(const Tracer& other); // undefined
520 public:
521 Tracer( CharScanner* p,const char* t )
522 : parser(p), text(t)
523 {
524 parser->traceIn(text);
525 }
526 ~Tracer()
527 {
528 parser->traceOut(text);
529 }
530 };
531
532 int traceDepth;
533 private:
534 CharScanner( const CharScanner& other ); // undefined
535 CharScanner& operator=( const CharScanner& other ); // undefined
536
537 #ifndef NO_STATIC_CONSTS
538 static const int NO_CHAR = 0;
539 #else
540 enum {
541 NO_CHAR = 0
542 };
543 #endif
544 };
545
546 inline int CharScanner::LA(unsigned int i)
547 {
548 int c = inputState->getInput().LA(i);
549
550 if ( caseSensitive )
551 return c;
552 else
553 return toLower(c); // VC 6 tolower bug caught in toLower.
554 }
555
556 inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
557 {
558 if (scanner->getCaseSensitiveLiterals())
559 return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
560 else
561 {
562 #ifdef NO_STRCASECMP
563 return (stricmp(x.c_str(),y.c_str())<0);
564 #else
565 return (strcasecmp(x.c_str(),y.c_str())<0);
566 #endif
567 }
568 }
569
570 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
571 }
572 #endif
573
574 #endif //INC_CharScanner_hpp__