]> git.saurik.com Git - bison.git/blame - src/lex.c
* src/reader.c (grammar_rule_begin, previous_rule, current_rule):
[bison.git] / src / lex.c
CommitLineData
40675e7c 1/* Token-reader for Bison's input parser,
5e424082
AD
2 Copyright (C) 1984, 1986, 1989, 1992, 2000, 2001, 2002
3 Free Software Foundation, Inc.
40675e7c 4
a0f6b076 5 This file is part of Bison, the GNU Compiler Compiler.
40675e7c 6
a0f6b076
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
40675e7c 11
a0f6b076
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
40675e7c 16
a0f6b076
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
40675e7c 21
40675e7c 22#include "system.h"
ceed8467 23#include "getargs.h"
40675e7c
DM
24#include "files.h"
25#include "symtab.h"
82b6d266 26#include "options.h"
40675e7c 27#include "lex.h"
a0f6b076 28#include "complain.h"
b2ca4022 29#include "gram.h"
ff4a34be 30#include "quote.h"
40675e7c
DM
31
32/* Buffer for storing the current token. */
0846f581 33static struct obstack token_obstack;
09b503c8 34const char *token_buffer = NULL;
40675e7c 35
db8837cb 36symbol_t *symval = NULL;
40675e7c
DM
37int numval;
38
561f9a30 39/* A token to be reread, see unlex and lex. */
342b8b6e 40static token_t unlexed = tok_undef;
db8837cb 41static symbol_t *unlexed_symval = NULL;
09b503c8 42static const char *unlexed_token_buffer = NULL;
40675e7c
DM
43
44void
342b8b6e 45lex_init (void)
40675e7c 46{
f17bcd1f 47 obstack_init (&token_obstack);
342b8b6e
AD
48 unlexed = tok_undef;
49}
50
51
52void
53lex_free (void)
54{
55 obstack_free (&token_obstack, NULL);
40675e7c
DM
56}
57
58
40675e7c 59int
d2729d44 60skip_white_space (void)
40675e7c 61{
abadc117
AD
62 int c;
63 int inside;
40675e7c 64
abadc117 65 c = getc (finput);
40675e7c
DM
66
67 for (;;)
68 {
69 int cplus_comment;
70
71 switch (c)
72 {
73 case '/':
79282c5a 74 /* FIXME: Should probably be merged with copy_comment. */
abadc117 75 c = getc (finput);
a083fbbf 76 if (c != '*' && c != '/')
a44c2277 77 {
a0f6b076 78 complain (_("unexpected `/' found and ignored"));
a44c2277
RS
79 break;
80 }
40675e7c
DM
81 cplus_comment = (c == '/');
82
abadc117 83 c = getc (finput);
40675e7c
DM
84
85 inside = 1;
86 while (inside)
87 {
88 if (!cplus_comment && c == '*')
89 {
90 while (c == '*')
abadc117 91 c = getc (finput);
40675e7c
DM
92
93 if (c == '/')
94 {
95 inside = 0;
abadc117 96 c = getc (finput);
40675e7c
DM
97 }
98 }
99 else if (c == '\n')
100 {
101 lineno++;
102 if (cplus_comment)
103 inside = 0;
abadc117 104 c = getc (finput);
40675e7c
DM
105 }
106 else if (c == EOF)
a0f6b076 107 fatal (_("unterminated comment"));
40675e7c 108 else
abadc117 109 c = getc (finput);
40675e7c
DM
110 }
111
112 break;
113
114 case '\n':
115 lineno++;
116
117 case ' ':
118 case '\t':
119 case '\f':
abadc117 120 c = getc (finput);
40675e7c
DM
121 break;
122
123 default:
36281465 124 return c;
40675e7c
DM
125 }
126 }
127}
128
79282c5a
AD
129
130/*-----------------------------------------------------.
131| Do a getc, but give error message if EOF encountered |
132`-----------------------------------------------------*/
133
428046f8 134int
abadc117 135xgetc (FILE *f)
a44c2277 136{
abadc117 137 int c = getc (f);
a44c2277 138 if (c == EOF)
a0f6b076 139 fatal (_("unexpected end of file"));
a44c2277
RS
140 return c;
141}
142
abadc117 143
2648a72d
AD
144/*---------------------------------------------------------------.
145| Read one literal character from FINPUT, process \-escapes, and |
146| return the character. |
147`---------------------------------------------------------------*/
f17bcd1f 148
2648a72d
AD
149char
150literalchar (void)
a44c2277 151{
abadc117 152 int c;
2648a72d 153 int res;
a44c2277 154
abadc117 155 c = xgetc (finput);
a083fbbf 156 if (c == '\n')
a44c2277 157 {
a0f6b076 158 complain (_("unescaped newline in constant"));
abadc117 159 ungetc (c, finput);
2648a72d 160 res = '?';
a44c2277
RS
161 }
162 else if (c != '\\')
163 {
2648a72d 164 res = c;
a44c2277
RS
165 }
166 else
167 {
abadc117
AD
168 c = xgetc (finput);
169 if (c == 't')
2648a72d 170 res = '\t';
abadc117 171 else if (c == 'n')
2648a72d 172 res = '\n';
abadc117 173 else if (c == 'a')
2648a72d 174 res = '\007';
abadc117 175 else if (c == 'r')
2648a72d 176 res = '\r';
abadc117 177 else if (c == 'f')
2648a72d 178 res = '\f';
abadc117 179 else if (c == 'b')
2648a72d 180 res = '\b';
abadc117 181 else if (c == 'v')
2648a72d 182 res = '\013';
abadc117 183 else if (c == '\\')
2648a72d 184 res = '\\';
abadc117 185 else if (c == '\'')
2648a72d 186 res = '\'';
abadc117 187 else if (c == '\"')
2648a72d 188 res = '\"';
a44c2277
RS
189 else if (c <= '7' && c >= '0')
190 {
2648a72d 191 res = 0;
a44c2277
RS
192 while (c <= '7' && c >= '0')
193 {
2648a72d
AD
194 res = (res * 8) + (c - '0');
195 if (res >= 256 || res < 0)
a44c2277 196 {
a0f6b076 197 complain (_("octal value outside range 0...255: `\\%o'"),
2648a72d
AD
198 res);
199 res &= 0xFF;
a44c2277
RS
200 break;
201 }
abadc117 202 c = xgetc (finput);
a44c2277 203 }
abadc117 204 ungetc (c, finput);
a44c2277
RS
205 }
206 else if (c == 'x')
207 {
abadc117 208 c = xgetc (finput);
2648a72d 209 res = 0;
a44c2277
RS
210 while (1)
211 {
212 if (c >= '0' && c <= '9')
2648a72d 213 res *= 16, res += c - '0';
a44c2277 214 else if (c >= 'a' && c <= 'f')
2648a72d 215 res *= 16, res += c - 'a' + 10;
a44c2277 216 else if (c >= 'A' && c <= 'F')
2648a72d 217 res *= 16, res += c - 'A' + 10;
a083fbbf 218 else
a44c2277 219 break;
2648a72d 220 if (res >= 256 || res < 0)
a44c2277 221 {
2648a72d
AD
222 complain (_("hexadecimal value above 255: `\\x%x'"), res);
223 res &= 0xFF;
a44c2277
RS
224 break;
225 }
abadc117 226 c = xgetc (finput);
a44c2277 227 }
abadc117 228 ungetc (c, finput);
a44c2277
RS
229 }
230 else
231 {
b0ce6046
AD
232 char badchar [] = "c";
233 badchar[0] = c;
a0f6b076 234 complain (_("unknown escape sequence: `\\' followed by `%s'"),
b0ce6046 235 quote (badchar));
2648a72d 236 res = '?';
a44c2277 237 }
abadc117 238 } /* has \ */
a44c2277 239
2648a72d 240 return res;
a44c2277
RS
241}
242
40675e7c
DM
243
244void
342b8b6e 245unlex (token_t token)
40675e7c
DM
246{
247 unlexed = token;
561f9a30 248 unlexed_token_buffer = token_buffer;
40675e7c
DM
249 unlexed_symval = symval;
250}
251
f282676b
AD
252/*-----------------------------------------------------------------.
253| We just read `<' from FIN. Store in TOKEN_BUFFER, the type name |
254| specified between the `<...>'. |
255`-----------------------------------------------------------------*/
256
257void
258read_type_name (FILE *fin)
259{
f282676b
AD
260 int c = getc (fin);
261
262 while (c != '>')
263 {
264 if (c == EOF)
265 fatal (_("unterminated type name at end of file"));
266 if (c == '\n')
267 {
268 complain (_("unterminated type name"));
269 ungetc (c, fin);
270 break;
271 }
272
f17bcd1f 273 obstack_1grow (&token_obstack, c);
f282676b
AD
274 c = getc (fin);
275 }
f17bcd1f
AD
276 obstack_1grow (&token_obstack, '\0');
277 token_buffer = obstack_finish (&token_obstack);
f282676b
AD
278}
279
40675e7c 280
511e79b3 281token_t
d2729d44 282lex (void)
40675e7c 283{
abadc117 284 int c;
f17bcd1f
AD
285
286 /* Just to make sure. */
287 token_buffer = NULL;
40675e7c 288
342b8b6e 289 if (unlexed != tok_undef)
40675e7c 290 {
342b8b6e 291 token_t res = unlexed;
40675e7c 292 symval = unlexed_symval;
561f9a30 293 token_buffer = unlexed_token_buffer;
342b8b6e
AD
294 unlexed = tok_undef;
295 return res;
40675e7c
DM
296 }
297
abadc117 298 c = skip_white_space ();
40675e7c
DM
299
300 switch (c)
301 {
302 case EOF:
f17bcd1f 303 token_buffer = "EOF";
511e79b3 304 return tok_eof;
40675e7c 305
abadc117
AD
306 case 'A': case 'B': case 'C': case 'D': case 'E':
307 case 'F': case 'G': case 'H': case 'I': case 'J':
308 case 'K': case 'L': case 'M': case 'N': case 'O':
309 case 'P': case 'Q': case 'R': case 'S': case 'T':
310 case 'U': case 'V': case 'W': case 'X': case 'Y':
40675e7c 311 case 'Z':
abadc117
AD
312 case 'a': case 'b': case 'c': case 'd': case 'e':
313 case 'f': case 'g': case 'h': case 'i': case 'j':
314 case 'k': case 'l': case 'm': case 'n': case 'o':
315 case 'p': case 'q': case 'r': case 's': case 't':
316 case 'u': case 'v': case 'w': case 'x': case 'y':
40675e7c 317 case 'z':
abadc117
AD
318 case '.': case '_':
319
abadc117 320 while (isalnum (c) || c == '_' || c == '.')
40675e7c 321 {
f17bcd1f 322 obstack_1grow (&token_obstack, c);
abadc117 323 c = getc (finput);
40675e7c 324 }
f17bcd1f
AD
325 obstack_1grow (&token_obstack, '\0');
326 token_buffer = obstack_finish (&token_obstack);
abadc117
AD
327 ungetc (c, finput);
328 symval = getsym (token_buffer);
511e79b3 329 return tok_identifier;
40675e7c 330
abadc117
AD
331 case '0': case '1': case '2': case '3': case '4':
332 case '5': case '6': case '7': case '8': case '9':
40675e7c
DM
333 {
334 numval = 0;
335
abadc117 336 while (isdigit (c))
40675e7c 337 {
f17bcd1f 338 obstack_1grow (&token_obstack, c);
abadc117
AD
339 numval = numval * 10 + c - '0';
340 c = getc (finput);
40675e7c 341 }
f17bcd1f
AD
342 obstack_1grow (&token_obstack, '\0');
343 token_buffer = obstack_finish (&token_obstack);
abadc117 344 ungetc (c, finput);
511e79b3 345 return tok_number;
40675e7c
DM
346 }
347
348 case '\'':
40675e7c
DM
349 /* parse the literal token and compute character code in code */
350
40675e7c 351 {
2648a72d 352 int code = literalchar ();
5ce94c29 353
f17bcd1f 354 obstack_1grow (&token_obstack, '\'');
2648a72d 355 obstack_1grow (&token_obstack, code);
40675e7c 356
abadc117 357 c = getc (finput);
a44c2277 358 if (c != '\'')
40675e7c 359 {
a0f6b076 360 complain (_("use \"...\" for multi-character literal tokens"));
2648a72d
AD
361 while (literalchar () != '\'')
362 /* Skip. */;
40675e7c 363 }
f17bcd1f
AD
364 obstack_1grow (&token_obstack, '\'');
365 obstack_1grow (&token_obstack, '\0');
366 token_buffer = obstack_finish (&token_obstack);
abadc117 367 symval = getsym (token_buffer);
5e424082
AD
368 symbol_class_set (symval, token_sym);
369 symbol_user_token_number_set (symval, code);
511e79b3 370 return tok_identifier;
a44c2277 371 }
40675e7c 372
a44c2277 373 case '\"':
a44c2277
RS
374 /* parse the literal string token and treat as an identifier */
375
a44c2277 376 {
5e424082 377 int code;
f17bcd1f
AD
378
379 obstack_1grow (&token_obstack, '\"');
79282c5a 380 /* Read up to and including ". */
2648a72d
AD
381 do
382 {
383 code = literalchar ();
384 obstack_1grow (&token_obstack, code);
385 }
386 while (code != '\"');
f17bcd1f
AD
387 obstack_1grow (&token_obstack, '\0');
388 token_buffer = obstack_finish (&token_obstack);
a44c2277 389
abadc117 390 symval = getsym (token_buffer);
5e424082 391 symbol_class_set (symval, token_sym);
511e79b3 392 return tok_identifier;
40675e7c
DM
393 }
394
395 case ',':
342b8b6e 396 token_buffer = ",";
511e79b3 397 return tok_comma;
40675e7c
DM
398
399 case ':':
342b8b6e 400 token_buffer = ":";
511e79b3 401 return tok_colon;
40675e7c
DM
402
403 case ';':
342b8b6e 404 token_buffer = ";";
511e79b3 405 return tok_semicolon;
40675e7c
DM
406
407 case '|':
342b8b6e 408 token_buffer = "|";
511e79b3 409 return tok_bar;
40675e7c
DM
410
411 case '{':
342b8b6e 412 token_buffer = "{";
511e79b3 413 return tok_left_curly;
40675e7c
DM
414
415 case '=':
342b8b6e 416 obstack_1grow (&token_obstack, c);
40675e7c
DM
417 do
418 {
abadc117 419 c = getc (finput);
342b8b6e 420 obstack_1grow (&token_obstack, c);
abadc117
AD
421 if (c == '\n')
422 lineno++;
40675e7c 423 }
abadc117 424 while (c == ' ' || c == '\n' || c == '\t');
342b8b6e
AD
425 obstack_1grow (&token_obstack, '\0');
426 token_buffer = obstack_finish (&token_obstack);
40675e7c
DM
427
428 if (c == '{')
a44c2277 429 {
511e79b3 430 return tok_left_curly;
a44c2277 431 }
40675e7c
DM
432 else
433 {
abadc117 434 ungetc (c, finput);
511e79b3 435 return tok_illegal;
40675e7c
DM
436 }
437
438 case '<':
f282676b 439 read_type_name (finput);
511e79b3 440 return tok_typename;
a083fbbf 441
40675e7c 442 case '%':
abadc117 443 return parse_percent_token ();
40675e7c
DM
444
445 default:
342b8b6e
AD
446 obstack_1grow (&token_obstack, c);
447 obstack_1grow (&token_obstack, '\0');
448 token_buffer = obstack_finish (&token_obstack);
511e79b3 449 return tok_illegal;
40675e7c
DM
450 }
451}
452
82b6d266
PB
453/* This function is a strcmp, which doesn't differentiate `-' and `_'
454 chars. */
6deb4447 455
82b6d266
PB
456static int
457option_strcmp (const char *left, const char *right)
abadc117 458{
342b8b6e
AD
459 const unsigned char *l, *r;
460 int c;
461
462 assert (left);
463 assert (right);
464 l = (const unsigned char *)left;
465 r = (const unsigned char *)right;
466 while (((c = *l - *r++) == 0 && *l != '\0')
467 || ((*l == '-' || *l == '_') && (*r == '_' || *r == '-')))
468 l++;
469 return c;
82b6d266 470}
a44c2277
RS
471
472/* Parse a token which starts with %.
473 Assumes the % has already been read and discarded. */
40675e7c 474
342b8b6e 475token_t
d2729d44 476parse_percent_token (void)
40675e7c 477{
ec3bc396 478 const struct option_table_s *tx = NULL;
09b503c8
AD
479 const char *arg = NULL;
480 /* Where the ARG was found in token_buffer. */
481 size_t arg_offset = 0;
40675e7c 482
342b8b6e 483 int c = getc (finput);
29ae55f1
AD
484 obstack_1grow (&token_obstack, '%');
485 obstack_1grow (&token_obstack, c);
40675e7c 486
55024580 487 if (!isalpha (c))
40675e7c 488 {
55024580 489 obstack_1grow (&token_obstack, '\0');
29ae55f1 490 token_buffer = obstack_finish (&token_obstack);
40675e7c 491
55024580
AD
492 switch (c)
493 {
494 case '%':
495 return tok_two_percents;
40675e7c 496
55024580
AD
497 case '{':
498 return tok_percent_left_curly;
40675e7c 499
55024580
AD
500 /* The following guys are here for backward compatibility with
501 very ancient Yacc versions. The paper of Johnson mentions
502 them (as ancient :). */
503 case '<':
504 return tok_left;
40675e7c 505
55024580
AD
506 case '>':
507 return tok_right;
40675e7c 508
55024580
AD
509 case '2':
510 return tok_nonassoc;
40675e7c 511
55024580
AD
512 case '0':
513 return tok_token;
f17bcd1f 514
55024580
AD
515 case '=':
516 return tok_prec;
517
518 default:
519 return tok_illegal;
520 }
29ae55f1 521 }
40675e7c 522
29ae55f1 523 while (c = getc (finput), isalpha (c) || c == '_' || c == '-')
40675e7c 524 {
6bc35ae5
MA
525 if (c == '_')
526 c = '-';
f17bcd1f 527 obstack_1grow (&token_obstack, c);
40675e7c
DM
528 }
529
09b503c8
AD
530 /* %DIRECTIVE="ARG". Separate into
531 TOKEN_BUFFER = `%DIRECTIVE\0ARG\0'.
532 This is a bit hackish, but once we move to a Bison parser,
533 things will be cleaned up. */
951366c1
AD
534 if (c == '=')
535 {
09b503c8
AD
536 /* End of the directive. We skip the `='. */
537 obstack_1grow (&token_obstack, '\0');
538 /* Fetch the ARG if present. */
951366c1 539 c = getc (finput);
09b503c8 540 if (c == '"')
951366c1 541 {
09b503c8
AD
542 int code;
543 arg_offset = obstack_object_size (&token_obstack);
544 /* Read up to and including `"'. Do not append the closing
545 `"' in the output: it's not part of the ARG. */
2648a72d 546 while ((code = literalchar ()) != '"')
09b503c8 547 obstack_1grow (&token_obstack, code);
951366c1 548 }
09b503c8 549 /* else: should be an error. */
951366c1
AD
550 }
551 else
552 ungetc (c, finput);
553
f17bcd1f
AD
554 obstack_1grow (&token_obstack, '\0');
555 token_buffer = obstack_finish (&token_obstack);
09b503c8
AD
556 if (arg_offset)
557 arg = token_buffer + arg_offset;
40675e7c 558
a44c2277 559 /* table lookup % directive */
82b6d266
PB
560 for (tx = option_table; tx->name; tx++)
561 if ((tx->access == opt_percent || tx->access == opt_both)
562 && option_strcmp (token_buffer + 1, tx->name) == 0)
a44c2277 563 break;
6deb4447 564
65be0866
AD
565 if (arg && tx->ret_val != tok_stropt)
566 fatal (_("`%s' supports no argument: %s"), token_buffer, quote (arg));
951366c1 567
62ab6972 568
82b6d266 569 switch (tx->ret_val)
a44c2277 570 {
951366c1 571 case tok_stropt:
ec3bc396 572 assert (tx->flag);
951366c1
AD
573 if (arg)
574 {
ec3bc396 575 char **flag = (char **) tx->flag;
951366c1
AD
576 /* Keep only the first assignment: command line options have
577 already been processed, and we want them to have
578 precedence. Side effect: if this %-option is used
579 several times, only the first is honored. Bah. */
ec3bc396
AD
580 if (!*flag)
581 *flag = xstrdup (arg);
951366c1
AD
582 }
583 else
584 fatal (_("`%s' requires an argument"), token_buffer);
585 return tok_noop;
586 break;
587
588 case tok_intopt:
ec3bc396
AD
589 assert (tx->flag);
590 *((int *) (tx->flag)) = 1;
511e79b3 591 return tok_noop;
62ab6972
AD
592 break;
593
594 case tok_obsolete:
595 fatal (_("`%s' is no longer supported"), token_buffer);
951366c1 596 return tok_noop;
62ab6972 597 break;
342b8b6e
AD
598
599 default:
951366c1 600 return tx->ret_val;
342b8b6e 601 break;
a44c2277 602 }
951366c1 603 abort ();
40675e7c 604}