]> git.saurik.com Git - bison.git/blame - src/lex.c
Also test parse error messages, including with YYERROR_VERBOSE.
[bison.git] / src / lex.c
CommitLineData
40675e7c 1/* Token-reader for Bison's input parser,
a0f6b076 2 Copyright (C) 1984, 1986, 1989, 1992, 2000 Free Software Foundation, Inc.
40675e7c 3
a0f6b076 4 This file is part of Bison, the GNU Compiler Compiler.
40675e7c 5
a0f6b076
AD
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
40675e7c 10
a0f6b076
AD
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
40675e7c 15
a0f6b076
AD
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
40675e7c 20
40675e7c 21#include "system.h"
ceed8467 22#include "getargs.h"
40675e7c 23#include "files.h"
a44c2277 24#include "getopt.h" /* for optarg */
40675e7c
DM
25#include "symtab.h"
26#include "lex.h"
d7913476 27#include "xalloc.h"
a0f6b076 28#include "complain.h"
b2ca4022 29#include "gram.h"
40675e7c 30
a44c2277 31/* functions from main.c */
abadc117 32extern char *printable_version PARAMS ((int));
40675e7c
DM
33
34/* Buffer for storing the current token. */
35char *token_buffer;
36
37/* Allocated size of token_buffer, not including space for terminator. */
d2729d44 38int maxtoken;
40675e7c
DM
39
40bucket *symval;
41int numval;
42
43static int unlexed; /* these two describe a token to be reread */
44static bucket *unlexed_symval; /* by the next call to lex */
45
46
47void
d2729d44 48init_lex (void)
40675e7c
DM
49{
50 maxtoken = 100;
d7913476 51 token_buffer = XCALLOC (char, maxtoken + 1);
40675e7c
DM
52 unlexed = -1;
53}
54
55
d2729d44
JT
56char *
57grow_token_buffer (char *p)
40675e7c
DM
58{
59 int offset = p - token_buffer;
60 maxtoken *= 2;
d7913476 61 token_buffer = XREALLOC (token_buffer, char, maxtoken + 1);
40675e7c
DM
62 return token_buffer + offset;
63}
64
65
66int
d2729d44 67skip_white_space (void)
40675e7c 68{
abadc117
AD
69 int c;
70 int inside;
40675e7c 71
abadc117 72 c = getc (finput);
40675e7c
DM
73
74 for (;;)
75 {
76 int cplus_comment;
77
78 switch (c)
79 {
80 case '/':
abadc117 81 c = getc (finput);
a083fbbf 82 if (c != '*' && c != '/')
a44c2277 83 {
a0f6b076 84 complain (_("unexpected `/' found and ignored"));
a44c2277
RS
85 break;
86 }
40675e7c
DM
87 cplus_comment = (c == '/');
88
abadc117 89 c = getc (finput);
40675e7c
DM
90
91 inside = 1;
92 while (inside)
93 {
94 if (!cplus_comment && c == '*')
95 {
96 while (c == '*')
abadc117 97 c = getc (finput);
40675e7c
DM
98
99 if (c == '/')
100 {
101 inside = 0;
abadc117 102 c = getc (finput);
40675e7c
DM
103 }
104 }
105 else if (c == '\n')
106 {
107 lineno++;
108 if (cplus_comment)
109 inside = 0;
abadc117 110 c = getc (finput);
40675e7c
DM
111 }
112 else if (c == EOF)
a0f6b076 113 fatal (_("unterminated comment"));
40675e7c 114 else
abadc117 115 c = getc (finput);
40675e7c
DM
116 }
117
118 break;
119
120 case '\n':
121 lineno++;
122
123 case ' ':
124 case '\t':
125 case '\f':
abadc117 126 c = getc (finput);
40675e7c
DM
127 break;
128
129 default:
36281465 130 return c;
40675e7c
DM
131 }
132 }
133}
134
a44c2277 135/* do a getc, but give error message if EOF encountered */
4a120d45 136static int
abadc117 137xgetc (FILE *f)
a44c2277 138{
abadc117 139 int c = getc (f);
a44c2277 140 if (c == EOF)
a0f6b076 141 fatal (_("unexpected end of file"));
a44c2277
RS
142 return c;
143}
144
abadc117
AD
145
146/*------------------------------------------------------------------.
147| Read one literal character from finput. Process \ escapes. |
148| Append the normalized string version of the char to *PP. Assign |
149| the character code to *PCODE. Return 1 unless the character is an |
150| unescaped `term' or \n report error for \n |
151`------------------------------------------------------------------*/
152
4a120d45 153static int
d2729d44 154literalchar (char **pp, int *pcode, char term)
a44c2277 155{
abadc117
AD
156 int c;
157 char *p;
158 int code;
a44c2277
RS
159 int wasquote = 0;
160
abadc117 161 c = xgetc (finput);
a083fbbf 162 if (c == '\n')
a44c2277 163 {
a0f6b076 164 complain (_("unescaped newline in constant"));
abadc117 165 ungetc (c, finput);
a44c2277
RS
166 code = '?';
167 wasquote = 1;
168 }
169 else if (c != '\\')
170 {
171 code = c;
a083fbbf 172 if (c == term)
a44c2277
RS
173 wasquote = 1;
174 }
175 else
176 {
abadc117
AD
177 c = xgetc (finput);
178 if (c == 't')
179 code = '\t';
180 else if (c == 'n')
181 code = '\n';
182 else if (c == 'a')
183 code = '\007';
184 else if (c == 'r')
185 code = '\r';
186 else if (c == 'f')
187 code = '\f';
188 else if (c == 'b')
189 code = '\b';
190 else if (c == 'v')
191 code = '\013';
192 else if (c == '\\')
193 code = '\\';
194 else if (c == '\'')
195 code = '\'';
196 else if (c == '\"')
197 code = '\"';
a44c2277
RS
198 else if (c <= '7' && c >= '0')
199 {
200 code = 0;
201 while (c <= '7' && c >= '0')
202 {
203 code = (code * 8) + (c - '0');
204 if (code >= 256 || code < 0)
205 {
a0f6b076
AD
206 complain (_("octal value outside range 0...255: `\\%o'"),
207 code);
a44c2277
RS
208 code &= 0xFF;
209 break;
210 }
abadc117 211 c = xgetc (finput);
a44c2277 212 }
abadc117 213 ungetc (c, finput);
a44c2277
RS
214 }
215 else if (c == 'x')
216 {
abadc117 217 c = xgetc (finput);
a44c2277
RS
218 code = 0;
219 while (1)
220 {
221 if (c >= '0' && c <= '9')
abadc117 222 code *= 16, code += c - '0';
a44c2277 223 else if (c >= 'a' && c <= 'f')
abadc117 224 code *= 16, code += c - 'a' + 10;
a44c2277 225 else if (c >= 'A' && c <= 'F')
abadc117 226 code *= 16, code += c - 'A' + 10;
a083fbbf 227 else
a44c2277 228 break;
abadc117 229 if (code >= 256 || code < 0)
a44c2277 230 {
abadc117 231 complain (_("hexadecimal value above 255: `\\x%x'"), code);
a44c2277
RS
232 code &= 0xFF;
233 break;
234 }
abadc117 235 c = xgetc (finput);
a44c2277 236 }
abadc117 237 ungetc (c, finput);
a44c2277
RS
238 }
239 else
240 {
a0f6b076 241 complain (_("unknown escape sequence: `\\' followed by `%s'"),
abadc117 242 printable_version (c));
a44c2277
RS
243 code = '?';
244 }
abadc117 245 } /* has \ */
a44c2277
RS
246
247 /* now fill token_buffer with the canonical name for this character
248 as a literal token. Do not use what the user typed,
249 so that `\012' and `\n' can be interchangeable. */
250
251 p = *pp;
e5335b74
JT
252 if (code == term && wasquote)
253 *p++ = code;
abadc117
AD
254 else if (code == '\\')
255 {
256 *p++ = '\\';
257 *p++ = '\\';
258 }
259 else if (code == '\'')
260 {
261 *p++ = '\\';
262 *p++ = '\'';
263 }
264 else if (code == '\"')
265 {
266 *p++ = '\\';
267 *p++ = '\"';
268 }
5ce94c29
RS
269 else if (code >= 040 && code < 0177)
270 *p++ = code;
abadc117
AD
271 else if (code == '\t')
272 {
273 *p++ = '\\';
274 *p++ = 't';
275 }
276 else if (code == '\n')
277 {
278 *p++ = '\\';
279 *p++ = 'n';
280 }
281 else if (code == '\r')
282 {
283 *p++ = '\\';
284 *p++ = 'r';
285 }
286 else if (code == '\v')
287 {
288 *p++ = '\\';
289 *p++ = 'v';
290 }
291 else if (code == '\b')
292 {
293 *p++ = '\\';
294 *p++ = 'b';
295 }
296 else if (code == '\f')
297 {
298 *p++ = '\\';
299 *p++ = 'f';
300 }
a44c2277
RS
301 else
302 {
303 *p++ = '\\';
304 *p++ = code / 0100 + '0';
305 *p++ = ((code / 010) & 07) + '0';
306 *p++ = (code & 07) + '0';
307 }
308 *pp = p;
309 *pcode = code;
abadc117 310 return !wasquote;
a44c2277
RS
311}
312
40675e7c
DM
313
314void
d2729d44 315unlex (int token)
40675e7c
DM
316{
317 unlexed = token;
318 unlexed_symval = symval;
319}
320
321
40675e7c 322int
d2729d44 323lex (void)
40675e7c 324{
abadc117 325 int c;
a44c2277 326 char *p;
40675e7c
DM
327
328 if (unlexed >= 0)
329 {
330 symval = unlexed_symval;
331 c = unlexed;
332 unlexed = -1;
36281465 333 return c;
40675e7c
DM
334 }
335
abadc117
AD
336 c = skip_white_space ();
337 *token_buffer = c; /* for error messages (token buffer always valid) */
a44c2277 338 token_buffer[1] = 0;
40675e7c
DM
339
340 switch (c)
341 {
342 case EOF:
abadc117 343 strcpy (token_buffer, "EOF");
36281465 344 return ENDFILE;
40675e7c 345
abadc117
AD
346 case 'A': case 'B': case 'C': case 'D': case 'E':
347 case 'F': case 'G': case 'H': case 'I': case 'J':
348 case 'K': case 'L': case 'M': case 'N': case 'O':
349 case 'P': case 'Q': case 'R': case 'S': case 'T':
350 case 'U': case 'V': case 'W': case 'X': case 'Y':
40675e7c 351 case 'Z':
abadc117
AD
352 case 'a': case 'b': case 'c': case 'd': case 'e':
353 case 'f': case 'g': case 'h': case 'i': case 'j':
354 case 'k': case 'l': case 'm': case 'n': case 'o':
355 case 'p': case 'q': case 'r': case 's': case 't':
356 case 'u': case 'v': case 'w': case 'x': case 'y':
40675e7c 357 case 'z':
abadc117
AD
358 case '.': case '_':
359
40675e7c 360 p = token_buffer;
abadc117 361 while (isalnum (c) || c == '_' || c == '.')
40675e7c
DM
362 {
363 if (p == token_buffer + maxtoken)
abadc117 364 p = grow_token_buffer (p);
40675e7c
DM
365
366 *p++ = c;
abadc117 367 c = getc (finput);
40675e7c
DM
368 }
369
370 *p = 0;
abadc117
AD
371 ungetc (c, finput);
372 symval = getsym (token_buffer);
36281465 373 return IDENTIFIER;
40675e7c 374
abadc117
AD
375 case '0': case '1': case '2': case '3': case '4':
376 case '5': case '6': case '7': case '8': case '9':
40675e7c
DM
377 {
378 numval = 0;
379
a44c2277 380 p = token_buffer;
abadc117 381 while (isdigit (c))
40675e7c 382 {
a44c2277 383 if (p == token_buffer + maxtoken)
abadc117 384 p = grow_token_buffer (p);
a44c2277
RS
385
386 *p++ = c;
abadc117
AD
387 numval = numval * 10 + c - '0';
388 c = getc (finput);
40675e7c 389 }
a44c2277 390 *p = 0;
abadc117 391 ungetc (c, finput);
36281465 392 return NUMBER;
40675e7c
DM
393 }
394
395 case '\'':
40675e7c
DM
396 /* parse the literal token and compute character code in code */
397
a44c2277 398 translations = -1;
40675e7c 399 {
a44c2277
RS
400 int code, discode;
401 char discard[10], *dp;
5ce94c29 402
a44c2277
RS
403 p = token_buffer;
404 *p++ = '\'';
abadc117 405 literalchar (&p, &code, '\'');
40675e7c 406
abadc117 407 c = getc (finput);
a44c2277 408 if (c != '\'')
40675e7c 409 {
a0f6b076 410 complain (_("use \"...\" for multi-character literal tokens"));
5ce94c29
RS
411 while (1)
412 {
413 dp = discard;
abadc117 414 if (!literalchar (&dp, &discode, '\''))
5ce94c29
RS
415 break;
416 }
40675e7c 417 }
a44c2277
RS
418 *p++ = '\'';
419 *p = 0;
abadc117 420 symval = getsym (token_buffer);
d7020c20 421 symval->class = token_sym;
abadc117 422 if (!symval->user_token_number)
a44c2277 423 symval->user_token_number = code;
36281465 424 return IDENTIFIER;
a44c2277 425 }
40675e7c 426
a44c2277 427 case '\"':
a44c2277
RS
428 /* parse the literal string token and treat as an identifier */
429
430 translations = -1;
431 {
abadc117 432 int code; /* ignored here */
40675e7c 433 p = token_buffer;
a44c2277 434 *p++ = '\"';
abadc117 435 while (literalchar (&p, &code, '\"')) /* read up to and including " */
40675e7c 436 {
a44c2277 437 if (p >= token_buffer + maxtoken - 4)
abadc117 438 p = grow_token_buffer (p);
40675e7c 439 }
40675e7c 440 *p = 0;
a44c2277 441
abadc117 442 symval = getsym (token_buffer);
d7020c20 443 symval->class = token_sym;
a44c2277 444
36281465 445 return IDENTIFIER;
40675e7c
DM
446 }
447
448 case ',':
36281465 449 return COMMA;
40675e7c
DM
450
451 case ':':
36281465 452 return COLON;
40675e7c
DM
453
454 case ';':
36281465 455 return SEMICOLON;
40675e7c
DM
456
457 case '|':
36281465 458 return BAR;
40675e7c
DM
459
460 case '{':
36281465 461 return LEFT_CURLY;
40675e7c
DM
462
463 case '=':
464 do
465 {
abadc117
AD
466 c = getc (finput);
467 if (c == '\n')
468 lineno++;
40675e7c 469 }
abadc117 470 while (c == ' ' || c == '\n' || c == '\t');
40675e7c
DM
471
472 if (c == '{')
a44c2277 473 {
abadc117 474 strcpy (token_buffer, "={");
36281465 475 return LEFT_CURLY;
a44c2277 476 }
40675e7c
DM
477 else
478 {
abadc117 479 ungetc (c, finput);
36281465 480 return ILLEGAL;
40675e7c
DM
481 }
482
483 case '<':
484 p = token_buffer;
abadc117 485 c = getc (finput);
40675e7c
DM
486 while (c != '>')
487 {
a44c2277 488 if (c == EOF)
a0f6b076 489 fatal (_("unterminated type name at end of file"));
a083fbbf 490 if (c == '\n')
a44c2277 491 {
a0f6b076 492 complain (_("unterminated type name"));
abadc117 493 ungetc (c, finput);
a44c2277
RS
494 break;
495 }
40675e7c
DM
496
497 if (p == token_buffer + maxtoken)
abadc117 498 p = grow_token_buffer (p);
40675e7c
DM
499
500 *p++ = c;
abadc117 501 c = getc (finput);
40675e7c
DM
502 }
503 *p = 0;
36281465 504 return TYPENAME;
a083fbbf 505
40675e7c
DM
506
507 case '%':
abadc117 508 return parse_percent_token ();
40675e7c
DM
509
510 default:
36281465 511 return ILLEGAL;
40675e7c
DM
512 }
513}
514
abadc117
AD
515/* the following table dictates the action taken for the various %
516 directives. A setflag value causes the named flag to be set. A
517 retval action returns the code. */
518struct percent_table_struct
a44c2277 519{
abadc117
AD
520 const char *name;
521 void *setflag;
522 int retval;
523}
524percent_table[] =
525{
526 { "token", NULL, TOKEN },
527 { "term", NULL, TOKEN },
528 { "nterm", NULL, NTERM },
529 { "type", NULL, TYPE },
530 { "guard", NULL, GUARD },
531 { "union", NULL, UNION },
532 { "expect", NULL, EXPECT },
533 { "thong", NULL, THONG },
534 { "start", NULL, START },
535 { "left", NULL, LEFT },
536 { "right", NULL, RIGHT },
537 { "nonassoc", NULL, NONASSOC },
538 { "binary", NULL, NONASSOC },
539 { "semantic_parser", NULL, SEMANTIC_PARSER },
540 { "pure_parser", NULL, PURE_PARSER },
541 { "prec", NULL, PREC },
542 { "no_lines", &nolinesflag, NOOP}, /* -l */
543 { "raw", &rawtoknumflag, NOOP }, /* -r */
544 { "token_table", &toknumflag, NOOP}, /* -k */
a44c2277 545#if 0
abadc117
AD
546 /* These can be utilized after main is reoganized so
547 open_files() is deferred 'til after read_declarations().
548 But %{ and %union both put information into files
549 that have to be opened before read_declarations().
a44c2277 550 */
1916f98e
AD
551 { "yacc", &yaccflag, NOOP}, /* -y */
552 { "fixed_output_files", &yaccflag, NOOP}, /* -y */
abadc117
AD
553 { "defines", &definesflag, NOOP}, /* -d */
554 { "no_parser", &noparserflag, NOOP}, /* -n */
555 { "output_file", &spec_outfile, SETOPT}, /* -o */
556 { "file_prefix", &spec_file_prefix, SETOPT}, /* -b */
557 { "name_prefix", &spec_name_prefix, SETOPT}, /* -p */
558 /* These would be acceptable, but they do not affect processing */
559 { "verbose", &verboseflag, NOOP}, /* -v */
560 { "debug", &debugflag, NOOP}, /* -t */
561/* {"help", <print usage stmt>, NOOP}, *//* -h */
562/* {"version", <print version number> , NOOP}, *//* -V */
a44c2277 563#endif
abadc117 564 { NULL, NULL, ILLEGAL}
a44c2277
RS
565};
566
567/* Parse a token which starts with %.
568 Assumes the % has already been read and discarded. */
40675e7c
DM
569
570int
d2729d44 571parse_percent_token (void)
40675e7c 572{
abadc117
AD
573 int c;
574 char *p;
575 struct percent_table_struct *tx;
40675e7c
DM
576
577 p = token_buffer;
abadc117 578 c = getc (finput);
a44c2277 579 *p++ = '%';
abadc117 580 *p++ = c; /* for error msg */
a44c2277 581 *p = 0;
40675e7c
DM
582
583 switch (c)
584 {
585 case '%':
36281465 586 return TWO_PERCENTS;
40675e7c
DM
587
588 case '{':
36281465 589 return PERCENT_LEFT_CURLY;
40675e7c
DM
590
591 case '<':
36281465 592 return LEFT;
40675e7c
DM
593
594 case '>':
36281465 595 return RIGHT;
40675e7c
DM
596
597 case '2':
36281465 598 return NONASSOC;
40675e7c
DM
599
600 case '0':
36281465 601 return TOKEN;
40675e7c
DM
602
603 case '=':
36281465 604 return PREC;
40675e7c 605 }
abadc117 606 if (!isalpha (c))
36281465 607 return ILLEGAL;
40675e7c 608
a44c2277
RS
609 p = token_buffer;
610 *p++ = '%';
abadc117 611 while (isalpha (c) || c == '_' || c == '-')
40675e7c
DM
612 {
613 if (p == token_buffer + maxtoken)
abadc117 614 p = grow_token_buffer (p);
40675e7c 615
abadc117
AD
616 if (c == '-')
617 c = '_';
40675e7c 618 *p++ = c;
abadc117 619 c = getc (finput);
40675e7c
DM
620 }
621
abadc117 622 ungetc (c, finput);
40675e7c
DM
623
624 *p = 0;
625
a44c2277
RS
626 /* table lookup % directive */
627 for (tx = percent_table; tx->name; tx++)
abadc117 628 if (strcmp (token_buffer + 1, tx->name) == 0)
a44c2277
RS
629 break;
630 if (tx->retval == SETOPT)
631 {
abadc117 632 *((char **) (tx->setflag)) = optarg;
a44c2277
RS
633 return NOOP;
634 }
635 if (tx->setflag)
636 {
abadc117 637 *((int *) (tx->setflag)) = 1;
a44c2277
RS
638 return NOOP;
639 }
640 return tx->retval;
40675e7c 641}