]> git.saurik.com Git - bison.git/blob - src/lex.c
* src/options.c (option_table): Adjust.
[bison.git] / src / lex.c
1 /* Token-reader for Bison's input parser,
2 Copyright 1984, 1986, 1989, 1992, 2000, 2001 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21 #include "system.h"
22 #include "getargs.h"
23 #include "files.h"
24 #include "getopt.h" /* for optarg */
25 #include "symtab.h"
26 #include "options.h"
27 #include "lex.h"
28 #include "xalloc.h"
29 #include "complain.h"
30 #include "gram.h"
31 #include "quote.h"
32
33 /* Buffer for storing the current token. */
34 struct obstack token_obstack;
35 const char *token_buffer = NULL;
36
37 bucket *symval;
38 int numval;
39
40 static int unlexed; /* these two describe a token to be reread */
41 static bucket *unlexed_symval; /* by the next call to lex */
42
43
44 void
45 init_lex (void)
46 {
47 obstack_init (&token_obstack);
48 unlexed = -1;
49 }
50
51
52 int
53 skip_white_space (void)
54 {
55 int c;
56 int inside;
57
58 c = getc (finput);
59
60 for (;;)
61 {
62 int cplus_comment;
63
64 switch (c)
65 {
66 case '/':
67 /* FIXME: Should probably be merged with copy_comment. */
68 c = getc (finput);
69 if (c != '*' && c != '/')
70 {
71 complain (_("unexpected `/' found and ignored"));
72 break;
73 }
74 cplus_comment = (c == '/');
75
76 c = getc (finput);
77
78 inside = 1;
79 while (inside)
80 {
81 if (!cplus_comment && c == '*')
82 {
83 while (c == '*')
84 c = getc (finput);
85
86 if (c == '/')
87 {
88 inside = 0;
89 c = getc (finput);
90 }
91 }
92 else if (c == '\n')
93 {
94 lineno++;
95 if (cplus_comment)
96 inside = 0;
97 c = getc (finput);
98 }
99 else if (c == EOF)
100 fatal (_("unterminated comment"));
101 else
102 c = getc (finput);
103 }
104
105 break;
106
107 case '\n':
108 lineno++;
109
110 case ' ':
111 case '\t':
112 case '\f':
113 c = getc (finput);
114 break;
115
116 default:
117 return c;
118 }
119 }
120 }
121
122
123 /*-----------------------------------------------------.
124 | Do a getc, but give error message if EOF encountered |
125 `-----------------------------------------------------*/
126
127 static int
128 xgetc (FILE *f)
129 {
130 int c = getc (f);
131 if (c == EOF)
132 fatal (_("unexpected end of file"));
133 return c;
134 }
135
136
137 /*------------------------------------------------------------------.
138 | Read one literal character from finput. Process \ escapes. |
139 | Append the normalized string version of the char to OUT. Assign |
140 | the character code to *PCODE. Return 1 unless the character is an |
141 | unescaped `term' or \n report error for \n. |
142 `------------------------------------------------------------------*/
143
144 /* FIXME: We could directly work in the obstack, but that would make
145 it more difficult to move to quotearg some day. So for the time
146 being, I prefer have literalchar behave like quotearg, and change
147 my mind later if I was wrong. */
148
149 static int
150 literalchar (struct obstack *out, int *pcode, char term)
151 {
152 int c;
153 char buf[4096];
154 char *cp;
155 int code;
156 int wasquote = 0;
157
158 c = xgetc (finput);
159 if (c == '\n')
160 {
161 complain (_("unescaped newline in constant"));
162 ungetc (c, finput);
163 code = '?';
164 wasquote = 1;
165 }
166 else if (c != '\\')
167 {
168 code = c;
169 if (c == term)
170 wasquote = 1;
171 }
172 else
173 {
174 c = xgetc (finput);
175 if (c == 't')
176 code = '\t';
177 else if (c == 'n')
178 code = '\n';
179 else if (c == 'a')
180 code = '\007';
181 else if (c == 'r')
182 code = '\r';
183 else if (c == 'f')
184 code = '\f';
185 else if (c == 'b')
186 code = '\b';
187 else if (c == 'v')
188 code = '\013';
189 else if (c == '\\')
190 code = '\\';
191 else if (c == '\'')
192 code = '\'';
193 else if (c == '\"')
194 code = '\"';
195 else if (c <= '7' && c >= '0')
196 {
197 code = 0;
198 while (c <= '7' && c >= '0')
199 {
200 code = (code * 8) + (c - '0');
201 if (code >= 256 || code < 0)
202 {
203 complain (_("octal value outside range 0...255: `\\%o'"),
204 code);
205 code &= 0xFF;
206 break;
207 }
208 c = xgetc (finput);
209 }
210 ungetc (c, finput);
211 }
212 else if (c == 'x')
213 {
214 c = xgetc (finput);
215 code = 0;
216 while (1)
217 {
218 if (c >= '0' && c <= '9')
219 code *= 16, code += c - '0';
220 else if (c >= 'a' && c <= 'f')
221 code *= 16, code += c - 'a' + 10;
222 else if (c >= 'A' && c <= 'F')
223 code *= 16, code += c - 'A' + 10;
224 else
225 break;
226 if (code >= 256 || code < 0)
227 {
228 complain (_("hexadecimal value above 255: `\\x%x'"), code);
229 code &= 0xFF;
230 break;
231 }
232 c = xgetc (finput);
233 }
234 ungetc (c, finput);
235 }
236 else
237 {
238 char badchar [] = "c";
239 badchar[0] = c;
240 complain (_("unknown escape sequence: `\\' followed by `%s'"),
241 quote (badchar));
242 code = '?';
243 }
244 } /* has \ */
245
246 /* now fill BUF with the canonical name for this character as a
247 literal token. Do not use what the user typed, so that `\012'
248 and `\n' can be interchangeable. */
249
250 cp = buf;
251 if (code == term && wasquote)
252 *cp++ = code;
253 else if (code == '\\')
254 {
255 *cp++ = '\\';
256 *cp++ = '\\';
257 }
258 else if (code == '\'')
259 {
260 *cp++ = '\\';
261 *cp++ = '\'';
262 }
263 else if (code == '\"')
264 {
265 *cp++ = '\\';
266 *cp++ = '\"';
267 }
268 else if (code >= 040 && code < 0177)
269 *cp++ = code;
270 else if (code == '\t')
271 {
272 *cp++ = '\\';
273 *cp++ = 't';
274 }
275 else if (code == '\n')
276 {
277 *cp++ = '\\';
278 *cp++ = 'n';
279 }
280 else if (code == '\r')
281 {
282 *cp++ = '\\';
283 *cp++ = 'r';
284 }
285 else if (code == '\v')
286 {
287 *cp++ = '\\';
288 *cp++ = 'v';
289 }
290 else if (code == '\b')
291 {
292 *cp++ = '\\';
293 *cp++ = 'b';
294 }
295 else if (code == '\f')
296 {
297 *cp++ = '\\';
298 *cp++ = 'f';
299 }
300 else
301 {
302 *cp++ = '\\';
303 *cp++ = code / 0100 + '0';
304 *cp++ = ((code / 010) & 07) + '0';
305 *cp++ = (code & 07) + '0';
306 }
307 *cp = '\0';
308
309 if (out)
310 obstack_sgrow (out, buf);
311 *pcode = code;
312 return !wasquote;
313 }
314
315
316 void
317 unlex (int token)
318 {
319 unlexed = token;
320 unlexed_symval = symval;
321 }
322
323 /*-----------------------------------------------------------------.
324 | We just read `<' from FIN. Store in TOKEN_BUFFER, the type name |
325 | specified between the `<...>'. |
326 `-----------------------------------------------------------------*/
327
328 void
329 read_type_name (FILE *fin)
330 {
331 int c = getc (fin);
332
333 while (c != '>')
334 {
335 if (c == EOF)
336 fatal (_("unterminated type name at end of file"));
337 if (c == '\n')
338 {
339 complain (_("unterminated type name"));
340 ungetc (c, fin);
341 break;
342 }
343
344 obstack_1grow (&token_obstack, c);
345 c = getc (fin);
346 }
347 obstack_1grow (&token_obstack, '\0');
348 token_buffer = obstack_finish (&token_obstack);
349 }
350
351
352 token_t
353 lex (void)
354 {
355 int c;
356
357 /* Just to make sure. */
358 token_buffer = NULL;
359
360 if (unlexed >= 0)
361 {
362 symval = unlexed_symval;
363 c = unlexed;
364 unlexed = -1;
365 return c;
366 }
367
368 c = skip_white_space ();
369
370 switch (c)
371 {
372 case EOF:
373 token_buffer = "EOF";
374 return tok_eof;
375
376 case 'A': case 'B': case 'C': case 'D': case 'E':
377 case 'F': case 'G': case 'H': case 'I': case 'J':
378 case 'K': case 'L': case 'M': case 'N': case 'O':
379 case 'P': case 'Q': case 'R': case 'S': case 'T':
380 case 'U': case 'V': case 'W': case 'X': case 'Y':
381 case 'Z':
382 case 'a': case 'b': case 'c': case 'd': case 'e':
383 case 'f': case 'g': case 'h': case 'i': case 'j':
384 case 'k': case 'l': case 'm': case 'n': case 'o':
385 case 'p': case 'q': case 'r': case 's': case 't':
386 case 'u': case 'v': case 'w': case 'x': case 'y':
387 case 'z':
388 case '.': case '_':
389
390 while (isalnum (c) || c == '_' || c == '.')
391 {
392 obstack_1grow (&token_obstack, c);
393 c = getc (finput);
394 }
395 obstack_1grow (&token_obstack, '\0');
396 token_buffer = obstack_finish (&token_obstack);
397 ungetc (c, finput);
398 symval = getsym (token_buffer);
399 return tok_identifier;
400
401 case '0': case '1': case '2': case '3': case '4':
402 case '5': case '6': case '7': case '8': case '9':
403 {
404 numval = 0;
405
406 while (isdigit (c))
407 {
408 obstack_1grow (&token_obstack, c);
409 numval = numval * 10 + c - '0';
410 c = getc (finput);
411 }
412 obstack_1grow (&token_obstack, '\0');
413 token_buffer = obstack_finish (&token_obstack);
414 ungetc (c, finput);
415 return tok_number;
416 }
417
418 case '\'':
419 /* parse the literal token and compute character code in code */
420
421 translations = -1;
422 {
423 int code, discode;
424
425 obstack_1grow (&token_obstack, '\'');
426 literalchar (&token_obstack, &code, '\'');
427
428 c = getc (finput);
429 if (c != '\'')
430 {
431 complain (_("use \"...\" for multi-character literal tokens"));
432 while (1)
433 if (!literalchar (0, &discode, '\''))
434 break;
435 }
436 obstack_1grow (&token_obstack, '\'');
437 obstack_1grow (&token_obstack, '\0');
438 token_buffer = obstack_finish (&token_obstack);
439 symval = getsym (token_buffer);
440 symval->class = token_sym;
441 if (!symval->user_token_number)
442 symval->user_token_number = code;
443 return tok_identifier;
444 }
445
446 case '\"':
447 /* parse the literal string token and treat as an identifier */
448
449 translations = -1;
450 {
451 int code; /* ignored here */
452
453 obstack_1grow (&token_obstack, '\"');
454 /* Read up to and including ". */
455 while (literalchar (&token_obstack, &code, '\"'))
456 /* nothing */;
457 obstack_1grow (&token_obstack, '\0');
458 token_buffer = obstack_finish (&token_obstack);
459
460 symval = getsym (token_buffer);
461 symval->class = token_sym;
462
463 return tok_identifier;
464 }
465
466 case ',':
467 return tok_comma;
468
469 case ':':
470 return tok_colon;
471
472 case ';':
473 return tok_semicolon;
474
475 case '|':
476 return tok_bar;
477
478 case '{':
479 return tok_left_curly;
480
481 case '=':
482 do
483 {
484 c = getc (finput);
485 if (c == '\n')
486 lineno++;
487 }
488 while (c == ' ' || c == '\n' || c == '\t');
489
490 if (c == '{')
491 {
492 token_buffer = "={";
493 return tok_left_curly;
494 }
495 else
496 {
497 ungetc (c, finput);
498 return tok_illegal;
499 }
500
501 case '<':
502 read_type_name (finput);
503 return tok_typename;
504
505 case '%':
506 return parse_percent_token ();
507
508 default:
509 return tok_illegal;
510 }
511 }
512
513 /* This function is a strcmp, which doesn't differentiate `-' and `_'
514 chars. */
515
516 static int
517 option_strcmp (const char *left, const char *right)
518 {
519 const unsigned char *l, *r;
520 int c;
521
522 assert(left != NULL && right != NULL);
523 l = (const unsigned char *)left;
524 r = (const unsigned char *)right;
525 while (((c = *l - *r++) == 0 && *l != '\0')
526 || ((*l == '-' || *l == '_') && (*r == '_' || *r == '-')))
527 l++;
528 return c;
529 }
530
531 /* Parse a token which starts with %.
532 Assumes the % has already been read and discarded. */
533
534 int
535 parse_percent_token (void)
536 {
537 int c;
538 const struct option_table_struct *tx;
539
540 c = getc (finput);
541
542 switch (c)
543 {
544 case '%':
545 return tok_two_percents;
546
547 case '{':
548 return tok_percent_left_curly;
549
550 case '<':
551 return tok_left;
552
553 case '>':
554 return tok_right;
555
556 case '2':
557 return tok_nonassoc;
558
559 case '0':
560 return tok_token;
561
562 case '=':
563 return tok_prec;
564 }
565
566 if (!isalpha (c))
567 return tok_illegal;
568
569 obstack_1grow (&token_obstack, '%');
570 while (isalpha (c) || c == '_' || c == '-')
571 {
572 if (c == '_')
573 c = '-';
574 obstack_1grow (&token_obstack, c);
575 c = getc (finput);
576 }
577
578 ungetc (c, finput);
579 obstack_1grow (&token_obstack, '\0');
580 token_buffer = obstack_finish (&token_obstack);
581
582 /* table lookup % directive */
583 for (tx = option_table; tx->name; tx++)
584 if ((tx->access == opt_percent || tx->access == opt_both)
585 && option_strcmp (token_buffer + 1, tx->name) == 0)
586 break;
587
588 if (tx->set_flag)
589 {
590 *((int *) (tx->set_flag)) = 1;
591 return tok_noop;
592 }
593
594 switch (tx->ret_val)
595 {
596 case tok_setopt:
597 *((char **) (tx->set_flag)) = optarg;
598 return tok_noop;
599 break;
600
601 case tok_obsolete:
602 fatal (_("`%s' is no longer supported"), token_buffer);
603 break;
604 }
605
606 return tx->ret_val;
607 }