]> git.saurik.com Git - bison.git/blame - src/reader.c
Try to make the use of the eoftoken valid. Given that its value
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
037ca2f1 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca
NF
30#include "lex.h"
31#include "gram.h"
a0f6b076 32#include "complain.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
340ef489 35#include "conflicts.h"
11d82f03 36#include "muscle_tab.h"
1ff442ca 37
a70083a3
AD
38typedef struct symbol_list
39{
40 struct symbol_list *next;
41 bucket *sym;
b29b2ed5 42 int line;
f499b062 43
3f96f4dc
AD
44 /* The action is attached to the LHS of a rule. */
45 const char *action;
46 int action_line;
f499b062
AD
47
48 /* The guard is attached to the LHS of a rule. */
49 const char *guard;
50 int guard_line;
a70083a3 51 bucket *ruleprec;
d945f5cd 52} symbol_list;
118fb205 53
1ff442ca 54int lineno;
1ff442ca 55char **tags;
d019d655 56short *user_toknums;
4a120d45
JT
57static symbol_list *grammar;
58static int start_flag;
59static bucket *startval;
1ff442ca
NF
60
61/* Nonzero if components of semantic values are used, implying
62 they must be unions. */
63static int value_components_used;
64
d7020c20
AD
65/* Nonzero if %union has been seen. */
66static int typed;
1ff442ca 67
d7020c20
AD
68/* Incremented for each %left, %right or %nonassoc seen */
69static int lastprec;
1ff442ca 70
b7c49edf
AD
71static bucket *errtoken = NULL;
72static bucket *undeftoken = NULL;
73static bucket *eoftoken = NULL;
b29b2ed5 74
6255b435 75static symbol_list *
b29b2ed5
AD
76symbol_list_new (bucket *sym)
77{
78 symbol_list *res = XMALLOC (symbol_list, 1);
79 res->next = NULL;
80 res->sym = sym;
81 res->line = lineno;
d945f5cd
AD
82 res->action = NULL;
83 res->action_line = 0;
f499b062
AD
84 res->guard = NULL;
85 res->guard_line = 0;
b29b2ed5
AD
86 res->ruleprec = NULL;
87 return res;
88}
89
0d533154 90\f
a70083a3 91
0d533154
AD
92/*===================\
93| Low level lexing. |
94\===================*/
943819bf
RS
95
96static void
118fb205 97skip_to_char (int target)
943819bf
RS
98{
99 int c;
100 if (target == '\n')
a0f6b076 101 complain (_(" Skipping to next \\n"));
943819bf 102 else
a0f6b076 103 complain (_(" Skipping to next %c"), target);
943819bf
RS
104
105 do
0d533154 106 c = skip_white_space ();
943819bf 107 while (c != target && c != EOF);
a083fbbf 108 if (c != EOF)
0d533154 109 ungetc (c, finput);
943819bf
RS
110}
111
112
0d533154
AD
113/*---------------------------------------------------------.
114| Read a signed integer from STREAM and return its value. |
115`---------------------------------------------------------*/
116
117static inline int
118read_signed_integer (FILE *stream)
119{
a70083a3
AD
120 int c = getc (stream);
121 int sign = 1;
122 int n = 0;
0d533154
AD
123
124 if (c == '-')
125 {
126 c = getc (stream);
127 sign = -1;
128 }
129
130 while (isdigit (c))
131 {
132 n = 10 * n + (c - '0');
133 c = getc (stream);
134 }
135
136 ungetc (c, stream);
137
138 return sign * n;
139}
140\f
79282c5a
AD
141/*--------------------------------------------------------------.
142| Get the data type (alternative in the union) of the value for |
143| symbol N in rule RULE. |
144`--------------------------------------------------------------*/
145
146static char *
b29b2ed5 147get_type_name (int n, symbol_list *rule)
79282c5a
AD
148{
149 int i;
150 symbol_list *rp;
151
152 if (n < 0)
153 {
154 complain (_("invalid $ value"));
155 return NULL;
156 }
157
158 rp = rule;
159 i = 0;
160
161 while (i < n)
162 {
163 rp = rp->next;
164 if (rp == NULL || rp->sym == NULL)
165 {
166 complain (_("invalid $ value"));
167 return NULL;
168 }
169 i++;
170 }
171
172 return rp->sym->type_name;
173}
174\f
337bab46
AD
175/*------------------------------------------------------------.
176| Dump the string from FIN to OOUT if non null. MATCH is the |
177| delimiter of the string (either ' or "). |
178`------------------------------------------------------------*/
ae3c3164
AD
179
180static inline void
b6610515 181copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
182{
183 int c;
184
b6610515
RA
185 if (store)
186 obstack_1grow (oout, match);
8c7ebe49 187
4a120d45 188 c = getc (fin);
ae3c3164
AD
189
190 while (c != match)
191 {
192 if (c == EOF)
193 fatal (_("unterminated string at end of file"));
194 if (c == '\n')
195 {
a0f6b076 196 complain (_("unterminated string"));
4a120d45 197 ungetc (c, fin);
ae3c3164
AD
198 c = match; /* invent terminator */
199 continue;
200 }
201
337bab46 202 obstack_1grow (oout, c);
ae3c3164
AD
203
204 if (c == '\\')
205 {
4a120d45 206 c = getc (fin);
ae3c3164
AD
207 if (c == EOF)
208 fatal (_("unterminated string at end of file"));
337bab46 209 obstack_1grow (oout, c);
8c7ebe49 210
ae3c3164
AD
211 if (c == '\n')
212 lineno++;
213 }
214
a70083a3 215 c = getc (fin);
ae3c3164
AD
216 }
217
b6610515
RA
218 if (store)
219 obstack_1grow (oout, c);
220}
221
222/* FIXME. */
223
224static inline void
225copy_string (FILE *fin, struct obstack *oout, int match)
226{
227 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
228}
229
b6610515
RA
230/* FIXME. */
231
232static inline void
233copy_identifier (FILE *fin, struct obstack *oout)
234{
235 int c;
236
237 while (isalnum (c = getc (fin)) || c == '_')
238 obstack_1grow (oout, c);
239
240 ungetc (c, fin);
241}
ae3c3164 242
2666f928
AD
243
244/*------------------------------------------------------------------.
245| Dump the wannabee comment from IN to OOUT. In fact we just saw a |
246| `/', which might or might not be a comment. In any case, copy |
247| what we saw. |
248`------------------------------------------------------------------*/
ae3c3164
AD
249
250static inline void
2666f928 251copy_comment (FILE *fin, struct obstack *oout)
ae3c3164
AD
252{
253 int cplus_comment;
a70083a3 254 int ended;
550a72a3
AD
255 int c;
256
257 /* We read a `/', output it. */
2666f928 258 obstack_1grow (oout, '/');
550a72a3
AD
259
260 switch ((c = getc (fin)))
261 {
262 case '/':
263 cplus_comment = 1;
264 break;
265 case '*':
266 cplus_comment = 0;
267 break;
268 default:
269 ungetc (c, fin);
270 return;
271 }
ae3c3164 272
2666f928 273 obstack_1grow (oout, c);
550a72a3 274 c = getc (fin);
ae3c3164
AD
275
276 ended = 0;
277 while (!ended)
278 {
279 if (!cplus_comment && c == '*')
280 {
281 while (c == '*')
282 {
2666f928 283 obstack_1grow (oout, c);
550a72a3 284 c = getc (fin);
ae3c3164
AD
285 }
286
287 if (c == '/')
288 {
2666f928 289 obstack_1grow (oout, c);
ae3c3164
AD
290 ended = 1;
291 }
292 }
293 else if (c == '\n')
294 {
295 lineno++;
2666f928 296 obstack_1grow (oout, c);
ae3c3164
AD
297 if (cplus_comment)
298 ended = 1;
299 else
550a72a3 300 c = getc (fin);
ae3c3164
AD
301 }
302 else if (c == EOF)
303 fatal (_("unterminated comment"));
304 else
305 {
2666f928 306 obstack_1grow (oout, c);
550a72a3 307 c = getc (fin);
ae3c3164
AD
308 }
309 }
310}
311
312
a70083a3 313/*-----------------------------------------------------------------.
337bab46 314| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
315| reference to this location. STACK_OFFSET is the number of values |
316| in the current rule so far, which says where to find `$0' with |
317| respect to the top of the stack. |
318`-----------------------------------------------------------------*/
1ff442ca 319
a70083a3 320static inline void
337bab46 321copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 322{
a70083a3 323 int c;
1ff442ca 324
a70083a3
AD
325 c = getc (fin);
326 if (c == '$')
1ff442ca 327 {
ff4423cc 328 obstack_sgrow (oout, "yyloc");
89cab50d 329 locations_flag = 1;
a70083a3
AD
330 }
331 else if (isdigit (c) || c == '-')
332 {
333 int n;
1ff442ca 334
a70083a3
AD
335 ungetc (c, fin);
336 n = read_signed_integer (fin);
11e2beca
AD
337 if (n > stack_offset)
338 complain (_("invalid value: %s%d"), "@", n);
339 else
340 {
341 /* Offset is always 0 if parser has already popped the stack
342 pointer. */
343 obstack_fgrow1 (oout, "yylsp[%d]",
344 n - (semantic_parser ? 0 : stack_offset));
345 locations_flag = 1;
346 }
1ff442ca 347 }
a70083a3 348 else
ff4a34be
AD
349 {
350 char buf[] = "@c";
351 buf[1] = c;
352 complain (_("%s is invalid"), quote (buf));
353 }
1ff442ca 354}
79282c5a
AD
355
356
357/*-------------------------------------------------------------------.
358| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
359| |
360| Possible inputs: $[<TYPENAME>]($|integer) |
361| |
337bab46 362| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
363| the number of values in the current rule so far, which says where |
364| to find `$0' with respect to the top of the stack. |
365`-------------------------------------------------------------------*/
366
367static inline void
337bab46 368copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
369 symbol_list *rule, int stack_offset)
370{
371 int c = getc (fin);
b0ce6046 372 const char *type_name = NULL;
79282c5a 373
f282676b 374 /* Get the type name if explicit. */
79282c5a
AD
375 if (c == '<')
376 {
f282676b 377 read_type_name (fin);
79282c5a
AD
378 type_name = token_buffer;
379 value_components_used = 1;
79282c5a
AD
380 c = getc (fin);
381 }
382
383 if (c == '$')
384 {
ff4423cc 385 obstack_sgrow (oout, "yyval");
8c7ebe49 386
79282c5a
AD
387 if (!type_name)
388 type_name = get_type_name (0, rule);
389 if (type_name)
337bab46 390 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
391 if (!type_name && typed)
392 complain (_("$$ of `%s' has no declared type"),
393 rule->sym->tag);
394 }
395 else if (isdigit (c) || c == '-')
396 {
397 int n;
398 ungetc (c, fin);
399 n = read_signed_integer (fin);
400
11e2beca
AD
401 if (n > stack_offset)
402 complain (_("invalid value: %s%d"), "$", n);
403 else
404 {
405 if (!type_name && n > 0)
406 type_name = get_type_name (n, rule);
407
408 /* Offset is always 0 if parser has already popped the stack
409 pointer. */
410 obstack_fgrow1 (oout, "yyvsp[%d]",
411 n - (semantic_parser ? 0 : stack_offset));
412
413 if (type_name)
414 obstack_fgrow1 (oout, ".%s", type_name);
415 if (!type_name && typed)
416 complain (_("$%d of `%s' has no declared type"),
417 n, rule->sym->tag);
418 }
79282c5a
AD
419 }
420 else
421 {
422 char buf[] = "$c";
423 buf[1] = c;
424 complain (_("%s is invalid"), quote (buf));
425 }
426}
a70083a3
AD
427\f
428/*-------------------------------------------------------------------.
429| Copy the contents of a `%{ ... %}' into the definitions file. The |
430| `%{' has already been read. Return after reading the `%}'. |
431`-------------------------------------------------------------------*/
1ff442ca 432
4a120d45 433static void
118fb205 434copy_definition (void)
1ff442ca 435{
a70083a3 436 int c;
ae3c3164 437 /* -1 while reading a character if prev char was %. */
a70083a3 438 int after_percent;
1ff442ca 439
89cab50d 440 if (!no_lines_flag)
25b222fa
MA
441 {
442 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 443 lineno, quotearg_style (c_quoting_style,
b7c49edf 444 muscle_find ("filename")));
25b222fa 445 }
1ff442ca
NF
446
447 after_percent = 0;
448
ae3c3164 449 c = getc (finput);
1ff442ca
NF
450
451 for (;;)
452 {
453 switch (c)
454 {
455 case '\n':
dd60faec 456 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
457 lineno++;
458 break;
459
460 case '%':
a70083a3 461 after_percent = -1;
1ff442ca 462 break;
a083fbbf 463
1ff442ca
NF
464 case '\'':
465 case '"':
337bab46 466 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
467 break;
468
469 case '/':
337bab46 470 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
471 break;
472
473 case EOF:
a70083a3 474 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
475
476 default:
dd60faec 477 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
478 }
479
a70083a3 480 c = getc (finput);
1ff442ca
NF
481
482 if (after_percent)
483 {
484 if (c == '}')
485 return;
dd60faec 486 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
487 }
488 after_percent = 0;
1ff442ca 489 }
1ff442ca
NF
490}
491
492
d7020c20
AD
493/*-------------------------------------------------------------------.
494| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
495| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
496| are reversed. |
497`-------------------------------------------------------------------*/
1ff442ca 498
4a120d45 499static void
d7020c20 500parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 501{
342b8b6e
AD
502 token_t token = tok_undef;
503 char *typename = NULL;
1ff442ca 504
1e9798d5
AD
505 /* The symbol being defined. */
506 struct bucket *symbol = NULL;
507
508 /* After `%token' and `%nterm', any number of symbols maybe be
509 defined. */
1ff442ca
NF
510 for (;;)
511 {
e6011337
JT
512 int tmp_char = ungetc (skip_white_space (), finput);
513
1e9798d5
AD
514 /* `%' (for instance from `%token', or from `%%' etc.) is the
515 only valid means to end this declaration. */
e6011337 516 if (tmp_char == '%')
1ff442ca 517 return;
e6011337 518 if (tmp_char == EOF)
a0f6b076 519 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 520
a70083a3 521 token = lex ();
511e79b3 522 if (token == tok_comma)
943819bf
RS
523 {
524 symbol = NULL;
525 continue;
526 }
511e79b3 527 if (token == tok_typename)
1ff442ca 528 {
95e36146 529 typename = xstrdup (token_buffer);
1ff442ca 530 value_components_used = 1;
943819bf
RS
531 symbol = NULL;
532 }
511e79b3 533 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 534 {
8e03724b
AD
535 if (symval->alias)
536 warn (_("symbol `%s' used more than once as a literal string"),
537 symval->tag);
538 else if (symbol->alias)
539 warn (_("symbol `%s' given more than one literal string"),
540 symbol->tag);
541 else
542 {
543 symval->class = token_sym;
544 symval->type_name = typename;
545 symval->user_token_number = symbol->user_token_number;
546 symbol->user_token_number = SALIAS;
547 symval->alias = symbol;
548 symbol->alias = symval;
549 /* symbol and symval combined are only one symbol */
550 nsyms--;
551 }
8e03724b 552 symbol = NULL;
1ff442ca 553 }
511e79b3 554 else if (token == tok_identifier)
1ff442ca
NF
555 {
556 int oldclass = symval->class;
943819bf 557 symbol = symval;
1ff442ca 558
943819bf 559 if (symbol->class == what_is_not)
a0f6b076 560 complain (_("symbol %s redefined"), symbol->tag);
943819bf 561 symbol->class = what_is;
d7020c20 562 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 563 symbol->value = nvars++;
1ff442ca
NF
564
565 if (typename)
566 {
943819bf
RS
567 if (symbol->type_name == NULL)
568 symbol->type_name = typename;
a70083a3 569 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 570 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
571 }
572 }
511e79b3 573 else if (symbol && token == tok_number)
a70083a3 574 {
943819bf 575 symbol->user_token_number = numval;
b7c49edf
AD
576 /* User defined EOF token? */
577 if (numval == 0)
578 eoftoken = symbol;
a70083a3 579 }
1ff442ca 580 else
943819bf 581 {
a0f6b076 582 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
583 token_buffer,
584 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 585 skip_to_char ('%');
943819bf 586 }
1ff442ca
NF
587 }
588
589}
590
1ff442ca 591
d7020c20
AD
592/*------------------------------.
593| Parse what comes after %start |
594`------------------------------*/
1ff442ca 595
4a120d45 596static void
118fb205 597parse_start_decl (void)
1ff442ca
NF
598{
599 if (start_flag)
27821bff 600 complain (_("multiple %s declarations"), "%start");
511e79b3 601 if (lex () != tok_identifier)
27821bff 602 complain (_("invalid %s declaration"), "%start");
943819bf
RS
603 else
604 {
605 start_flag = 1;
606 startval = symval;
607 }
1ff442ca
NF
608}
609
a70083a3
AD
610/*-----------------------------------------------------------.
611| read in a %type declaration and record its information for |
612| get_type_name to access |
613`-----------------------------------------------------------*/
614
615static void
616parse_type_decl (void)
617{
a70083a3
AD
618 char *name;
619
511e79b3 620 if (lex () != tok_typename)
a70083a3
AD
621 {
622 complain ("%s", _("%type declaration has no <typename>"));
623 skip_to_char ('%');
624 return;
625 }
626
95e36146 627 name = xstrdup (token_buffer);
a70083a3
AD
628
629 for (;;)
630 {
f17bcd1f 631 token_t t;
a70083a3
AD
632 int tmp_char = ungetc (skip_white_space (), finput);
633
634 if (tmp_char == '%')
635 return;
636 if (tmp_char == EOF)
637 fatal (_("Premature EOF after %s"), token_buffer);
638
639 t = lex ();
640
641 switch (t)
1ff442ca
NF
642 {
643
511e79b3
AD
644 case tok_comma:
645 case tok_semicolon:
1ff442ca
NF
646 break;
647
511e79b3 648 case tok_identifier:
1ff442ca
NF
649 if (symval->type_name == NULL)
650 symval->type_name = name;
a70083a3 651 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 652 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
653
654 break;
655
656 default:
a0f6b076
AD
657 complain (_("invalid %%type declaration due to item: %s"),
658 token_buffer);
a70083a3 659 skip_to_char ('%');
1ff442ca
NF
660 }
661 }
662}
663
664
665
d7020c20
AD
666/*----------------------------------------------------------------.
667| Read in a %left, %right or %nonassoc declaration and record its |
668| information. |
669`----------------------------------------------------------------*/
1ff442ca 670
4a120d45 671static void
d7020c20 672parse_assoc_decl (associativity assoc)
1ff442ca 673{
a70083a3
AD
674 char *name = NULL;
675 int prev = 0;
1ff442ca 676
a70083a3 677 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 678
1ff442ca
NF
679 for (;;)
680 {
f17bcd1f 681 token_t t;
e6011337 682 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 683
e6011337 684 if (tmp_char == '%')
1ff442ca 685 return;
e6011337 686 if (tmp_char == EOF)
a0f6b076 687 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 688
a70083a3 689 t = lex ();
1ff442ca
NF
690
691 switch (t)
692 {
511e79b3 693 case tok_typename:
95e36146 694 name = xstrdup (token_buffer);
1ff442ca
NF
695 break;
696
511e79b3 697 case tok_comma:
1ff442ca
NF
698 break;
699
511e79b3 700 case tok_identifier:
1ff442ca 701 if (symval->prec != 0)
a0f6b076 702 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
703 symval->prec = lastprec;
704 symval->assoc = assoc;
d7020c20 705 if (symval->class == nterm_sym)
a0f6b076 706 complain (_("symbol %s redefined"), symval->tag);
d7020c20 707 symval->class = token_sym;
1ff442ca 708 if (name)
a70083a3 709 { /* record the type, if one is specified */
1ff442ca
NF
710 if (symval->type_name == NULL)
711 symval->type_name = name;
a70083a3 712 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 713 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
714 }
715 break;
716
511e79b3
AD
717 case tok_number:
718 if (prev == tok_identifier)
a70083a3 719 {
1ff442ca 720 symval->user_token_number = numval;
a70083a3
AD
721 }
722 else
723 {
724 complain (_
725 ("invalid text (%s) - number should be after identifier"),
726token_buffer);
727 skip_to_char ('%');
728 }
1ff442ca
NF
729 break;
730
511e79b3 731 case tok_semicolon:
1ff442ca
NF
732 return;
733
734 default:
a0f6b076 735 complain (_("unexpected item: %s"), token_buffer);
a70083a3 736 skip_to_char ('%');
1ff442ca
NF
737 }
738
739 prev = t;
1ff442ca
NF
740 }
741}
742
743
744
dd60faec 745/*--------------------------------------------------------------.
180d45ba
PB
746| Copy the union declaration into the stype muscle |
747| (and fdefines), where it is made into the definition of |
748| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 749`--------------------------------------------------------------*/
1ff442ca 750
4a120d45 751static void
118fb205 752parse_union_decl (void)
1ff442ca 753{
a70083a3
AD
754 int c;
755 int count = 0;
428046f8 756 bool done = FALSE;
180d45ba 757 struct obstack union_obstack;
1ff442ca 758 if (typed)
27821bff 759 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
760
761 typed = 1;
762
180d45ba
PB
763 obstack_init (&union_obstack);
764 obstack_sgrow (&union_obstack, "union");
1ff442ca 765
428046f8 766 while (!done)
1ff442ca 767 {
428046f8
AD
768 c = xgetc (finput);
769
342b8b6e
AD
770 /* If C contains '/', it is output by copy_comment (). */
771 if (c != '/')
2666f928 772 obstack_1grow (&union_obstack, c);
1ff442ca
NF
773
774 switch (c)
775 {
776 case '\n':
777 lineno++;
778 break;
779
780 case '/':
2666f928 781 copy_comment (finput, &union_obstack);
1ff442ca
NF
782 break;
783
1ff442ca
NF
784 case '{':
785 count++;
786 break;
787
788 case '}':
428046f8 789 /* FIXME: Errr. How could this happen???. --akim */
1ff442ca 790 if (count == 0)
27821bff 791 complain (_("unmatched %s"), "`}'");
1ff442ca 792 count--;
428046f8
AD
793 if (!count)
794 done = TRUE;
795 break;
1ff442ca 796 }
1ff442ca 797 }
180d45ba 798
428046f8
AD
799 /* JF don't choke on trailing semi */
800 c = skip_white_space ();
801 if (c != ';')
802 ungetc (c, finput);
803 obstack_1grow (&union_obstack, 0);
804 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
805}
806
d7020c20
AD
807
808/*-------------------------------------------------------.
809| Parse the declaration %expect N which says to expect N |
810| shift-reduce conflicts. |
811`-------------------------------------------------------*/
1ff442ca 812
4a120d45 813static void
118fb205 814parse_expect_decl (void)
1ff442ca 815{
131e2fef 816 int c = skip_white_space ();
1ff442ca
NF
817 ungetc (c, finput);
818
131e2fef 819 if (!isdigit (c))
79282c5a 820 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
821 else
822 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
823}
824
a70083a3
AD
825
826/*-------------------------------------------------------------------.
827| Parse what comes after %thong. the full syntax is |
828| |
829| %thong <type> token number literal |
830| |
831| the <type> or number may be omitted. The number specifies the |
832| user_token_number. |
833| |
834| Two symbols are entered in the table, one for the token symbol and |
835| one for the literal. Both are given the <type>, if any, from the |
836| declaration. The ->user_token_number of the first is SALIAS and |
837| the ->user_token_number of the second is set to the number, if |
838| any, from the declaration. The two symbols are linked via |
839| pointers in their ->alias fields. |
840| |
841| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
842| only the literal string is retained it is the literal string that |
843| is output to yytname |
844`-------------------------------------------------------------------*/
845
846static void
847parse_thong_decl (void)
7b306f52 848{
f17bcd1f 849 token_t token;
a70083a3
AD
850 struct bucket *symbol;
851 char *typename = 0;
6b7e85b9 852 int usrtoknum = SUNDEF;
7b306f52 853
a70083a3 854 token = lex (); /* fetch typename or first token */
511e79b3 855 if (token == tok_typename)
7b306f52 856 {
95e36146 857 typename = xstrdup (token_buffer);
a70083a3
AD
858 value_components_used = 1;
859 token = lex (); /* fetch first token */
7b306f52 860 }
7b306f52 861
a70083a3 862 /* process first token */
7b306f52 863
511e79b3 864 if (token != tok_identifier)
a70083a3
AD
865 {
866 complain (_("unrecognized item %s, expected an identifier"),
867 token_buffer);
868 skip_to_char ('%');
869 return;
7b306f52 870 }
d7020c20 871 symval->class = token_sym;
a70083a3
AD
872 symval->type_name = typename;
873 symval->user_token_number = SALIAS;
874 symbol = symval;
7b306f52 875
a70083a3 876 token = lex (); /* get number or literal string */
1ff442ca 877
511e79b3 878 if (token == tok_number)
943819bf 879 {
a70083a3
AD
880 usrtoknum = numval;
881 token = lex (); /* okay, did number, now get literal */
943819bf 882 }
1ff442ca 883
a70083a3 884 /* process literal string token */
1ff442ca 885
511e79b3 886 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 887 {
a70083a3
AD
888 complain (_("expected string constant instead of %s"), token_buffer);
889 skip_to_char ('%');
890 return;
1ff442ca 891 }
d7020c20 892 symval->class = token_sym;
a70083a3
AD
893 symval->type_name = typename;
894 symval->user_token_number = usrtoknum;
1ff442ca 895
a70083a3
AD
896 symval->alias = symbol;
897 symbol->alias = symval;
1ff442ca 898
79282c5a
AD
899 /* symbol and symval combined are only one symbol. */
900 nsyms--;
a70083a3 901}
3cef001a 902
11e2beca 903
b6610515 904static void
11d82f03 905parse_muscle_decl (void)
b6610515
RA
906{
907 int ch = ungetc (skip_white_space (), finput);
b7c49edf
AD
908 char *muscle_key;
909 char *muscle_value;
b6610515
RA
910
911 /* Read key. */
912 if (!isalpha (ch) && ch != '_')
913 {
914 complain (_("invalid %s declaration"), "%define");
915 skip_to_char ('%');
916 return;
917 }
11d82f03
MA
918 copy_identifier (finput, &muscle_obstack);
919 obstack_1grow (&muscle_obstack, 0);
920 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 921
b6610515
RA
922 /* Read value. */
923 ch = skip_white_space ();
924 if (ch != '"')
925 {
926 ungetc (ch, finput);
927 if (ch != EOF)
928 {
929 complain (_("invalid %s declaration"), "%define");
930 skip_to_char ('%');
931 return;
932 }
933 else
934 fatal (_("Premature EOF after %s"), "\"");
935 }
11d82f03
MA
936 copy_string2 (finput, &muscle_obstack, '"', 0);
937 obstack_1grow (&muscle_obstack, 0);
938 muscle_value = obstack_finish (&muscle_obstack);
b6610515 939
b6610515 940 /* Store the (key, value) pair in the environment. */
11d82f03 941 muscle_insert (muscle_key, muscle_value);
b6610515
RA
942}
943
2ba3b73c 944
426cf563
MA
945
946/*---------------------------------.
a870c567 947| Parse a double quoted parameter. |
426cf563
MA
948`---------------------------------*/
949
950static const char *
951parse_dquoted_param (const char *from)
952{
953 struct obstack param_obstack;
954 const char *param = NULL;
955 int c;
956
957 obstack_init (&param_obstack);
958 c = skip_white_space ();
959
960 if (c != '"')
961 {
962 complain (_("invalid %s declaration"), from);
963 ungetc (c, finput);
964 skip_to_char ('%');
965 return NULL;
966 }
967
2648a72d
AD
968 while ((c = literalchar ()) != '"')
969 obstack_1grow (&param_obstack, c);
a870c567 970
426cf563
MA
971 obstack_1grow (&param_obstack, '\0');
972 param = obstack_finish (&param_obstack);
973
974 if (c != '"' || strlen (param) == 0)
975 {
976 complain (_("invalid %s declaration"), from);
977 if (c != '"')
978 ungetc (c, finput);
979 skip_to_char ('%');
980 return NULL;
981 }
982
983 return param;
984}
985
2ba3b73c
MA
986/*----------------------------------.
987| Parse what comes after %skeleton. |
988`----------------------------------*/
989
a870c567 990static void
2ba3b73c
MA
991parse_skel_decl (void)
992{
426cf563 993 skeleton = parse_dquoted_param ("%skeleton");
2ba3b73c
MA
994}
995
a70083a3
AD
996/*----------------------------------------------------------------.
997| Read from finput until `%%' is seen. Discard the `%%'. Handle |
998| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 999| groups to ATTRS_OBSTACK. |
a70083a3 1000`----------------------------------------------------------------*/
1ff442ca 1001
4a120d45 1002static void
a70083a3 1003read_declarations (void)
1ff442ca 1004{
a70083a3 1005 for (;;)
1ff442ca 1006 {
951366c1 1007 int c = skip_white_space ();
1ff442ca 1008
a70083a3
AD
1009 if (c == '%')
1010 {
951366c1 1011 token_t tok = parse_percent_token ();
1ff442ca 1012
a70083a3 1013 switch (tok)
943819bf 1014 {
511e79b3 1015 case tok_two_percents:
a70083a3 1016 return;
1ff442ca 1017
511e79b3 1018 case tok_percent_left_curly:
a70083a3
AD
1019 copy_definition ();
1020 break;
1ff442ca 1021
511e79b3 1022 case tok_token:
d7020c20 1023 parse_token_decl (token_sym, nterm_sym);
a70083a3 1024 break;
1ff442ca 1025
511e79b3 1026 case tok_nterm:
d7020c20 1027 parse_token_decl (nterm_sym, token_sym);
a70083a3 1028 break;
1ff442ca 1029
511e79b3 1030 case tok_type:
a70083a3
AD
1031 parse_type_decl ();
1032 break;
1ff442ca 1033
511e79b3 1034 case tok_start:
a70083a3
AD
1035 parse_start_decl ();
1036 break;
118fb205 1037
511e79b3 1038 case tok_union:
a70083a3
AD
1039 parse_union_decl ();
1040 break;
1ff442ca 1041
511e79b3 1042 case tok_expect:
a70083a3
AD
1043 parse_expect_decl ();
1044 break;
6deb4447 1045
511e79b3 1046 case tok_thong:
a70083a3
AD
1047 parse_thong_decl ();
1048 break;
d7020c20 1049
511e79b3 1050 case tok_left:
d7020c20 1051 parse_assoc_decl (left_assoc);
a70083a3 1052 break;
1ff442ca 1053
511e79b3 1054 case tok_right:
d7020c20 1055 parse_assoc_decl (right_assoc);
a70083a3 1056 break;
1ff442ca 1057
511e79b3 1058 case tok_nonassoc:
d7020c20 1059 parse_assoc_decl (non_assoc);
a70083a3 1060 break;
1ff442ca 1061
b6610515 1062 case tok_define:
11d82f03 1063 parse_muscle_decl ();
b6610515 1064 break;
342b8b6e 1065
2ba3b73c
MA
1066 case tok_skel:
1067 parse_skel_decl ();
1068 break;
b6610515 1069
511e79b3 1070 case tok_noop:
a70083a3 1071 break;
1ff442ca 1072
951366c1
AD
1073 case tok_stropt:
1074 case tok_intopt:
1075 case tok_obsolete:
951366c1
AD
1076 abort ();
1077 break;
1078
e0c40012 1079 case tok_illegal:
a70083a3
AD
1080 default:
1081 complain (_("unrecognized: %s"), token_buffer);
1082 skip_to_char ('%');
1083 }
1084 }
1085 else if (c == EOF)
1086 fatal (_("no input grammar"));
1087 else
1088 {
ff4a34be
AD
1089 char buf[] = "c";
1090 buf[0] = c;
1091 complain (_("unknown character: %s"), quote (buf));
a70083a3 1092 skip_to_char ('%');
1ff442ca 1093 }
1ff442ca 1094 }
1ff442ca 1095}
a70083a3
AD
1096\f
1097/*-------------------------------------------------------------------.
1098| Assuming that a `{' has just been seen, copy everything up to the |
1099| matching `}' into the actions file. STACK_OFFSET is the number of |
1100| values in the current rule so far, which says where to find `$0' |
1101| with respect to the top of the stack. |
14d293ac 1102| |
11e2beca
AD
1103| This routine is used both for actions and guards. Only |
1104| ACTION_OBSTACK is used, but this is fine, since we use only |
14d293ac 1105| pointers to relevant portions inside this obstack. |
a70083a3 1106`-------------------------------------------------------------------*/
1ff442ca 1107
4a120d45 1108static void
14d293ac 1109parse_braces (symbol_list *rule, int stack_offset)
1ff442ca 1110{
a70083a3 1111 int c;
a70083a3 1112 int count;
1ff442ca 1113
1ff442ca 1114 count = 1;
1ff442ca
NF
1115 while (count > 0)
1116 {
14d293ac
AD
1117 while ((c = getc (finput)) != '}')
1118 switch (c)
1119 {
1120 case '\n':
1121 obstack_1grow (&action_obstack, c);
1122 lineno++;
1123 break;
1ff442ca 1124
14d293ac
AD
1125 case '{':
1126 obstack_1grow (&action_obstack, c);
1127 count++;
1128 break;
1ff442ca 1129
14d293ac
AD
1130 case '\'':
1131 case '"':
1132 copy_string (finput, &action_obstack, c);
1133 break;
1ff442ca 1134
14d293ac
AD
1135 case '/':
1136 copy_comment (finput, &action_obstack);
1137 break;
1ff442ca 1138
14d293ac
AD
1139 case '$':
1140 copy_dollar (finput, &action_obstack,
1141 rule, stack_offset);
1142 break;
1ff442ca 1143
14d293ac
AD
1144 case '@':
1145 copy_at (finput, &action_obstack,
1146 stack_offset);
1147 break;
a70083a3 1148
14d293ac
AD
1149 case EOF:
1150 fatal (_("unmatched %s"), "`{'");
a70083a3 1151
14d293ac
AD
1152 default:
1153 obstack_1grow (&action_obstack, c);
1154 }
a70083a3 1155
14d293ac 1156 /* Above loop exits when C is '}'. */
a70083a3
AD
1157 if (--count)
1158 {
8c7ebe49 1159 obstack_1grow (&action_obstack, c);
a70083a3
AD
1160 c = getc (finput);
1161 }
1162 }
1163
3f96f4dc 1164 obstack_1grow (&action_obstack, '\0');
a70083a3 1165}
14d293ac 1166
a70083a3
AD
1167
1168static void
14d293ac 1169parse_action (symbol_list *rule, int stack_offset)
a70083a3 1170{
14d293ac
AD
1171 rule->action_line = lineno;
1172 parse_braces (rule, stack_offset);
1173 rule->action = obstack_finish (&action_obstack);
1174}
a70083a3 1175
a70083a3 1176
14d293ac
AD
1177static void
1178parse_guard (symbol_list *rule, int stack_offset)
1179{
1180 token_t t = lex ();
1181 if (t != tok_left_curly)
1182 complain (_("invalid %s declaration"), "%guard");
f499b062 1183 rule->guard_line = lineno;
14d293ac
AD
1184 parse_braces (rule, stack_offset);
1185 rule->guard = obstack_finish (&action_obstack);
1ff442ca 1186}
14d293ac 1187
a70083a3
AD
1188\f
1189
a70083a3
AD
1190/*-------------------------------------------------------------------.
1191| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1192| with the user's names. |
1193`-------------------------------------------------------------------*/
1ff442ca 1194
4a120d45 1195static bucket *
118fb205 1196gensym (void)
1ff442ca 1197{
274d42ce
AD
1198 /* Incremented for each generated symbol */
1199 static int gensym_count = 0;
1200 static char buf[256];
1201
a70083a3 1202 bucket *sym;
1ff442ca 1203
274d42ce
AD
1204 sprintf (buf, "@%d", ++gensym_count);
1205 token_buffer = buf;
a70083a3 1206 sym = getsym (token_buffer);
d7020c20 1207 sym->class = nterm_sym;
1ff442ca 1208 sym->value = nvars++;
36281465 1209 return sym;
1ff442ca 1210}
a70083a3 1211\f
107f7dfb
AD
1212/*-------------------------------------------------------------------.
1213| Parse the input grammar into a one symbol_list structure. Each |
1214| rule is represented by a sequence of symbols: the left hand side |
1215| followed by the contents of the right hand side, followed by a |
1216| null pointer instead of a symbol to terminate the rule. The next |
1217| symbol is the lhs of the following rule. |
1218| |
1219| All guards and actions are copied out to the appropriate files, |
1220| labelled by the rule number they apply to. |
1221| |
1222| Bison used to allow some %directives in the rules sections, but |
1223| this is no longer consider appropriate: (i) the documented grammar |
1224| doesn't claim it, (ii), it would promote bad style, (iii), error |
1225| recovery for %directives consists in skipping the junk until a `%' |
1226| is seen and helrp synchronizing. This scheme is definitely wrong |
1227| in the rules section. |
1228`-------------------------------------------------------------------*/
1ff442ca 1229
4a120d45 1230static void
118fb205 1231readgram (void)
1ff442ca 1232{
f17bcd1f 1233 token_t t;
a70083a3 1234 bucket *lhs = NULL;
107f7dfb
AD
1235 symbol_list *p = NULL;
1236 symbol_list *p1 = NULL;
a70083a3 1237 bucket *bp;
1ff442ca 1238
ff4a34be
AD
1239 /* Points to first symbol_list of current rule. its symbol is the
1240 lhs of the rule. */
107f7dfb 1241 symbol_list *crule = NULL;
ff4a34be 1242 /* Points to the symbol_list preceding crule. */
107f7dfb 1243 symbol_list *crule1 = NULL;
1ff442ca 1244
a70083a3 1245 t = lex ();
1ff442ca 1246
511e79b3 1247 while (t != tok_two_percents && t != tok_eof)
107f7dfb
AD
1248 if (t == tok_identifier || t == tok_bar)
1249 {
1250 int action_flag = 0;
1251 /* Number of symbols in rhs of this rule so far */
1252 int rulelength = 0;
1253 int xactions = 0; /* JF for error checking */
1254 bucket *first_rhs = 0;
1255
1256 if (t == tok_identifier)
1257 {
1258 lhs = symval;
1259
1260 if (!start_flag)
1261 {
1262 startval = lhs;
1263 start_flag = 1;
1264 }
1ff442ca 1265
107f7dfb
AD
1266 t = lex ();
1267 if (t != tok_colon)
1268 {
1269 complain (_("ill-formed rule: initial symbol not followed by colon"));
1270 unlex (t);
1271 }
1272 }
1273
1274 if (nrules == 0 && t == tok_bar)
1275 {
1276 complain (_("grammar starts with vertical bar"));
1277 lhs = symval; /* BOGUS: use a random symval */
1278 }
1279 /* start a new rule and record its lhs. */
1280
1281 nrules++;
1282 nitems++;
1283
1284 p = symbol_list_new (lhs);
1285
1286 crule1 = p1;
1287 if (p1)
1288 p1->next = p;
1289 else
1290 grammar = p;
1ff442ca 1291
107f7dfb
AD
1292 p1 = p;
1293 crule = p;
1ff442ca 1294
107f7dfb 1295 /* mark the rule's lhs as a nonterminal if not already so. */
1ff442ca 1296
107f7dfb
AD
1297 if (lhs->class == unknown_sym)
1298 {
1299 lhs->class = nterm_sym;
1300 lhs->value = nvars;
1301 nvars++;
1302 }
1303 else if (lhs->class == token_sym)
1304 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca 1305
107f7dfb 1306 /* read the rhs of the rule. */
1ff442ca 1307
107f7dfb
AD
1308 for (;;)
1309 {
1310 t = lex ();
1311 if (t == tok_prec)
1312 {
1313 t = lex ();
1314 crule->ruleprec = symval;
1315 t = lex ();
1316 }
1317
1318 if (!(t == tok_identifier || t == tok_left_curly))
1319 break;
1ff442ca 1320
107f7dfb
AD
1321 /* If next token is an identifier, see if a colon follows it.
1322 If one does, exit this rule now. */
1323 if (t == tok_identifier)
1324 {
1325 bucket *ssave;
1326 token_t t1;
1327
1328 ssave = symval;
1329 t1 = lex ();
1330 unlex (t1);
1331 symval = ssave;
1332 if (t1 == tok_colon)
1333 break;
1334
1335 if (!first_rhs) /* JF */
1336 first_rhs = symval;
1337 /* Not followed by colon =>
1338 process as part of this rule's rhs. */
1339 }
1340
1341 /* If we just passed an action, that action was in the middle
1342 of a rule, so make a dummy rule to reduce it to a
1343 non-terminal. */
1344 if (action_flag)
1345 {
1346 /* Since the action was written out with this rule's
1347 number, we must give the new rule this number by
1348 inserting the new rule before it. */
1349
1350 /* Make a dummy nonterminal, a gensym. */
1351 bucket *sdummy = gensym ();
1352
1353 /* Make a new rule, whose body is empty, before the
1354 current one, so that the action just read can
1355 belong to it. */
1356 nrules++;
1357 nitems++;
1358 p = symbol_list_new (sdummy);
1359 /* Attach its lineno to that of the host rule. */
1360 p->line = crule->line;
1361 if (crule1)
1362 crule1->next = p;
1363 else
1364 grammar = p;
1365 /* End of the rule. */
1366 crule1 = symbol_list_new (NULL);
1367 crule1->next = crule;
1368
1369 p->next = crule1;
1370
1371 /* Insert the dummy generated by that rule into this
1372 rule. */
1373 nitems++;
1374 p = symbol_list_new (sdummy);
1375 p1->next = p;
1376 p1 = p;
1377
1378 action_flag = 0;
1379 }
1380
1381 if (t == tok_identifier)
1382 {
1383 nitems++;
1384 p = symbol_list_new (symval);
1385 p1->next = p;
1386 p1 = p;
1387 }
1388 else /* handle an action. */
1389 {
14d293ac 1390 parse_action (crule, rulelength);
107f7dfb
AD
1391 action_flag = 1;
1392 xactions++; /* JF */
1393 }
1394 rulelength++;
1395 } /* end of read rhs of rule */
1396
1397 /* Put an empty link in the list to mark the end of this rule */
1398 p = symbol_list_new (NULL);
1399 p1->next = p;
1400 p1 = p;
1401
1402 if (t == tok_prec)
1403 {
1404 complain (_("two @prec's in a row"));
1405 t = lex ();
1406 crule->ruleprec = symval;
1407 t = lex ();
1408 }
f499b062 1409
107f7dfb
AD
1410 if (t == tok_guard)
1411 {
1412 if (!semantic_parser)
1413 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1414
14d293ac 1415 parse_guard (crule, rulelength);
a70083a3 1416 t = lex ();
107f7dfb 1417 }
f499b062
AD
1418
1419 if (t == tok_left_curly)
107f7dfb
AD
1420 {
1421 /* This case never occurs -wjh */
1422 if (action_flag)
1423 complain (_("two actions at end of one rule"));
14d293ac 1424 parse_action (crule, rulelength);
107f7dfb
AD
1425 action_flag = 1;
1426 xactions++; /* -wjh */
1427 t = lex ();
1428 }
1429 /* If $$ is being set in default way, report if any type
1430 mismatch. */
1431 else if (!xactions
1432 && first_rhs && lhs->type_name != first_rhs->type_name)
1433 {
1434 if (lhs->type_name == 0
1435 || first_rhs->type_name == 0
1436 || strcmp (lhs->type_name, first_rhs->type_name))
1437 complain (_("type clash (`%s' `%s') on default action"),
1438 lhs->type_name ? lhs->type_name : "",
1439 first_rhs->type_name ? first_rhs->type_name : "");
1440 }
1441 /* Warn if there is no default for $$ but we need one. */
1442 else if (!xactions && !first_rhs && lhs->type_name != 0)
1443 complain (_("empty rule for typed nonterminal, and no action"));
1444 if (t == tok_semicolon)
a70083a3 1445 t = lex ();
107f7dfb
AD
1446 }
1447 else
1448 {
1449 complain (_("invalid input: %s"), quote (token_buffer));
1450 t = lex ();
1451 }
943819bf 1452
1ff442ca 1453
943819bf
RS
1454 /* grammar has been read. Do some checking */
1455
1ff442ca 1456 if (nsyms > MAXSHORT)
a0f6b076
AD
1457 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1458 MAXSHORT);
1ff442ca 1459 if (nrules == 0)
a0f6b076 1460 fatal (_("no rules in the input grammar"));
1ff442ca 1461
1ff442ca
NF
1462 /* Report any undefined symbols and consider them nonterminals. */
1463
1464 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1465 if (bp->class == unknown_sym)
1ff442ca 1466 {
a70083a3
AD
1467 complain (_
1468 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1469 bp->tag);
d7020c20 1470 bp->class = nterm_sym;
1ff442ca
NF
1471 bp->value = nvars++;
1472 }
1473
1474 ntokens = nsyms - nvars;
1475}
ff48177d
MA
1476
1477/* At the end of the grammar file, some C source code must
63c2d5de 1478 be stored. It is going to be associated to the epilogue
ff48177d
MA
1479 directive. */
1480static void
1481read_additionnal_code (void)
1482{
1483 char c;
63c2d5de 1484 struct obstack el_obstack;
342b8b6e 1485
63c2d5de 1486 obstack_init (&el_obstack);
ff48177d 1487
710ddc4f
MA
1488 if (!no_lines_flag)
1489 {
1490 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1491 lineno, quotearg_style (c_quoting_style,
b7c49edf 1492 muscle_find ("filename")));
710ddc4f
MA
1493 }
1494
ff48177d 1495 while ((c = getc (finput)) != EOF)
63c2d5de 1496 obstack_1grow (&el_obstack, c);
342b8b6e 1497
63c2d5de 1498 obstack_1grow (&el_obstack, 0);
11d82f03 1499 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1500}
1501
a70083a3 1502\f
037ca2f1
AD
1503/*------------------------------------------------------------------.
1504| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1505| number. |
1506`------------------------------------------------------------------*/
1507
1508static void
1509token_translations_init (void)
1510{
1511 bucket *bp = NULL;
1512 int i;
1513
1514 token_translations = XCALLOC (short, max_user_token_number + 1);
1515
1516 /* Initialize all entries for literal tokens to 2, the internal
1517 token number for $undefined., which represents all invalid
1518 inputs. */
1519 for (i = 0; i <= max_user_token_number; i++)
1520 token_translations[i] = 2;
1521
1522 for (bp = firstsymbol; bp; bp = bp->next)
1523 {
1524 /* Non-terminal? */
1525 if (bp->value >= ntokens)
1526 continue;
1527 /* A token string alias? */
1528 if (bp->user_token_number == SALIAS)
1529 continue;
6b7e85b9
AD
1530
1531 assert (bp->user_token_number != SUNDEF);
1532
037ca2f1
AD
1533 /* A token which translation has already been set? */
1534 if (token_translations[bp->user_token_number] != 2)
1535 complain (_("tokens %s and %s both assigned number %d"),
1536 tags[token_translations[bp->user_token_number]],
1537 bp->tag, bp->user_token_number);
1538 token_translations[bp->user_token_number] = bp->value;
1539 }
1540}
1541
1542
a70083a3
AD
1543/*------------------------------------------------------------------.
1544| Assign symbol numbers, and write definition of token names into |
b2ca4022 1545| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1546| of symbols. |
1547`------------------------------------------------------------------*/
1ff442ca 1548
4a120d45 1549static void
118fb205 1550packsymbols (void)
1ff442ca 1551{
342b8b6e 1552 bucket *bp = NULL;
a70083a3 1553 int tokno = 1;
a70083a3 1554 int last_user_token_number;
1ff442ca 1555
d7913476 1556 tags = XCALLOC (char *, nsyms + 1);
d7913476 1557 user_toknums = XCALLOC (short, nsyms + 1);
1ff442ca 1558
d7913476
AD
1559 sprec = XCALLOC (short, nsyms);
1560 sassoc = XCALLOC (short, nsyms);
1ff442ca
NF
1561
1562 max_user_token_number = 256;
1563 last_user_token_number = 256;
1564
1565 for (bp = firstsymbol; bp; bp = bp->next)
1566 {
d7020c20 1567 if (bp->class == nterm_sym)
1ff442ca
NF
1568 {
1569 bp->value += ntokens;
1570 }
943819bf
RS
1571 else if (bp->alias)
1572 {
b7c49edf
AD
1573 /* This symbol and its alias are a single token defn.
1574 Allocate a tokno, and assign to both check agreement of
1575 prec and assoc fields and make both the same */
1576 if (bp->value == -1)
1577 {
1578 if (bp == eoftoken || bp->alias == eoftoken)
1579 bp->value = bp->alias->value = 0;
1580 else
1581 {
1582 bp->value = bp->alias->value = tokno++;
1583 }
1584 }
943819bf 1585
0a6384c4
AD
1586 if (bp->prec != bp->alias->prec)
1587 {
1588 if (bp->prec != 0 && bp->alias->prec != 0
1589 && bp->user_token_number == SALIAS)
a0f6b076
AD
1590 complain (_("conflicting precedences for %s and %s"),
1591 bp->tag, bp->alias->tag);
0a6384c4
AD
1592 if (bp->prec != 0)
1593 bp->alias->prec = bp->prec;
1594 else
1595 bp->prec = bp->alias->prec;
1596 }
943819bf 1597
0a6384c4
AD
1598 if (bp->assoc != bp->alias->assoc)
1599 {
a0f6b076
AD
1600 if (bp->assoc != 0 && bp->alias->assoc != 0
1601 && bp->user_token_number == SALIAS)
1602 complain (_("conflicting assoc values for %s and %s"),
1603 bp->tag, bp->alias->tag);
1604 if (bp->assoc != 0)
1605 bp->alias->assoc = bp->assoc;
1606 else
1607 bp->assoc = bp->alias->assoc;
1608 }
0a6384c4 1609
b7c49edf 1610 /* Do not do processing below for SALIASs. */
0a6384c4 1611 if (bp->user_token_number == SALIAS)
b7c49edf 1612 continue;
943819bf 1613
a70083a3 1614 }
b7c49edf 1615 else /* bp->class == token_sym */
943819bf 1616 {
b7c49edf
AD
1617 if (bp == eoftoken)
1618 bp->value = 0;
1619 else
1620 bp->value = tokno++;
943819bf
RS
1621 }
1622
d7020c20 1623 if (bp->class == token_sym)
1ff442ca 1624 {
6b7e85b9 1625 if (bp->user_token_number == SUNDEF)
1ff442ca
NF
1626 bp->user_token_number = ++last_user_token_number;
1627 if (bp->user_token_number > max_user_token_number)
1628 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1629 }
1630
1631 tags[bp->value] = bp->tag;
943819bf 1632 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1633 sprec[bp->value] = bp->prec;
1634 sassoc[bp->value] = bp->assoc;
1ff442ca
NF
1635 }
1636
037ca2f1 1637 token_translations_init ();
1ff442ca
NF
1638
1639 error_token_number = errtoken->value;
1640
e3f1699f
AD
1641 if (startval->class == unknown_sym)
1642 fatal (_("the start symbol %s is undefined"), startval->tag);
1643 else if (startval->class == token_sym)
1644 fatal (_("the start symbol %s is a token"), startval->tag);
1645
1646 start_symbol = startval->value;
1647}
1648
1649
93ede233
AD
1650/*---------------------------------------------------------------.
1651| Save the definition of token names in the `TOKENDEFS' muscle. |
1652`---------------------------------------------------------------*/
e3f1699f
AD
1653
1654static void
93ede233 1655symbols_save (void)
e3f1699f 1656{
93ede233
AD
1657 struct obstack tokendefs;
1658 bucket *bp;
93ede233
AD
1659 obstack_init (&tokendefs);
1660
1661 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1662 {
ec2da99f 1663 char *symbol = bp->tag; /* get symbol */
1ff442ca 1664
93ede233
AD
1665 if (bp->value >= ntokens)
1666 continue;
1667 if (bp->user_token_number == SALIAS)
1668 continue;
1669 if ('\'' == *symbol)
1670 continue; /* skip literal character */
1671 if (bp == errtoken)
1672 continue; /* skip error token */
1673 if ('\"' == *symbol)
037ca2f1 1674 {
93ede233
AD
1675 /* use literal string only if given a symbol with an alias */
1676 if (bp->alias)
1677 symbol = bp->alias->tag;
1678 else
1679 continue;
037ca2f1 1680 }
93ede233
AD
1681
1682 /* Don't #define nonliteral tokens whose names contain periods. */
ec2da99f 1683 if (strchr (symbol, '.'))
93ede233
AD
1684 continue;
1685
7742ddeb 1686 obstack_fgrow2 (&tokendefs, "# define %s\t%d\n",
93ede233
AD
1687 symbol, bp->user_token_number);
1688 if (semantic_parser)
1689 /* FIXME: This is probably wrong, and should be just as
1690 above. --akim. */
7742ddeb 1691 obstack_fgrow2 (&tokendefs, "# define T%s\t%d\n", symbol, bp->value);
1ff442ca 1692 }
93ede233
AD
1693
1694 obstack_1grow (&tokendefs, 0);
1695 muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1696 obstack_free (&tokendefs, NULL);
1ff442ca 1697}
a083fbbf 1698
1ff442ca 1699
a70083a3
AD
1700/*---------------------------------------------------------------.
1701| Convert the rules into the representation using RRHS, RLHS and |
1702| RITEMS. |
1703`---------------------------------------------------------------*/
1ff442ca 1704
4a120d45 1705static void
118fb205 1706packgram (void)
1ff442ca 1707{
a70083a3
AD
1708 int itemno;
1709 int ruleno;
1710 symbol_list *p;
1ff442ca 1711
adc8c848
AD
1712 /* We use short to index items. */
1713 if (nitems >= MAXSHORT)
1714 fatal (_("too many items (max %d)"), MAXSHORT);
1715
d7913476 1716 ritem = XCALLOC (short, nitems + 1);
b2ed6e58 1717 rule_table = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1718
1719 itemno = 0;
1720 ruleno = 1;
1721
1722 p = grammar;
1723 while (p)
1724 {
b29b2ed5 1725 bucket *ruleprec = p->ruleprec;
b2ed6e58
AD
1726 rule_table[ruleno].lhs = p->sym->value;
1727 rule_table[ruleno].rhs = itemno;
b29b2ed5 1728 rule_table[ruleno].line = p->line;
68f1e3ed 1729 rule_table[ruleno].useful = TRUE;
3f96f4dc
AD
1730 rule_table[ruleno].action = p->action;
1731 rule_table[ruleno].action_line = p->action_line;
f499b062
AD
1732 rule_table[ruleno].guard = p->guard;
1733 rule_table[ruleno].guard_line = p->guard_line;
1ff442ca
NF
1734
1735 p = p->next;
1736 while (p && p->sym)
1737 {
1738 ritem[itemno++] = p->sym->value;
1739 /* A rule gets by default the precedence and associativity
1740 of the last token in it. */
d7020c20 1741 if (p->sym->class == token_sym)
1ff442ca 1742 {
652a871c
AD
1743 rule_table[ruleno].prec = p->sym->prec;
1744 rule_table[ruleno].assoc = p->sym->assoc;
1ff442ca 1745 }
a70083a3
AD
1746 if (p)
1747 p = p->next;
1ff442ca
NF
1748 }
1749
1750 /* If this rule has a %prec,
a70083a3 1751 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1752 if (ruleprec)
1753 {
652a871c
AD
1754 rule_table[ruleno].prec = ruleprec->prec;
1755 rule_table[ruleno].assoc = ruleprec->assoc;
1756 rule_table[ruleno].precsym = ruleprec->value;
1ff442ca
NF
1757 }
1758
1759 ritem[itemno++] = -ruleno;
1760 ruleno++;
1761
a70083a3
AD
1762 if (p)
1763 p = p->next;
1ff442ca
NF
1764 }
1765
1766 ritem[itemno] = 0;
75142d45
AD
1767 nritems = itemno;
1768 assert (nritems == nitems);
3067fbef
AD
1769
1770 if (trace_flag)
1771 ritem_print (stderr);
1ff442ca 1772}
a70083a3
AD
1773\f
1774/*-------------------------------------------------------------------.
1775| Read in the grammar specification and record it in the format |
ea5607fd 1776| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1777| and all actions into ACTION_OBSTACK, in each case forming the body |
1778| of a C function (YYGUARD or YYACTION) which contains a switch |
1779| statement to decide which guard or action to execute. |
a70083a3
AD
1780`-------------------------------------------------------------------*/
1781
1782void
1783reader (void)
1784{
1785 start_flag = 0;
1786 startval = NULL; /* start symbol not specified yet. */
1787
b7c49edf 1788 nsyms = 0;
a70083a3
AD
1789 nvars = 0;
1790 nrules = 0;
1791 nitems = 0;
a70083a3
AD
1792
1793 typed = 0;
1794 lastprec = 0;
1795
a70083a3
AD
1796 semantic_parser = 0;
1797 pure_parser = 0;
a70083a3
AD
1798
1799 grammar = NULL;
1800
342b8b6e 1801 lex_init ();
a70083a3
AD
1802 lineno = 1;
1803
11d82f03
MA
1804 /* Initialize the muscle obstack. */
1805 obstack_init (&muscle_obstack);
82e236e2 1806
a70083a3
AD
1807 /* Initialize the symbol table. */
1808 tabinit ();
b6610515 1809
a70083a3
AD
1810 /* Construct the error token */
1811 errtoken = getsym ("error");
d7020c20 1812 errtoken->class = token_sym;
a70083a3 1813 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 1814
a70083a3
AD
1815 /* Construct a token that represents all undefined literal tokens.
1816 It is always token number 2. */
1817 undeftoken = getsym ("$undefined.");
d7020c20 1818 undeftoken->class = token_sym;
a70083a3
AD
1819 undeftoken->user_token_number = 2;
1820
331dbc1b
AD
1821 /* Initialize the obstacks. */
1822 obstack_init (&action_obstack);
1823 obstack_init (&attrs_obstack);
331dbc1b
AD
1824 obstack_init (&output_obstack);
1825
1826 finput = xfopen (infile, "r");
1827
896fe5c1
AD
1828 /* Read the declaration section. Copy %{ ... %} groups to
1829 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1830 etc. found there. */
a70083a3 1831 read_declarations ();
b7c49edf
AD
1832
1833 /* If the user did not define her EOFTOKEN, do it now. */
1834 if (!eoftoken)
1835 {
1836 eoftoken = getsym ("$");
1837 eoftoken->class = token_sym;
1838 /* Value specified by POSIX. */
1839 eoftoken->user_token_number = 0;
1840 }
1841
a70083a3
AD
1842 /* Read in the grammar, build grammar in list form. Write out
1843 guards and actions. */
1844 readgram ();
ff48177d
MA
1845 /* Some C code is given at the end of the grammar file. */
1846 read_additionnal_code ();
b0c4483e 1847
331dbc1b
AD
1848 lex_free ();
1849 xfclose (finput);
1850
a70083a3
AD
1851 /* Assign the symbols their symbol numbers. Write #defines for the
1852 token symbols into FDEFINES if requested. */
1853 packsymbols ();
93ede233
AD
1854 /* Save them. */
1855 symbols_save ();
1856
a70083a3
AD
1857 /* Convert the grammar into the format described in gram.h. */
1858 packgram ();
a70083a3 1859}