]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/lalr.c (set_goto_map, initialize_F): Use SHIFT_SYMBOL.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
037ca2f1 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca
NF
30#include "lex.h"
31#include "gram.h"
a0f6b076 32#include "complain.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
340ef489 35#include "conflicts.h"
11d82f03 36#include "muscle_tab.h"
1ff442ca 37
a70083a3
AD
38typedef struct symbol_list
39{
40 struct symbol_list *next;
41 bucket *sym;
b29b2ed5 42 int line;
f499b062 43
3f96f4dc
AD
44 /* The action is attached to the LHS of a rule. */
45 const char *action;
46 int action_line;
f499b062
AD
47
48 /* The guard is attached to the LHS of a rule. */
49 const char *guard;
50 int guard_line;
a70083a3 51 bucket *ruleprec;
d945f5cd 52} symbol_list;
118fb205 53
1ff442ca 54int lineno;
1ff442ca 55char **tags;
d019d655 56short *user_toknums;
4a120d45
JT
57static symbol_list *grammar;
58static int start_flag;
59static bucket *startval;
1ff442ca
NF
60
61/* Nonzero if components of semantic values are used, implying
62 they must be unions. */
63static int value_components_used;
64
d7020c20
AD
65/* Nonzero if %union has been seen. */
66static int typed;
1ff442ca 67
d7020c20
AD
68/* Incremented for each %left, %right or %nonassoc seen */
69static int lastprec;
1ff442ca 70
1ff442ca 71static bucket *errtoken;
5b2e3c89 72static bucket *undeftoken;
b29b2ed5
AD
73
74
6255b435 75static symbol_list *
b29b2ed5
AD
76symbol_list_new (bucket *sym)
77{
78 symbol_list *res = XMALLOC (symbol_list, 1);
79 res->next = NULL;
80 res->sym = sym;
81 res->line = lineno;
d945f5cd
AD
82 res->action = NULL;
83 res->action_line = 0;
f499b062
AD
84 res->guard = NULL;
85 res->guard_line = 0;
b29b2ed5
AD
86 res->ruleprec = NULL;
87 return res;
88}
89
0d533154 90\f
a70083a3 91
0d533154
AD
92/*===================\
93| Low level lexing. |
94\===================*/
943819bf
RS
95
96static void
118fb205 97skip_to_char (int target)
943819bf
RS
98{
99 int c;
100 if (target == '\n')
a0f6b076 101 complain (_(" Skipping to next \\n"));
943819bf 102 else
a0f6b076 103 complain (_(" Skipping to next %c"), target);
943819bf
RS
104
105 do
0d533154 106 c = skip_white_space ();
943819bf 107 while (c != target && c != EOF);
a083fbbf 108 if (c != EOF)
0d533154 109 ungetc (c, finput);
943819bf
RS
110}
111
112
0d533154
AD
113/*---------------------------------------------------------.
114| Read a signed integer from STREAM and return its value. |
115`---------------------------------------------------------*/
116
117static inline int
118read_signed_integer (FILE *stream)
119{
a70083a3
AD
120 int c = getc (stream);
121 int sign = 1;
122 int n = 0;
0d533154
AD
123
124 if (c == '-')
125 {
126 c = getc (stream);
127 sign = -1;
128 }
129
130 while (isdigit (c))
131 {
132 n = 10 * n + (c - '0');
133 c = getc (stream);
134 }
135
136 ungetc (c, stream);
137
138 return sign * n;
139}
140\f
79282c5a
AD
141/*--------------------------------------------------------------.
142| Get the data type (alternative in the union) of the value for |
143| symbol N in rule RULE. |
144`--------------------------------------------------------------*/
145
146static char *
b29b2ed5 147get_type_name (int n, symbol_list *rule)
79282c5a
AD
148{
149 int i;
150 symbol_list *rp;
151
152 if (n < 0)
153 {
154 complain (_("invalid $ value"));
155 return NULL;
156 }
157
158 rp = rule;
159 i = 0;
160
161 while (i < n)
162 {
163 rp = rp->next;
164 if (rp == NULL || rp->sym == NULL)
165 {
166 complain (_("invalid $ value"));
167 return NULL;
168 }
169 i++;
170 }
171
172 return rp->sym->type_name;
173}
174\f
337bab46
AD
175/*------------------------------------------------------------.
176| Dump the string from FIN to OOUT if non null. MATCH is the |
177| delimiter of the string (either ' or "). |
178`------------------------------------------------------------*/
ae3c3164
AD
179
180static inline void
b6610515 181copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
182{
183 int c;
184
b6610515
RA
185 if (store)
186 obstack_1grow (oout, match);
8c7ebe49 187
4a120d45 188 c = getc (fin);
ae3c3164
AD
189
190 while (c != match)
191 {
192 if (c == EOF)
193 fatal (_("unterminated string at end of file"));
194 if (c == '\n')
195 {
a0f6b076 196 complain (_("unterminated string"));
4a120d45 197 ungetc (c, fin);
ae3c3164
AD
198 c = match; /* invent terminator */
199 continue;
200 }
201
337bab46 202 obstack_1grow (oout, c);
ae3c3164
AD
203
204 if (c == '\\')
205 {
4a120d45 206 c = getc (fin);
ae3c3164
AD
207 if (c == EOF)
208 fatal (_("unterminated string at end of file"));
337bab46 209 obstack_1grow (oout, c);
8c7ebe49 210
ae3c3164
AD
211 if (c == '\n')
212 lineno++;
213 }
214
a70083a3 215 c = getc (fin);
ae3c3164
AD
216 }
217
b6610515
RA
218 if (store)
219 obstack_1grow (oout, c);
220}
221
222/* FIXME. */
223
224static inline void
225copy_string (FILE *fin, struct obstack *oout, int match)
226{
227 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
228}
229
b6610515
RA
230/* FIXME. */
231
232static inline void
233copy_identifier (FILE *fin, struct obstack *oout)
234{
235 int c;
236
237 while (isalnum (c = getc (fin)) || c == '_')
238 obstack_1grow (oout, c);
239
240 ungetc (c, fin);
241}
ae3c3164 242
2666f928
AD
243
244/*------------------------------------------------------------------.
245| Dump the wannabee comment from IN to OOUT. In fact we just saw a |
246| `/', which might or might not be a comment. In any case, copy |
247| what we saw. |
248`------------------------------------------------------------------*/
ae3c3164
AD
249
250static inline void
2666f928 251copy_comment (FILE *fin, struct obstack *oout)
ae3c3164
AD
252{
253 int cplus_comment;
a70083a3 254 int ended;
550a72a3
AD
255 int c;
256
257 /* We read a `/', output it. */
2666f928 258 obstack_1grow (oout, '/');
550a72a3
AD
259
260 switch ((c = getc (fin)))
261 {
262 case '/':
263 cplus_comment = 1;
264 break;
265 case '*':
266 cplus_comment = 0;
267 break;
268 default:
269 ungetc (c, fin);
270 return;
271 }
ae3c3164 272
2666f928 273 obstack_1grow (oout, c);
550a72a3 274 c = getc (fin);
ae3c3164
AD
275
276 ended = 0;
277 while (!ended)
278 {
279 if (!cplus_comment && c == '*')
280 {
281 while (c == '*')
282 {
2666f928 283 obstack_1grow (oout, c);
550a72a3 284 c = getc (fin);
ae3c3164
AD
285 }
286
287 if (c == '/')
288 {
2666f928 289 obstack_1grow (oout, c);
ae3c3164
AD
290 ended = 1;
291 }
292 }
293 else if (c == '\n')
294 {
295 lineno++;
2666f928 296 obstack_1grow (oout, c);
ae3c3164
AD
297 if (cplus_comment)
298 ended = 1;
299 else
550a72a3 300 c = getc (fin);
ae3c3164
AD
301 }
302 else if (c == EOF)
303 fatal (_("unterminated comment"));
304 else
305 {
2666f928 306 obstack_1grow (oout, c);
550a72a3 307 c = getc (fin);
ae3c3164
AD
308 }
309 }
310}
311
312
a70083a3 313/*-----------------------------------------------------------------.
337bab46 314| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
315| reference to this location. STACK_OFFSET is the number of values |
316| in the current rule so far, which says where to find `$0' with |
317| respect to the top of the stack. |
318`-----------------------------------------------------------------*/
1ff442ca 319
a70083a3 320static inline void
337bab46 321copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 322{
a70083a3 323 int c;
1ff442ca 324
a70083a3
AD
325 c = getc (fin);
326 if (c == '$')
1ff442ca 327 {
ff4423cc 328 obstack_sgrow (oout, "yyloc");
89cab50d 329 locations_flag = 1;
a70083a3
AD
330 }
331 else if (isdigit (c) || c == '-')
332 {
333 int n;
1ff442ca 334
a70083a3
AD
335 ungetc (c, fin);
336 n = read_signed_integer (fin);
11e2beca
AD
337 if (n > stack_offset)
338 complain (_("invalid value: %s%d"), "@", n);
339 else
340 {
341 /* Offset is always 0 if parser has already popped the stack
342 pointer. */
343 obstack_fgrow1 (oout, "yylsp[%d]",
344 n - (semantic_parser ? 0 : stack_offset));
345 locations_flag = 1;
346 }
1ff442ca 347 }
a70083a3 348 else
ff4a34be
AD
349 {
350 char buf[] = "@c";
351 buf[1] = c;
352 complain (_("%s is invalid"), quote (buf));
353 }
1ff442ca 354}
79282c5a
AD
355
356
357/*-------------------------------------------------------------------.
358| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
359| |
360| Possible inputs: $[<TYPENAME>]($|integer) |
361| |
337bab46 362| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
363| the number of values in the current rule so far, which says where |
364| to find `$0' with respect to the top of the stack. |
365`-------------------------------------------------------------------*/
366
367static inline void
337bab46 368copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
369 symbol_list *rule, int stack_offset)
370{
371 int c = getc (fin);
b0ce6046 372 const char *type_name = NULL;
79282c5a 373
f282676b 374 /* Get the type name if explicit. */
79282c5a
AD
375 if (c == '<')
376 {
f282676b 377 read_type_name (fin);
79282c5a
AD
378 type_name = token_buffer;
379 value_components_used = 1;
79282c5a
AD
380 c = getc (fin);
381 }
382
383 if (c == '$')
384 {
ff4423cc 385 obstack_sgrow (oout, "yyval");
8c7ebe49 386
79282c5a
AD
387 if (!type_name)
388 type_name = get_type_name (0, rule);
389 if (type_name)
337bab46 390 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
391 if (!type_name && typed)
392 complain (_("$$ of `%s' has no declared type"),
393 rule->sym->tag);
394 }
395 else if (isdigit (c) || c == '-')
396 {
397 int n;
398 ungetc (c, fin);
399 n = read_signed_integer (fin);
400
11e2beca
AD
401 if (n > stack_offset)
402 complain (_("invalid value: %s%d"), "$", n);
403 else
404 {
405 if (!type_name && n > 0)
406 type_name = get_type_name (n, rule);
407
408 /* Offset is always 0 if parser has already popped the stack
409 pointer. */
410 obstack_fgrow1 (oout, "yyvsp[%d]",
411 n - (semantic_parser ? 0 : stack_offset));
412
413 if (type_name)
414 obstack_fgrow1 (oout, ".%s", type_name);
415 if (!type_name && typed)
416 complain (_("$%d of `%s' has no declared type"),
417 n, rule->sym->tag);
418 }
79282c5a
AD
419 }
420 else
421 {
422 char buf[] = "$c";
423 buf[1] = c;
424 complain (_("%s is invalid"), quote (buf));
425 }
426}
a70083a3
AD
427\f
428/*-------------------------------------------------------------------.
429| Copy the contents of a `%{ ... %}' into the definitions file. The |
430| `%{' has already been read. Return after reading the `%}'. |
431`-------------------------------------------------------------------*/
1ff442ca 432
4a120d45 433static void
118fb205 434copy_definition (void)
1ff442ca 435{
a70083a3 436 int c;
ae3c3164 437 /* -1 while reading a character if prev char was %. */
a70083a3 438 int after_percent;
1ff442ca 439
89cab50d 440 if (!no_lines_flag)
25b222fa
MA
441 {
442 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 443 lineno, quotearg_style (c_quoting_style,
25b222fa
MA
444 muscle_find("filename")));
445 }
1ff442ca
NF
446
447 after_percent = 0;
448
ae3c3164 449 c = getc (finput);
1ff442ca
NF
450
451 for (;;)
452 {
453 switch (c)
454 {
455 case '\n':
dd60faec 456 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
457 lineno++;
458 break;
459
460 case '%':
a70083a3 461 after_percent = -1;
1ff442ca 462 break;
a083fbbf 463
1ff442ca
NF
464 case '\'':
465 case '"':
337bab46 466 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
467 break;
468
469 case '/':
337bab46 470 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
471 break;
472
473 case EOF:
a70083a3 474 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
475
476 default:
dd60faec 477 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
478 }
479
a70083a3 480 c = getc (finput);
1ff442ca
NF
481
482 if (after_percent)
483 {
484 if (c == '}')
485 return;
dd60faec 486 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
487 }
488 after_percent = 0;
1ff442ca 489 }
1ff442ca
NF
490}
491
492
d7020c20
AD
493/*-------------------------------------------------------------------.
494| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
495| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
496| are reversed. |
497`-------------------------------------------------------------------*/
1ff442ca 498
4a120d45 499static void
d7020c20 500parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 501{
342b8b6e
AD
502 token_t token = tok_undef;
503 char *typename = NULL;
1ff442ca 504
1e9798d5
AD
505 /* The symbol being defined. */
506 struct bucket *symbol = NULL;
507
508 /* After `%token' and `%nterm', any number of symbols maybe be
509 defined. */
1ff442ca
NF
510 for (;;)
511 {
e6011337
JT
512 int tmp_char = ungetc (skip_white_space (), finput);
513
1e9798d5
AD
514 /* `%' (for instance from `%token', or from `%%' etc.) is the
515 only valid means to end this declaration. */
e6011337 516 if (tmp_char == '%')
1ff442ca 517 return;
e6011337 518 if (tmp_char == EOF)
a0f6b076 519 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 520
a70083a3 521 token = lex ();
511e79b3 522 if (token == tok_comma)
943819bf
RS
523 {
524 symbol = NULL;
525 continue;
526 }
511e79b3 527 if (token == tok_typename)
1ff442ca 528 {
95e36146 529 typename = xstrdup (token_buffer);
1ff442ca 530 value_components_used = 1;
943819bf
RS
531 symbol = NULL;
532 }
511e79b3 533 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 534 {
8e03724b
AD
535 if (symval->alias)
536 warn (_("symbol `%s' used more than once as a literal string"),
537 symval->tag);
538 else if (symbol->alias)
539 warn (_("symbol `%s' given more than one literal string"),
540 symbol->tag);
541 else
542 {
543 symval->class = token_sym;
544 symval->type_name = typename;
545 symval->user_token_number = symbol->user_token_number;
546 symbol->user_token_number = SALIAS;
547 symval->alias = symbol;
548 symbol->alias = symval;
549 /* symbol and symval combined are only one symbol */
550 nsyms--;
551 }
8e03724b 552 symbol = NULL;
1ff442ca 553 }
511e79b3 554 else if (token == tok_identifier)
1ff442ca
NF
555 {
556 int oldclass = symval->class;
943819bf 557 symbol = symval;
1ff442ca 558
943819bf 559 if (symbol->class == what_is_not)
a0f6b076 560 complain (_("symbol %s redefined"), symbol->tag);
943819bf 561 symbol->class = what_is;
d7020c20 562 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 563 symbol->value = nvars++;
1ff442ca
NF
564
565 if (typename)
566 {
943819bf
RS
567 if (symbol->type_name == NULL)
568 symbol->type_name = typename;
a70083a3 569 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 570 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
571 }
572 }
511e79b3 573 else if (symbol && token == tok_number)
a70083a3 574 {
943819bf 575 symbol->user_token_number = numval;
a70083a3 576 }
1ff442ca 577 else
943819bf 578 {
a0f6b076 579 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
580 token_buffer,
581 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 582 skip_to_char ('%');
943819bf 583 }
1ff442ca
NF
584 }
585
586}
587
1ff442ca 588
d7020c20
AD
589/*------------------------------.
590| Parse what comes after %start |
591`------------------------------*/
1ff442ca 592
4a120d45 593static void
118fb205 594parse_start_decl (void)
1ff442ca
NF
595{
596 if (start_flag)
27821bff 597 complain (_("multiple %s declarations"), "%start");
511e79b3 598 if (lex () != tok_identifier)
27821bff 599 complain (_("invalid %s declaration"), "%start");
943819bf
RS
600 else
601 {
602 start_flag = 1;
603 startval = symval;
604 }
1ff442ca
NF
605}
606
a70083a3
AD
607/*-----------------------------------------------------------.
608| read in a %type declaration and record its information for |
609| get_type_name to access |
610`-----------------------------------------------------------*/
611
612static void
613parse_type_decl (void)
614{
a70083a3
AD
615 char *name;
616
511e79b3 617 if (lex () != tok_typename)
a70083a3
AD
618 {
619 complain ("%s", _("%type declaration has no <typename>"));
620 skip_to_char ('%');
621 return;
622 }
623
95e36146 624 name = xstrdup (token_buffer);
a70083a3
AD
625
626 for (;;)
627 {
f17bcd1f 628 token_t t;
a70083a3
AD
629 int tmp_char = ungetc (skip_white_space (), finput);
630
631 if (tmp_char == '%')
632 return;
633 if (tmp_char == EOF)
634 fatal (_("Premature EOF after %s"), token_buffer);
635
636 t = lex ();
637
638 switch (t)
1ff442ca
NF
639 {
640
511e79b3
AD
641 case tok_comma:
642 case tok_semicolon:
1ff442ca
NF
643 break;
644
511e79b3 645 case tok_identifier:
1ff442ca
NF
646 if (symval->type_name == NULL)
647 symval->type_name = name;
a70083a3 648 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 649 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
650
651 break;
652
653 default:
a0f6b076
AD
654 complain (_("invalid %%type declaration due to item: %s"),
655 token_buffer);
a70083a3 656 skip_to_char ('%');
1ff442ca
NF
657 }
658 }
659}
660
661
662
d7020c20
AD
663/*----------------------------------------------------------------.
664| Read in a %left, %right or %nonassoc declaration and record its |
665| information. |
666`----------------------------------------------------------------*/
1ff442ca 667
4a120d45 668static void
d7020c20 669parse_assoc_decl (associativity assoc)
1ff442ca 670{
a70083a3
AD
671 char *name = NULL;
672 int prev = 0;
1ff442ca 673
a70083a3 674 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 675
1ff442ca
NF
676 for (;;)
677 {
f17bcd1f 678 token_t t;
e6011337 679 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 680
e6011337 681 if (tmp_char == '%')
1ff442ca 682 return;
e6011337 683 if (tmp_char == EOF)
a0f6b076 684 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 685
a70083a3 686 t = lex ();
1ff442ca
NF
687
688 switch (t)
689 {
511e79b3 690 case tok_typename:
95e36146 691 name = xstrdup (token_buffer);
1ff442ca
NF
692 break;
693
511e79b3 694 case tok_comma:
1ff442ca
NF
695 break;
696
511e79b3 697 case tok_identifier:
1ff442ca 698 if (symval->prec != 0)
a0f6b076 699 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
700 symval->prec = lastprec;
701 symval->assoc = assoc;
d7020c20 702 if (symval->class == nterm_sym)
a0f6b076 703 complain (_("symbol %s redefined"), symval->tag);
d7020c20 704 symval->class = token_sym;
1ff442ca 705 if (name)
a70083a3 706 { /* record the type, if one is specified */
1ff442ca
NF
707 if (symval->type_name == NULL)
708 symval->type_name = name;
a70083a3 709 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 710 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
711 }
712 break;
713
511e79b3
AD
714 case tok_number:
715 if (prev == tok_identifier)
a70083a3 716 {
1ff442ca 717 symval->user_token_number = numval;
a70083a3
AD
718 }
719 else
720 {
721 complain (_
722 ("invalid text (%s) - number should be after identifier"),
723token_buffer);
724 skip_to_char ('%');
725 }
1ff442ca
NF
726 break;
727
511e79b3 728 case tok_semicolon:
1ff442ca
NF
729 return;
730
731 default:
a0f6b076 732 complain (_("unexpected item: %s"), token_buffer);
a70083a3 733 skip_to_char ('%');
1ff442ca
NF
734 }
735
736 prev = t;
1ff442ca
NF
737 }
738}
739
740
741
dd60faec 742/*--------------------------------------------------------------.
180d45ba
PB
743| Copy the union declaration into the stype muscle |
744| (and fdefines), where it is made into the definition of |
745| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 746`--------------------------------------------------------------*/
1ff442ca 747
4a120d45 748static void
118fb205 749parse_union_decl (void)
1ff442ca 750{
a70083a3
AD
751 int c;
752 int count = 0;
428046f8 753 bool done = FALSE;
180d45ba 754 struct obstack union_obstack;
1ff442ca 755 if (typed)
27821bff 756 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
757
758 typed = 1;
759
180d45ba
PB
760 obstack_init (&union_obstack);
761 obstack_sgrow (&union_obstack, "union");
1ff442ca 762
428046f8 763 while (!done)
1ff442ca 764 {
428046f8
AD
765 c = xgetc (finput);
766
342b8b6e
AD
767 /* If C contains '/', it is output by copy_comment (). */
768 if (c != '/')
2666f928 769 obstack_1grow (&union_obstack, c);
1ff442ca
NF
770
771 switch (c)
772 {
773 case '\n':
774 lineno++;
775 break;
776
777 case '/':
2666f928 778 copy_comment (finput, &union_obstack);
1ff442ca
NF
779 break;
780
1ff442ca
NF
781 case '{':
782 count++;
783 break;
784
785 case '}':
428046f8 786 /* FIXME: Errr. How could this happen???. --akim */
1ff442ca 787 if (count == 0)
27821bff 788 complain (_("unmatched %s"), "`}'");
1ff442ca 789 count--;
428046f8
AD
790 if (!count)
791 done = TRUE;
792 break;
1ff442ca 793 }
1ff442ca 794 }
180d45ba 795
428046f8
AD
796 /* JF don't choke on trailing semi */
797 c = skip_white_space ();
798 if (c != ';')
799 ungetc (c, finput);
800 obstack_1grow (&union_obstack, 0);
801 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
802}
803
d7020c20
AD
804
805/*-------------------------------------------------------.
806| Parse the declaration %expect N which says to expect N |
807| shift-reduce conflicts. |
808`-------------------------------------------------------*/
1ff442ca 809
4a120d45 810static void
118fb205 811parse_expect_decl (void)
1ff442ca 812{
131e2fef 813 int c = skip_white_space ();
1ff442ca
NF
814 ungetc (c, finput);
815
131e2fef 816 if (!isdigit (c))
79282c5a 817 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
818 else
819 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
820}
821
a70083a3
AD
822
823/*-------------------------------------------------------------------.
824| Parse what comes after %thong. the full syntax is |
825| |
826| %thong <type> token number literal |
827| |
828| the <type> or number may be omitted. The number specifies the |
829| user_token_number. |
830| |
831| Two symbols are entered in the table, one for the token symbol and |
832| one for the literal. Both are given the <type>, if any, from the |
833| declaration. The ->user_token_number of the first is SALIAS and |
834| the ->user_token_number of the second is set to the number, if |
835| any, from the declaration. The two symbols are linked via |
836| pointers in their ->alias fields. |
837| |
838| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
839| only the literal string is retained it is the literal string that |
840| is output to yytname |
841`-------------------------------------------------------------------*/
842
843static void
844parse_thong_decl (void)
7b306f52 845{
f17bcd1f 846 token_t token;
a70083a3
AD
847 struct bucket *symbol;
848 char *typename = 0;
6b7e85b9 849 int usrtoknum = SUNDEF;
7b306f52 850
a70083a3 851 token = lex (); /* fetch typename or first token */
511e79b3 852 if (token == tok_typename)
7b306f52 853 {
95e36146 854 typename = xstrdup (token_buffer);
a70083a3
AD
855 value_components_used = 1;
856 token = lex (); /* fetch first token */
7b306f52 857 }
7b306f52 858
a70083a3 859 /* process first token */
7b306f52 860
511e79b3 861 if (token != tok_identifier)
a70083a3
AD
862 {
863 complain (_("unrecognized item %s, expected an identifier"),
864 token_buffer);
865 skip_to_char ('%');
866 return;
7b306f52 867 }
d7020c20 868 symval->class = token_sym;
a70083a3
AD
869 symval->type_name = typename;
870 symval->user_token_number = SALIAS;
871 symbol = symval;
7b306f52 872
a70083a3 873 token = lex (); /* get number or literal string */
1ff442ca 874
511e79b3 875 if (token == tok_number)
943819bf 876 {
a70083a3
AD
877 usrtoknum = numval;
878 token = lex (); /* okay, did number, now get literal */
943819bf 879 }
1ff442ca 880
a70083a3 881 /* process literal string token */
1ff442ca 882
511e79b3 883 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 884 {
a70083a3
AD
885 complain (_("expected string constant instead of %s"), token_buffer);
886 skip_to_char ('%');
887 return;
1ff442ca 888 }
d7020c20 889 symval->class = token_sym;
a70083a3
AD
890 symval->type_name = typename;
891 symval->user_token_number = usrtoknum;
1ff442ca 892
a70083a3
AD
893 symval->alias = symbol;
894 symbol->alias = symval;
1ff442ca 895
79282c5a
AD
896 /* symbol and symval combined are only one symbol. */
897 nsyms--;
a70083a3 898}
3cef001a 899
11e2beca 900
b6610515 901static void
11d82f03 902parse_muscle_decl (void)
b6610515
RA
903{
904 int ch = ungetc (skip_white_space (), finput);
11d82f03
MA
905 char* muscle_key;
906 char* muscle_value;
b6610515
RA
907
908 /* Read key. */
909 if (!isalpha (ch) && ch != '_')
910 {
911 complain (_("invalid %s declaration"), "%define");
912 skip_to_char ('%');
913 return;
914 }
11d82f03
MA
915 copy_identifier (finput, &muscle_obstack);
916 obstack_1grow (&muscle_obstack, 0);
917 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 918
b6610515
RA
919 /* Read value. */
920 ch = skip_white_space ();
921 if (ch != '"')
922 {
923 ungetc (ch, finput);
924 if (ch != EOF)
925 {
926 complain (_("invalid %s declaration"), "%define");
927 skip_to_char ('%');
928 return;
929 }
930 else
931 fatal (_("Premature EOF after %s"), "\"");
932 }
11d82f03
MA
933 copy_string2 (finput, &muscle_obstack, '"', 0);
934 obstack_1grow (&muscle_obstack, 0);
935 muscle_value = obstack_finish (&muscle_obstack);
b6610515 936
b6610515 937 /* Store the (key, value) pair in the environment. */
11d82f03 938 muscle_insert (muscle_key, muscle_value);
b6610515
RA
939}
940
2ba3b73c 941
426cf563
MA
942
943/*---------------------------------.
a870c567 944| Parse a double quoted parameter. |
426cf563
MA
945`---------------------------------*/
946
947static const char *
948parse_dquoted_param (const char *from)
949{
950 struct obstack param_obstack;
951 const char *param = NULL;
952 int c;
953
954 obstack_init (&param_obstack);
955 c = skip_white_space ();
956
957 if (c != '"')
958 {
959 complain (_("invalid %s declaration"), from);
960 ungetc (c, finput);
961 skip_to_char ('%');
962 return NULL;
963 }
964
2648a72d
AD
965 while ((c = literalchar ()) != '"')
966 obstack_1grow (&param_obstack, c);
a870c567 967
426cf563
MA
968 obstack_1grow (&param_obstack, '\0');
969 param = obstack_finish (&param_obstack);
970
971 if (c != '"' || strlen (param) == 0)
972 {
973 complain (_("invalid %s declaration"), from);
974 if (c != '"')
975 ungetc (c, finput);
976 skip_to_char ('%');
977 return NULL;
978 }
979
980 return param;
981}
982
2ba3b73c
MA
983/*----------------------------------.
984| Parse what comes after %skeleton. |
985`----------------------------------*/
986
a870c567 987static void
2ba3b73c
MA
988parse_skel_decl (void)
989{
426cf563 990 skeleton = parse_dquoted_param ("%skeleton");
2ba3b73c
MA
991}
992
a70083a3
AD
993/*----------------------------------------------------------------.
994| Read from finput until `%%' is seen. Discard the `%%'. Handle |
995| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 996| groups to ATTRS_OBSTACK. |
a70083a3 997`----------------------------------------------------------------*/
1ff442ca 998
4a120d45 999static void
a70083a3 1000read_declarations (void)
1ff442ca 1001{
a70083a3 1002 for (;;)
1ff442ca 1003 {
951366c1 1004 int c = skip_white_space ();
1ff442ca 1005
a70083a3
AD
1006 if (c == '%')
1007 {
951366c1 1008 token_t tok = parse_percent_token ();
1ff442ca 1009
a70083a3 1010 switch (tok)
943819bf 1011 {
511e79b3 1012 case tok_two_percents:
a70083a3 1013 return;
1ff442ca 1014
511e79b3 1015 case tok_percent_left_curly:
a70083a3
AD
1016 copy_definition ();
1017 break;
1ff442ca 1018
511e79b3 1019 case tok_token:
d7020c20 1020 parse_token_decl (token_sym, nterm_sym);
a70083a3 1021 break;
1ff442ca 1022
511e79b3 1023 case tok_nterm:
d7020c20 1024 parse_token_decl (nterm_sym, token_sym);
a70083a3 1025 break;
1ff442ca 1026
511e79b3 1027 case tok_type:
a70083a3
AD
1028 parse_type_decl ();
1029 break;
1ff442ca 1030
511e79b3 1031 case tok_start:
a70083a3
AD
1032 parse_start_decl ();
1033 break;
118fb205 1034
511e79b3 1035 case tok_union:
a70083a3
AD
1036 parse_union_decl ();
1037 break;
1ff442ca 1038
511e79b3 1039 case tok_expect:
a70083a3
AD
1040 parse_expect_decl ();
1041 break;
6deb4447 1042
511e79b3 1043 case tok_thong:
a70083a3
AD
1044 parse_thong_decl ();
1045 break;
d7020c20 1046
511e79b3 1047 case tok_left:
d7020c20 1048 parse_assoc_decl (left_assoc);
a70083a3 1049 break;
1ff442ca 1050
511e79b3 1051 case tok_right:
d7020c20 1052 parse_assoc_decl (right_assoc);
a70083a3 1053 break;
1ff442ca 1054
511e79b3 1055 case tok_nonassoc:
d7020c20 1056 parse_assoc_decl (non_assoc);
a70083a3 1057 break;
1ff442ca 1058
b6610515 1059 case tok_define:
11d82f03 1060 parse_muscle_decl ();
b6610515 1061 break;
342b8b6e 1062
2ba3b73c
MA
1063 case tok_skel:
1064 parse_skel_decl ();
1065 break;
b6610515 1066
511e79b3 1067 case tok_noop:
a70083a3 1068 break;
1ff442ca 1069
951366c1
AD
1070 case tok_stropt:
1071 case tok_intopt:
1072 case tok_obsolete:
951366c1
AD
1073 abort ();
1074 break;
1075
e0c40012 1076 case tok_illegal:
a70083a3
AD
1077 default:
1078 complain (_("unrecognized: %s"), token_buffer);
1079 skip_to_char ('%');
1080 }
1081 }
1082 else if (c == EOF)
1083 fatal (_("no input grammar"));
1084 else
1085 {
ff4a34be
AD
1086 char buf[] = "c";
1087 buf[0] = c;
1088 complain (_("unknown character: %s"), quote (buf));
a70083a3 1089 skip_to_char ('%');
1ff442ca 1090 }
1ff442ca 1091 }
1ff442ca 1092}
a70083a3
AD
1093\f
1094/*-------------------------------------------------------------------.
1095| Assuming that a `{' has just been seen, copy everything up to the |
1096| matching `}' into the actions file. STACK_OFFSET is the number of |
1097| values in the current rule so far, which says where to find `$0' |
1098| with respect to the top of the stack. |
14d293ac 1099| |
11e2beca
AD
1100| This routine is used both for actions and guards. Only |
1101| ACTION_OBSTACK is used, but this is fine, since we use only |
14d293ac 1102| pointers to relevant portions inside this obstack. |
a70083a3 1103`-------------------------------------------------------------------*/
1ff442ca 1104
4a120d45 1105static void
14d293ac 1106parse_braces (symbol_list *rule, int stack_offset)
1ff442ca 1107{
a70083a3 1108 int c;
a70083a3 1109 int count;
1ff442ca 1110
1ff442ca 1111 count = 1;
1ff442ca
NF
1112 while (count > 0)
1113 {
14d293ac
AD
1114 while ((c = getc (finput)) != '}')
1115 switch (c)
1116 {
1117 case '\n':
1118 obstack_1grow (&action_obstack, c);
1119 lineno++;
1120 break;
1ff442ca 1121
14d293ac
AD
1122 case '{':
1123 obstack_1grow (&action_obstack, c);
1124 count++;
1125 break;
1ff442ca 1126
14d293ac
AD
1127 case '\'':
1128 case '"':
1129 copy_string (finput, &action_obstack, c);
1130 break;
1ff442ca 1131
14d293ac
AD
1132 case '/':
1133 copy_comment (finput, &action_obstack);
1134 break;
1ff442ca 1135
14d293ac
AD
1136 case '$':
1137 copy_dollar (finput, &action_obstack,
1138 rule, stack_offset);
1139 break;
1ff442ca 1140
14d293ac
AD
1141 case '@':
1142 copy_at (finput, &action_obstack,
1143 stack_offset);
1144 break;
a70083a3 1145
14d293ac
AD
1146 case EOF:
1147 fatal (_("unmatched %s"), "`{'");
a70083a3 1148
14d293ac
AD
1149 default:
1150 obstack_1grow (&action_obstack, c);
1151 }
a70083a3 1152
14d293ac 1153 /* Above loop exits when C is '}'. */
a70083a3
AD
1154 if (--count)
1155 {
8c7ebe49 1156 obstack_1grow (&action_obstack, c);
a70083a3
AD
1157 c = getc (finput);
1158 }
1159 }
1160
3f96f4dc 1161 obstack_1grow (&action_obstack, '\0');
a70083a3 1162}
14d293ac 1163
a70083a3
AD
1164
1165static void
14d293ac 1166parse_action (symbol_list *rule, int stack_offset)
a70083a3 1167{
14d293ac
AD
1168 rule->action_line = lineno;
1169 parse_braces (rule, stack_offset);
1170 rule->action = obstack_finish (&action_obstack);
1171}
a70083a3 1172
a70083a3 1173
14d293ac
AD
1174static void
1175parse_guard (symbol_list *rule, int stack_offset)
1176{
1177 token_t t = lex ();
1178 if (t != tok_left_curly)
1179 complain (_("invalid %s declaration"), "%guard");
f499b062 1180 rule->guard_line = lineno;
14d293ac
AD
1181 parse_braces (rule, stack_offset);
1182 rule->guard = obstack_finish (&action_obstack);
1ff442ca 1183}
14d293ac 1184
a70083a3
AD
1185\f
1186
a70083a3
AD
1187/*-------------------------------------------------------------------.
1188| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1189| with the user's names. |
1190`-------------------------------------------------------------------*/
1ff442ca 1191
4a120d45 1192static bucket *
118fb205 1193gensym (void)
1ff442ca 1194{
274d42ce
AD
1195 /* Incremented for each generated symbol */
1196 static int gensym_count = 0;
1197 static char buf[256];
1198
a70083a3 1199 bucket *sym;
1ff442ca 1200
274d42ce
AD
1201 sprintf (buf, "@%d", ++gensym_count);
1202 token_buffer = buf;
a70083a3 1203 sym = getsym (token_buffer);
d7020c20 1204 sym->class = nterm_sym;
1ff442ca 1205 sym->value = nvars++;
36281465 1206 return sym;
1ff442ca 1207}
a70083a3 1208\f
107f7dfb
AD
1209/*-------------------------------------------------------------------.
1210| Parse the input grammar into a one symbol_list structure. Each |
1211| rule is represented by a sequence of symbols: the left hand side |
1212| followed by the contents of the right hand side, followed by a |
1213| null pointer instead of a symbol to terminate the rule. The next |
1214| symbol is the lhs of the following rule. |
1215| |
1216| All guards and actions are copied out to the appropriate files, |
1217| labelled by the rule number they apply to. |
1218| |
1219| Bison used to allow some %directives in the rules sections, but |
1220| this is no longer consider appropriate: (i) the documented grammar |
1221| doesn't claim it, (ii), it would promote bad style, (iii), error |
1222| recovery for %directives consists in skipping the junk until a `%' |
1223| is seen and helrp synchronizing. This scheme is definitely wrong |
1224| in the rules section. |
1225`-------------------------------------------------------------------*/
1ff442ca 1226
4a120d45 1227static void
118fb205 1228readgram (void)
1ff442ca 1229{
f17bcd1f 1230 token_t t;
a70083a3 1231 bucket *lhs = NULL;
107f7dfb
AD
1232 symbol_list *p = NULL;
1233 symbol_list *p1 = NULL;
a70083a3 1234 bucket *bp;
1ff442ca 1235
ff4a34be
AD
1236 /* Points to first symbol_list of current rule. its symbol is the
1237 lhs of the rule. */
107f7dfb 1238 symbol_list *crule = NULL;
ff4a34be 1239 /* Points to the symbol_list preceding crule. */
107f7dfb 1240 symbol_list *crule1 = NULL;
1ff442ca 1241
a70083a3 1242 t = lex ();
1ff442ca 1243
511e79b3 1244 while (t != tok_two_percents && t != tok_eof)
107f7dfb
AD
1245 if (t == tok_identifier || t == tok_bar)
1246 {
1247 int action_flag = 0;
1248 /* Number of symbols in rhs of this rule so far */
1249 int rulelength = 0;
1250 int xactions = 0; /* JF for error checking */
1251 bucket *first_rhs = 0;
1252
1253 if (t == tok_identifier)
1254 {
1255 lhs = symval;
1256
1257 if (!start_flag)
1258 {
1259 startval = lhs;
1260 start_flag = 1;
1261 }
1ff442ca 1262
107f7dfb
AD
1263 t = lex ();
1264 if (t != tok_colon)
1265 {
1266 complain (_("ill-formed rule: initial symbol not followed by colon"));
1267 unlex (t);
1268 }
1269 }
1270
1271 if (nrules == 0 && t == tok_bar)
1272 {
1273 complain (_("grammar starts with vertical bar"));
1274 lhs = symval; /* BOGUS: use a random symval */
1275 }
1276 /* start a new rule and record its lhs. */
1277
1278 nrules++;
1279 nitems++;
1280
1281 p = symbol_list_new (lhs);
1282
1283 crule1 = p1;
1284 if (p1)
1285 p1->next = p;
1286 else
1287 grammar = p;
1ff442ca 1288
107f7dfb
AD
1289 p1 = p;
1290 crule = p;
1ff442ca 1291
107f7dfb 1292 /* mark the rule's lhs as a nonterminal if not already so. */
1ff442ca 1293
107f7dfb
AD
1294 if (lhs->class == unknown_sym)
1295 {
1296 lhs->class = nterm_sym;
1297 lhs->value = nvars;
1298 nvars++;
1299 }
1300 else if (lhs->class == token_sym)
1301 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca 1302
107f7dfb 1303 /* read the rhs of the rule. */
1ff442ca 1304
107f7dfb
AD
1305 for (;;)
1306 {
1307 t = lex ();
1308 if (t == tok_prec)
1309 {
1310 t = lex ();
1311 crule->ruleprec = symval;
1312 t = lex ();
1313 }
1314
1315 if (!(t == tok_identifier || t == tok_left_curly))
1316 break;
1ff442ca 1317
107f7dfb
AD
1318 /* If next token is an identifier, see if a colon follows it.
1319 If one does, exit this rule now. */
1320 if (t == tok_identifier)
1321 {
1322 bucket *ssave;
1323 token_t t1;
1324
1325 ssave = symval;
1326 t1 = lex ();
1327 unlex (t1);
1328 symval = ssave;
1329 if (t1 == tok_colon)
1330 break;
1331
1332 if (!first_rhs) /* JF */
1333 first_rhs = symval;
1334 /* Not followed by colon =>
1335 process as part of this rule's rhs. */
1336 }
1337
1338 /* If we just passed an action, that action was in the middle
1339 of a rule, so make a dummy rule to reduce it to a
1340 non-terminal. */
1341 if (action_flag)
1342 {
1343 /* Since the action was written out with this rule's
1344 number, we must give the new rule this number by
1345 inserting the new rule before it. */
1346
1347 /* Make a dummy nonterminal, a gensym. */
1348 bucket *sdummy = gensym ();
1349
1350 /* Make a new rule, whose body is empty, before the
1351 current one, so that the action just read can
1352 belong to it. */
1353 nrules++;
1354 nitems++;
1355 p = symbol_list_new (sdummy);
1356 /* Attach its lineno to that of the host rule. */
1357 p->line = crule->line;
1358 if (crule1)
1359 crule1->next = p;
1360 else
1361 grammar = p;
1362 /* End of the rule. */
1363 crule1 = symbol_list_new (NULL);
1364 crule1->next = crule;
1365
1366 p->next = crule1;
1367
1368 /* Insert the dummy generated by that rule into this
1369 rule. */
1370 nitems++;
1371 p = symbol_list_new (sdummy);
1372 p1->next = p;
1373 p1 = p;
1374
1375 action_flag = 0;
1376 }
1377
1378 if (t == tok_identifier)
1379 {
1380 nitems++;
1381 p = symbol_list_new (symval);
1382 p1->next = p;
1383 p1 = p;
1384 }
1385 else /* handle an action. */
1386 {
14d293ac 1387 parse_action (crule, rulelength);
107f7dfb
AD
1388 action_flag = 1;
1389 xactions++; /* JF */
1390 }
1391 rulelength++;
1392 } /* end of read rhs of rule */
1393
1394 /* Put an empty link in the list to mark the end of this rule */
1395 p = symbol_list_new (NULL);
1396 p1->next = p;
1397 p1 = p;
1398
1399 if (t == tok_prec)
1400 {
1401 complain (_("two @prec's in a row"));
1402 t = lex ();
1403 crule->ruleprec = symval;
1404 t = lex ();
1405 }
f499b062 1406
107f7dfb
AD
1407 if (t == tok_guard)
1408 {
1409 if (!semantic_parser)
1410 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1411
14d293ac 1412 parse_guard (crule, rulelength);
a70083a3 1413 t = lex ();
107f7dfb 1414 }
f499b062
AD
1415
1416 if (t == tok_left_curly)
107f7dfb
AD
1417 {
1418 /* This case never occurs -wjh */
1419 if (action_flag)
1420 complain (_("two actions at end of one rule"));
14d293ac 1421 parse_action (crule, rulelength);
107f7dfb
AD
1422 action_flag = 1;
1423 xactions++; /* -wjh */
1424 t = lex ();
1425 }
1426 /* If $$ is being set in default way, report if any type
1427 mismatch. */
1428 else if (!xactions
1429 && first_rhs && lhs->type_name != first_rhs->type_name)
1430 {
1431 if (lhs->type_name == 0
1432 || first_rhs->type_name == 0
1433 || strcmp (lhs->type_name, first_rhs->type_name))
1434 complain (_("type clash (`%s' `%s') on default action"),
1435 lhs->type_name ? lhs->type_name : "",
1436 first_rhs->type_name ? first_rhs->type_name : "");
1437 }
1438 /* Warn if there is no default for $$ but we need one. */
1439 else if (!xactions && !first_rhs && lhs->type_name != 0)
1440 complain (_("empty rule for typed nonterminal, and no action"));
1441 if (t == tok_semicolon)
a70083a3 1442 t = lex ();
107f7dfb
AD
1443 }
1444 else
1445 {
1446 complain (_("invalid input: %s"), quote (token_buffer));
1447 t = lex ();
1448 }
943819bf 1449
1ff442ca 1450
943819bf
RS
1451 /* grammar has been read. Do some checking */
1452
1ff442ca 1453 if (nsyms > MAXSHORT)
a0f6b076
AD
1454 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1455 MAXSHORT);
1ff442ca 1456 if (nrules == 0)
a0f6b076 1457 fatal (_("no rules in the input grammar"));
1ff442ca 1458
1ff442ca
NF
1459 /* Report any undefined symbols and consider them nonterminals. */
1460
1461 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1462 if (bp->class == unknown_sym)
1ff442ca 1463 {
a70083a3
AD
1464 complain (_
1465 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1466 bp->tag);
d7020c20 1467 bp->class = nterm_sym;
1ff442ca
NF
1468 bp->value = nvars++;
1469 }
1470
1471 ntokens = nsyms - nvars;
1472}
ff48177d
MA
1473
1474/* At the end of the grammar file, some C source code must
63c2d5de 1475 be stored. It is going to be associated to the epilogue
ff48177d
MA
1476 directive. */
1477static void
1478read_additionnal_code (void)
1479{
1480 char c;
63c2d5de 1481 struct obstack el_obstack;
342b8b6e 1482
63c2d5de 1483 obstack_init (&el_obstack);
ff48177d 1484
710ddc4f
MA
1485 if (!no_lines_flag)
1486 {
1487 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1488 lineno, quotearg_style (c_quoting_style,
1489 muscle_find("filename")));
1490 }
1491
ff48177d 1492 while ((c = getc (finput)) != EOF)
63c2d5de 1493 obstack_1grow (&el_obstack, c);
342b8b6e 1494
63c2d5de 1495 obstack_1grow (&el_obstack, 0);
11d82f03 1496 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1497}
1498
a70083a3 1499\f
037ca2f1
AD
1500/*------------------------------------------------------------------.
1501| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1502| number. |
1503`------------------------------------------------------------------*/
1504
1505static void
1506token_translations_init (void)
1507{
1508 bucket *bp = NULL;
1509 int i;
1510
1511 token_translations = XCALLOC (short, max_user_token_number + 1);
1512
1513 /* Initialize all entries for literal tokens to 2, the internal
1514 token number for $undefined., which represents all invalid
1515 inputs. */
1516 for (i = 0; i <= max_user_token_number; i++)
1517 token_translations[i] = 2;
1518
1519 for (bp = firstsymbol; bp; bp = bp->next)
1520 {
1521 /* Non-terminal? */
1522 if (bp->value >= ntokens)
1523 continue;
1524 /* A token string alias? */
1525 if (bp->user_token_number == SALIAS)
1526 continue;
6b7e85b9
AD
1527
1528 assert (bp->user_token_number != SUNDEF);
1529
037ca2f1
AD
1530 /* A token which translation has already been set? */
1531 if (token_translations[bp->user_token_number] != 2)
1532 complain (_("tokens %s and %s both assigned number %d"),
1533 tags[token_translations[bp->user_token_number]],
1534 bp->tag, bp->user_token_number);
1535 token_translations[bp->user_token_number] = bp->value;
1536 }
1537}
1538
1539
a70083a3
AD
1540/*------------------------------------------------------------------.
1541| Assign symbol numbers, and write definition of token names into |
b2ca4022 1542| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1543| of symbols. |
1544`------------------------------------------------------------------*/
1ff442ca 1545
4a120d45 1546static void
118fb205 1547packsymbols (void)
1ff442ca 1548{
342b8b6e 1549 bucket *bp = NULL;
a70083a3 1550 int tokno = 1;
a70083a3 1551 int last_user_token_number;
4a120d45 1552 static char DOLLAR[] = "$";
1ff442ca 1553
d7913476 1554 tags = XCALLOC (char *, nsyms + 1);
d7913476 1555 user_toknums = XCALLOC (short, nsyms + 1);
1ff442ca 1556
d7913476
AD
1557 sprec = XCALLOC (short, nsyms);
1558 sassoc = XCALLOC (short, nsyms);
1ff442ca 1559
037ca2f1
AD
1560 /* The EOF token. */
1561 tags[0] = DOLLAR;
1562 user_toknums[0] = 0;
1563
1ff442ca
NF
1564 max_user_token_number = 256;
1565 last_user_token_number = 256;
1566
1567 for (bp = firstsymbol; bp; bp = bp->next)
1568 {
d7020c20 1569 if (bp->class == nterm_sym)
1ff442ca
NF
1570 {
1571 bp->value += ntokens;
1572 }
943819bf
RS
1573 else if (bp->alias)
1574 {
0a6384c4
AD
1575 /* this symbol and its alias are a single token defn.
1576 allocate a tokno, and assign to both check agreement of
1577 ->prec and ->assoc fields and make both the same */
1578 if (bp->value == 0)
1579 bp->value = bp->alias->value = tokno++;
943819bf 1580
0a6384c4
AD
1581 if (bp->prec != bp->alias->prec)
1582 {
1583 if (bp->prec != 0 && bp->alias->prec != 0
1584 && bp->user_token_number == SALIAS)
a0f6b076
AD
1585 complain (_("conflicting precedences for %s and %s"),
1586 bp->tag, bp->alias->tag);
0a6384c4
AD
1587 if (bp->prec != 0)
1588 bp->alias->prec = bp->prec;
1589 else
1590 bp->prec = bp->alias->prec;
1591 }
943819bf 1592
0a6384c4
AD
1593 if (bp->assoc != bp->alias->assoc)
1594 {
a0f6b076
AD
1595 if (bp->assoc != 0 && bp->alias->assoc != 0
1596 && bp->user_token_number == SALIAS)
1597 complain (_("conflicting assoc values for %s and %s"),
1598 bp->tag, bp->alias->tag);
1599 if (bp->assoc != 0)
1600 bp->alias->assoc = bp->assoc;
1601 else
1602 bp->assoc = bp->alias->assoc;
1603 }
0a6384c4
AD
1604
1605 if (bp->user_token_number == SALIAS)
a70083a3 1606 continue; /* do not do processing below for SALIASs */
943819bf 1607
a70083a3 1608 }
d7020c20 1609 else /* bp->class == token_sym */
943819bf
RS
1610 {
1611 bp->value = tokno++;
1612 }
1613
d7020c20 1614 if (bp->class == token_sym)
1ff442ca 1615 {
6b7e85b9 1616 if (bp->user_token_number == SUNDEF)
1ff442ca
NF
1617 bp->user_token_number = ++last_user_token_number;
1618 if (bp->user_token_number > max_user_token_number)
1619 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1620 }
1621
1622 tags[bp->value] = bp->tag;
943819bf 1623 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1624 sprec[bp->value] = bp->prec;
1625 sassoc[bp->value] = bp->assoc;
1ff442ca
NF
1626 }
1627
037ca2f1 1628 token_translations_init ();
1ff442ca
NF
1629
1630 error_token_number = errtoken->value;
1631
e3f1699f
AD
1632 if (startval->class == unknown_sym)
1633 fatal (_("the start symbol %s is undefined"), startval->tag);
1634 else if (startval->class == token_sym)
1635 fatal (_("the start symbol %s is a token"), startval->tag);
1636
1637 start_symbol = startval->value;
1638}
1639
1640
93ede233
AD
1641/*---------------------------------------------------------------.
1642| Save the definition of token names in the `TOKENDEFS' muscle. |
1643`---------------------------------------------------------------*/
e3f1699f
AD
1644
1645static void
93ede233 1646symbols_save (void)
e3f1699f 1647{
93ede233
AD
1648 struct obstack tokendefs;
1649 bucket *bp;
1650 char *cp, *symbol;
1651 char c;
1652 obstack_init (&tokendefs);
1653
1654 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1655 {
93ede233 1656 symbol = bp->tag; /* get symbol */
1ff442ca 1657
93ede233
AD
1658 if (bp->value >= ntokens)
1659 continue;
1660 if (bp->user_token_number == SALIAS)
1661 continue;
1662 if ('\'' == *symbol)
1663 continue; /* skip literal character */
1664 if (bp == errtoken)
1665 continue; /* skip error token */
1666 if ('\"' == *symbol)
037ca2f1 1667 {
93ede233
AD
1668 /* use literal string only if given a symbol with an alias */
1669 if (bp->alias)
1670 symbol = bp->alias->tag;
1671 else
1672 continue;
037ca2f1 1673 }
93ede233
AD
1674
1675 /* Don't #define nonliteral tokens whose names contain periods. */
1676 cp = symbol;
1677 while ((c = *cp++) && c != '.');
1678 if (c != '\0')
1679 continue;
1680
7742ddeb 1681 obstack_fgrow2 (&tokendefs, "# define %s\t%d\n",
93ede233
AD
1682 symbol, bp->user_token_number);
1683 if (semantic_parser)
1684 /* FIXME: This is probably wrong, and should be just as
1685 above. --akim. */
7742ddeb 1686 obstack_fgrow2 (&tokendefs, "# define T%s\t%d\n", symbol, bp->value);
1ff442ca 1687 }
93ede233
AD
1688
1689 obstack_1grow (&tokendefs, 0);
1690 muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1691 obstack_free (&tokendefs, NULL);
1ff442ca 1692}
a083fbbf 1693
1ff442ca 1694
a70083a3
AD
1695/*---------------------------------------------------------------.
1696| Convert the rules into the representation using RRHS, RLHS and |
1697| RITEMS. |
1698`---------------------------------------------------------------*/
1ff442ca 1699
4a120d45 1700static void
118fb205 1701packgram (void)
1ff442ca 1702{
a70083a3
AD
1703 int itemno;
1704 int ruleno;
1705 symbol_list *p;
1ff442ca 1706
adc8c848
AD
1707 /* We use short to index items. */
1708 if (nitems >= MAXSHORT)
1709 fatal (_("too many items (max %d)"), MAXSHORT);
1710
d7913476 1711 ritem = XCALLOC (short, nitems + 1);
b2ed6e58 1712 rule_table = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1713
1714 itemno = 0;
1715 ruleno = 1;
1716
1717 p = grammar;
1718 while (p)
1719 {
b29b2ed5 1720 bucket *ruleprec = p->ruleprec;
b2ed6e58
AD
1721 rule_table[ruleno].lhs = p->sym->value;
1722 rule_table[ruleno].rhs = itemno;
b29b2ed5 1723 rule_table[ruleno].line = p->line;
68f1e3ed 1724 rule_table[ruleno].useful = TRUE;
3f96f4dc
AD
1725 rule_table[ruleno].action = p->action;
1726 rule_table[ruleno].action_line = p->action_line;
f499b062
AD
1727 rule_table[ruleno].guard = p->guard;
1728 rule_table[ruleno].guard_line = p->guard_line;
1ff442ca
NF
1729
1730 p = p->next;
1731 while (p && p->sym)
1732 {
1733 ritem[itemno++] = p->sym->value;
1734 /* A rule gets by default the precedence and associativity
1735 of the last token in it. */
d7020c20 1736 if (p->sym->class == token_sym)
1ff442ca 1737 {
652a871c
AD
1738 rule_table[ruleno].prec = p->sym->prec;
1739 rule_table[ruleno].assoc = p->sym->assoc;
1ff442ca 1740 }
a70083a3
AD
1741 if (p)
1742 p = p->next;
1ff442ca
NF
1743 }
1744
1745 /* If this rule has a %prec,
a70083a3 1746 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1747 if (ruleprec)
1748 {
652a871c
AD
1749 rule_table[ruleno].prec = ruleprec->prec;
1750 rule_table[ruleno].assoc = ruleprec->assoc;
1751 rule_table[ruleno].precsym = ruleprec->value;
1ff442ca
NF
1752 }
1753
1754 ritem[itemno++] = -ruleno;
1755 ruleno++;
1756
a70083a3
AD
1757 if (p)
1758 p = p->next;
1ff442ca
NF
1759 }
1760
1761 ritem[itemno] = 0;
3067fbef
AD
1762
1763 if (trace_flag)
1764 ritem_print (stderr);
1ff442ca 1765}
a70083a3
AD
1766\f
1767/*-------------------------------------------------------------------.
1768| Read in the grammar specification and record it in the format |
ea5607fd 1769| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1770| and all actions into ACTION_OBSTACK, in each case forming the body |
1771| of a C function (YYGUARD or YYACTION) which contains a switch |
1772| statement to decide which guard or action to execute. |
a70083a3
AD
1773`-------------------------------------------------------------------*/
1774
1775void
1776reader (void)
1777{
1778 start_flag = 0;
1779 startval = NULL; /* start symbol not specified yet. */
1780
a70083a3
AD
1781 nsyms = 1;
1782 nvars = 0;
1783 nrules = 0;
1784 nitems = 0;
a70083a3
AD
1785
1786 typed = 0;
1787 lastprec = 0;
1788
a70083a3
AD
1789 semantic_parser = 0;
1790 pure_parser = 0;
a70083a3
AD
1791
1792 grammar = NULL;
1793
342b8b6e 1794 lex_init ();
a70083a3
AD
1795 lineno = 1;
1796
11d82f03
MA
1797 /* Initialize the muscle obstack. */
1798 obstack_init (&muscle_obstack);
82e236e2 1799
a70083a3
AD
1800 /* Initialize the symbol table. */
1801 tabinit ();
b6610515 1802
a70083a3
AD
1803 /* Construct the error token */
1804 errtoken = getsym ("error");
d7020c20 1805 errtoken->class = token_sym;
a70083a3 1806 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 1807
a70083a3
AD
1808 /* Construct a token that represents all undefined literal tokens.
1809 It is always token number 2. */
1810 undeftoken = getsym ("$undefined.");
d7020c20 1811 undeftoken->class = token_sym;
a70083a3
AD
1812 undeftoken->user_token_number = 2;
1813
331dbc1b
AD
1814 /* Initialize the obstacks. */
1815 obstack_init (&action_obstack);
1816 obstack_init (&attrs_obstack);
331dbc1b
AD
1817 obstack_init (&output_obstack);
1818
1819 finput = xfopen (infile, "r");
1820
896fe5c1
AD
1821 /* Read the declaration section. Copy %{ ... %} groups to
1822 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1823 etc. found there. */
a70083a3 1824 read_declarations ();
a70083a3
AD
1825 /* Read in the grammar, build grammar in list form. Write out
1826 guards and actions. */
1827 readgram ();
ff48177d
MA
1828 /* Some C code is given at the end of the grammar file. */
1829 read_additionnal_code ();
b0c4483e 1830
331dbc1b
AD
1831 lex_free ();
1832 xfclose (finput);
1833
a70083a3
AD
1834 /* Assign the symbols their symbol numbers. Write #defines for the
1835 token symbols into FDEFINES if requested. */
1836 packsymbols ();
93ede233
AD
1837 /* Save them. */
1838 symbols_save ();
1839
a70083a3
AD
1840 /* Convert the grammar into the format described in gram.h. */
1841 packgram ();
a70083a3 1842}