]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/gram.h, src/gram.c (rules_rhs_length): New.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca
NF
30#include "lex.h"
31#include "gram.h"
a0f6b076 32#include "complain.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
340ef489 35#include "conflicts.h"
11d82f03 36#include "muscle_tab.h"
1ff442ca 37
a70083a3
AD
38typedef struct symbol_list
39{
40 struct symbol_list *next;
41 bucket *sym;
b29b2ed5 42 int line;
f499b062 43
3f96f4dc
AD
44 /* The action is attached to the LHS of a rule. */
45 const char *action;
46 int action_line;
f499b062
AD
47
48 /* The guard is attached to the LHS of a rule. */
49 const char *guard;
50 int guard_line;
a70083a3 51 bucket *ruleprec;
d945f5cd 52} symbol_list;
118fb205 53
1ff442ca 54int lineno;
4a120d45
JT
55static symbol_list *grammar;
56static int start_flag;
57static bucket *startval;
1ff442ca
NF
58
59/* Nonzero if components of semantic values are used, implying
60 they must be unions. */
61static int value_components_used;
62
d7020c20
AD
63/* Nonzero if %union has been seen. */
64static int typed;
1ff442ca 65
d7020c20
AD
66/* Incremented for each %left, %right or %nonassoc seen */
67static int lastprec;
1ff442ca 68
b7c49edf
AD
69static bucket *errtoken = NULL;
70static bucket *undeftoken = NULL;
71static bucket *eoftoken = NULL;
30171f79 72static bucket *axiom = NULL;
b29b2ed5 73
6255b435 74static symbol_list *
b29b2ed5
AD
75symbol_list_new (bucket *sym)
76{
77 symbol_list *res = XMALLOC (symbol_list, 1);
78 res->next = NULL;
79 res->sym = sym;
80 res->line = lineno;
d945f5cd
AD
81 res->action = NULL;
82 res->action_line = 0;
f499b062
AD
83 res->guard = NULL;
84 res->guard_line = 0;
b29b2ed5
AD
85 res->ruleprec = NULL;
86 return res;
87}
88
0d533154 89\f
a70083a3 90
0d533154
AD
91/*===================\
92| Low level lexing. |
93\===================*/
943819bf
RS
94
95static void
118fb205 96skip_to_char (int target)
943819bf
RS
97{
98 int c;
99 if (target == '\n')
a0f6b076 100 complain (_(" Skipping to next \\n"));
943819bf 101 else
a0f6b076 102 complain (_(" Skipping to next %c"), target);
943819bf
RS
103
104 do
0d533154 105 c = skip_white_space ();
943819bf 106 while (c != target && c != EOF);
a083fbbf 107 if (c != EOF)
0d533154 108 ungetc (c, finput);
943819bf
RS
109}
110
111
0d533154
AD
112/*---------------------------------------------------------.
113| Read a signed integer from STREAM and return its value. |
114`---------------------------------------------------------*/
115
116static inline int
117read_signed_integer (FILE *stream)
118{
a70083a3
AD
119 int c = getc (stream);
120 int sign = 1;
121 int n = 0;
0d533154
AD
122
123 if (c == '-')
124 {
125 c = getc (stream);
126 sign = -1;
127 }
128
129 while (isdigit (c))
130 {
131 n = 10 * n + (c - '0');
132 c = getc (stream);
133 }
134
135 ungetc (c, stream);
136
137 return sign * n;
138}
139\f
79282c5a
AD
140/*--------------------------------------------------------------.
141| Get the data type (alternative in the union) of the value for |
142| symbol N in rule RULE. |
143`--------------------------------------------------------------*/
144
145static char *
b29b2ed5 146get_type_name (int n, symbol_list *rule)
79282c5a
AD
147{
148 int i;
149 symbol_list *rp;
150
151 if (n < 0)
152 {
153 complain (_("invalid $ value"));
154 return NULL;
155 }
156
157 rp = rule;
158 i = 0;
159
160 while (i < n)
161 {
162 rp = rp->next;
163 if (rp == NULL || rp->sym == NULL)
164 {
165 complain (_("invalid $ value"));
166 return NULL;
167 }
168 i++;
169 }
170
171 return rp->sym->type_name;
172}
173\f
337bab46
AD
174/*------------------------------------------------------------.
175| Dump the string from FIN to OOUT if non null. MATCH is the |
176| delimiter of the string (either ' or "). |
177`------------------------------------------------------------*/
ae3c3164
AD
178
179static inline void
b6610515 180copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
181{
182 int c;
183
b6610515
RA
184 if (store)
185 obstack_1grow (oout, match);
8c7ebe49 186
4a120d45 187 c = getc (fin);
ae3c3164
AD
188
189 while (c != match)
190 {
191 if (c == EOF)
192 fatal (_("unterminated string at end of file"));
193 if (c == '\n')
194 {
a0f6b076 195 complain (_("unterminated string"));
4a120d45 196 ungetc (c, fin);
ae3c3164
AD
197 c = match; /* invent terminator */
198 continue;
199 }
200
337bab46 201 obstack_1grow (oout, c);
ae3c3164
AD
202
203 if (c == '\\')
204 {
4a120d45 205 c = getc (fin);
ae3c3164
AD
206 if (c == EOF)
207 fatal (_("unterminated string at end of file"));
337bab46 208 obstack_1grow (oout, c);
8c7ebe49 209
ae3c3164
AD
210 if (c == '\n')
211 lineno++;
212 }
213
a70083a3 214 c = getc (fin);
ae3c3164
AD
215 }
216
b6610515
RA
217 if (store)
218 obstack_1grow (oout, c);
219}
220
221/* FIXME. */
222
223static inline void
224copy_string (FILE *fin, struct obstack *oout, int match)
225{
226 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
227}
228
b6610515
RA
229/* FIXME. */
230
231static inline void
232copy_identifier (FILE *fin, struct obstack *oout)
233{
234 int c;
235
236 while (isalnum (c = getc (fin)) || c == '_')
237 obstack_1grow (oout, c);
238
239 ungetc (c, fin);
240}
ae3c3164 241
2666f928
AD
242
243/*------------------------------------------------------------------.
244| Dump the wannabee comment from IN to OOUT. In fact we just saw a |
245| `/', which might or might not be a comment. In any case, copy |
246| what we saw. |
247`------------------------------------------------------------------*/
ae3c3164
AD
248
249static inline void
2666f928 250copy_comment (FILE *fin, struct obstack *oout)
ae3c3164
AD
251{
252 int cplus_comment;
a70083a3 253 int ended;
550a72a3
AD
254 int c;
255
256 /* We read a `/', output it. */
2666f928 257 obstack_1grow (oout, '/');
550a72a3
AD
258
259 switch ((c = getc (fin)))
260 {
261 case '/':
262 cplus_comment = 1;
263 break;
264 case '*':
265 cplus_comment = 0;
266 break;
267 default:
268 ungetc (c, fin);
269 return;
270 }
ae3c3164 271
2666f928 272 obstack_1grow (oout, c);
550a72a3 273 c = getc (fin);
ae3c3164
AD
274
275 ended = 0;
276 while (!ended)
277 {
278 if (!cplus_comment && c == '*')
279 {
280 while (c == '*')
281 {
2666f928 282 obstack_1grow (oout, c);
550a72a3 283 c = getc (fin);
ae3c3164
AD
284 }
285
286 if (c == '/')
287 {
2666f928 288 obstack_1grow (oout, c);
ae3c3164
AD
289 ended = 1;
290 }
291 }
292 else if (c == '\n')
293 {
294 lineno++;
2666f928 295 obstack_1grow (oout, c);
ae3c3164
AD
296 if (cplus_comment)
297 ended = 1;
298 else
550a72a3 299 c = getc (fin);
ae3c3164
AD
300 }
301 else if (c == EOF)
302 fatal (_("unterminated comment"));
303 else
304 {
2666f928 305 obstack_1grow (oout, c);
550a72a3 306 c = getc (fin);
ae3c3164
AD
307 }
308 }
309}
310
311
a70083a3 312/*-----------------------------------------------------------------.
337bab46 313| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
314| reference to this location. STACK_OFFSET is the number of values |
315| in the current rule so far, which says where to find `$0' with |
316| respect to the top of the stack. |
317`-----------------------------------------------------------------*/
1ff442ca 318
a70083a3 319static inline void
337bab46 320copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 321{
a70083a3 322 int c;
1ff442ca 323
a70083a3
AD
324 c = getc (fin);
325 if (c == '$')
1ff442ca 326 {
ff4423cc 327 obstack_sgrow (oout, "yyloc");
89cab50d 328 locations_flag = 1;
a70083a3
AD
329 }
330 else if (isdigit (c) || c == '-')
331 {
332 int n;
1ff442ca 333
a70083a3
AD
334 ungetc (c, fin);
335 n = read_signed_integer (fin);
11e2beca
AD
336 if (n > stack_offset)
337 complain (_("invalid value: %s%d"), "@", n);
338 else
339 {
340 /* Offset is always 0 if parser has already popped the stack
341 pointer. */
342 obstack_fgrow1 (oout, "yylsp[%d]",
343 n - (semantic_parser ? 0 : stack_offset));
344 locations_flag = 1;
345 }
1ff442ca 346 }
a70083a3 347 else
ff4a34be
AD
348 {
349 char buf[] = "@c";
350 buf[1] = c;
351 complain (_("%s is invalid"), quote (buf));
352 }
1ff442ca 353}
79282c5a
AD
354
355
356/*-------------------------------------------------------------------.
357| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
358| |
359| Possible inputs: $[<TYPENAME>]($|integer) |
360| |
337bab46 361| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
362| the number of values in the current rule so far, which says where |
363| to find `$0' with respect to the top of the stack. |
364`-------------------------------------------------------------------*/
365
366static inline void
337bab46 367copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
368 symbol_list *rule, int stack_offset)
369{
370 int c = getc (fin);
b0ce6046 371 const char *type_name = NULL;
79282c5a 372
f282676b 373 /* Get the type name if explicit. */
79282c5a
AD
374 if (c == '<')
375 {
f282676b 376 read_type_name (fin);
79282c5a
AD
377 type_name = token_buffer;
378 value_components_used = 1;
79282c5a
AD
379 c = getc (fin);
380 }
381
382 if (c == '$')
383 {
ff4423cc 384 obstack_sgrow (oout, "yyval");
8c7ebe49 385
79282c5a
AD
386 if (!type_name)
387 type_name = get_type_name (0, rule);
388 if (type_name)
337bab46 389 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
390 if (!type_name && typed)
391 complain (_("$$ of `%s' has no declared type"),
392 rule->sym->tag);
393 }
394 else if (isdigit (c) || c == '-')
395 {
396 int n;
397 ungetc (c, fin);
398 n = read_signed_integer (fin);
399
11e2beca
AD
400 if (n > stack_offset)
401 complain (_("invalid value: %s%d"), "$", n);
402 else
403 {
404 if (!type_name && n > 0)
405 type_name = get_type_name (n, rule);
406
407 /* Offset is always 0 if parser has already popped the stack
408 pointer. */
409 obstack_fgrow1 (oout, "yyvsp[%d]",
410 n - (semantic_parser ? 0 : stack_offset));
411
412 if (type_name)
413 obstack_fgrow1 (oout, ".%s", type_name);
414 if (!type_name && typed)
415 complain (_("$%d of `%s' has no declared type"),
416 n, rule->sym->tag);
417 }
79282c5a
AD
418 }
419 else
420 {
421 char buf[] = "$c";
422 buf[1] = c;
423 complain (_("%s is invalid"), quote (buf));
424 }
425}
a70083a3
AD
426\f
427/*-------------------------------------------------------------------.
428| Copy the contents of a `%{ ... %}' into the definitions file. The |
429| `%{' has already been read. Return after reading the `%}'. |
430`-------------------------------------------------------------------*/
1ff442ca 431
4a120d45 432static void
118fb205 433copy_definition (void)
1ff442ca 434{
a70083a3 435 int c;
ae3c3164 436 /* -1 while reading a character if prev char was %. */
a70083a3 437 int after_percent;
1ff442ca 438
89cab50d 439 if (!no_lines_flag)
25b222fa
MA
440 {
441 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 442 lineno, quotearg_style (c_quoting_style,
b7c49edf 443 muscle_find ("filename")));
25b222fa 444 }
1ff442ca
NF
445
446 after_percent = 0;
447
ae3c3164 448 c = getc (finput);
1ff442ca
NF
449
450 for (;;)
451 {
452 switch (c)
453 {
454 case '\n':
dd60faec 455 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
456 lineno++;
457 break;
458
459 case '%':
a70083a3 460 after_percent = -1;
1ff442ca 461 break;
a083fbbf 462
1ff442ca
NF
463 case '\'':
464 case '"':
337bab46 465 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
466 break;
467
468 case '/':
337bab46 469 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
470 break;
471
472 case EOF:
a70083a3 473 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
474
475 default:
dd60faec 476 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
477 }
478
a70083a3 479 c = getc (finput);
1ff442ca
NF
480
481 if (after_percent)
482 {
483 if (c == '}')
484 return;
dd60faec 485 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
486 }
487 after_percent = 0;
1ff442ca 488 }
1ff442ca
NF
489}
490
491
d7020c20
AD
492/*-------------------------------------------------------------------.
493| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
494| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
495| are reversed. |
496`-------------------------------------------------------------------*/
1ff442ca 497
4a120d45 498static void
d7020c20 499parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 500{
342b8b6e
AD
501 token_t token = tok_undef;
502 char *typename = NULL;
1ff442ca 503
1e9798d5
AD
504 /* The symbol being defined. */
505 struct bucket *symbol = NULL;
506
507 /* After `%token' and `%nterm', any number of symbols maybe be
508 defined. */
1ff442ca
NF
509 for (;;)
510 {
e6011337
JT
511 int tmp_char = ungetc (skip_white_space (), finput);
512
1e9798d5
AD
513 /* `%' (for instance from `%token', or from `%%' etc.) is the
514 only valid means to end this declaration. */
e6011337 515 if (tmp_char == '%')
1ff442ca 516 return;
e6011337 517 if (tmp_char == EOF)
a0f6b076 518 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 519
a70083a3 520 token = lex ();
511e79b3 521 if (token == tok_comma)
943819bf
RS
522 {
523 symbol = NULL;
524 continue;
525 }
511e79b3 526 if (token == tok_typename)
1ff442ca 527 {
95e36146 528 typename = xstrdup (token_buffer);
1ff442ca 529 value_components_used = 1;
943819bf
RS
530 symbol = NULL;
531 }
511e79b3 532 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 533 {
8e03724b
AD
534 if (symval->alias)
535 warn (_("symbol `%s' used more than once as a literal string"),
536 symval->tag);
537 else if (symbol->alias)
538 warn (_("symbol `%s' given more than one literal string"),
539 symbol->tag);
540 else
541 {
542 symval->class = token_sym;
543 symval->type_name = typename;
544 symval->user_token_number = symbol->user_token_number;
545 symbol->user_token_number = SALIAS;
546 symval->alias = symbol;
547 symbol->alias = symval;
548 /* symbol and symval combined are only one symbol */
549 nsyms--;
550 }
8e03724b 551 symbol = NULL;
1ff442ca 552 }
511e79b3 553 else if (token == tok_identifier)
1ff442ca
NF
554 {
555 int oldclass = symval->class;
943819bf 556 symbol = symval;
1ff442ca 557
943819bf 558 if (symbol->class == what_is_not)
a0f6b076 559 complain (_("symbol %s redefined"), symbol->tag);
943819bf 560 symbol->class = what_is;
d7020c20 561 if (what_is == nterm_sym && oldclass != nterm_sym)
d9b739c3 562 symbol->number = nvars++;
1ff442ca
NF
563
564 if (typename)
565 {
943819bf
RS
566 if (symbol->type_name == NULL)
567 symbol->type_name = typename;
a70083a3 568 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 569 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
570 }
571 }
511e79b3 572 else if (symbol && token == tok_number)
a70083a3 573 {
943819bf 574 symbol->user_token_number = numval;
b7c49edf
AD
575 /* User defined EOF token? */
576 if (numval == 0)
577 eoftoken = symbol;
a70083a3 578 }
1ff442ca 579 else
943819bf 580 {
a0f6b076 581 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
582 token_buffer,
583 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 584 skip_to_char ('%');
943819bf 585 }
1ff442ca
NF
586 }
587
588}
589
1ff442ca 590
d7020c20
AD
591/*------------------------------.
592| Parse what comes after %start |
593`------------------------------*/
1ff442ca 594
4a120d45 595static void
118fb205 596parse_start_decl (void)
1ff442ca
NF
597{
598 if (start_flag)
27821bff 599 complain (_("multiple %s declarations"), "%start");
511e79b3 600 if (lex () != tok_identifier)
27821bff 601 complain (_("invalid %s declaration"), "%start");
943819bf
RS
602 else
603 {
604 start_flag = 1;
605 startval = symval;
606 }
1ff442ca
NF
607}
608
a70083a3
AD
609/*-----------------------------------------------------------.
610| read in a %type declaration and record its information for |
611| get_type_name to access |
612`-----------------------------------------------------------*/
613
614static void
615parse_type_decl (void)
616{
a70083a3
AD
617 char *name;
618
511e79b3 619 if (lex () != tok_typename)
a70083a3
AD
620 {
621 complain ("%s", _("%type declaration has no <typename>"));
622 skip_to_char ('%');
623 return;
624 }
625
95e36146 626 name = xstrdup (token_buffer);
a70083a3
AD
627
628 for (;;)
629 {
f17bcd1f 630 token_t t;
a70083a3
AD
631 int tmp_char = ungetc (skip_white_space (), finput);
632
633 if (tmp_char == '%')
634 return;
635 if (tmp_char == EOF)
636 fatal (_("Premature EOF after %s"), token_buffer);
637
638 t = lex ();
639
640 switch (t)
1ff442ca
NF
641 {
642
511e79b3
AD
643 case tok_comma:
644 case tok_semicolon:
1ff442ca
NF
645 break;
646
511e79b3 647 case tok_identifier:
1ff442ca
NF
648 if (symval->type_name == NULL)
649 symval->type_name = name;
a70083a3 650 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 651 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
652
653 break;
654
655 default:
a0f6b076
AD
656 complain (_("invalid %%type declaration due to item: %s"),
657 token_buffer);
a70083a3 658 skip_to_char ('%');
1ff442ca
NF
659 }
660 }
661}
662
663
664
d7020c20
AD
665/*----------------------------------------------------------------.
666| Read in a %left, %right or %nonassoc declaration and record its |
667| information. |
668`----------------------------------------------------------------*/
1ff442ca 669
4a120d45 670static void
d7020c20 671parse_assoc_decl (associativity assoc)
1ff442ca 672{
a70083a3
AD
673 char *name = NULL;
674 int prev = 0;
1ff442ca 675
a70083a3 676 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 677
1ff442ca
NF
678 for (;;)
679 {
f17bcd1f 680 token_t t;
e6011337 681 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 682
e6011337 683 if (tmp_char == '%')
1ff442ca 684 return;
e6011337 685 if (tmp_char == EOF)
a0f6b076 686 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 687
a70083a3 688 t = lex ();
1ff442ca
NF
689
690 switch (t)
691 {
511e79b3 692 case tok_typename:
95e36146 693 name = xstrdup (token_buffer);
1ff442ca
NF
694 break;
695
511e79b3 696 case tok_comma:
1ff442ca
NF
697 break;
698
511e79b3 699 case tok_identifier:
1ff442ca 700 if (symval->prec != 0)
a0f6b076 701 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
702 symval->prec = lastprec;
703 symval->assoc = assoc;
d7020c20 704 if (symval->class == nterm_sym)
a0f6b076 705 complain (_("symbol %s redefined"), symval->tag);
d7020c20 706 symval->class = token_sym;
1ff442ca 707 if (name)
a70083a3 708 { /* record the type, if one is specified */
1ff442ca
NF
709 if (symval->type_name == NULL)
710 symval->type_name = name;
a70083a3 711 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 712 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
713 }
714 break;
715
511e79b3
AD
716 case tok_number:
717 if (prev == tok_identifier)
a70083a3 718 {
1ff442ca 719 symval->user_token_number = numval;
a70083a3
AD
720 }
721 else
722 {
723 complain (_
724 ("invalid text (%s) - number should be after identifier"),
725token_buffer);
726 skip_to_char ('%');
727 }
1ff442ca
NF
728 break;
729
511e79b3 730 case tok_semicolon:
1ff442ca
NF
731 return;
732
733 default:
a0f6b076 734 complain (_("unexpected item: %s"), token_buffer);
a70083a3 735 skip_to_char ('%');
1ff442ca
NF
736 }
737
738 prev = t;
1ff442ca
NF
739 }
740}
741
742
743
dd60faec 744/*--------------------------------------------------------------.
180d45ba
PB
745| Copy the union declaration into the stype muscle |
746| (and fdefines), where it is made into the definition of |
747| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 748`--------------------------------------------------------------*/
1ff442ca 749
4a120d45 750static void
118fb205 751parse_union_decl (void)
1ff442ca 752{
a70083a3
AD
753 int c;
754 int count = 0;
428046f8 755 bool done = FALSE;
180d45ba 756 struct obstack union_obstack;
1ff442ca 757 if (typed)
27821bff 758 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
759
760 typed = 1;
761
642cb8f8 762 MUSCLE_INSERT_INT ("stype_line", lineno);
180d45ba
PB
763 obstack_init (&union_obstack);
764 obstack_sgrow (&union_obstack, "union");
1ff442ca 765
428046f8 766 while (!done)
1ff442ca 767 {
428046f8
AD
768 c = xgetc (finput);
769
342b8b6e
AD
770 /* If C contains '/', it is output by copy_comment (). */
771 if (c != '/')
2666f928 772 obstack_1grow (&union_obstack, c);
1ff442ca
NF
773
774 switch (c)
775 {
776 case '\n':
777 lineno++;
778 break;
779
780 case '/':
2666f928 781 copy_comment (finput, &union_obstack);
1ff442ca
NF
782 break;
783
1ff442ca
NF
784 case '{':
785 count++;
786 break;
787
788 case '}':
428046f8 789 /* FIXME: Errr. How could this happen???. --akim */
1ff442ca 790 if (count == 0)
27821bff 791 complain (_("unmatched %s"), "`}'");
1ff442ca 792 count--;
428046f8
AD
793 if (!count)
794 done = TRUE;
795 break;
1ff442ca 796 }
1ff442ca 797 }
180d45ba 798
428046f8
AD
799 /* JF don't choke on trailing semi */
800 c = skip_white_space ();
801 if (c != ';')
802 ungetc (c, finput);
803 obstack_1grow (&union_obstack, 0);
804 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
805}
806
d7020c20
AD
807
808/*-------------------------------------------------------.
809| Parse the declaration %expect N which says to expect N |
810| shift-reduce conflicts. |
811`-------------------------------------------------------*/
1ff442ca 812
4a120d45 813static void
118fb205 814parse_expect_decl (void)
1ff442ca 815{
131e2fef 816 int c = skip_white_space ();
1ff442ca
NF
817 ungetc (c, finput);
818
131e2fef 819 if (!isdigit (c))
79282c5a 820 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
821 else
822 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
823}
824
a70083a3
AD
825
826/*-------------------------------------------------------------------.
827| Parse what comes after %thong. the full syntax is |
828| |
829| %thong <type> token number literal |
830| |
831| the <type> or number may be omitted. The number specifies the |
832| user_token_number. |
833| |
834| Two symbols are entered in the table, one for the token symbol and |
835| one for the literal. Both are given the <type>, if any, from the |
836| declaration. The ->user_token_number of the first is SALIAS and |
837| the ->user_token_number of the second is set to the number, if |
838| any, from the declaration. The two symbols are linked via |
839| pointers in their ->alias fields. |
840| |
841| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
842| only the literal string is retained it is the literal string that |
843| is output to yytname |
844`-------------------------------------------------------------------*/
845
846static void
847parse_thong_decl (void)
7b306f52 848{
f17bcd1f 849 token_t token;
a70083a3
AD
850 struct bucket *symbol;
851 char *typename = 0;
6b7e85b9 852 int usrtoknum = SUNDEF;
7b306f52 853
a70083a3 854 token = lex (); /* fetch typename or first token */
511e79b3 855 if (token == tok_typename)
7b306f52 856 {
95e36146 857 typename = xstrdup (token_buffer);
a70083a3
AD
858 value_components_used = 1;
859 token = lex (); /* fetch first token */
7b306f52 860 }
7b306f52 861
a70083a3 862 /* process first token */
7b306f52 863
511e79b3 864 if (token != tok_identifier)
a70083a3
AD
865 {
866 complain (_("unrecognized item %s, expected an identifier"),
867 token_buffer);
868 skip_to_char ('%');
869 return;
7b306f52 870 }
d7020c20 871 symval->class = token_sym;
a70083a3
AD
872 symval->type_name = typename;
873 symval->user_token_number = SALIAS;
874 symbol = symval;
7b306f52 875
a70083a3 876 token = lex (); /* get number or literal string */
1ff442ca 877
511e79b3 878 if (token == tok_number)
943819bf 879 {
a70083a3
AD
880 usrtoknum = numval;
881 token = lex (); /* okay, did number, now get literal */
943819bf 882 }
1ff442ca 883
a70083a3 884 /* process literal string token */
1ff442ca 885
511e79b3 886 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 887 {
a70083a3
AD
888 complain (_("expected string constant instead of %s"), token_buffer);
889 skip_to_char ('%');
890 return;
1ff442ca 891 }
d7020c20 892 symval->class = token_sym;
a70083a3
AD
893 symval->type_name = typename;
894 symval->user_token_number = usrtoknum;
1ff442ca 895
a70083a3
AD
896 symval->alias = symbol;
897 symbol->alias = symval;
1ff442ca 898
79282c5a
AD
899 /* symbol and symval combined are only one symbol. */
900 nsyms--;
a70083a3 901}
3cef001a 902
11e2beca 903
b6610515 904static void
11d82f03 905parse_muscle_decl (void)
b6610515
RA
906{
907 int ch = ungetc (skip_white_space (), finput);
b7c49edf
AD
908 char *muscle_key;
909 char *muscle_value;
b6610515
RA
910
911 /* Read key. */
912 if (!isalpha (ch) && ch != '_')
913 {
914 complain (_("invalid %s declaration"), "%define");
915 skip_to_char ('%');
916 return;
917 }
11d82f03
MA
918 copy_identifier (finput, &muscle_obstack);
919 obstack_1grow (&muscle_obstack, 0);
920 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 921
b6610515
RA
922 /* Read value. */
923 ch = skip_white_space ();
924 if (ch != '"')
925 {
926 ungetc (ch, finput);
927 if (ch != EOF)
928 {
929 complain (_("invalid %s declaration"), "%define");
930 skip_to_char ('%');
931 return;
932 }
933 else
934 fatal (_("Premature EOF after %s"), "\"");
935 }
11d82f03
MA
936 copy_string2 (finput, &muscle_obstack, '"', 0);
937 obstack_1grow (&muscle_obstack, 0);
938 muscle_value = obstack_finish (&muscle_obstack);
b6610515 939
b6610515 940 /* Store the (key, value) pair in the environment. */
11d82f03 941 muscle_insert (muscle_key, muscle_value);
b6610515
RA
942}
943
2ba3b73c 944
426cf563
MA
945
946/*---------------------------------.
a870c567 947| Parse a double quoted parameter. |
426cf563
MA
948`---------------------------------*/
949
950static const char *
951parse_dquoted_param (const char *from)
952{
953 struct obstack param_obstack;
954 const char *param = NULL;
955 int c;
956
957 obstack_init (&param_obstack);
958 c = skip_white_space ();
959
960 if (c != '"')
961 {
962 complain (_("invalid %s declaration"), from);
963 ungetc (c, finput);
964 skip_to_char ('%');
965 return NULL;
966 }
967
2648a72d
AD
968 while ((c = literalchar ()) != '"')
969 obstack_1grow (&param_obstack, c);
a870c567 970
426cf563
MA
971 obstack_1grow (&param_obstack, '\0');
972 param = obstack_finish (&param_obstack);
973
974 if (c != '"' || strlen (param) == 0)
975 {
976 complain (_("invalid %s declaration"), from);
977 if (c != '"')
978 ungetc (c, finput);
979 skip_to_char ('%');
980 return NULL;
981 }
982
983 return param;
984}
985
2ba3b73c
MA
986/*----------------------------------.
987| Parse what comes after %skeleton. |
988`----------------------------------*/
989
a870c567 990static void
2ba3b73c
MA
991parse_skel_decl (void)
992{
426cf563 993 skeleton = parse_dquoted_param ("%skeleton");
2ba3b73c
MA
994}
995
a70083a3
AD
996/*----------------------------------------------------------------.
997| Read from finput until `%%' is seen. Discard the `%%'. Handle |
998| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 999| groups to ATTRS_OBSTACK. |
a70083a3 1000`----------------------------------------------------------------*/
1ff442ca 1001
4a120d45 1002static void
a70083a3 1003read_declarations (void)
1ff442ca 1004{
a70083a3 1005 for (;;)
1ff442ca 1006 {
951366c1 1007 int c = skip_white_space ();
1ff442ca 1008
a70083a3
AD
1009 if (c == '%')
1010 {
951366c1 1011 token_t tok = parse_percent_token ();
1ff442ca 1012
a70083a3 1013 switch (tok)
943819bf 1014 {
511e79b3 1015 case tok_two_percents:
a70083a3 1016 return;
1ff442ca 1017
511e79b3 1018 case tok_percent_left_curly:
a70083a3
AD
1019 copy_definition ();
1020 break;
1ff442ca 1021
511e79b3 1022 case tok_token:
d7020c20 1023 parse_token_decl (token_sym, nterm_sym);
a70083a3 1024 break;
1ff442ca 1025
511e79b3 1026 case tok_nterm:
d7020c20 1027 parse_token_decl (nterm_sym, token_sym);
a70083a3 1028 break;
1ff442ca 1029
511e79b3 1030 case tok_type:
a70083a3
AD
1031 parse_type_decl ();
1032 break;
1ff442ca 1033
511e79b3 1034 case tok_start:
a70083a3
AD
1035 parse_start_decl ();
1036 break;
118fb205 1037
511e79b3 1038 case tok_union:
a70083a3
AD
1039 parse_union_decl ();
1040 break;
1ff442ca 1041
511e79b3 1042 case tok_expect:
a70083a3
AD
1043 parse_expect_decl ();
1044 break;
6deb4447 1045
511e79b3 1046 case tok_thong:
a70083a3
AD
1047 parse_thong_decl ();
1048 break;
d7020c20 1049
511e79b3 1050 case tok_left:
d7020c20 1051 parse_assoc_decl (left_assoc);
a70083a3 1052 break;
1ff442ca 1053
511e79b3 1054 case tok_right:
d7020c20 1055 parse_assoc_decl (right_assoc);
a70083a3 1056 break;
1ff442ca 1057
511e79b3 1058 case tok_nonassoc:
d7020c20 1059 parse_assoc_decl (non_assoc);
a70083a3 1060 break;
1ff442ca 1061
b6610515 1062 case tok_define:
11d82f03 1063 parse_muscle_decl ();
b6610515 1064 break;
342b8b6e 1065
2ba3b73c
MA
1066 case tok_skel:
1067 parse_skel_decl ();
1068 break;
b6610515 1069
511e79b3 1070 case tok_noop:
a70083a3 1071 break;
1ff442ca 1072
951366c1
AD
1073 case tok_stropt:
1074 case tok_intopt:
1075 case tok_obsolete:
951366c1
AD
1076 abort ();
1077 break;
1078
e0c40012 1079 case tok_illegal:
a70083a3
AD
1080 default:
1081 complain (_("unrecognized: %s"), token_buffer);
1082 skip_to_char ('%');
1083 }
1084 }
1085 else if (c == EOF)
1086 fatal (_("no input grammar"));
1087 else
1088 {
ff4a34be
AD
1089 char buf[] = "c";
1090 buf[0] = c;
1091 complain (_("unknown character: %s"), quote (buf));
a70083a3 1092 skip_to_char ('%');
1ff442ca 1093 }
1ff442ca 1094 }
1ff442ca 1095}
a70083a3
AD
1096\f
1097/*-------------------------------------------------------------------.
1098| Assuming that a `{' has just been seen, copy everything up to the |
1099| matching `}' into the actions file. STACK_OFFSET is the number of |
1100| values in the current rule so far, which says where to find `$0' |
1101| with respect to the top of the stack. |
14d293ac 1102| |
11e2beca
AD
1103| This routine is used both for actions and guards. Only |
1104| ACTION_OBSTACK is used, but this is fine, since we use only |
14d293ac 1105| pointers to relevant portions inside this obstack. |
a70083a3 1106`-------------------------------------------------------------------*/
1ff442ca 1107
4a120d45 1108static void
14d293ac 1109parse_braces (symbol_list *rule, int stack_offset)
1ff442ca 1110{
a70083a3 1111 int c;
a70083a3 1112 int count;
1ff442ca 1113
1ff442ca 1114 count = 1;
1ff442ca
NF
1115 while (count > 0)
1116 {
14d293ac
AD
1117 while ((c = getc (finput)) != '}')
1118 switch (c)
1119 {
1120 case '\n':
1121 obstack_1grow (&action_obstack, c);
1122 lineno++;
1123 break;
1ff442ca 1124
14d293ac
AD
1125 case '{':
1126 obstack_1grow (&action_obstack, c);
1127 count++;
1128 break;
1ff442ca 1129
14d293ac
AD
1130 case '\'':
1131 case '"':
1132 copy_string (finput, &action_obstack, c);
1133 break;
1ff442ca 1134
14d293ac
AD
1135 case '/':
1136 copy_comment (finput, &action_obstack);
1137 break;
1ff442ca 1138
14d293ac
AD
1139 case '$':
1140 copy_dollar (finput, &action_obstack,
1141 rule, stack_offset);
1142 break;
1ff442ca 1143
14d293ac
AD
1144 case '@':
1145 copy_at (finput, &action_obstack,
1146 stack_offset);
1147 break;
a70083a3 1148
14d293ac
AD
1149 case EOF:
1150 fatal (_("unmatched %s"), "`{'");
a70083a3 1151
14d293ac
AD
1152 default:
1153 obstack_1grow (&action_obstack, c);
1154 }
a70083a3 1155
14d293ac 1156 /* Above loop exits when C is '}'. */
a70083a3 1157 if (--count)
2b25d624 1158 obstack_1grow (&action_obstack, c);
a70083a3
AD
1159 }
1160
3f96f4dc 1161 obstack_1grow (&action_obstack, '\0');
a70083a3 1162}
14d293ac 1163
a70083a3
AD
1164
1165static void
14d293ac 1166parse_action (symbol_list *rule, int stack_offset)
a70083a3 1167{
14d293ac
AD
1168 rule->action_line = lineno;
1169 parse_braces (rule, stack_offset);
1170 rule->action = obstack_finish (&action_obstack);
1171}
a70083a3 1172
a70083a3 1173
14d293ac
AD
1174static void
1175parse_guard (symbol_list *rule, int stack_offset)
1176{
1177 token_t t = lex ();
1178 if (t != tok_left_curly)
1179 complain (_("invalid %s declaration"), "%guard");
f499b062 1180 rule->guard_line = lineno;
14d293ac
AD
1181 parse_braces (rule, stack_offset);
1182 rule->guard = obstack_finish (&action_obstack);
1ff442ca 1183}
14d293ac 1184
a70083a3
AD
1185\f
1186
a70083a3
AD
1187/*-------------------------------------------------------------------.
1188| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1189| with the user's names. |
1190`-------------------------------------------------------------------*/
1ff442ca 1191
4a120d45 1192static bucket *
118fb205 1193gensym (void)
1ff442ca 1194{
274d42ce
AD
1195 /* Incremented for each generated symbol */
1196 static int gensym_count = 0;
1197 static char buf[256];
1198
a70083a3 1199 bucket *sym;
1ff442ca 1200
274d42ce
AD
1201 sprintf (buf, "@%d", ++gensym_count);
1202 token_buffer = buf;
a70083a3 1203 sym = getsym (token_buffer);
d7020c20 1204 sym->class = nterm_sym;
d9b739c3 1205 sym->number = nvars++;
36281465 1206 return sym;
1ff442ca 1207}
a70083a3 1208\f
107f7dfb
AD
1209/*-------------------------------------------------------------------.
1210| Parse the input grammar into a one symbol_list structure. Each |
1211| rule is represented by a sequence of symbols: the left hand side |
1212| followed by the contents of the right hand side, followed by a |
1213| null pointer instead of a symbol to terminate the rule. The next |
1214| symbol is the lhs of the following rule. |
1215| |
1216| All guards and actions are copied out to the appropriate files, |
1217| labelled by the rule number they apply to. |
1218| |
1219| Bison used to allow some %directives in the rules sections, but |
1220| this is no longer consider appropriate: (i) the documented grammar |
1221| doesn't claim it, (ii), it would promote bad style, (iii), error |
1222| recovery for %directives consists in skipping the junk until a `%' |
1223| is seen and helrp synchronizing. This scheme is definitely wrong |
1224| in the rules section. |
1225`-------------------------------------------------------------------*/
1ff442ca 1226
4a120d45 1227static void
118fb205 1228readgram (void)
1ff442ca 1229{
f17bcd1f 1230 token_t t;
a70083a3 1231 bucket *lhs = NULL;
107f7dfb
AD
1232 symbol_list *p = NULL;
1233 symbol_list *p1 = NULL;
a70083a3 1234 bucket *bp;
1ff442ca 1235
ff4a34be
AD
1236 /* Points to first symbol_list of current rule. its symbol is the
1237 lhs of the rule. */
107f7dfb 1238 symbol_list *crule = NULL;
ff4a34be 1239 /* Points to the symbol_list preceding crule. */
107f7dfb 1240 symbol_list *crule1 = NULL;
1ff442ca 1241
a70083a3 1242 t = lex ();
1ff442ca 1243
511e79b3 1244 while (t != tok_two_percents && t != tok_eof)
107f7dfb
AD
1245 if (t == tok_identifier || t == tok_bar)
1246 {
1247 int action_flag = 0;
1248 /* Number of symbols in rhs of this rule so far */
1249 int rulelength = 0;
1250 int xactions = 0; /* JF for error checking */
1251 bucket *first_rhs = 0;
1252
1253 if (t == tok_identifier)
1254 {
1255 lhs = symval;
1256
1257 if (!start_flag)
1258 {
1259 startval = lhs;
1260 start_flag = 1;
1261 }
1ff442ca 1262
107f7dfb
AD
1263 t = lex ();
1264 if (t != tok_colon)
1265 {
1266 complain (_("ill-formed rule: initial symbol not followed by colon"));
1267 unlex (t);
1268 }
1269 }
1270
1271 if (nrules == 0 && t == tok_bar)
1272 {
1273 complain (_("grammar starts with vertical bar"));
1274 lhs = symval; /* BOGUS: use a random symval */
1275 }
1276 /* start a new rule and record its lhs. */
1277
1278 nrules++;
1279 nitems++;
1280
1281 p = symbol_list_new (lhs);
1282
1283 crule1 = p1;
1284 if (p1)
1285 p1->next = p;
1286 else
1287 grammar = p;
1ff442ca 1288
107f7dfb
AD
1289 p1 = p;
1290 crule = p;
1ff442ca 1291
107f7dfb 1292 /* mark the rule's lhs as a nonterminal if not already so. */
1ff442ca 1293
107f7dfb
AD
1294 if (lhs->class == unknown_sym)
1295 {
1296 lhs->class = nterm_sym;
d9b739c3 1297 lhs->number = nvars;
107f7dfb
AD
1298 nvars++;
1299 }
1300 else if (lhs->class == token_sym)
1301 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca 1302
107f7dfb 1303 /* read the rhs of the rule. */
1ff442ca 1304
107f7dfb
AD
1305 for (;;)
1306 {
1307 t = lex ();
1308 if (t == tok_prec)
1309 {
1310 t = lex ();
1311 crule->ruleprec = symval;
1312 t = lex ();
1313 }
1314
1315 if (!(t == tok_identifier || t == tok_left_curly))
1316 break;
1ff442ca 1317
107f7dfb
AD
1318 /* If next token is an identifier, see if a colon follows it.
1319 If one does, exit this rule now. */
1320 if (t == tok_identifier)
1321 {
1322 bucket *ssave;
1323 token_t t1;
1324
1325 ssave = symval;
1326 t1 = lex ();
1327 unlex (t1);
1328 symval = ssave;
1329 if (t1 == tok_colon)
e5352bc7 1330 {
fff9bf0b 1331 warn (_("previous rule lacks an ending `;'"));
e5352bc7
AD
1332 break;
1333 }
107f7dfb
AD
1334
1335 if (!first_rhs) /* JF */
1336 first_rhs = symval;
1337 /* Not followed by colon =>
1338 process as part of this rule's rhs. */
1339 }
1340
1341 /* If we just passed an action, that action was in the middle
1342 of a rule, so make a dummy rule to reduce it to a
1343 non-terminal. */
1344 if (action_flag)
1345 {
1346 /* Since the action was written out with this rule's
1347 number, we must give the new rule this number by
1348 inserting the new rule before it. */
1349
1350 /* Make a dummy nonterminal, a gensym. */
1351 bucket *sdummy = gensym ();
1352
1353 /* Make a new rule, whose body is empty, before the
1354 current one, so that the action just read can
1355 belong to it. */
1356 nrules++;
1357 nitems++;
1358 p = symbol_list_new (sdummy);
1359 /* Attach its lineno to that of the host rule. */
1360 p->line = crule->line;
82c035a8
AD
1361 /* Move the action from the host rule to this one. */
1362 p->action = crule->action;
1363 p->action_line = crule->action_line;
1364 crule->action = NULL;
1365
107f7dfb
AD
1366 if (crule1)
1367 crule1->next = p;
1368 else
1369 grammar = p;
1370 /* End of the rule. */
1371 crule1 = symbol_list_new (NULL);
1372 crule1->next = crule;
1373
1374 p->next = crule1;
1375
1376 /* Insert the dummy generated by that rule into this
1377 rule. */
1378 nitems++;
1379 p = symbol_list_new (sdummy);
1380 p1->next = p;
1381 p1 = p;
1382
1383 action_flag = 0;
1384 }
1385
1386 if (t == tok_identifier)
1387 {
1388 nitems++;
1389 p = symbol_list_new (symval);
1390 p1->next = p;
1391 p1 = p;
1392 }
1393 else /* handle an action. */
1394 {
14d293ac 1395 parse_action (crule, rulelength);
107f7dfb
AD
1396 action_flag = 1;
1397 xactions++; /* JF */
1398 }
1399 rulelength++;
1400 } /* end of read rhs of rule */
1401
1402 /* Put an empty link in the list to mark the end of this rule */
1403 p = symbol_list_new (NULL);
1404 p1->next = p;
1405 p1 = p;
1406
1407 if (t == tok_prec)
1408 {
1409 complain (_("two @prec's in a row"));
1410 t = lex ();
1411 crule->ruleprec = symval;
1412 t = lex ();
1413 }
f499b062 1414
107f7dfb
AD
1415 if (t == tok_guard)
1416 {
1417 if (!semantic_parser)
1418 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1419
14d293ac 1420 parse_guard (crule, rulelength);
a70083a3 1421 t = lex ();
107f7dfb 1422 }
f499b062
AD
1423
1424 if (t == tok_left_curly)
107f7dfb
AD
1425 {
1426 /* This case never occurs -wjh */
1427 if (action_flag)
1428 complain (_("two actions at end of one rule"));
14d293ac 1429 parse_action (crule, rulelength);
107f7dfb
AD
1430 action_flag = 1;
1431 xactions++; /* -wjh */
1432 t = lex ();
1433 }
1434 /* If $$ is being set in default way, report if any type
1435 mismatch. */
1436 else if (!xactions
1437 && first_rhs && lhs->type_name != first_rhs->type_name)
1438 {
1439 if (lhs->type_name == 0
1440 || first_rhs->type_name == 0
1441 || strcmp (lhs->type_name, first_rhs->type_name))
1442 complain (_("type clash (`%s' `%s') on default action"),
1443 lhs->type_name ? lhs->type_name : "",
1444 first_rhs->type_name ? first_rhs->type_name : "");
1445 }
1446 /* Warn if there is no default for $$ but we need one. */
1447 else if (!xactions && !first_rhs && lhs->type_name != 0)
1448 complain (_("empty rule for typed nonterminal, and no action"));
bfcf1f3a 1449 if (t == tok_two_percents || t == tok_eof)
fff9bf0b 1450 warn (_("previous rule lacks an ending `;'"));
107f7dfb 1451 if (t == tok_semicolon)
a70083a3 1452 t = lex ();
107f7dfb
AD
1453 }
1454 else
1455 {
1456 complain (_("invalid input: %s"), quote (token_buffer));
1457 t = lex ();
1458 }
943819bf 1459
b68e7744
AD
1460 /* grammar has been read. Do some checking */
1461
1462 if (nrules == 0)
1463 fatal (_("no rules in the input grammar"));
1464
1465 /* Report any undefined symbols and consider them nonterminals. */
1466
1467 for (bp = firstsymbol; bp; bp = bp->next)
1468 if (bp->class == unknown_sym)
1469 {
1470 complain (_
1471 ("symbol %s is used, but is not defined as a token and has no rules"),
1472 bp->tag);
1473 bp->class = nterm_sym;
d9b739c3 1474 bp->number = nvars++;
b68e7744
AD
1475 }
1476
ff442794
AD
1477 /* Insert the initial rule, which line is that of the first rule
1478 (not that of the start symbol):
30171f79
AD
1479
1480 axiom: %start EOF. */
1481 p = symbol_list_new (axiom);
ff442794 1482 p->line = grammar->line;
30171f79
AD
1483 p->next = symbol_list_new (startval);
1484 p->next->next = symbol_list_new (eoftoken);
1485 p->next->next->next = symbol_list_new (NULL);
1486 p->next->next->next->next = grammar;
1487 nrules += 1;
1488 nitems += 3;
1489 grammar = p;
1490 startval = axiom;
1ff442ca
NF
1491
1492 if (nsyms > MAXSHORT)
a0f6b076
AD
1493 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1494 MAXSHORT);
1ff442ca
NF
1495
1496 ntokens = nsyms - nvars;
1497}
ff48177d
MA
1498
1499/* At the end of the grammar file, some C source code must
63c2d5de 1500 be stored. It is going to be associated to the epilogue
ff48177d
MA
1501 directive. */
1502static void
1503read_additionnal_code (void)
1504{
9101a310 1505 int c;
63c2d5de 1506 struct obstack el_obstack;
342b8b6e 1507
63c2d5de 1508 obstack_init (&el_obstack);
ff48177d 1509
710ddc4f
MA
1510 if (!no_lines_flag)
1511 {
1512 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1513 lineno, quotearg_style (c_quoting_style,
b7c49edf 1514 muscle_find ("filename")));
710ddc4f
MA
1515 }
1516
ff48177d 1517 while ((c = getc (finput)) != EOF)
63c2d5de 1518 obstack_1grow (&el_obstack, c);
342b8b6e 1519
63c2d5de 1520 obstack_1grow (&el_obstack, 0);
11d82f03 1521 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1522}
1523
a70083a3 1524\f
037ca2f1
AD
1525/*------------------------------------------------------------------.
1526| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1527| number. |
1528`------------------------------------------------------------------*/
1529
1530static void
1531token_translations_init (void)
1532{
1533 bucket *bp = NULL;
1534 int i;
1535
1536 token_translations = XCALLOC (short, max_user_token_number + 1);
1537
1538 /* Initialize all entries for literal tokens to 2, the internal
1539 token number for $undefined., which represents all invalid
1540 inputs. */
18bcecb0 1541 for (i = 0; i < max_user_token_number + 1; i++)
037ca2f1
AD
1542 token_translations[i] = 2;
1543
1544 for (bp = firstsymbol; bp; bp = bp->next)
1545 {
1546 /* Non-terminal? */
d9b739c3 1547 if (bp->number >= ntokens)
037ca2f1
AD
1548 continue;
1549 /* A token string alias? */
1550 if (bp->user_token_number == SALIAS)
1551 continue;
6b7e85b9
AD
1552
1553 assert (bp->user_token_number != SUNDEF);
1554
037ca2f1
AD
1555 /* A token which translation has already been set? */
1556 if (token_translations[bp->user_token_number] != 2)
1557 complain (_("tokens %s and %s both assigned number %d"),
ad949da9 1558 symbols[token_translations[bp->user_token_number]]->tag,
037ca2f1 1559 bp->tag, bp->user_token_number);
d9b739c3 1560 token_translations[bp->user_token_number] = bp->number;
037ca2f1
AD
1561 }
1562}
1563
1564
0e78e603
AD
1565/*----------------------------------------------------------------.
1566| Assign symbol numbers, and write definition of token names into |
1567| FDEFINES. Set up vectors SYMBOL_TABLE, TAGS of symbols. |
1568`----------------------------------------------------------------*/
1ff442ca 1569
4a120d45 1570static void
118fb205 1571packsymbols (void)
1ff442ca 1572{
342b8b6e 1573 bucket *bp = NULL;
a70083a3 1574 int tokno = 1;
a70083a3 1575 int last_user_token_number;
1ff442ca 1576
0e78e603 1577 symbols = XCALLOC (bucket *, nsyms);
1ff442ca 1578
1ff442ca
NF
1579 max_user_token_number = 256;
1580 last_user_token_number = 256;
1581
1582 for (bp = firstsymbol; bp; bp = bp->next)
1583 {
d7020c20 1584 if (bp->class == nterm_sym)
1ff442ca 1585 {
d9b739c3 1586 bp->number += ntokens;
1ff442ca 1587 }
943819bf
RS
1588 else if (bp->alias)
1589 {
b7c49edf
AD
1590 /* This symbol and its alias are a single token defn.
1591 Allocate a tokno, and assign to both check agreement of
1592 prec and assoc fields and make both the same */
d9b739c3 1593 if (bp->number == -1)
b7c49edf
AD
1594 {
1595 if (bp == eoftoken || bp->alias == eoftoken)
d9b739c3 1596 bp->number = bp->alias->number = 0;
b7c49edf
AD
1597 else
1598 {
d9b739c3 1599 bp->number = bp->alias->number = tokno++;
b7c49edf
AD
1600 }
1601 }
943819bf 1602
0a6384c4
AD
1603 if (bp->prec != bp->alias->prec)
1604 {
1605 if (bp->prec != 0 && bp->alias->prec != 0
1606 && bp->user_token_number == SALIAS)
a0f6b076
AD
1607 complain (_("conflicting precedences for %s and %s"),
1608 bp->tag, bp->alias->tag);
0a6384c4
AD
1609 if (bp->prec != 0)
1610 bp->alias->prec = bp->prec;
1611 else
1612 bp->prec = bp->alias->prec;
1613 }
943819bf 1614
0a6384c4
AD
1615 if (bp->assoc != bp->alias->assoc)
1616 {
a0f6b076
AD
1617 if (bp->assoc != 0 && bp->alias->assoc != 0
1618 && bp->user_token_number == SALIAS)
1619 complain (_("conflicting assoc values for %s and %s"),
1620 bp->tag, bp->alias->tag);
1621 if (bp->assoc != 0)
1622 bp->alias->assoc = bp->assoc;
1623 else
1624 bp->assoc = bp->alias->assoc;
1625 }
0a6384c4 1626
b7c49edf 1627 /* Do not do processing below for SALIASs. */
0a6384c4 1628 if (bp->user_token_number == SALIAS)
b7c49edf 1629 continue;
943819bf 1630
a70083a3 1631 }
b7c49edf 1632 else /* bp->class == token_sym */
943819bf 1633 {
b7c49edf 1634 if (bp == eoftoken)
d9b739c3 1635 bp->number = 0;
b7c49edf 1636 else
d9b739c3 1637 bp->number = tokno++;
943819bf
RS
1638 }
1639
d7020c20 1640 if (bp->class == token_sym)
1ff442ca 1641 {
6b7e85b9 1642 if (bp->user_token_number == SUNDEF)
1ff442ca
NF
1643 bp->user_token_number = ++last_user_token_number;
1644 if (bp->user_token_number > max_user_token_number)
1645 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1646 }
1647
d9b739c3 1648 symbols[bp->number] = bp;
1ff442ca
NF
1649 }
1650
037ca2f1 1651 token_translations_init ();
1ff442ca 1652
d9b739c3 1653 error_token_number = errtoken->number;
1ff442ca 1654
e3f1699f
AD
1655 if (startval->class == unknown_sym)
1656 fatal (_("the start symbol %s is undefined"), startval->tag);
1657 else if (startval->class == token_sym)
1658 fatal (_("the start symbol %s is a token"), startval->tag);
1659
d9b739c3 1660 start_symbol = startval->number;
e3f1699f
AD
1661}
1662
1663
a70083a3
AD
1664/*---------------------------------------------------------------.
1665| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 1666| RITEM. |
a70083a3 1667`---------------------------------------------------------------*/
1ff442ca 1668
4a120d45 1669static void
118fb205 1670packgram (void)
1ff442ca 1671{
a70083a3
AD
1672 int itemno;
1673 int ruleno;
1674 symbol_list *p;
1ff442ca 1675
adc8c848
AD
1676 /* We use short to index items. */
1677 if (nitems >= MAXSHORT)
1678 fatal (_("too many items (max %d)"), MAXSHORT);
1679
d7913476 1680 ritem = XCALLOC (short, nitems + 1);
1a2b5d37 1681 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1682
1683 itemno = 0;
1684 ruleno = 1;
1685
1686 p = grammar;
1687 while (p)
1688 {
b29b2ed5 1689 bucket *ruleprec = p->ruleprec;
c3b407f4 1690 rules[ruleno].number = ruleno;
d9b739c3 1691 rules[ruleno].lhs = p->sym->number;
99013900 1692 rules[ruleno].rhs = ritem + itemno;
1a2b5d37
AD
1693 rules[ruleno].line = p->line;
1694 rules[ruleno].useful = TRUE;
1695 rules[ruleno].action = p->action;
1696 rules[ruleno].action_line = p->action_line;
1697 rules[ruleno].guard = p->guard;
1698 rules[ruleno].guard_line = p->guard_line;
1ff442ca
NF
1699
1700 p = p->next;
1701 while (p && p->sym)
1702 {
d9b739c3 1703 ritem[itemno++] = p->sym->number;
1ff442ca
NF
1704 /* A rule gets by default the precedence and associativity
1705 of the last token in it. */
d7020c20 1706 if (p->sym->class == token_sym)
1ff442ca 1707 {
1a2b5d37
AD
1708 rules[ruleno].prec = p->sym->prec;
1709 rules[ruleno].assoc = p->sym->assoc;
1ff442ca 1710 }
a70083a3
AD
1711 if (p)
1712 p = p->next;
1ff442ca
NF
1713 }
1714
1715 /* If this rule has a %prec,
a70083a3 1716 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1717 if (ruleprec)
1718 {
1a2b5d37
AD
1719 rules[ruleno].prec = ruleprec->prec;
1720 rules[ruleno].assoc = ruleprec->assoc;
d9b739c3 1721 rules[ruleno].precsym = ruleprec->number;
1ff442ca
NF
1722 }
1723
1724 ritem[itemno++] = -ruleno;
1725 ruleno++;
1726
a70083a3
AD
1727 if (p)
1728 p = p->next;
1ff442ca
NF
1729 }
1730
1731 ritem[itemno] = 0;
75142d45
AD
1732 nritems = itemno;
1733 assert (nritems == nitems);
3067fbef
AD
1734
1735 if (trace_flag)
1736 ritem_print (stderr);
1ff442ca 1737}
a70083a3
AD
1738\f
1739/*-------------------------------------------------------------------.
1740| Read in the grammar specification and record it in the format |
ea5607fd 1741| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1742| and all actions into ACTION_OBSTACK, in each case forming the body |
1743| of a C function (YYGUARD or YYACTION) which contains a switch |
1744| statement to decide which guard or action to execute. |
a70083a3
AD
1745`-------------------------------------------------------------------*/
1746
1747void
1748reader (void)
1749{
1750 start_flag = 0;
1751 startval = NULL; /* start symbol not specified yet. */
1752
b7c49edf 1753 nsyms = 0;
a70083a3
AD
1754 nvars = 0;
1755 nrules = 0;
1756 nitems = 0;
a70083a3
AD
1757
1758 typed = 0;
1759 lastprec = 0;
1760
a70083a3
AD
1761 semantic_parser = 0;
1762 pure_parser = 0;
a70083a3
AD
1763
1764 grammar = NULL;
1765
342b8b6e 1766 lex_init ();
a70083a3
AD
1767 lineno = 1;
1768
11d82f03
MA
1769 /* Initialize the muscle obstack. */
1770 obstack_init (&muscle_obstack);
82e236e2 1771
a70083a3
AD
1772 /* Initialize the symbol table. */
1773 tabinit ();
b6610515 1774
30171f79
AD
1775 /* Construct the axiom symbol. */
1776 axiom = getsym ("$axiom");
1777 axiom->class = nterm_sym;
d9b739c3 1778 axiom->number = nvars++;
30171f79 1779
a70083a3
AD
1780 /* Construct the error token */
1781 errtoken = getsym ("error");
d7020c20 1782 errtoken->class = token_sym;
a70083a3 1783 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 1784
a70083a3
AD
1785 /* Construct a token that represents all undefined literal tokens.
1786 It is always token number 2. */
1787 undeftoken = getsym ("$undefined.");
d7020c20 1788 undeftoken->class = token_sym;
a70083a3
AD
1789 undeftoken->user_token_number = 2;
1790
331dbc1b
AD
1791 /* Initialize the obstacks. */
1792 obstack_init (&action_obstack);
1793 obstack_init (&attrs_obstack);
331dbc1b
AD
1794 obstack_init (&output_obstack);
1795
1796 finput = xfopen (infile, "r");
1797
896fe5c1
AD
1798 /* Read the declaration section. Copy %{ ... %} groups to
1799 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1800 etc. found there. */
a70083a3 1801 read_declarations ();
b7c49edf
AD
1802
1803 /* If the user did not define her EOFTOKEN, do it now. */
1804 if (!eoftoken)
1805 {
1806 eoftoken = getsym ("$");
1807 eoftoken->class = token_sym;
1808 /* Value specified by POSIX. */
1809 eoftoken->user_token_number = 0;
1810 }
1811
a70083a3
AD
1812 /* Read in the grammar, build grammar in list form. Write out
1813 guards and actions. */
1814 readgram ();
ff48177d
MA
1815 /* Some C code is given at the end of the grammar file. */
1816 read_additionnal_code ();
b0c4483e 1817
331dbc1b
AD
1818 lex_free ();
1819 xfclose (finput);
1820
a70083a3
AD
1821 /* Assign the symbols their symbol numbers. Write #defines for the
1822 token symbols into FDEFINES if requested. */
1823 packsymbols ();
93ede233 1824
a70083a3
AD
1825 /* Convert the grammar into the format described in gram.h. */
1826 packgram ();
8419d367
AD
1827
1828 /* The grammar as a symbol_list is no longer needed. */
1829 LIST_FREE (symbol_list, grammar);
a70083a3 1830}
76514394
AD
1831
1832void
1833grammar_free (void)
1834{
1835 XFREE (ritem);
1836 free (rules + 1);
1837 /* Free the symbol table data structure. */
1838 free_symtab ();
1839}