]> git.saurik.com Git - bison.git/blame - src/reader.c
Give a try to M4 as a back end.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
037ca2f1 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca
NF
30#include "lex.h"
31#include "gram.h"
a0f6b076 32#include "complain.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
340ef489 35#include "conflicts.h"
11d82f03 36#include "muscle_tab.h"
1ff442ca 37
a70083a3
AD
38typedef struct symbol_list
39{
40 struct symbol_list *next;
41 bucket *sym;
b29b2ed5 42 int line;
f499b062 43
3f96f4dc
AD
44 /* The action is attached to the LHS of a rule. */
45 const char *action;
46 int action_line;
f499b062
AD
47
48 /* The guard is attached to the LHS of a rule. */
49 const char *guard;
50 int guard_line;
a70083a3 51 bucket *ruleprec;
d945f5cd 52} symbol_list;
118fb205 53
1ff442ca 54int lineno;
4a120d45
JT
55static symbol_list *grammar;
56static int start_flag;
57static bucket *startval;
1ff442ca
NF
58
59/* Nonzero if components of semantic values are used, implying
60 they must be unions. */
61static int value_components_used;
62
d7020c20
AD
63/* Nonzero if %union has been seen. */
64static int typed;
1ff442ca 65
d7020c20
AD
66/* Incremented for each %left, %right or %nonassoc seen */
67static int lastprec;
1ff442ca 68
b7c49edf
AD
69static bucket *errtoken = NULL;
70static bucket *undeftoken = NULL;
71static bucket *eoftoken = NULL;
30171f79 72static bucket *axiom = NULL;
b29b2ed5 73
6255b435 74static symbol_list *
b29b2ed5
AD
75symbol_list_new (bucket *sym)
76{
77 symbol_list *res = XMALLOC (symbol_list, 1);
78 res->next = NULL;
79 res->sym = sym;
80 res->line = lineno;
d945f5cd
AD
81 res->action = NULL;
82 res->action_line = 0;
f499b062
AD
83 res->guard = NULL;
84 res->guard_line = 0;
b29b2ed5
AD
85 res->ruleprec = NULL;
86 return res;
87}
88
0d533154 89\f
a70083a3 90
0d533154
AD
91/*===================\
92| Low level lexing. |
93\===================*/
943819bf
RS
94
95static void
118fb205 96skip_to_char (int target)
943819bf
RS
97{
98 int c;
99 if (target == '\n')
a0f6b076 100 complain (_(" Skipping to next \\n"));
943819bf 101 else
a0f6b076 102 complain (_(" Skipping to next %c"), target);
943819bf
RS
103
104 do
0d533154 105 c = skip_white_space ();
943819bf 106 while (c != target && c != EOF);
a083fbbf 107 if (c != EOF)
0d533154 108 ungetc (c, finput);
943819bf
RS
109}
110
111
0d533154
AD
112/*---------------------------------------------------------.
113| Read a signed integer from STREAM and return its value. |
114`---------------------------------------------------------*/
115
116static inline int
117read_signed_integer (FILE *stream)
118{
a70083a3
AD
119 int c = getc (stream);
120 int sign = 1;
121 int n = 0;
0d533154
AD
122
123 if (c == '-')
124 {
125 c = getc (stream);
126 sign = -1;
127 }
128
129 while (isdigit (c))
130 {
131 n = 10 * n + (c - '0');
132 c = getc (stream);
133 }
134
135 ungetc (c, stream);
136
137 return sign * n;
138}
139\f
79282c5a
AD
140/*--------------------------------------------------------------.
141| Get the data type (alternative in the union) of the value for |
142| symbol N in rule RULE. |
143`--------------------------------------------------------------*/
144
145static char *
b29b2ed5 146get_type_name (int n, symbol_list *rule)
79282c5a
AD
147{
148 int i;
149 symbol_list *rp;
150
151 if (n < 0)
152 {
153 complain (_("invalid $ value"));
154 return NULL;
155 }
156
157 rp = rule;
158 i = 0;
159
160 while (i < n)
161 {
162 rp = rp->next;
163 if (rp == NULL || rp->sym == NULL)
164 {
165 complain (_("invalid $ value"));
166 return NULL;
167 }
168 i++;
169 }
170
171 return rp->sym->type_name;
172}
173\f
337bab46
AD
174/*------------------------------------------------------------.
175| Dump the string from FIN to OOUT if non null. MATCH is the |
176| delimiter of the string (either ' or "). |
177`------------------------------------------------------------*/
ae3c3164
AD
178
179static inline void
b6610515 180copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
181{
182 int c;
183
b6610515
RA
184 if (store)
185 obstack_1grow (oout, match);
8c7ebe49 186
4a120d45 187 c = getc (fin);
ae3c3164
AD
188
189 while (c != match)
190 {
191 if (c == EOF)
192 fatal (_("unterminated string at end of file"));
193 if (c == '\n')
194 {
a0f6b076 195 complain (_("unterminated string"));
4a120d45 196 ungetc (c, fin);
ae3c3164
AD
197 c = match; /* invent terminator */
198 continue;
199 }
200
337bab46 201 obstack_1grow (oout, c);
ae3c3164
AD
202
203 if (c == '\\')
204 {
4a120d45 205 c = getc (fin);
ae3c3164
AD
206 if (c == EOF)
207 fatal (_("unterminated string at end of file"));
337bab46 208 obstack_1grow (oout, c);
8c7ebe49 209
ae3c3164
AD
210 if (c == '\n')
211 lineno++;
212 }
213
a70083a3 214 c = getc (fin);
ae3c3164
AD
215 }
216
b6610515
RA
217 if (store)
218 obstack_1grow (oout, c);
219}
220
221/* FIXME. */
222
223static inline void
224copy_string (FILE *fin, struct obstack *oout, int match)
225{
226 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
227}
228
b6610515
RA
229/* FIXME. */
230
231static inline void
232copy_identifier (FILE *fin, struct obstack *oout)
233{
234 int c;
235
236 while (isalnum (c = getc (fin)) || c == '_')
237 obstack_1grow (oout, c);
238
239 ungetc (c, fin);
240}
ae3c3164 241
2666f928
AD
242
243/*------------------------------------------------------------------.
244| Dump the wannabee comment from IN to OOUT. In fact we just saw a |
245| `/', which might or might not be a comment. In any case, copy |
246| what we saw. |
247`------------------------------------------------------------------*/
ae3c3164
AD
248
249static inline void
2666f928 250copy_comment (FILE *fin, struct obstack *oout)
ae3c3164
AD
251{
252 int cplus_comment;
a70083a3 253 int ended;
550a72a3
AD
254 int c;
255
256 /* We read a `/', output it. */
2666f928 257 obstack_1grow (oout, '/');
550a72a3
AD
258
259 switch ((c = getc (fin)))
260 {
261 case '/':
262 cplus_comment = 1;
263 break;
264 case '*':
265 cplus_comment = 0;
266 break;
267 default:
268 ungetc (c, fin);
269 return;
270 }
ae3c3164 271
2666f928 272 obstack_1grow (oout, c);
550a72a3 273 c = getc (fin);
ae3c3164
AD
274
275 ended = 0;
276 while (!ended)
277 {
278 if (!cplus_comment && c == '*')
279 {
280 while (c == '*')
281 {
2666f928 282 obstack_1grow (oout, c);
550a72a3 283 c = getc (fin);
ae3c3164
AD
284 }
285
286 if (c == '/')
287 {
2666f928 288 obstack_1grow (oout, c);
ae3c3164
AD
289 ended = 1;
290 }
291 }
292 else if (c == '\n')
293 {
294 lineno++;
2666f928 295 obstack_1grow (oout, c);
ae3c3164
AD
296 if (cplus_comment)
297 ended = 1;
298 else
550a72a3 299 c = getc (fin);
ae3c3164
AD
300 }
301 else if (c == EOF)
302 fatal (_("unterminated comment"));
303 else
304 {
2666f928 305 obstack_1grow (oout, c);
550a72a3 306 c = getc (fin);
ae3c3164
AD
307 }
308 }
309}
310
311
a70083a3 312/*-----------------------------------------------------------------.
337bab46 313| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
314| reference to this location. STACK_OFFSET is the number of values |
315| in the current rule so far, which says where to find `$0' with |
316| respect to the top of the stack. |
317`-----------------------------------------------------------------*/
1ff442ca 318
a70083a3 319static inline void
337bab46 320copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 321{
a70083a3 322 int c;
1ff442ca 323
a70083a3
AD
324 c = getc (fin);
325 if (c == '$')
1ff442ca 326 {
ff4423cc 327 obstack_sgrow (oout, "yyloc");
89cab50d 328 locations_flag = 1;
a70083a3
AD
329 }
330 else if (isdigit (c) || c == '-')
331 {
332 int n;
1ff442ca 333
a70083a3
AD
334 ungetc (c, fin);
335 n = read_signed_integer (fin);
11e2beca
AD
336 if (n > stack_offset)
337 complain (_("invalid value: %s%d"), "@", n);
338 else
339 {
340 /* Offset is always 0 if parser has already popped the stack
341 pointer. */
342 obstack_fgrow1 (oout, "yylsp[%d]",
343 n - (semantic_parser ? 0 : stack_offset));
344 locations_flag = 1;
345 }
1ff442ca 346 }
a70083a3 347 else
ff4a34be
AD
348 {
349 char buf[] = "@c";
350 buf[1] = c;
351 complain (_("%s is invalid"), quote (buf));
352 }
1ff442ca 353}
79282c5a
AD
354
355
356/*-------------------------------------------------------------------.
357| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
358| |
359| Possible inputs: $[<TYPENAME>]($|integer) |
360| |
337bab46 361| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
362| the number of values in the current rule so far, which says where |
363| to find `$0' with respect to the top of the stack. |
364`-------------------------------------------------------------------*/
365
366static inline void
337bab46 367copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
368 symbol_list *rule, int stack_offset)
369{
370 int c = getc (fin);
b0ce6046 371 const char *type_name = NULL;
79282c5a 372
f282676b 373 /* Get the type name if explicit. */
79282c5a
AD
374 if (c == '<')
375 {
f282676b 376 read_type_name (fin);
79282c5a
AD
377 type_name = token_buffer;
378 value_components_used = 1;
79282c5a
AD
379 c = getc (fin);
380 }
381
382 if (c == '$')
383 {
ff4423cc 384 obstack_sgrow (oout, "yyval");
8c7ebe49 385
79282c5a
AD
386 if (!type_name)
387 type_name = get_type_name (0, rule);
388 if (type_name)
337bab46 389 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
390 if (!type_name && typed)
391 complain (_("$$ of `%s' has no declared type"),
392 rule->sym->tag);
393 }
394 else if (isdigit (c) || c == '-')
395 {
396 int n;
397 ungetc (c, fin);
398 n = read_signed_integer (fin);
399
11e2beca
AD
400 if (n > stack_offset)
401 complain (_("invalid value: %s%d"), "$", n);
402 else
403 {
404 if (!type_name && n > 0)
405 type_name = get_type_name (n, rule);
406
407 /* Offset is always 0 if parser has already popped the stack
408 pointer. */
409 obstack_fgrow1 (oout, "yyvsp[%d]",
410 n - (semantic_parser ? 0 : stack_offset));
411
412 if (type_name)
413 obstack_fgrow1 (oout, ".%s", type_name);
414 if (!type_name && typed)
415 complain (_("$%d of `%s' has no declared type"),
416 n, rule->sym->tag);
417 }
79282c5a
AD
418 }
419 else
420 {
421 char buf[] = "$c";
422 buf[1] = c;
423 complain (_("%s is invalid"), quote (buf));
424 }
425}
a70083a3
AD
426\f
427/*-------------------------------------------------------------------.
428| Copy the contents of a `%{ ... %}' into the definitions file. The |
429| `%{' has already been read. Return after reading the `%}'. |
430`-------------------------------------------------------------------*/
1ff442ca 431
4a120d45 432static void
118fb205 433copy_definition (void)
1ff442ca 434{
a70083a3 435 int c;
ae3c3164 436 /* -1 while reading a character if prev char was %. */
a70083a3 437 int after_percent;
1ff442ca 438
89cab50d 439 if (!no_lines_flag)
25b222fa
MA
440 {
441 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 442 lineno, quotearg_style (c_quoting_style,
b7c49edf 443 muscle_find ("filename")));
25b222fa 444 }
1ff442ca
NF
445
446 after_percent = 0;
447
ae3c3164 448 c = getc (finput);
1ff442ca
NF
449
450 for (;;)
451 {
452 switch (c)
453 {
454 case '\n':
dd60faec 455 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
456 lineno++;
457 break;
458
459 case '%':
a70083a3 460 after_percent = -1;
1ff442ca 461 break;
a083fbbf 462
1ff442ca
NF
463 case '\'':
464 case '"':
337bab46 465 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
466 break;
467
468 case '/':
337bab46 469 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
470 break;
471
472 case EOF:
a70083a3 473 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
474
475 default:
dd60faec 476 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
477 }
478
a70083a3 479 c = getc (finput);
1ff442ca
NF
480
481 if (after_percent)
482 {
483 if (c == '}')
484 return;
dd60faec 485 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
486 }
487 after_percent = 0;
1ff442ca 488 }
1ff442ca
NF
489}
490
491
d7020c20
AD
492/*-------------------------------------------------------------------.
493| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
494| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
495| are reversed. |
496`-------------------------------------------------------------------*/
1ff442ca 497
4a120d45 498static void
d7020c20 499parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 500{
342b8b6e
AD
501 token_t token = tok_undef;
502 char *typename = NULL;
1ff442ca 503
1e9798d5
AD
504 /* The symbol being defined. */
505 struct bucket *symbol = NULL;
506
507 /* After `%token' and `%nterm', any number of symbols maybe be
508 defined. */
1ff442ca
NF
509 for (;;)
510 {
e6011337
JT
511 int tmp_char = ungetc (skip_white_space (), finput);
512
1e9798d5
AD
513 /* `%' (for instance from `%token', or from `%%' etc.) is the
514 only valid means to end this declaration. */
e6011337 515 if (tmp_char == '%')
1ff442ca 516 return;
e6011337 517 if (tmp_char == EOF)
a0f6b076 518 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 519
a70083a3 520 token = lex ();
511e79b3 521 if (token == tok_comma)
943819bf
RS
522 {
523 symbol = NULL;
524 continue;
525 }
511e79b3 526 if (token == tok_typename)
1ff442ca 527 {
95e36146 528 typename = xstrdup (token_buffer);
1ff442ca 529 value_components_used = 1;
943819bf
RS
530 symbol = NULL;
531 }
511e79b3 532 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 533 {
8e03724b
AD
534 if (symval->alias)
535 warn (_("symbol `%s' used more than once as a literal string"),
536 symval->tag);
537 else if (symbol->alias)
538 warn (_("symbol `%s' given more than one literal string"),
539 symbol->tag);
540 else
541 {
542 symval->class = token_sym;
543 symval->type_name = typename;
544 symval->user_token_number = symbol->user_token_number;
545 symbol->user_token_number = SALIAS;
546 symval->alias = symbol;
547 symbol->alias = symval;
548 /* symbol and symval combined are only one symbol */
549 nsyms--;
550 }
8e03724b 551 symbol = NULL;
1ff442ca 552 }
511e79b3 553 else if (token == tok_identifier)
1ff442ca
NF
554 {
555 int oldclass = symval->class;
943819bf 556 symbol = symval;
1ff442ca 557
943819bf 558 if (symbol->class == what_is_not)
a0f6b076 559 complain (_("symbol %s redefined"), symbol->tag);
943819bf 560 symbol->class = what_is;
d7020c20 561 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 562 symbol->value = nvars++;
1ff442ca
NF
563
564 if (typename)
565 {
943819bf
RS
566 if (symbol->type_name == NULL)
567 symbol->type_name = typename;
a70083a3 568 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 569 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
570 }
571 }
511e79b3 572 else if (symbol && token == tok_number)
a70083a3 573 {
943819bf 574 symbol->user_token_number = numval;
b7c49edf
AD
575 /* User defined EOF token? */
576 if (numval == 0)
577 eoftoken = symbol;
a70083a3 578 }
1ff442ca 579 else
943819bf 580 {
a0f6b076 581 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
582 token_buffer,
583 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 584 skip_to_char ('%');
943819bf 585 }
1ff442ca
NF
586 }
587
588}
589
1ff442ca 590
d7020c20
AD
591/*------------------------------.
592| Parse what comes after %start |
593`------------------------------*/
1ff442ca 594
4a120d45 595static void
118fb205 596parse_start_decl (void)
1ff442ca
NF
597{
598 if (start_flag)
27821bff 599 complain (_("multiple %s declarations"), "%start");
511e79b3 600 if (lex () != tok_identifier)
27821bff 601 complain (_("invalid %s declaration"), "%start");
943819bf
RS
602 else
603 {
604 start_flag = 1;
605 startval = symval;
606 }
1ff442ca
NF
607}
608
a70083a3
AD
609/*-----------------------------------------------------------.
610| read in a %type declaration and record its information for |
611| get_type_name to access |
612`-----------------------------------------------------------*/
613
614static void
615parse_type_decl (void)
616{
a70083a3
AD
617 char *name;
618
511e79b3 619 if (lex () != tok_typename)
a70083a3
AD
620 {
621 complain ("%s", _("%type declaration has no <typename>"));
622 skip_to_char ('%');
623 return;
624 }
625
95e36146 626 name = xstrdup (token_buffer);
a70083a3
AD
627
628 for (;;)
629 {
f17bcd1f 630 token_t t;
a70083a3
AD
631 int tmp_char = ungetc (skip_white_space (), finput);
632
633 if (tmp_char == '%')
634 return;
635 if (tmp_char == EOF)
636 fatal (_("Premature EOF after %s"), token_buffer);
637
638 t = lex ();
639
640 switch (t)
1ff442ca
NF
641 {
642
511e79b3
AD
643 case tok_comma:
644 case tok_semicolon:
1ff442ca
NF
645 break;
646
511e79b3 647 case tok_identifier:
1ff442ca
NF
648 if (symval->type_name == NULL)
649 symval->type_name = name;
a70083a3 650 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 651 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
652
653 break;
654
655 default:
a0f6b076
AD
656 complain (_("invalid %%type declaration due to item: %s"),
657 token_buffer);
a70083a3 658 skip_to_char ('%');
1ff442ca
NF
659 }
660 }
661}
662
663
664
d7020c20
AD
665/*----------------------------------------------------------------.
666| Read in a %left, %right or %nonassoc declaration and record its |
667| information. |
668`----------------------------------------------------------------*/
1ff442ca 669
4a120d45 670static void
d7020c20 671parse_assoc_decl (associativity assoc)
1ff442ca 672{
a70083a3
AD
673 char *name = NULL;
674 int prev = 0;
1ff442ca 675
a70083a3 676 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 677
1ff442ca
NF
678 for (;;)
679 {
f17bcd1f 680 token_t t;
e6011337 681 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 682
e6011337 683 if (tmp_char == '%')
1ff442ca 684 return;
e6011337 685 if (tmp_char == EOF)
a0f6b076 686 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 687
a70083a3 688 t = lex ();
1ff442ca
NF
689
690 switch (t)
691 {
511e79b3 692 case tok_typename:
95e36146 693 name = xstrdup (token_buffer);
1ff442ca
NF
694 break;
695
511e79b3 696 case tok_comma:
1ff442ca
NF
697 break;
698
511e79b3 699 case tok_identifier:
1ff442ca 700 if (symval->prec != 0)
a0f6b076 701 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
702 symval->prec = lastprec;
703 symval->assoc = assoc;
d7020c20 704 if (symval->class == nterm_sym)
a0f6b076 705 complain (_("symbol %s redefined"), symval->tag);
d7020c20 706 symval->class = token_sym;
1ff442ca 707 if (name)
a70083a3 708 { /* record the type, if one is specified */
1ff442ca
NF
709 if (symval->type_name == NULL)
710 symval->type_name = name;
a70083a3 711 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 712 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
713 }
714 break;
715
511e79b3
AD
716 case tok_number:
717 if (prev == tok_identifier)
a70083a3 718 {
1ff442ca 719 symval->user_token_number = numval;
a70083a3
AD
720 }
721 else
722 {
723 complain (_
724 ("invalid text (%s) - number should be after identifier"),
725token_buffer);
726 skip_to_char ('%');
727 }
1ff442ca
NF
728 break;
729
511e79b3 730 case tok_semicolon:
1ff442ca
NF
731 return;
732
733 default:
a0f6b076 734 complain (_("unexpected item: %s"), token_buffer);
a70083a3 735 skip_to_char ('%');
1ff442ca
NF
736 }
737
738 prev = t;
1ff442ca
NF
739 }
740}
741
742
743
dd60faec 744/*--------------------------------------------------------------.
180d45ba
PB
745| Copy the union declaration into the stype muscle |
746| (and fdefines), where it is made into the definition of |
747| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 748`--------------------------------------------------------------*/
1ff442ca 749
4a120d45 750static void
118fb205 751parse_union_decl (void)
1ff442ca 752{
a70083a3
AD
753 int c;
754 int count = 0;
428046f8 755 bool done = FALSE;
180d45ba 756 struct obstack union_obstack;
1ff442ca 757 if (typed)
27821bff 758 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
759
760 typed = 1;
761
180d45ba
PB
762 obstack_init (&union_obstack);
763 obstack_sgrow (&union_obstack, "union");
1ff442ca 764
428046f8 765 while (!done)
1ff442ca 766 {
428046f8
AD
767 c = xgetc (finput);
768
342b8b6e
AD
769 /* If C contains '/', it is output by copy_comment (). */
770 if (c != '/')
2666f928 771 obstack_1grow (&union_obstack, c);
1ff442ca
NF
772
773 switch (c)
774 {
775 case '\n':
776 lineno++;
777 break;
778
779 case '/':
2666f928 780 copy_comment (finput, &union_obstack);
1ff442ca
NF
781 break;
782
1ff442ca
NF
783 case '{':
784 count++;
785 break;
786
787 case '}':
428046f8 788 /* FIXME: Errr. How could this happen???. --akim */
1ff442ca 789 if (count == 0)
27821bff 790 complain (_("unmatched %s"), "`}'");
1ff442ca 791 count--;
428046f8
AD
792 if (!count)
793 done = TRUE;
794 break;
1ff442ca 795 }
1ff442ca 796 }
180d45ba 797
428046f8
AD
798 /* JF don't choke on trailing semi */
799 c = skip_white_space ();
800 if (c != ';')
801 ungetc (c, finput);
802 obstack_1grow (&union_obstack, 0);
803 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
804}
805
d7020c20
AD
806
807/*-------------------------------------------------------.
808| Parse the declaration %expect N which says to expect N |
809| shift-reduce conflicts. |
810`-------------------------------------------------------*/
1ff442ca 811
4a120d45 812static void
118fb205 813parse_expect_decl (void)
1ff442ca 814{
131e2fef 815 int c = skip_white_space ();
1ff442ca
NF
816 ungetc (c, finput);
817
131e2fef 818 if (!isdigit (c))
79282c5a 819 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
820 else
821 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
822}
823
a70083a3
AD
824
825/*-------------------------------------------------------------------.
826| Parse what comes after %thong. the full syntax is |
827| |
828| %thong <type> token number literal |
829| |
830| the <type> or number may be omitted. The number specifies the |
831| user_token_number. |
832| |
833| Two symbols are entered in the table, one for the token symbol and |
834| one for the literal. Both are given the <type>, if any, from the |
835| declaration. The ->user_token_number of the first is SALIAS and |
836| the ->user_token_number of the second is set to the number, if |
837| any, from the declaration. The two symbols are linked via |
838| pointers in their ->alias fields. |
839| |
840| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
841| only the literal string is retained it is the literal string that |
842| is output to yytname |
843`-------------------------------------------------------------------*/
844
845static void
846parse_thong_decl (void)
7b306f52 847{
f17bcd1f 848 token_t token;
a70083a3
AD
849 struct bucket *symbol;
850 char *typename = 0;
6b7e85b9 851 int usrtoknum = SUNDEF;
7b306f52 852
a70083a3 853 token = lex (); /* fetch typename or first token */
511e79b3 854 if (token == tok_typename)
7b306f52 855 {
95e36146 856 typename = xstrdup (token_buffer);
a70083a3
AD
857 value_components_used = 1;
858 token = lex (); /* fetch first token */
7b306f52 859 }
7b306f52 860
a70083a3 861 /* process first token */
7b306f52 862
511e79b3 863 if (token != tok_identifier)
a70083a3
AD
864 {
865 complain (_("unrecognized item %s, expected an identifier"),
866 token_buffer);
867 skip_to_char ('%');
868 return;
7b306f52 869 }
d7020c20 870 symval->class = token_sym;
a70083a3
AD
871 symval->type_name = typename;
872 symval->user_token_number = SALIAS;
873 symbol = symval;
7b306f52 874
a70083a3 875 token = lex (); /* get number or literal string */
1ff442ca 876
511e79b3 877 if (token == tok_number)
943819bf 878 {
a70083a3
AD
879 usrtoknum = numval;
880 token = lex (); /* okay, did number, now get literal */
943819bf 881 }
1ff442ca 882
a70083a3 883 /* process literal string token */
1ff442ca 884
511e79b3 885 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 886 {
a70083a3
AD
887 complain (_("expected string constant instead of %s"), token_buffer);
888 skip_to_char ('%');
889 return;
1ff442ca 890 }
d7020c20 891 symval->class = token_sym;
a70083a3
AD
892 symval->type_name = typename;
893 symval->user_token_number = usrtoknum;
1ff442ca 894
a70083a3
AD
895 symval->alias = symbol;
896 symbol->alias = symval;
1ff442ca 897
79282c5a
AD
898 /* symbol and symval combined are only one symbol. */
899 nsyms--;
a70083a3 900}
3cef001a 901
11e2beca 902
b6610515 903static void
11d82f03 904parse_muscle_decl (void)
b6610515
RA
905{
906 int ch = ungetc (skip_white_space (), finput);
b7c49edf
AD
907 char *muscle_key;
908 char *muscle_value;
b6610515
RA
909
910 /* Read key. */
911 if (!isalpha (ch) && ch != '_')
912 {
913 complain (_("invalid %s declaration"), "%define");
914 skip_to_char ('%');
915 return;
916 }
11d82f03
MA
917 copy_identifier (finput, &muscle_obstack);
918 obstack_1grow (&muscle_obstack, 0);
919 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 920
b6610515
RA
921 /* Read value. */
922 ch = skip_white_space ();
923 if (ch != '"')
924 {
925 ungetc (ch, finput);
926 if (ch != EOF)
927 {
928 complain (_("invalid %s declaration"), "%define");
929 skip_to_char ('%');
930 return;
931 }
932 else
933 fatal (_("Premature EOF after %s"), "\"");
934 }
11d82f03
MA
935 copy_string2 (finput, &muscle_obstack, '"', 0);
936 obstack_1grow (&muscle_obstack, 0);
937 muscle_value = obstack_finish (&muscle_obstack);
b6610515 938
b6610515 939 /* Store the (key, value) pair in the environment. */
11d82f03 940 muscle_insert (muscle_key, muscle_value);
b6610515
RA
941}
942
2ba3b73c 943
426cf563
MA
944
945/*---------------------------------.
a870c567 946| Parse a double quoted parameter. |
426cf563
MA
947`---------------------------------*/
948
949static const char *
950parse_dquoted_param (const char *from)
951{
952 struct obstack param_obstack;
953 const char *param = NULL;
954 int c;
955
956 obstack_init (&param_obstack);
957 c = skip_white_space ();
958
959 if (c != '"')
960 {
961 complain (_("invalid %s declaration"), from);
962 ungetc (c, finput);
963 skip_to_char ('%');
964 return NULL;
965 }
966
2648a72d
AD
967 while ((c = literalchar ()) != '"')
968 obstack_1grow (&param_obstack, c);
a870c567 969
426cf563
MA
970 obstack_1grow (&param_obstack, '\0');
971 param = obstack_finish (&param_obstack);
972
973 if (c != '"' || strlen (param) == 0)
974 {
975 complain (_("invalid %s declaration"), from);
976 if (c != '"')
977 ungetc (c, finput);
978 skip_to_char ('%');
979 return NULL;
980 }
981
982 return param;
983}
984
2ba3b73c
MA
985/*----------------------------------.
986| Parse what comes after %skeleton. |
987`----------------------------------*/
988
a870c567 989static void
2ba3b73c
MA
990parse_skel_decl (void)
991{
426cf563 992 skeleton = parse_dquoted_param ("%skeleton");
2ba3b73c
MA
993}
994
a70083a3
AD
995/*----------------------------------------------------------------.
996| Read from finput until `%%' is seen. Discard the `%%'. Handle |
997| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 998| groups to ATTRS_OBSTACK. |
a70083a3 999`----------------------------------------------------------------*/
1ff442ca 1000
4a120d45 1001static void
a70083a3 1002read_declarations (void)
1ff442ca 1003{
a70083a3 1004 for (;;)
1ff442ca 1005 {
951366c1 1006 int c = skip_white_space ();
1ff442ca 1007
a70083a3
AD
1008 if (c == '%')
1009 {
951366c1 1010 token_t tok = parse_percent_token ();
1ff442ca 1011
a70083a3 1012 switch (tok)
943819bf 1013 {
511e79b3 1014 case tok_two_percents:
a70083a3 1015 return;
1ff442ca 1016
511e79b3 1017 case tok_percent_left_curly:
a70083a3
AD
1018 copy_definition ();
1019 break;
1ff442ca 1020
511e79b3 1021 case tok_token:
d7020c20 1022 parse_token_decl (token_sym, nterm_sym);
a70083a3 1023 break;
1ff442ca 1024
511e79b3 1025 case tok_nterm:
d7020c20 1026 parse_token_decl (nterm_sym, token_sym);
a70083a3 1027 break;
1ff442ca 1028
511e79b3 1029 case tok_type:
a70083a3
AD
1030 parse_type_decl ();
1031 break;
1ff442ca 1032
511e79b3 1033 case tok_start:
a70083a3
AD
1034 parse_start_decl ();
1035 break;
118fb205 1036
511e79b3 1037 case tok_union:
a70083a3
AD
1038 parse_union_decl ();
1039 break;
1ff442ca 1040
511e79b3 1041 case tok_expect:
a70083a3
AD
1042 parse_expect_decl ();
1043 break;
6deb4447 1044
511e79b3 1045 case tok_thong:
a70083a3
AD
1046 parse_thong_decl ();
1047 break;
d7020c20 1048
511e79b3 1049 case tok_left:
d7020c20 1050 parse_assoc_decl (left_assoc);
a70083a3 1051 break;
1ff442ca 1052
511e79b3 1053 case tok_right:
d7020c20 1054 parse_assoc_decl (right_assoc);
a70083a3 1055 break;
1ff442ca 1056
511e79b3 1057 case tok_nonassoc:
d7020c20 1058 parse_assoc_decl (non_assoc);
a70083a3 1059 break;
1ff442ca 1060
b6610515 1061 case tok_define:
11d82f03 1062 parse_muscle_decl ();
b6610515 1063 break;
342b8b6e 1064
2ba3b73c
MA
1065 case tok_skel:
1066 parse_skel_decl ();
1067 break;
b6610515 1068
511e79b3 1069 case tok_noop:
a70083a3 1070 break;
1ff442ca 1071
951366c1
AD
1072 case tok_stropt:
1073 case tok_intopt:
1074 case tok_obsolete:
951366c1
AD
1075 abort ();
1076 break;
1077
e0c40012 1078 case tok_illegal:
a70083a3
AD
1079 default:
1080 complain (_("unrecognized: %s"), token_buffer);
1081 skip_to_char ('%');
1082 }
1083 }
1084 else if (c == EOF)
1085 fatal (_("no input grammar"));
1086 else
1087 {
ff4a34be
AD
1088 char buf[] = "c";
1089 buf[0] = c;
1090 complain (_("unknown character: %s"), quote (buf));
a70083a3 1091 skip_to_char ('%');
1ff442ca 1092 }
1ff442ca 1093 }
1ff442ca 1094}
a70083a3
AD
1095\f
1096/*-------------------------------------------------------------------.
1097| Assuming that a `{' has just been seen, copy everything up to the |
1098| matching `}' into the actions file. STACK_OFFSET is the number of |
1099| values in the current rule so far, which says where to find `$0' |
1100| with respect to the top of the stack. |
14d293ac 1101| |
11e2beca
AD
1102| This routine is used both for actions and guards. Only |
1103| ACTION_OBSTACK is used, but this is fine, since we use only |
14d293ac 1104| pointers to relevant portions inside this obstack. |
a70083a3 1105`-------------------------------------------------------------------*/
1ff442ca 1106
4a120d45 1107static void
14d293ac 1108parse_braces (symbol_list *rule, int stack_offset)
1ff442ca 1109{
a70083a3 1110 int c;
a70083a3 1111 int count;
1ff442ca 1112
1ff442ca 1113 count = 1;
1ff442ca
NF
1114 while (count > 0)
1115 {
14d293ac
AD
1116 while ((c = getc (finput)) != '}')
1117 switch (c)
1118 {
1119 case '\n':
1120 obstack_1grow (&action_obstack, c);
1121 lineno++;
1122 break;
1ff442ca 1123
14d293ac
AD
1124 case '{':
1125 obstack_1grow (&action_obstack, c);
1126 count++;
1127 break;
1ff442ca 1128
14d293ac
AD
1129 case '\'':
1130 case '"':
1131 copy_string (finput, &action_obstack, c);
1132 break;
1ff442ca 1133
14d293ac
AD
1134 case '/':
1135 copy_comment (finput, &action_obstack);
1136 break;
1ff442ca 1137
14d293ac
AD
1138 case '$':
1139 copy_dollar (finput, &action_obstack,
1140 rule, stack_offset);
1141 break;
1ff442ca 1142
14d293ac
AD
1143 case '@':
1144 copy_at (finput, &action_obstack,
1145 stack_offset);
1146 break;
a70083a3 1147
14d293ac
AD
1148 case EOF:
1149 fatal (_("unmatched %s"), "`{'");
a70083a3 1150
14d293ac
AD
1151 default:
1152 obstack_1grow (&action_obstack, c);
1153 }
a70083a3 1154
14d293ac 1155 /* Above loop exits when C is '}'. */
a70083a3 1156 if (--count)
2b25d624 1157 obstack_1grow (&action_obstack, c);
a70083a3
AD
1158 }
1159
3f96f4dc 1160 obstack_1grow (&action_obstack, '\0');
a70083a3 1161}
14d293ac 1162
a70083a3
AD
1163
1164static void
14d293ac 1165parse_action (symbol_list *rule, int stack_offset)
a70083a3 1166{
14d293ac
AD
1167 rule->action_line = lineno;
1168 parse_braces (rule, stack_offset);
1169 rule->action = obstack_finish (&action_obstack);
1170}
a70083a3 1171
a70083a3 1172
14d293ac
AD
1173static void
1174parse_guard (symbol_list *rule, int stack_offset)
1175{
1176 token_t t = lex ();
1177 if (t != tok_left_curly)
1178 complain (_("invalid %s declaration"), "%guard");
f499b062 1179 rule->guard_line = lineno;
14d293ac
AD
1180 parse_braces (rule, stack_offset);
1181 rule->guard = obstack_finish (&action_obstack);
1ff442ca 1182}
14d293ac 1183
a70083a3
AD
1184\f
1185
a70083a3
AD
1186/*-------------------------------------------------------------------.
1187| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1188| with the user's names. |
1189`-------------------------------------------------------------------*/
1ff442ca 1190
4a120d45 1191static bucket *
118fb205 1192gensym (void)
1ff442ca 1193{
274d42ce
AD
1194 /* Incremented for each generated symbol */
1195 static int gensym_count = 0;
1196 static char buf[256];
1197
a70083a3 1198 bucket *sym;
1ff442ca 1199
274d42ce
AD
1200 sprintf (buf, "@%d", ++gensym_count);
1201 token_buffer = buf;
a70083a3 1202 sym = getsym (token_buffer);
d7020c20 1203 sym->class = nterm_sym;
1ff442ca 1204 sym->value = nvars++;
36281465 1205 return sym;
1ff442ca 1206}
a70083a3 1207\f
107f7dfb
AD
1208/*-------------------------------------------------------------------.
1209| Parse the input grammar into a one symbol_list structure. Each |
1210| rule is represented by a sequence of symbols: the left hand side |
1211| followed by the contents of the right hand side, followed by a |
1212| null pointer instead of a symbol to terminate the rule. The next |
1213| symbol is the lhs of the following rule. |
1214| |
1215| All guards and actions are copied out to the appropriate files, |
1216| labelled by the rule number they apply to. |
1217| |
1218| Bison used to allow some %directives in the rules sections, but |
1219| this is no longer consider appropriate: (i) the documented grammar |
1220| doesn't claim it, (ii), it would promote bad style, (iii), error |
1221| recovery for %directives consists in skipping the junk until a `%' |
1222| is seen and helrp synchronizing. This scheme is definitely wrong |
1223| in the rules section. |
1224`-------------------------------------------------------------------*/
1ff442ca 1225
4a120d45 1226static void
118fb205 1227readgram (void)
1ff442ca 1228{
f17bcd1f 1229 token_t t;
a70083a3 1230 bucket *lhs = NULL;
107f7dfb
AD
1231 symbol_list *p = NULL;
1232 symbol_list *p1 = NULL;
a70083a3 1233 bucket *bp;
1ff442ca 1234
ff4a34be
AD
1235 /* Points to first symbol_list of current rule. its symbol is the
1236 lhs of the rule. */
107f7dfb 1237 symbol_list *crule = NULL;
ff4a34be 1238 /* Points to the symbol_list preceding crule. */
107f7dfb 1239 symbol_list *crule1 = NULL;
1ff442ca 1240
a70083a3 1241 t = lex ();
1ff442ca 1242
511e79b3 1243 while (t != tok_two_percents && t != tok_eof)
107f7dfb
AD
1244 if (t == tok_identifier || t == tok_bar)
1245 {
1246 int action_flag = 0;
1247 /* Number of symbols in rhs of this rule so far */
1248 int rulelength = 0;
1249 int xactions = 0; /* JF for error checking */
1250 bucket *first_rhs = 0;
1251
1252 if (t == tok_identifier)
1253 {
1254 lhs = symval;
1255
1256 if (!start_flag)
1257 {
1258 startval = lhs;
1259 start_flag = 1;
1260 }
1ff442ca 1261
107f7dfb
AD
1262 t = lex ();
1263 if (t != tok_colon)
1264 {
1265 complain (_("ill-formed rule: initial symbol not followed by colon"));
1266 unlex (t);
1267 }
1268 }
1269
1270 if (nrules == 0 && t == tok_bar)
1271 {
1272 complain (_("grammar starts with vertical bar"));
1273 lhs = symval; /* BOGUS: use a random symval */
1274 }
1275 /* start a new rule and record its lhs. */
1276
1277 nrules++;
1278 nitems++;
1279
1280 p = symbol_list_new (lhs);
1281
1282 crule1 = p1;
1283 if (p1)
1284 p1->next = p;
1285 else
1286 grammar = p;
1ff442ca 1287
107f7dfb
AD
1288 p1 = p;
1289 crule = p;
1ff442ca 1290
107f7dfb 1291 /* mark the rule's lhs as a nonterminal if not already so. */
1ff442ca 1292
107f7dfb
AD
1293 if (lhs->class == unknown_sym)
1294 {
1295 lhs->class = nterm_sym;
1296 lhs->value = nvars;
1297 nvars++;
1298 }
1299 else if (lhs->class == token_sym)
1300 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca 1301
107f7dfb 1302 /* read the rhs of the rule. */
1ff442ca 1303
107f7dfb
AD
1304 for (;;)
1305 {
1306 t = lex ();
1307 if (t == tok_prec)
1308 {
1309 t = lex ();
1310 crule->ruleprec = symval;
1311 t = lex ();
1312 }
1313
1314 if (!(t == tok_identifier || t == tok_left_curly))
1315 break;
1ff442ca 1316
107f7dfb
AD
1317 /* If next token is an identifier, see if a colon follows it.
1318 If one does, exit this rule now. */
1319 if (t == tok_identifier)
1320 {
1321 bucket *ssave;
1322 token_t t1;
1323
1324 ssave = symval;
1325 t1 = lex ();
1326 unlex (t1);
1327 symval = ssave;
1328 if (t1 == tok_colon)
1329 break;
1330
1331 if (!first_rhs) /* JF */
1332 first_rhs = symval;
1333 /* Not followed by colon =>
1334 process as part of this rule's rhs. */
1335 }
1336
1337 /* If we just passed an action, that action was in the middle
1338 of a rule, so make a dummy rule to reduce it to a
1339 non-terminal. */
1340 if (action_flag)
1341 {
1342 /* Since the action was written out with this rule's
1343 number, we must give the new rule this number by
1344 inserting the new rule before it. */
1345
1346 /* Make a dummy nonterminal, a gensym. */
1347 bucket *sdummy = gensym ();
1348
1349 /* Make a new rule, whose body is empty, before the
1350 current one, so that the action just read can
1351 belong to it. */
1352 nrules++;
1353 nitems++;
1354 p = symbol_list_new (sdummy);
1355 /* Attach its lineno to that of the host rule. */
1356 p->line = crule->line;
82c035a8
AD
1357 /* Move the action from the host rule to this one. */
1358 p->action = crule->action;
1359 p->action_line = crule->action_line;
1360 crule->action = NULL;
1361
107f7dfb
AD
1362 if (crule1)
1363 crule1->next = p;
1364 else
1365 grammar = p;
1366 /* End of the rule. */
1367 crule1 = symbol_list_new (NULL);
1368 crule1->next = crule;
1369
1370 p->next = crule1;
1371
1372 /* Insert the dummy generated by that rule into this
1373 rule. */
1374 nitems++;
1375 p = symbol_list_new (sdummy);
1376 p1->next = p;
1377 p1 = p;
1378
1379 action_flag = 0;
1380 }
1381
1382 if (t == tok_identifier)
1383 {
1384 nitems++;
1385 p = symbol_list_new (symval);
1386 p1->next = p;
1387 p1 = p;
1388 }
1389 else /* handle an action. */
1390 {
14d293ac 1391 parse_action (crule, rulelength);
107f7dfb
AD
1392 action_flag = 1;
1393 xactions++; /* JF */
1394 }
1395 rulelength++;
1396 } /* end of read rhs of rule */
1397
1398 /* Put an empty link in the list to mark the end of this rule */
1399 p = symbol_list_new (NULL);
1400 p1->next = p;
1401 p1 = p;
1402
1403 if (t == tok_prec)
1404 {
1405 complain (_("two @prec's in a row"));
1406 t = lex ();
1407 crule->ruleprec = symval;
1408 t = lex ();
1409 }
f499b062 1410
107f7dfb
AD
1411 if (t == tok_guard)
1412 {
1413 if (!semantic_parser)
1414 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1415
14d293ac 1416 parse_guard (crule, rulelength);
a70083a3 1417 t = lex ();
107f7dfb 1418 }
f499b062
AD
1419
1420 if (t == tok_left_curly)
107f7dfb
AD
1421 {
1422 /* This case never occurs -wjh */
1423 if (action_flag)
1424 complain (_("two actions at end of one rule"));
14d293ac 1425 parse_action (crule, rulelength);
107f7dfb
AD
1426 action_flag = 1;
1427 xactions++; /* -wjh */
1428 t = lex ();
1429 }
1430 /* If $$ is being set in default way, report if any type
1431 mismatch. */
1432 else if (!xactions
1433 && first_rhs && lhs->type_name != first_rhs->type_name)
1434 {
1435 if (lhs->type_name == 0
1436 || first_rhs->type_name == 0
1437 || strcmp (lhs->type_name, first_rhs->type_name))
1438 complain (_("type clash (`%s' `%s') on default action"),
1439 lhs->type_name ? lhs->type_name : "",
1440 first_rhs->type_name ? first_rhs->type_name : "");
1441 }
1442 /* Warn if there is no default for $$ but we need one. */
1443 else if (!xactions && !first_rhs && lhs->type_name != 0)
1444 complain (_("empty rule for typed nonterminal, and no action"));
1445 if (t == tok_semicolon)
a70083a3 1446 t = lex ();
107f7dfb
AD
1447 }
1448 else
1449 {
1450 complain (_("invalid input: %s"), quote (token_buffer));
1451 t = lex ();
1452 }
943819bf 1453
b68e7744
AD
1454 /* grammar has been read. Do some checking */
1455
1456 if (nrules == 0)
1457 fatal (_("no rules in the input grammar"));
1458
1459 /* Report any undefined symbols and consider them nonterminals. */
1460
1461 for (bp = firstsymbol; bp; bp = bp->next)
1462 if (bp->class == unknown_sym)
1463 {
1464 complain (_
1465 ("symbol %s is used, but is not defined as a token and has no rules"),
1466 bp->tag);
1467 bp->class = nterm_sym;
1468 bp->value = nvars++;
1469 }
1470
ff442794
AD
1471 /* Insert the initial rule, which line is that of the first rule
1472 (not that of the start symbol):
30171f79
AD
1473
1474 axiom: %start EOF. */
1475 p = symbol_list_new (axiom);
ff442794 1476 p->line = grammar->line;
30171f79
AD
1477 p->next = symbol_list_new (startval);
1478 p->next->next = symbol_list_new (eoftoken);
1479 p->next->next->next = symbol_list_new (NULL);
1480 p->next->next->next->next = grammar;
1481 nrules += 1;
1482 nitems += 3;
1483 grammar = p;
1484 startval = axiom;
1ff442ca
NF
1485
1486 if (nsyms > MAXSHORT)
a0f6b076
AD
1487 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1488 MAXSHORT);
1ff442ca
NF
1489
1490 ntokens = nsyms - nvars;
1491}
ff48177d
MA
1492
1493/* At the end of the grammar file, some C source code must
63c2d5de 1494 be stored. It is going to be associated to the epilogue
ff48177d
MA
1495 directive. */
1496static void
1497read_additionnal_code (void)
1498{
1499 char c;
63c2d5de 1500 struct obstack el_obstack;
342b8b6e 1501
63c2d5de 1502 obstack_init (&el_obstack);
ff48177d 1503
710ddc4f
MA
1504 if (!no_lines_flag)
1505 {
1506 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1507 lineno, quotearg_style (c_quoting_style,
b7c49edf 1508 muscle_find ("filename")));
710ddc4f
MA
1509 }
1510
ff48177d 1511 while ((c = getc (finput)) != EOF)
63c2d5de 1512 obstack_1grow (&el_obstack, c);
342b8b6e 1513
63c2d5de 1514 obstack_1grow (&el_obstack, 0);
11d82f03 1515 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1516}
1517
a70083a3 1518\f
037ca2f1
AD
1519/*------------------------------------------------------------------.
1520| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1521| number. |
1522`------------------------------------------------------------------*/
1523
1524static void
1525token_translations_init (void)
1526{
1527 bucket *bp = NULL;
1528 int i;
1529
1530 token_translations = XCALLOC (short, max_user_token_number + 1);
1531
1532 /* Initialize all entries for literal tokens to 2, the internal
1533 token number for $undefined., which represents all invalid
1534 inputs. */
1535 for (i = 0; i <= max_user_token_number; i++)
1536 token_translations[i] = 2;
1537
1538 for (bp = firstsymbol; bp; bp = bp->next)
1539 {
1540 /* Non-terminal? */
1541 if (bp->value >= ntokens)
1542 continue;
1543 /* A token string alias? */
1544 if (bp->user_token_number == SALIAS)
1545 continue;
6b7e85b9
AD
1546
1547 assert (bp->user_token_number != SUNDEF);
1548
037ca2f1
AD
1549 /* A token which translation has already been set? */
1550 if (token_translations[bp->user_token_number] != 2)
1551 complain (_("tokens %s and %s both assigned number %d"),
ad949da9 1552 symbols[token_translations[bp->user_token_number]]->tag,
037ca2f1
AD
1553 bp->tag, bp->user_token_number);
1554 token_translations[bp->user_token_number] = bp->value;
1555 }
1556}
1557
1558
0e78e603
AD
1559/*----------------------------------------------------------------.
1560| Assign symbol numbers, and write definition of token names into |
1561| FDEFINES. Set up vectors SYMBOL_TABLE, TAGS of symbols. |
1562`----------------------------------------------------------------*/
1ff442ca 1563
4a120d45 1564static void
118fb205 1565packsymbols (void)
1ff442ca 1566{
342b8b6e 1567 bucket *bp = NULL;
a70083a3 1568 int tokno = 1;
a70083a3 1569 int last_user_token_number;
1ff442ca 1570
0e78e603 1571 symbols = XCALLOC (bucket *, nsyms);
1ff442ca 1572
1ff442ca
NF
1573 max_user_token_number = 256;
1574 last_user_token_number = 256;
1575
1576 for (bp = firstsymbol; bp; bp = bp->next)
1577 {
d7020c20 1578 if (bp->class == nterm_sym)
1ff442ca
NF
1579 {
1580 bp->value += ntokens;
1581 }
943819bf
RS
1582 else if (bp->alias)
1583 {
b7c49edf
AD
1584 /* This symbol and its alias are a single token defn.
1585 Allocate a tokno, and assign to both check agreement of
1586 prec and assoc fields and make both the same */
1587 if (bp->value == -1)
1588 {
1589 if (bp == eoftoken || bp->alias == eoftoken)
1590 bp->value = bp->alias->value = 0;
1591 else
1592 {
1593 bp->value = bp->alias->value = tokno++;
1594 }
1595 }
943819bf 1596
0a6384c4
AD
1597 if (bp->prec != bp->alias->prec)
1598 {
1599 if (bp->prec != 0 && bp->alias->prec != 0
1600 && bp->user_token_number == SALIAS)
a0f6b076
AD
1601 complain (_("conflicting precedences for %s and %s"),
1602 bp->tag, bp->alias->tag);
0a6384c4
AD
1603 if (bp->prec != 0)
1604 bp->alias->prec = bp->prec;
1605 else
1606 bp->prec = bp->alias->prec;
1607 }
943819bf 1608
0a6384c4
AD
1609 if (bp->assoc != bp->alias->assoc)
1610 {
a0f6b076
AD
1611 if (bp->assoc != 0 && bp->alias->assoc != 0
1612 && bp->user_token_number == SALIAS)
1613 complain (_("conflicting assoc values for %s and %s"),
1614 bp->tag, bp->alias->tag);
1615 if (bp->assoc != 0)
1616 bp->alias->assoc = bp->assoc;
1617 else
1618 bp->assoc = bp->alias->assoc;
1619 }
0a6384c4 1620
b7c49edf 1621 /* Do not do processing below for SALIASs. */
0a6384c4 1622 if (bp->user_token_number == SALIAS)
b7c49edf 1623 continue;
943819bf 1624
a70083a3 1625 }
b7c49edf 1626 else /* bp->class == token_sym */
943819bf 1627 {
b7c49edf
AD
1628 if (bp == eoftoken)
1629 bp->value = 0;
1630 else
1631 bp->value = tokno++;
943819bf
RS
1632 }
1633
d7020c20 1634 if (bp->class == token_sym)
1ff442ca 1635 {
6b7e85b9 1636 if (bp->user_token_number == SUNDEF)
1ff442ca
NF
1637 bp->user_token_number = ++last_user_token_number;
1638 if (bp->user_token_number > max_user_token_number)
1639 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1640 }
1641
0e78e603 1642 symbols[bp->value] = bp;
1ff442ca
NF
1643 }
1644
037ca2f1 1645 token_translations_init ();
1ff442ca
NF
1646
1647 error_token_number = errtoken->value;
1648
e3f1699f
AD
1649 if (startval->class == unknown_sym)
1650 fatal (_("the start symbol %s is undefined"), startval->tag);
1651 else if (startval->class == token_sym)
1652 fatal (_("the start symbol %s is a token"), startval->tag);
1653
1654 start_symbol = startval->value;
1655}
1656
1657
a70083a3
AD
1658/*---------------------------------------------------------------.
1659| Convert the rules into the representation using RRHS, RLHS and |
1660| RITEMS. |
1661`---------------------------------------------------------------*/
1ff442ca 1662
4a120d45 1663static void
118fb205 1664packgram (void)
1ff442ca 1665{
a70083a3
AD
1666 int itemno;
1667 int ruleno;
1668 symbol_list *p;
1ff442ca 1669
adc8c848
AD
1670 /* We use short to index items. */
1671 if (nitems >= MAXSHORT)
1672 fatal (_("too many items (max %d)"), MAXSHORT);
1673
d7913476 1674 ritem = XCALLOC (short, nitems + 1);
1a2b5d37 1675 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1676
1677 itemno = 0;
1678 ruleno = 1;
1679
1680 p = grammar;
1681 while (p)
1682 {
b29b2ed5 1683 bucket *ruleprec = p->ruleprec;
1a2b5d37
AD
1684 rules[ruleno].lhs = p->sym->value;
1685 rules[ruleno].rhs = itemno;
1686 rules[ruleno].line = p->line;
1687 rules[ruleno].useful = TRUE;
1688 rules[ruleno].action = p->action;
1689 rules[ruleno].action_line = p->action_line;
1690 rules[ruleno].guard = p->guard;
1691 rules[ruleno].guard_line = p->guard_line;
1ff442ca
NF
1692
1693 p = p->next;
1694 while (p && p->sym)
1695 {
1696 ritem[itemno++] = p->sym->value;
1697 /* A rule gets by default the precedence and associativity
1698 of the last token in it. */
d7020c20 1699 if (p->sym->class == token_sym)
1ff442ca 1700 {
1a2b5d37
AD
1701 rules[ruleno].prec = p->sym->prec;
1702 rules[ruleno].assoc = p->sym->assoc;
1ff442ca 1703 }
a70083a3
AD
1704 if (p)
1705 p = p->next;
1ff442ca
NF
1706 }
1707
1708 /* If this rule has a %prec,
a70083a3 1709 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1710 if (ruleprec)
1711 {
1a2b5d37
AD
1712 rules[ruleno].prec = ruleprec->prec;
1713 rules[ruleno].assoc = ruleprec->assoc;
1714 rules[ruleno].precsym = ruleprec->value;
1ff442ca
NF
1715 }
1716
1717 ritem[itemno++] = -ruleno;
1718 ruleno++;
1719
a70083a3
AD
1720 if (p)
1721 p = p->next;
1ff442ca
NF
1722 }
1723
1724 ritem[itemno] = 0;
75142d45
AD
1725 nritems = itemno;
1726 assert (nritems == nitems);
3067fbef
AD
1727
1728 if (trace_flag)
1729 ritem_print (stderr);
1ff442ca 1730}
a70083a3
AD
1731\f
1732/*-------------------------------------------------------------------.
1733| Read in the grammar specification and record it in the format |
ea5607fd 1734| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1735| and all actions into ACTION_OBSTACK, in each case forming the body |
1736| of a C function (YYGUARD or YYACTION) which contains a switch |
1737| statement to decide which guard or action to execute. |
a70083a3
AD
1738`-------------------------------------------------------------------*/
1739
1740void
1741reader (void)
1742{
1743 start_flag = 0;
1744 startval = NULL; /* start symbol not specified yet. */
1745
b7c49edf 1746 nsyms = 0;
a70083a3
AD
1747 nvars = 0;
1748 nrules = 0;
1749 nitems = 0;
a70083a3
AD
1750
1751 typed = 0;
1752 lastprec = 0;
1753
a70083a3
AD
1754 semantic_parser = 0;
1755 pure_parser = 0;
a70083a3
AD
1756
1757 grammar = NULL;
1758
342b8b6e 1759 lex_init ();
a70083a3
AD
1760 lineno = 1;
1761
11d82f03
MA
1762 /* Initialize the muscle obstack. */
1763 obstack_init (&muscle_obstack);
82e236e2 1764
a70083a3
AD
1765 /* Initialize the symbol table. */
1766 tabinit ();
b6610515 1767
30171f79
AD
1768 /* Construct the axiom symbol. */
1769 axiom = getsym ("$axiom");
1770 axiom->class = nterm_sym;
1771 axiom->value = nvars++;
1772
a70083a3
AD
1773 /* Construct the error token */
1774 errtoken = getsym ("error");
d7020c20 1775 errtoken->class = token_sym;
a70083a3 1776 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 1777
a70083a3
AD
1778 /* Construct a token that represents all undefined literal tokens.
1779 It is always token number 2. */
1780 undeftoken = getsym ("$undefined.");
d7020c20 1781 undeftoken->class = token_sym;
a70083a3
AD
1782 undeftoken->user_token_number = 2;
1783
331dbc1b
AD
1784 /* Initialize the obstacks. */
1785 obstack_init (&action_obstack);
1786 obstack_init (&attrs_obstack);
331dbc1b
AD
1787 obstack_init (&output_obstack);
1788
1789 finput = xfopen (infile, "r");
1790
896fe5c1
AD
1791 /* Read the declaration section. Copy %{ ... %} groups to
1792 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1793 etc. found there. */
a70083a3 1794 read_declarations ();
b7c49edf
AD
1795
1796 /* If the user did not define her EOFTOKEN, do it now. */
1797 if (!eoftoken)
1798 {
1799 eoftoken = getsym ("$");
1800 eoftoken->class = token_sym;
1801 /* Value specified by POSIX. */
1802 eoftoken->user_token_number = 0;
1803 }
1804
a70083a3
AD
1805 /* Read in the grammar, build grammar in list form. Write out
1806 guards and actions. */
1807 readgram ();
ff48177d
MA
1808 /* Some C code is given at the end of the grammar file. */
1809 read_additionnal_code ();
b0c4483e 1810
331dbc1b
AD
1811 lex_free ();
1812 xfclose (finput);
1813
a70083a3
AD
1814 /* Assign the symbols their symbol numbers. Write #defines for the
1815 token symbols into FDEFINES if requested. */
1816 packsymbols ();
93ede233 1817
a70083a3
AD
1818 /* Convert the grammar into the format described in gram.h. */
1819 packgram ();
8419d367
AD
1820
1821 /* The grammar as a symbol_list is no longer needed. */
1822 LIST_FREE (symbol_list, grammar);
a70083a3 1823}