]> git.saurik.com Git - bison.git/blame - src/reader.c
Don't store the token defs in a muscle, just be ready to output it
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
037ca2f1 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca
NF
30#include "lex.h"
31#include "gram.h"
a0f6b076 32#include "complain.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
340ef489 35#include "conflicts.h"
11d82f03 36#include "muscle_tab.h"
1ff442ca 37
a70083a3
AD
38typedef struct symbol_list
39{
40 struct symbol_list *next;
41 bucket *sym;
b29b2ed5 42 int line;
f499b062 43
3f96f4dc
AD
44 /* The action is attached to the LHS of a rule. */
45 const char *action;
46 int action_line;
f499b062
AD
47
48 /* The guard is attached to the LHS of a rule. */
49 const char *guard;
50 int guard_line;
a70083a3 51 bucket *ruleprec;
d945f5cd 52} symbol_list;
118fb205 53
1ff442ca 54int lineno;
4a120d45
JT
55static symbol_list *grammar;
56static int start_flag;
57static bucket *startval;
1ff442ca
NF
58
59/* Nonzero if components of semantic values are used, implying
60 they must be unions. */
61static int value_components_used;
62
d7020c20
AD
63/* Nonzero if %union has been seen. */
64static int typed;
1ff442ca 65
d7020c20
AD
66/* Incremented for each %left, %right or %nonassoc seen */
67static int lastprec;
1ff442ca 68
b7c49edf
AD
69static bucket *errtoken = NULL;
70static bucket *undeftoken = NULL;
71static bucket *eoftoken = NULL;
30171f79 72static bucket *axiom = NULL;
b29b2ed5 73
6255b435 74static symbol_list *
b29b2ed5
AD
75symbol_list_new (bucket *sym)
76{
77 symbol_list *res = XMALLOC (symbol_list, 1);
78 res->next = NULL;
79 res->sym = sym;
80 res->line = lineno;
d945f5cd
AD
81 res->action = NULL;
82 res->action_line = 0;
f499b062
AD
83 res->guard = NULL;
84 res->guard_line = 0;
b29b2ed5
AD
85 res->ruleprec = NULL;
86 return res;
87}
88
0d533154 89\f
a70083a3 90
0d533154
AD
91/*===================\
92| Low level lexing. |
93\===================*/
943819bf
RS
94
95static void
118fb205 96skip_to_char (int target)
943819bf
RS
97{
98 int c;
99 if (target == '\n')
a0f6b076 100 complain (_(" Skipping to next \\n"));
943819bf 101 else
a0f6b076 102 complain (_(" Skipping to next %c"), target);
943819bf
RS
103
104 do
0d533154 105 c = skip_white_space ();
943819bf 106 while (c != target && c != EOF);
a083fbbf 107 if (c != EOF)
0d533154 108 ungetc (c, finput);
943819bf
RS
109}
110
111
0d533154
AD
112/*---------------------------------------------------------.
113| Read a signed integer from STREAM and return its value. |
114`---------------------------------------------------------*/
115
116static inline int
117read_signed_integer (FILE *stream)
118{
a70083a3
AD
119 int c = getc (stream);
120 int sign = 1;
121 int n = 0;
0d533154
AD
122
123 if (c == '-')
124 {
125 c = getc (stream);
126 sign = -1;
127 }
128
129 while (isdigit (c))
130 {
131 n = 10 * n + (c - '0');
132 c = getc (stream);
133 }
134
135 ungetc (c, stream);
136
137 return sign * n;
138}
139\f
79282c5a
AD
140/*--------------------------------------------------------------.
141| Get the data type (alternative in the union) of the value for |
142| symbol N in rule RULE. |
143`--------------------------------------------------------------*/
144
145static char *
b29b2ed5 146get_type_name (int n, symbol_list *rule)
79282c5a
AD
147{
148 int i;
149 symbol_list *rp;
150
151 if (n < 0)
152 {
153 complain (_("invalid $ value"));
154 return NULL;
155 }
156
157 rp = rule;
158 i = 0;
159
160 while (i < n)
161 {
162 rp = rp->next;
163 if (rp == NULL || rp->sym == NULL)
164 {
165 complain (_("invalid $ value"));
166 return NULL;
167 }
168 i++;
169 }
170
171 return rp->sym->type_name;
172}
173\f
337bab46
AD
174/*------------------------------------------------------------.
175| Dump the string from FIN to OOUT if non null. MATCH is the |
176| delimiter of the string (either ' or "). |
177`------------------------------------------------------------*/
ae3c3164
AD
178
179static inline void
b6610515 180copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
181{
182 int c;
183
b6610515
RA
184 if (store)
185 obstack_1grow (oout, match);
8c7ebe49 186
4a120d45 187 c = getc (fin);
ae3c3164
AD
188
189 while (c != match)
190 {
191 if (c == EOF)
192 fatal (_("unterminated string at end of file"));
193 if (c == '\n')
194 {
a0f6b076 195 complain (_("unterminated string"));
4a120d45 196 ungetc (c, fin);
ae3c3164
AD
197 c = match; /* invent terminator */
198 continue;
199 }
200
337bab46 201 obstack_1grow (oout, c);
ae3c3164
AD
202
203 if (c == '\\')
204 {
4a120d45 205 c = getc (fin);
ae3c3164
AD
206 if (c == EOF)
207 fatal (_("unterminated string at end of file"));
337bab46 208 obstack_1grow (oout, c);
8c7ebe49 209
ae3c3164
AD
210 if (c == '\n')
211 lineno++;
212 }
213
a70083a3 214 c = getc (fin);
ae3c3164
AD
215 }
216
b6610515
RA
217 if (store)
218 obstack_1grow (oout, c);
219}
220
221/* FIXME. */
222
223static inline void
224copy_string (FILE *fin, struct obstack *oout, int match)
225{
226 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
227}
228
b6610515
RA
229/* FIXME. */
230
231static inline void
232copy_identifier (FILE *fin, struct obstack *oout)
233{
234 int c;
235
236 while (isalnum (c = getc (fin)) || c == '_')
237 obstack_1grow (oout, c);
238
239 ungetc (c, fin);
240}
ae3c3164 241
2666f928
AD
242
243/*------------------------------------------------------------------.
244| Dump the wannabee comment from IN to OOUT. In fact we just saw a |
245| `/', which might or might not be a comment. In any case, copy |
246| what we saw. |
247`------------------------------------------------------------------*/
ae3c3164
AD
248
249static inline void
2666f928 250copy_comment (FILE *fin, struct obstack *oout)
ae3c3164
AD
251{
252 int cplus_comment;
a70083a3 253 int ended;
550a72a3
AD
254 int c;
255
256 /* We read a `/', output it. */
2666f928 257 obstack_1grow (oout, '/');
550a72a3
AD
258
259 switch ((c = getc (fin)))
260 {
261 case '/':
262 cplus_comment = 1;
263 break;
264 case '*':
265 cplus_comment = 0;
266 break;
267 default:
268 ungetc (c, fin);
269 return;
270 }
ae3c3164 271
2666f928 272 obstack_1grow (oout, c);
550a72a3 273 c = getc (fin);
ae3c3164
AD
274
275 ended = 0;
276 while (!ended)
277 {
278 if (!cplus_comment && c == '*')
279 {
280 while (c == '*')
281 {
2666f928 282 obstack_1grow (oout, c);
550a72a3 283 c = getc (fin);
ae3c3164
AD
284 }
285
286 if (c == '/')
287 {
2666f928 288 obstack_1grow (oout, c);
ae3c3164
AD
289 ended = 1;
290 }
291 }
292 else if (c == '\n')
293 {
294 lineno++;
2666f928 295 obstack_1grow (oout, c);
ae3c3164
AD
296 if (cplus_comment)
297 ended = 1;
298 else
550a72a3 299 c = getc (fin);
ae3c3164
AD
300 }
301 else if (c == EOF)
302 fatal (_("unterminated comment"));
303 else
304 {
2666f928 305 obstack_1grow (oout, c);
550a72a3 306 c = getc (fin);
ae3c3164
AD
307 }
308 }
309}
310
311
a70083a3 312/*-----------------------------------------------------------------.
337bab46 313| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
314| reference to this location. STACK_OFFSET is the number of values |
315| in the current rule so far, which says where to find `$0' with |
316| respect to the top of the stack. |
317`-----------------------------------------------------------------*/
1ff442ca 318
a70083a3 319static inline void
337bab46 320copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 321{
a70083a3 322 int c;
1ff442ca 323
a70083a3
AD
324 c = getc (fin);
325 if (c == '$')
1ff442ca 326 {
ff4423cc 327 obstack_sgrow (oout, "yyloc");
89cab50d 328 locations_flag = 1;
a70083a3
AD
329 }
330 else if (isdigit (c) || c == '-')
331 {
332 int n;
1ff442ca 333
a70083a3
AD
334 ungetc (c, fin);
335 n = read_signed_integer (fin);
11e2beca
AD
336 if (n > stack_offset)
337 complain (_("invalid value: %s%d"), "@", n);
338 else
339 {
340 /* Offset is always 0 if parser has already popped the stack
341 pointer. */
342 obstack_fgrow1 (oout, "yylsp[%d]",
343 n - (semantic_parser ? 0 : stack_offset));
344 locations_flag = 1;
345 }
1ff442ca 346 }
a70083a3 347 else
ff4a34be
AD
348 {
349 char buf[] = "@c";
350 buf[1] = c;
351 complain (_("%s is invalid"), quote (buf));
352 }
1ff442ca 353}
79282c5a
AD
354
355
356/*-------------------------------------------------------------------.
357| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
358| |
359| Possible inputs: $[<TYPENAME>]($|integer) |
360| |
337bab46 361| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
362| the number of values in the current rule so far, which says where |
363| to find `$0' with respect to the top of the stack. |
364`-------------------------------------------------------------------*/
365
366static inline void
337bab46 367copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
368 symbol_list *rule, int stack_offset)
369{
370 int c = getc (fin);
b0ce6046 371 const char *type_name = NULL;
79282c5a 372
f282676b 373 /* Get the type name if explicit. */
79282c5a
AD
374 if (c == '<')
375 {
f282676b 376 read_type_name (fin);
79282c5a
AD
377 type_name = token_buffer;
378 value_components_used = 1;
79282c5a
AD
379 c = getc (fin);
380 }
381
382 if (c == '$')
383 {
ff4423cc 384 obstack_sgrow (oout, "yyval");
8c7ebe49 385
79282c5a
AD
386 if (!type_name)
387 type_name = get_type_name (0, rule);
388 if (type_name)
337bab46 389 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
390 if (!type_name && typed)
391 complain (_("$$ of `%s' has no declared type"),
392 rule->sym->tag);
393 }
394 else if (isdigit (c) || c == '-')
395 {
396 int n;
397 ungetc (c, fin);
398 n = read_signed_integer (fin);
399
11e2beca
AD
400 if (n > stack_offset)
401 complain (_("invalid value: %s%d"), "$", n);
402 else
403 {
404 if (!type_name && n > 0)
405 type_name = get_type_name (n, rule);
406
407 /* Offset is always 0 if parser has already popped the stack
408 pointer. */
409 obstack_fgrow1 (oout, "yyvsp[%d]",
410 n - (semantic_parser ? 0 : stack_offset));
411
412 if (type_name)
413 obstack_fgrow1 (oout, ".%s", type_name);
414 if (!type_name && typed)
415 complain (_("$%d of `%s' has no declared type"),
416 n, rule->sym->tag);
417 }
79282c5a
AD
418 }
419 else
420 {
421 char buf[] = "$c";
422 buf[1] = c;
423 complain (_("%s is invalid"), quote (buf));
424 }
425}
a70083a3
AD
426\f
427/*-------------------------------------------------------------------.
428| Copy the contents of a `%{ ... %}' into the definitions file. The |
429| `%{' has already been read. Return after reading the `%}'. |
430`-------------------------------------------------------------------*/
1ff442ca 431
4a120d45 432static void
118fb205 433copy_definition (void)
1ff442ca 434{
a70083a3 435 int c;
ae3c3164 436 /* -1 while reading a character if prev char was %. */
a70083a3 437 int after_percent;
1ff442ca 438
89cab50d 439 if (!no_lines_flag)
25b222fa
MA
440 {
441 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 442 lineno, quotearg_style (c_quoting_style,
b7c49edf 443 muscle_find ("filename")));
25b222fa 444 }
1ff442ca
NF
445
446 after_percent = 0;
447
ae3c3164 448 c = getc (finput);
1ff442ca
NF
449
450 for (;;)
451 {
452 switch (c)
453 {
454 case '\n':
dd60faec 455 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
456 lineno++;
457 break;
458
459 case '%':
a70083a3 460 after_percent = -1;
1ff442ca 461 break;
a083fbbf 462
1ff442ca
NF
463 case '\'':
464 case '"':
337bab46 465 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
466 break;
467
468 case '/':
337bab46 469 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
470 break;
471
472 case EOF:
a70083a3 473 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
474
475 default:
dd60faec 476 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
477 }
478
a70083a3 479 c = getc (finput);
1ff442ca
NF
480
481 if (after_percent)
482 {
483 if (c == '}')
484 return;
dd60faec 485 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
486 }
487 after_percent = 0;
1ff442ca 488 }
1ff442ca
NF
489}
490
491
d7020c20
AD
492/*-------------------------------------------------------------------.
493| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
494| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
495| are reversed. |
496`-------------------------------------------------------------------*/
1ff442ca 497
4a120d45 498static void
d7020c20 499parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 500{
342b8b6e
AD
501 token_t token = tok_undef;
502 char *typename = NULL;
1ff442ca 503
1e9798d5
AD
504 /* The symbol being defined. */
505 struct bucket *symbol = NULL;
506
507 /* After `%token' and `%nterm', any number of symbols maybe be
508 defined. */
1ff442ca
NF
509 for (;;)
510 {
e6011337
JT
511 int tmp_char = ungetc (skip_white_space (), finput);
512
1e9798d5
AD
513 /* `%' (for instance from `%token', or from `%%' etc.) is the
514 only valid means to end this declaration. */
e6011337 515 if (tmp_char == '%')
1ff442ca 516 return;
e6011337 517 if (tmp_char == EOF)
a0f6b076 518 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 519
a70083a3 520 token = lex ();
511e79b3 521 if (token == tok_comma)
943819bf
RS
522 {
523 symbol = NULL;
524 continue;
525 }
511e79b3 526 if (token == tok_typename)
1ff442ca 527 {
95e36146 528 typename = xstrdup (token_buffer);
1ff442ca 529 value_components_used = 1;
943819bf
RS
530 symbol = NULL;
531 }
511e79b3 532 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 533 {
8e03724b
AD
534 if (symval->alias)
535 warn (_("symbol `%s' used more than once as a literal string"),
536 symval->tag);
537 else if (symbol->alias)
538 warn (_("symbol `%s' given more than one literal string"),
539 symbol->tag);
540 else
541 {
542 symval->class = token_sym;
543 symval->type_name = typename;
544 symval->user_token_number = symbol->user_token_number;
545 symbol->user_token_number = SALIAS;
546 symval->alias = symbol;
547 symbol->alias = symval;
548 /* symbol and symval combined are only one symbol */
549 nsyms--;
550 }
8e03724b 551 symbol = NULL;
1ff442ca 552 }
511e79b3 553 else if (token == tok_identifier)
1ff442ca
NF
554 {
555 int oldclass = symval->class;
943819bf 556 symbol = symval;
1ff442ca 557
943819bf 558 if (symbol->class == what_is_not)
a0f6b076 559 complain (_("symbol %s redefined"), symbol->tag);
943819bf 560 symbol->class = what_is;
d7020c20 561 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 562 symbol->value = nvars++;
1ff442ca
NF
563
564 if (typename)
565 {
943819bf
RS
566 if (symbol->type_name == NULL)
567 symbol->type_name = typename;
a70083a3 568 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 569 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
570 }
571 }
511e79b3 572 else if (symbol && token == tok_number)
a70083a3 573 {
943819bf 574 symbol->user_token_number = numval;
b7c49edf
AD
575 /* User defined EOF token? */
576 if (numval == 0)
577 eoftoken = symbol;
a70083a3 578 }
1ff442ca 579 else
943819bf 580 {
a0f6b076 581 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
582 token_buffer,
583 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 584 skip_to_char ('%');
943819bf 585 }
1ff442ca
NF
586 }
587
588}
589
1ff442ca 590
d7020c20
AD
591/*------------------------------.
592| Parse what comes after %start |
593`------------------------------*/
1ff442ca 594
4a120d45 595static void
118fb205 596parse_start_decl (void)
1ff442ca
NF
597{
598 if (start_flag)
27821bff 599 complain (_("multiple %s declarations"), "%start");
511e79b3 600 if (lex () != tok_identifier)
27821bff 601 complain (_("invalid %s declaration"), "%start");
943819bf
RS
602 else
603 {
604 start_flag = 1;
605 startval = symval;
606 }
1ff442ca
NF
607}
608
a70083a3
AD
609/*-----------------------------------------------------------.
610| read in a %type declaration and record its information for |
611| get_type_name to access |
612`-----------------------------------------------------------*/
613
614static void
615parse_type_decl (void)
616{
a70083a3
AD
617 char *name;
618
511e79b3 619 if (lex () != tok_typename)
a70083a3
AD
620 {
621 complain ("%s", _("%type declaration has no <typename>"));
622 skip_to_char ('%');
623 return;
624 }
625
95e36146 626 name = xstrdup (token_buffer);
a70083a3
AD
627
628 for (;;)
629 {
f17bcd1f 630 token_t t;
a70083a3
AD
631 int tmp_char = ungetc (skip_white_space (), finput);
632
633 if (tmp_char == '%')
634 return;
635 if (tmp_char == EOF)
636 fatal (_("Premature EOF after %s"), token_buffer);
637
638 t = lex ();
639
640 switch (t)
1ff442ca
NF
641 {
642
511e79b3
AD
643 case tok_comma:
644 case tok_semicolon:
1ff442ca
NF
645 break;
646
511e79b3 647 case tok_identifier:
1ff442ca
NF
648 if (symval->type_name == NULL)
649 symval->type_name = name;
a70083a3 650 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 651 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
652
653 break;
654
655 default:
a0f6b076
AD
656 complain (_("invalid %%type declaration due to item: %s"),
657 token_buffer);
a70083a3 658 skip_to_char ('%');
1ff442ca
NF
659 }
660 }
661}
662
663
664
d7020c20
AD
665/*----------------------------------------------------------------.
666| Read in a %left, %right or %nonassoc declaration and record its |
667| information. |
668`----------------------------------------------------------------*/
1ff442ca 669
4a120d45 670static void
d7020c20 671parse_assoc_decl (associativity assoc)
1ff442ca 672{
a70083a3
AD
673 char *name = NULL;
674 int prev = 0;
1ff442ca 675
a70083a3 676 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 677
1ff442ca
NF
678 for (;;)
679 {
f17bcd1f 680 token_t t;
e6011337 681 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 682
e6011337 683 if (tmp_char == '%')
1ff442ca 684 return;
e6011337 685 if (tmp_char == EOF)
a0f6b076 686 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 687
a70083a3 688 t = lex ();
1ff442ca
NF
689
690 switch (t)
691 {
511e79b3 692 case tok_typename:
95e36146 693 name = xstrdup (token_buffer);
1ff442ca
NF
694 break;
695
511e79b3 696 case tok_comma:
1ff442ca
NF
697 break;
698
511e79b3 699 case tok_identifier:
1ff442ca 700 if (symval->prec != 0)
a0f6b076 701 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
702 symval->prec = lastprec;
703 symval->assoc = assoc;
d7020c20 704 if (symval->class == nterm_sym)
a0f6b076 705 complain (_("symbol %s redefined"), symval->tag);
d7020c20 706 symval->class = token_sym;
1ff442ca 707 if (name)
a70083a3 708 { /* record the type, if one is specified */
1ff442ca
NF
709 if (symval->type_name == NULL)
710 symval->type_name = name;
a70083a3 711 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 712 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
713 }
714 break;
715
511e79b3
AD
716 case tok_number:
717 if (prev == tok_identifier)
a70083a3 718 {
1ff442ca 719 symval->user_token_number = numval;
a70083a3
AD
720 }
721 else
722 {
723 complain (_
724 ("invalid text (%s) - number should be after identifier"),
725token_buffer);
726 skip_to_char ('%');
727 }
1ff442ca
NF
728 break;
729
511e79b3 730 case tok_semicolon:
1ff442ca
NF
731 return;
732
733 default:
a0f6b076 734 complain (_("unexpected item: %s"), token_buffer);
a70083a3 735 skip_to_char ('%');
1ff442ca
NF
736 }
737
738 prev = t;
1ff442ca
NF
739 }
740}
741
742
743
dd60faec 744/*--------------------------------------------------------------.
180d45ba
PB
745| Copy the union declaration into the stype muscle |
746| (and fdefines), where it is made into the definition of |
747| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 748`--------------------------------------------------------------*/
1ff442ca 749
4a120d45 750static void
118fb205 751parse_union_decl (void)
1ff442ca 752{
a70083a3
AD
753 int c;
754 int count = 0;
428046f8 755 bool done = FALSE;
180d45ba 756 struct obstack union_obstack;
1ff442ca 757 if (typed)
27821bff 758 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
759
760 typed = 1;
761
180d45ba
PB
762 obstack_init (&union_obstack);
763 obstack_sgrow (&union_obstack, "union");
1ff442ca 764
428046f8 765 while (!done)
1ff442ca 766 {
428046f8
AD
767 c = xgetc (finput);
768
342b8b6e
AD
769 /* If C contains '/', it is output by copy_comment (). */
770 if (c != '/')
2666f928 771 obstack_1grow (&union_obstack, c);
1ff442ca
NF
772
773 switch (c)
774 {
775 case '\n':
776 lineno++;
777 break;
778
779 case '/':
2666f928 780 copy_comment (finput, &union_obstack);
1ff442ca
NF
781 break;
782
1ff442ca
NF
783 case '{':
784 count++;
785 break;
786
787 case '}':
428046f8 788 /* FIXME: Errr. How could this happen???. --akim */
1ff442ca 789 if (count == 0)
27821bff 790 complain (_("unmatched %s"), "`}'");
1ff442ca 791 count--;
428046f8
AD
792 if (!count)
793 done = TRUE;
794 break;
1ff442ca 795 }
1ff442ca 796 }
180d45ba 797
428046f8
AD
798 /* JF don't choke on trailing semi */
799 c = skip_white_space ();
800 if (c != ';')
801 ungetc (c, finput);
802 obstack_1grow (&union_obstack, 0);
803 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
804}
805
d7020c20
AD
806
807/*-------------------------------------------------------.
808| Parse the declaration %expect N which says to expect N |
809| shift-reduce conflicts. |
810`-------------------------------------------------------*/
1ff442ca 811
4a120d45 812static void
118fb205 813parse_expect_decl (void)
1ff442ca 814{
131e2fef 815 int c = skip_white_space ();
1ff442ca
NF
816 ungetc (c, finput);
817
131e2fef 818 if (!isdigit (c))
79282c5a 819 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
820 else
821 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
822}
823
a70083a3
AD
824
825/*-------------------------------------------------------------------.
826| Parse what comes after %thong. the full syntax is |
827| |
828| %thong <type> token number literal |
829| |
830| the <type> or number may be omitted. The number specifies the |
831| user_token_number. |
832| |
833| Two symbols are entered in the table, one for the token symbol and |
834| one for the literal. Both are given the <type>, if any, from the |
835| declaration. The ->user_token_number of the first is SALIAS and |
836| the ->user_token_number of the second is set to the number, if |
837| any, from the declaration. The two symbols are linked via |
838| pointers in their ->alias fields. |
839| |
840| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
841| only the literal string is retained it is the literal string that |
842| is output to yytname |
843`-------------------------------------------------------------------*/
844
845static void
846parse_thong_decl (void)
7b306f52 847{
f17bcd1f 848 token_t token;
a70083a3
AD
849 struct bucket *symbol;
850 char *typename = 0;
6b7e85b9 851 int usrtoknum = SUNDEF;
7b306f52 852
a70083a3 853 token = lex (); /* fetch typename or first token */
511e79b3 854 if (token == tok_typename)
7b306f52 855 {
95e36146 856 typename = xstrdup (token_buffer);
a70083a3
AD
857 value_components_used = 1;
858 token = lex (); /* fetch first token */
7b306f52 859 }
7b306f52 860
a70083a3 861 /* process first token */
7b306f52 862
511e79b3 863 if (token != tok_identifier)
a70083a3
AD
864 {
865 complain (_("unrecognized item %s, expected an identifier"),
866 token_buffer);
867 skip_to_char ('%');
868 return;
7b306f52 869 }
d7020c20 870 symval->class = token_sym;
a70083a3
AD
871 symval->type_name = typename;
872 symval->user_token_number = SALIAS;
873 symbol = symval;
7b306f52 874
a70083a3 875 token = lex (); /* get number or literal string */
1ff442ca 876
511e79b3 877 if (token == tok_number)
943819bf 878 {
a70083a3
AD
879 usrtoknum = numval;
880 token = lex (); /* okay, did number, now get literal */
943819bf 881 }
1ff442ca 882
a70083a3 883 /* process literal string token */
1ff442ca 884
511e79b3 885 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 886 {
a70083a3
AD
887 complain (_("expected string constant instead of %s"), token_buffer);
888 skip_to_char ('%');
889 return;
1ff442ca 890 }
d7020c20 891 symval->class = token_sym;
a70083a3
AD
892 symval->type_name = typename;
893 symval->user_token_number = usrtoknum;
1ff442ca 894
a70083a3
AD
895 symval->alias = symbol;
896 symbol->alias = symval;
1ff442ca 897
79282c5a
AD
898 /* symbol and symval combined are only one symbol. */
899 nsyms--;
a70083a3 900}
3cef001a 901
11e2beca 902
b6610515 903static void
11d82f03 904parse_muscle_decl (void)
b6610515
RA
905{
906 int ch = ungetc (skip_white_space (), finput);
b7c49edf
AD
907 char *muscle_key;
908 char *muscle_value;
b6610515
RA
909
910 /* Read key. */
911 if (!isalpha (ch) && ch != '_')
912 {
913 complain (_("invalid %s declaration"), "%define");
914 skip_to_char ('%');
915 return;
916 }
11d82f03
MA
917 copy_identifier (finput, &muscle_obstack);
918 obstack_1grow (&muscle_obstack, 0);
919 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 920
b6610515
RA
921 /* Read value. */
922 ch = skip_white_space ();
923 if (ch != '"')
924 {
925 ungetc (ch, finput);
926 if (ch != EOF)
927 {
928 complain (_("invalid %s declaration"), "%define");
929 skip_to_char ('%');
930 return;
931 }
932 else
933 fatal (_("Premature EOF after %s"), "\"");
934 }
11d82f03
MA
935 copy_string2 (finput, &muscle_obstack, '"', 0);
936 obstack_1grow (&muscle_obstack, 0);
937 muscle_value = obstack_finish (&muscle_obstack);
b6610515 938
b6610515 939 /* Store the (key, value) pair in the environment. */
11d82f03 940 muscle_insert (muscle_key, muscle_value);
b6610515
RA
941}
942
2ba3b73c 943
426cf563
MA
944
945/*---------------------------------.
a870c567 946| Parse a double quoted parameter. |
426cf563
MA
947`---------------------------------*/
948
949static const char *
950parse_dquoted_param (const char *from)
951{
952 struct obstack param_obstack;
953 const char *param = NULL;
954 int c;
955
956 obstack_init (&param_obstack);
957 c = skip_white_space ();
958
959 if (c != '"')
960 {
961 complain (_("invalid %s declaration"), from);
962 ungetc (c, finput);
963 skip_to_char ('%');
964 return NULL;
965 }
966
2648a72d
AD
967 while ((c = literalchar ()) != '"')
968 obstack_1grow (&param_obstack, c);
a870c567 969
426cf563
MA
970 obstack_1grow (&param_obstack, '\0');
971 param = obstack_finish (&param_obstack);
972
973 if (c != '"' || strlen (param) == 0)
974 {
975 complain (_("invalid %s declaration"), from);
976 if (c != '"')
977 ungetc (c, finput);
978 skip_to_char ('%');
979 return NULL;
980 }
981
982 return param;
983}
984
2ba3b73c
MA
985/*----------------------------------.
986| Parse what comes after %skeleton. |
987`----------------------------------*/
988
a870c567 989static void
2ba3b73c
MA
990parse_skel_decl (void)
991{
426cf563 992 skeleton = parse_dquoted_param ("%skeleton");
2ba3b73c
MA
993}
994
a70083a3
AD
995/*----------------------------------------------------------------.
996| Read from finput until `%%' is seen. Discard the `%%'. Handle |
997| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 998| groups to ATTRS_OBSTACK. |
a70083a3 999`----------------------------------------------------------------*/
1ff442ca 1000
4a120d45 1001static void
a70083a3 1002read_declarations (void)
1ff442ca 1003{
a70083a3 1004 for (;;)
1ff442ca 1005 {
951366c1 1006 int c = skip_white_space ();
1ff442ca 1007
a70083a3
AD
1008 if (c == '%')
1009 {
951366c1 1010 token_t tok = parse_percent_token ();
1ff442ca 1011
a70083a3 1012 switch (tok)
943819bf 1013 {
511e79b3 1014 case tok_two_percents:
a70083a3 1015 return;
1ff442ca 1016
511e79b3 1017 case tok_percent_left_curly:
a70083a3
AD
1018 copy_definition ();
1019 break;
1ff442ca 1020
511e79b3 1021 case tok_token:
d7020c20 1022 parse_token_decl (token_sym, nterm_sym);
a70083a3 1023 break;
1ff442ca 1024
511e79b3 1025 case tok_nterm:
d7020c20 1026 parse_token_decl (nterm_sym, token_sym);
a70083a3 1027 break;
1ff442ca 1028
511e79b3 1029 case tok_type:
a70083a3
AD
1030 parse_type_decl ();
1031 break;
1ff442ca 1032
511e79b3 1033 case tok_start:
a70083a3
AD
1034 parse_start_decl ();
1035 break;
118fb205 1036
511e79b3 1037 case tok_union:
a70083a3
AD
1038 parse_union_decl ();
1039 break;
1ff442ca 1040
511e79b3 1041 case tok_expect:
a70083a3
AD
1042 parse_expect_decl ();
1043 break;
6deb4447 1044
511e79b3 1045 case tok_thong:
a70083a3
AD
1046 parse_thong_decl ();
1047 break;
d7020c20 1048
511e79b3 1049 case tok_left:
d7020c20 1050 parse_assoc_decl (left_assoc);
a70083a3 1051 break;
1ff442ca 1052
511e79b3 1053 case tok_right:
d7020c20 1054 parse_assoc_decl (right_assoc);
a70083a3 1055 break;
1ff442ca 1056
511e79b3 1057 case tok_nonassoc:
d7020c20 1058 parse_assoc_decl (non_assoc);
a70083a3 1059 break;
1ff442ca 1060
b6610515 1061 case tok_define:
11d82f03 1062 parse_muscle_decl ();
b6610515 1063 break;
342b8b6e 1064
2ba3b73c
MA
1065 case tok_skel:
1066 parse_skel_decl ();
1067 break;
b6610515 1068
511e79b3 1069 case tok_noop:
a70083a3 1070 break;
1ff442ca 1071
951366c1
AD
1072 case tok_stropt:
1073 case tok_intopt:
1074 case tok_obsolete:
951366c1
AD
1075 abort ();
1076 break;
1077
e0c40012 1078 case tok_illegal:
a70083a3
AD
1079 default:
1080 complain (_("unrecognized: %s"), token_buffer);
1081 skip_to_char ('%');
1082 }
1083 }
1084 else if (c == EOF)
1085 fatal (_("no input grammar"));
1086 else
1087 {
ff4a34be
AD
1088 char buf[] = "c";
1089 buf[0] = c;
1090 complain (_("unknown character: %s"), quote (buf));
a70083a3 1091 skip_to_char ('%');
1ff442ca 1092 }
1ff442ca 1093 }
1ff442ca 1094}
a70083a3
AD
1095\f
1096/*-------------------------------------------------------------------.
1097| Assuming that a `{' has just been seen, copy everything up to the |
1098| matching `}' into the actions file. STACK_OFFSET is the number of |
1099| values in the current rule so far, which says where to find `$0' |
1100| with respect to the top of the stack. |
14d293ac 1101| |
11e2beca
AD
1102| This routine is used both for actions and guards. Only |
1103| ACTION_OBSTACK is used, but this is fine, since we use only |
14d293ac 1104| pointers to relevant portions inside this obstack. |
a70083a3 1105`-------------------------------------------------------------------*/
1ff442ca 1106
4a120d45 1107static void
14d293ac 1108parse_braces (symbol_list *rule, int stack_offset)
1ff442ca 1109{
a70083a3 1110 int c;
a70083a3 1111 int count;
1ff442ca 1112
1ff442ca 1113 count = 1;
1ff442ca
NF
1114 while (count > 0)
1115 {
14d293ac
AD
1116 while ((c = getc (finput)) != '}')
1117 switch (c)
1118 {
1119 case '\n':
1120 obstack_1grow (&action_obstack, c);
1121 lineno++;
1122 break;
1ff442ca 1123
14d293ac
AD
1124 case '{':
1125 obstack_1grow (&action_obstack, c);
1126 count++;
1127 break;
1ff442ca 1128
14d293ac
AD
1129 case '\'':
1130 case '"':
1131 copy_string (finput, &action_obstack, c);
1132 break;
1ff442ca 1133
14d293ac
AD
1134 case '/':
1135 copy_comment (finput, &action_obstack);
1136 break;
1ff442ca 1137
14d293ac
AD
1138 case '$':
1139 copy_dollar (finput, &action_obstack,
1140 rule, stack_offset);
1141 break;
1ff442ca 1142
14d293ac
AD
1143 case '@':
1144 copy_at (finput, &action_obstack,
1145 stack_offset);
1146 break;
a70083a3 1147
14d293ac
AD
1148 case EOF:
1149 fatal (_("unmatched %s"), "`{'");
a70083a3 1150
14d293ac
AD
1151 default:
1152 obstack_1grow (&action_obstack, c);
1153 }
a70083a3 1154
14d293ac 1155 /* Above loop exits when C is '}'. */
a70083a3
AD
1156 if (--count)
1157 {
8c7ebe49 1158 obstack_1grow (&action_obstack, c);
a70083a3
AD
1159 c = getc (finput);
1160 }
1161 }
1162
3f96f4dc 1163 obstack_1grow (&action_obstack, '\0');
a70083a3 1164}
14d293ac 1165
a70083a3
AD
1166
1167static void
14d293ac 1168parse_action (symbol_list *rule, int stack_offset)
a70083a3 1169{
14d293ac
AD
1170 rule->action_line = lineno;
1171 parse_braces (rule, stack_offset);
1172 rule->action = obstack_finish (&action_obstack);
1173}
a70083a3 1174
a70083a3 1175
14d293ac
AD
1176static void
1177parse_guard (symbol_list *rule, int stack_offset)
1178{
1179 token_t t = lex ();
1180 if (t != tok_left_curly)
1181 complain (_("invalid %s declaration"), "%guard");
f499b062 1182 rule->guard_line = lineno;
14d293ac
AD
1183 parse_braces (rule, stack_offset);
1184 rule->guard = obstack_finish (&action_obstack);
1ff442ca 1185}
14d293ac 1186
a70083a3
AD
1187\f
1188
a70083a3
AD
1189/*-------------------------------------------------------------------.
1190| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1191| with the user's names. |
1192`-------------------------------------------------------------------*/
1ff442ca 1193
4a120d45 1194static bucket *
118fb205 1195gensym (void)
1ff442ca 1196{
274d42ce
AD
1197 /* Incremented for each generated symbol */
1198 static int gensym_count = 0;
1199 static char buf[256];
1200
a70083a3 1201 bucket *sym;
1ff442ca 1202
274d42ce
AD
1203 sprintf (buf, "@%d", ++gensym_count);
1204 token_buffer = buf;
a70083a3 1205 sym = getsym (token_buffer);
d7020c20 1206 sym->class = nterm_sym;
1ff442ca 1207 sym->value = nvars++;
36281465 1208 return sym;
1ff442ca 1209}
a70083a3 1210\f
107f7dfb
AD
1211/*-------------------------------------------------------------------.
1212| Parse the input grammar into a one symbol_list structure. Each |
1213| rule is represented by a sequence of symbols: the left hand side |
1214| followed by the contents of the right hand side, followed by a |
1215| null pointer instead of a symbol to terminate the rule. The next |
1216| symbol is the lhs of the following rule. |
1217| |
1218| All guards and actions are copied out to the appropriate files, |
1219| labelled by the rule number they apply to. |
1220| |
1221| Bison used to allow some %directives in the rules sections, but |
1222| this is no longer consider appropriate: (i) the documented grammar |
1223| doesn't claim it, (ii), it would promote bad style, (iii), error |
1224| recovery for %directives consists in skipping the junk until a `%' |
1225| is seen and helrp synchronizing. This scheme is definitely wrong |
1226| in the rules section. |
1227`-------------------------------------------------------------------*/
1ff442ca 1228
4a120d45 1229static void
118fb205 1230readgram (void)
1ff442ca 1231{
f17bcd1f 1232 token_t t;
a70083a3 1233 bucket *lhs = NULL;
107f7dfb
AD
1234 symbol_list *p = NULL;
1235 symbol_list *p1 = NULL;
a70083a3 1236 bucket *bp;
1ff442ca 1237
ff4a34be
AD
1238 /* Points to first symbol_list of current rule. its symbol is the
1239 lhs of the rule. */
107f7dfb 1240 symbol_list *crule = NULL;
ff4a34be 1241 /* Points to the symbol_list preceding crule. */
107f7dfb 1242 symbol_list *crule1 = NULL;
1ff442ca 1243
a70083a3 1244 t = lex ();
1ff442ca 1245
511e79b3 1246 while (t != tok_two_percents && t != tok_eof)
107f7dfb
AD
1247 if (t == tok_identifier || t == tok_bar)
1248 {
1249 int action_flag = 0;
1250 /* Number of symbols in rhs of this rule so far */
1251 int rulelength = 0;
1252 int xactions = 0; /* JF for error checking */
1253 bucket *first_rhs = 0;
1254
1255 if (t == tok_identifier)
1256 {
1257 lhs = symval;
1258
1259 if (!start_flag)
1260 {
1261 startval = lhs;
1262 start_flag = 1;
1263 }
1ff442ca 1264
107f7dfb
AD
1265 t = lex ();
1266 if (t != tok_colon)
1267 {
1268 complain (_("ill-formed rule: initial symbol not followed by colon"));
1269 unlex (t);
1270 }
1271 }
1272
1273 if (nrules == 0 && t == tok_bar)
1274 {
1275 complain (_("grammar starts with vertical bar"));
1276 lhs = symval; /* BOGUS: use a random symval */
1277 }
1278 /* start a new rule and record its lhs. */
1279
1280 nrules++;
1281 nitems++;
1282
1283 p = symbol_list_new (lhs);
1284
1285 crule1 = p1;
1286 if (p1)
1287 p1->next = p;
1288 else
1289 grammar = p;
1ff442ca 1290
107f7dfb
AD
1291 p1 = p;
1292 crule = p;
1ff442ca 1293
107f7dfb 1294 /* mark the rule's lhs as a nonterminal if not already so. */
1ff442ca 1295
107f7dfb
AD
1296 if (lhs->class == unknown_sym)
1297 {
1298 lhs->class = nterm_sym;
1299 lhs->value = nvars;
1300 nvars++;
1301 }
1302 else if (lhs->class == token_sym)
1303 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca 1304
107f7dfb 1305 /* read the rhs of the rule. */
1ff442ca 1306
107f7dfb
AD
1307 for (;;)
1308 {
1309 t = lex ();
1310 if (t == tok_prec)
1311 {
1312 t = lex ();
1313 crule->ruleprec = symval;
1314 t = lex ();
1315 }
1316
1317 if (!(t == tok_identifier || t == tok_left_curly))
1318 break;
1ff442ca 1319
107f7dfb
AD
1320 /* If next token is an identifier, see if a colon follows it.
1321 If one does, exit this rule now. */
1322 if (t == tok_identifier)
1323 {
1324 bucket *ssave;
1325 token_t t1;
1326
1327 ssave = symval;
1328 t1 = lex ();
1329 unlex (t1);
1330 symval = ssave;
1331 if (t1 == tok_colon)
1332 break;
1333
1334 if (!first_rhs) /* JF */
1335 first_rhs = symval;
1336 /* Not followed by colon =>
1337 process as part of this rule's rhs. */
1338 }
1339
1340 /* If we just passed an action, that action was in the middle
1341 of a rule, so make a dummy rule to reduce it to a
1342 non-terminal. */
1343 if (action_flag)
1344 {
1345 /* Since the action was written out with this rule's
1346 number, we must give the new rule this number by
1347 inserting the new rule before it. */
1348
1349 /* Make a dummy nonterminal, a gensym. */
1350 bucket *sdummy = gensym ();
1351
1352 /* Make a new rule, whose body is empty, before the
1353 current one, so that the action just read can
1354 belong to it. */
1355 nrules++;
1356 nitems++;
1357 p = symbol_list_new (sdummy);
1358 /* Attach its lineno to that of the host rule. */
1359 p->line = crule->line;
82c035a8
AD
1360 /* Move the action from the host rule to this one. */
1361 p->action = crule->action;
1362 p->action_line = crule->action_line;
1363 crule->action = NULL;
1364
107f7dfb
AD
1365 if (crule1)
1366 crule1->next = p;
1367 else
1368 grammar = p;
1369 /* End of the rule. */
1370 crule1 = symbol_list_new (NULL);
1371 crule1->next = crule;
1372
1373 p->next = crule1;
1374
1375 /* Insert the dummy generated by that rule into this
1376 rule. */
1377 nitems++;
1378 p = symbol_list_new (sdummy);
1379 p1->next = p;
1380 p1 = p;
1381
1382 action_flag = 0;
1383 }
1384
1385 if (t == tok_identifier)
1386 {
1387 nitems++;
1388 p = symbol_list_new (symval);
1389 p1->next = p;
1390 p1 = p;
1391 }
1392 else /* handle an action. */
1393 {
14d293ac 1394 parse_action (crule, rulelength);
107f7dfb
AD
1395 action_flag = 1;
1396 xactions++; /* JF */
1397 }
1398 rulelength++;
1399 } /* end of read rhs of rule */
1400
1401 /* Put an empty link in the list to mark the end of this rule */
1402 p = symbol_list_new (NULL);
1403 p1->next = p;
1404 p1 = p;
1405
1406 if (t == tok_prec)
1407 {
1408 complain (_("two @prec's in a row"));
1409 t = lex ();
1410 crule->ruleprec = symval;
1411 t = lex ();
1412 }
f499b062 1413
107f7dfb
AD
1414 if (t == tok_guard)
1415 {
1416 if (!semantic_parser)
1417 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1418
14d293ac 1419 parse_guard (crule, rulelength);
a70083a3 1420 t = lex ();
107f7dfb 1421 }
f499b062
AD
1422
1423 if (t == tok_left_curly)
107f7dfb
AD
1424 {
1425 /* This case never occurs -wjh */
1426 if (action_flag)
1427 complain (_("two actions at end of one rule"));
14d293ac 1428 parse_action (crule, rulelength);
107f7dfb
AD
1429 action_flag = 1;
1430 xactions++; /* -wjh */
1431 t = lex ();
1432 }
1433 /* If $$ is being set in default way, report if any type
1434 mismatch. */
1435 else if (!xactions
1436 && first_rhs && lhs->type_name != first_rhs->type_name)
1437 {
1438 if (lhs->type_name == 0
1439 || first_rhs->type_name == 0
1440 || strcmp (lhs->type_name, first_rhs->type_name))
1441 complain (_("type clash (`%s' `%s') on default action"),
1442 lhs->type_name ? lhs->type_name : "",
1443 first_rhs->type_name ? first_rhs->type_name : "");
1444 }
1445 /* Warn if there is no default for $$ but we need one. */
1446 else if (!xactions && !first_rhs && lhs->type_name != 0)
1447 complain (_("empty rule for typed nonterminal, and no action"));
1448 if (t == tok_semicolon)
a70083a3 1449 t = lex ();
107f7dfb
AD
1450 }
1451 else
1452 {
1453 complain (_("invalid input: %s"), quote (token_buffer));
1454 t = lex ();
1455 }
943819bf 1456
b68e7744
AD
1457 /* grammar has been read. Do some checking */
1458
1459 if (nrules == 0)
1460 fatal (_("no rules in the input grammar"));
1461
1462 /* Report any undefined symbols and consider them nonterminals. */
1463
1464 for (bp = firstsymbol; bp; bp = bp->next)
1465 if (bp->class == unknown_sym)
1466 {
1467 complain (_
1468 ("symbol %s is used, but is not defined as a token and has no rules"),
1469 bp->tag);
1470 bp->class = nterm_sym;
1471 bp->value = nvars++;
1472 }
1473
ff442794
AD
1474 /* Insert the initial rule, which line is that of the first rule
1475 (not that of the start symbol):
30171f79
AD
1476
1477 axiom: %start EOF. */
1478 p = symbol_list_new (axiom);
ff442794 1479 p->line = grammar->line;
30171f79
AD
1480 p->next = symbol_list_new (startval);
1481 p->next->next = symbol_list_new (eoftoken);
1482 p->next->next->next = symbol_list_new (NULL);
1483 p->next->next->next->next = grammar;
1484 nrules += 1;
1485 nitems += 3;
1486 grammar = p;
1487 startval = axiom;
1ff442ca
NF
1488
1489 if (nsyms > MAXSHORT)
a0f6b076
AD
1490 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1491 MAXSHORT);
1ff442ca
NF
1492
1493 ntokens = nsyms - nvars;
1494}
ff48177d
MA
1495
1496/* At the end of the grammar file, some C source code must
63c2d5de 1497 be stored. It is going to be associated to the epilogue
ff48177d
MA
1498 directive. */
1499static void
1500read_additionnal_code (void)
1501{
1502 char c;
63c2d5de 1503 struct obstack el_obstack;
342b8b6e 1504
63c2d5de 1505 obstack_init (&el_obstack);
ff48177d 1506
710ddc4f
MA
1507 if (!no_lines_flag)
1508 {
1509 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1510 lineno, quotearg_style (c_quoting_style,
b7c49edf 1511 muscle_find ("filename")));
710ddc4f
MA
1512 }
1513
ff48177d 1514 while ((c = getc (finput)) != EOF)
63c2d5de 1515 obstack_1grow (&el_obstack, c);
342b8b6e 1516
63c2d5de 1517 obstack_1grow (&el_obstack, 0);
11d82f03 1518 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1519}
1520
a70083a3 1521\f
037ca2f1
AD
1522/*------------------------------------------------------------------.
1523| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1524| number. |
1525`------------------------------------------------------------------*/
1526
1527static void
1528token_translations_init (void)
1529{
1530 bucket *bp = NULL;
1531 int i;
1532
1533 token_translations = XCALLOC (short, max_user_token_number + 1);
1534
1535 /* Initialize all entries for literal tokens to 2, the internal
1536 token number for $undefined., which represents all invalid
1537 inputs. */
1538 for (i = 0; i <= max_user_token_number; i++)
1539 token_translations[i] = 2;
1540
1541 for (bp = firstsymbol; bp; bp = bp->next)
1542 {
1543 /* Non-terminal? */
1544 if (bp->value >= ntokens)
1545 continue;
1546 /* A token string alias? */
1547 if (bp->user_token_number == SALIAS)
1548 continue;
6b7e85b9
AD
1549
1550 assert (bp->user_token_number != SUNDEF);
1551
037ca2f1
AD
1552 /* A token which translation has already been set? */
1553 if (token_translations[bp->user_token_number] != 2)
1554 complain (_("tokens %s and %s both assigned number %d"),
ad949da9 1555 symbols[token_translations[bp->user_token_number]]->tag,
037ca2f1
AD
1556 bp->tag, bp->user_token_number);
1557 token_translations[bp->user_token_number] = bp->value;
1558 }
1559}
1560
1561
0e78e603
AD
1562/*----------------------------------------------------------------.
1563| Assign symbol numbers, and write definition of token names into |
1564| FDEFINES. Set up vectors SYMBOL_TABLE, TAGS of symbols. |
1565`----------------------------------------------------------------*/
1ff442ca 1566
4a120d45 1567static void
118fb205 1568packsymbols (void)
1ff442ca 1569{
342b8b6e 1570 bucket *bp = NULL;
a70083a3 1571 int tokno = 1;
a70083a3 1572 int last_user_token_number;
1ff442ca 1573
0e78e603 1574 symbols = XCALLOC (bucket *, nsyms);
1ff442ca 1575
1ff442ca
NF
1576 max_user_token_number = 256;
1577 last_user_token_number = 256;
1578
1579 for (bp = firstsymbol; bp; bp = bp->next)
1580 {
d7020c20 1581 if (bp->class == nterm_sym)
1ff442ca
NF
1582 {
1583 bp->value += ntokens;
1584 }
943819bf
RS
1585 else if (bp->alias)
1586 {
b7c49edf
AD
1587 /* This symbol and its alias are a single token defn.
1588 Allocate a tokno, and assign to both check agreement of
1589 prec and assoc fields and make both the same */
1590 if (bp->value == -1)
1591 {
1592 if (bp == eoftoken || bp->alias == eoftoken)
1593 bp->value = bp->alias->value = 0;
1594 else
1595 {
1596 bp->value = bp->alias->value = tokno++;
1597 }
1598 }
943819bf 1599
0a6384c4
AD
1600 if (bp->prec != bp->alias->prec)
1601 {
1602 if (bp->prec != 0 && bp->alias->prec != 0
1603 && bp->user_token_number == SALIAS)
a0f6b076
AD
1604 complain (_("conflicting precedences for %s and %s"),
1605 bp->tag, bp->alias->tag);
0a6384c4
AD
1606 if (bp->prec != 0)
1607 bp->alias->prec = bp->prec;
1608 else
1609 bp->prec = bp->alias->prec;
1610 }
943819bf 1611
0a6384c4
AD
1612 if (bp->assoc != bp->alias->assoc)
1613 {
a0f6b076
AD
1614 if (bp->assoc != 0 && bp->alias->assoc != 0
1615 && bp->user_token_number == SALIAS)
1616 complain (_("conflicting assoc values for %s and %s"),
1617 bp->tag, bp->alias->tag);
1618 if (bp->assoc != 0)
1619 bp->alias->assoc = bp->assoc;
1620 else
1621 bp->assoc = bp->alias->assoc;
1622 }
0a6384c4 1623
b7c49edf 1624 /* Do not do processing below for SALIASs. */
0a6384c4 1625 if (bp->user_token_number == SALIAS)
b7c49edf 1626 continue;
943819bf 1627
a70083a3 1628 }
b7c49edf 1629 else /* bp->class == token_sym */
943819bf 1630 {
b7c49edf
AD
1631 if (bp == eoftoken)
1632 bp->value = 0;
1633 else
1634 bp->value = tokno++;
943819bf
RS
1635 }
1636
d7020c20 1637 if (bp->class == token_sym)
1ff442ca 1638 {
6b7e85b9 1639 if (bp->user_token_number == SUNDEF)
1ff442ca
NF
1640 bp->user_token_number = ++last_user_token_number;
1641 if (bp->user_token_number > max_user_token_number)
1642 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1643 }
1644
0e78e603 1645 symbols[bp->value] = bp;
1ff442ca
NF
1646 }
1647
037ca2f1 1648 token_translations_init ();
1ff442ca
NF
1649
1650 error_token_number = errtoken->value;
1651
e3f1699f
AD
1652 if (startval->class == unknown_sym)
1653 fatal (_("the start symbol %s is undefined"), startval->tag);
1654 else if (startval->class == token_sym)
1655 fatal (_("the start symbol %s is a token"), startval->tag);
1656
1657 start_symbol = startval->value;
1658}
1659
1660
a70083a3
AD
1661/*---------------------------------------------------------------.
1662| Convert the rules into the representation using RRHS, RLHS and |
1663| RITEMS. |
1664`---------------------------------------------------------------*/
1ff442ca 1665
4a120d45 1666static void
118fb205 1667packgram (void)
1ff442ca 1668{
a70083a3
AD
1669 int itemno;
1670 int ruleno;
1671 symbol_list *p;
1ff442ca 1672
adc8c848
AD
1673 /* We use short to index items. */
1674 if (nitems >= MAXSHORT)
1675 fatal (_("too many items (max %d)"), MAXSHORT);
1676
d7913476 1677 ritem = XCALLOC (short, nitems + 1);
1a2b5d37 1678 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1679
1680 itemno = 0;
1681 ruleno = 1;
1682
1683 p = grammar;
1684 while (p)
1685 {
b29b2ed5 1686 bucket *ruleprec = p->ruleprec;
1a2b5d37
AD
1687 rules[ruleno].lhs = p->sym->value;
1688 rules[ruleno].rhs = itemno;
1689 rules[ruleno].line = p->line;
1690 rules[ruleno].useful = TRUE;
1691 rules[ruleno].action = p->action;
1692 rules[ruleno].action_line = p->action_line;
1693 rules[ruleno].guard = p->guard;
1694 rules[ruleno].guard_line = p->guard_line;
1ff442ca
NF
1695
1696 p = p->next;
1697 while (p && p->sym)
1698 {
1699 ritem[itemno++] = p->sym->value;
1700 /* A rule gets by default the precedence and associativity
1701 of the last token in it. */
d7020c20 1702 if (p->sym->class == token_sym)
1ff442ca 1703 {
1a2b5d37
AD
1704 rules[ruleno].prec = p->sym->prec;
1705 rules[ruleno].assoc = p->sym->assoc;
1ff442ca 1706 }
a70083a3
AD
1707 if (p)
1708 p = p->next;
1ff442ca
NF
1709 }
1710
1711 /* If this rule has a %prec,
a70083a3 1712 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1713 if (ruleprec)
1714 {
1a2b5d37
AD
1715 rules[ruleno].prec = ruleprec->prec;
1716 rules[ruleno].assoc = ruleprec->assoc;
1717 rules[ruleno].precsym = ruleprec->value;
1ff442ca
NF
1718 }
1719
1720 ritem[itemno++] = -ruleno;
1721 ruleno++;
1722
a70083a3
AD
1723 if (p)
1724 p = p->next;
1ff442ca
NF
1725 }
1726
1727 ritem[itemno] = 0;
75142d45
AD
1728 nritems = itemno;
1729 assert (nritems == nitems);
3067fbef
AD
1730
1731 if (trace_flag)
1732 ritem_print (stderr);
1ff442ca 1733}
a70083a3
AD
1734\f
1735/*-------------------------------------------------------------------.
1736| Read in the grammar specification and record it in the format |
ea5607fd 1737| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1738| and all actions into ACTION_OBSTACK, in each case forming the body |
1739| of a C function (YYGUARD or YYACTION) which contains a switch |
1740| statement to decide which guard or action to execute. |
a70083a3
AD
1741`-------------------------------------------------------------------*/
1742
1743void
1744reader (void)
1745{
1746 start_flag = 0;
1747 startval = NULL; /* start symbol not specified yet. */
1748
b7c49edf 1749 nsyms = 0;
a70083a3
AD
1750 nvars = 0;
1751 nrules = 0;
1752 nitems = 0;
a70083a3
AD
1753
1754 typed = 0;
1755 lastprec = 0;
1756
a70083a3
AD
1757 semantic_parser = 0;
1758 pure_parser = 0;
a70083a3
AD
1759
1760 grammar = NULL;
1761
342b8b6e 1762 lex_init ();
a70083a3
AD
1763 lineno = 1;
1764
11d82f03
MA
1765 /* Initialize the muscle obstack. */
1766 obstack_init (&muscle_obstack);
82e236e2 1767
a70083a3
AD
1768 /* Initialize the symbol table. */
1769 tabinit ();
b6610515 1770
30171f79
AD
1771 /* Construct the axiom symbol. */
1772 axiom = getsym ("$axiom");
1773 axiom->class = nterm_sym;
1774 axiom->value = nvars++;
1775
a70083a3
AD
1776 /* Construct the error token */
1777 errtoken = getsym ("error");
d7020c20 1778 errtoken->class = token_sym;
a70083a3 1779 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 1780
a70083a3
AD
1781 /* Construct a token that represents all undefined literal tokens.
1782 It is always token number 2. */
1783 undeftoken = getsym ("$undefined.");
d7020c20 1784 undeftoken->class = token_sym;
a70083a3
AD
1785 undeftoken->user_token_number = 2;
1786
331dbc1b
AD
1787 /* Initialize the obstacks. */
1788 obstack_init (&action_obstack);
1789 obstack_init (&attrs_obstack);
331dbc1b
AD
1790 obstack_init (&output_obstack);
1791
1792 finput = xfopen (infile, "r");
1793
896fe5c1
AD
1794 /* Read the declaration section. Copy %{ ... %} groups to
1795 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1796 etc. found there. */
a70083a3 1797 read_declarations ();
b7c49edf
AD
1798
1799 /* If the user did not define her EOFTOKEN, do it now. */
1800 if (!eoftoken)
1801 {
1802 eoftoken = getsym ("$");
1803 eoftoken->class = token_sym;
1804 /* Value specified by POSIX. */
1805 eoftoken->user_token_number = 0;
1806 }
1807
a70083a3
AD
1808 /* Read in the grammar, build grammar in list form. Write out
1809 guards and actions. */
1810 readgram ();
ff48177d
MA
1811 /* Some C code is given at the end of the grammar file. */
1812 read_additionnal_code ();
b0c4483e 1813
331dbc1b
AD
1814 lex_free ();
1815 xfclose (finput);
1816
a70083a3
AD
1817 /* Assign the symbols their symbol numbers. Write #defines for the
1818 token symbols into FDEFINES if requested. */
1819 packsymbols ();
93ede233 1820
a70083a3
AD
1821 /* Convert the grammar into the format described in gram.h. */
1822 packgram ();
8419d367
AD
1823
1824 /* The grammar as a symbol_list is no longer needed. */
1825 LIST_FREE (symbol_list, grammar);
a70083a3 1826}