]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/reader.c, src/reader.h (user_toknums): Remove.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
037ca2f1 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca
NF
30#include "lex.h"
31#include "gram.h"
a0f6b076 32#include "complain.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
340ef489 35#include "conflicts.h"
11d82f03 36#include "muscle_tab.h"
1ff442ca 37
a70083a3
AD
38typedef struct symbol_list
39{
40 struct symbol_list *next;
41 bucket *sym;
b29b2ed5 42 int line;
f499b062 43
3f96f4dc
AD
44 /* The action is attached to the LHS of a rule. */
45 const char *action;
46 int action_line;
f499b062
AD
47
48 /* The guard is attached to the LHS of a rule. */
49 const char *guard;
50 int guard_line;
a70083a3 51 bucket *ruleprec;
d945f5cd 52} symbol_list;
118fb205 53
1ff442ca 54int lineno;
4a120d45
JT
55static symbol_list *grammar;
56static int start_flag;
57static bucket *startval;
1ff442ca
NF
58
59/* Nonzero if components of semantic values are used, implying
60 they must be unions. */
61static int value_components_used;
62
d7020c20
AD
63/* Nonzero if %union has been seen. */
64static int typed;
1ff442ca 65
d7020c20
AD
66/* Incremented for each %left, %right or %nonassoc seen */
67static int lastprec;
1ff442ca 68
b7c49edf
AD
69static bucket *errtoken = NULL;
70static bucket *undeftoken = NULL;
71static bucket *eoftoken = NULL;
30171f79 72static bucket *axiom = NULL;
b29b2ed5 73
6255b435 74static symbol_list *
b29b2ed5
AD
75symbol_list_new (bucket *sym)
76{
77 symbol_list *res = XMALLOC (symbol_list, 1);
78 res->next = NULL;
79 res->sym = sym;
80 res->line = lineno;
d945f5cd
AD
81 res->action = NULL;
82 res->action_line = 0;
f499b062
AD
83 res->guard = NULL;
84 res->guard_line = 0;
b29b2ed5
AD
85 res->ruleprec = NULL;
86 return res;
87}
88
0d533154 89\f
a70083a3 90
0d533154
AD
91/*===================\
92| Low level lexing. |
93\===================*/
943819bf
RS
94
95static void
118fb205 96skip_to_char (int target)
943819bf
RS
97{
98 int c;
99 if (target == '\n')
a0f6b076 100 complain (_(" Skipping to next \\n"));
943819bf 101 else
a0f6b076 102 complain (_(" Skipping to next %c"), target);
943819bf
RS
103
104 do
0d533154 105 c = skip_white_space ();
943819bf 106 while (c != target && c != EOF);
a083fbbf 107 if (c != EOF)
0d533154 108 ungetc (c, finput);
943819bf
RS
109}
110
111
0d533154
AD
112/*---------------------------------------------------------.
113| Read a signed integer from STREAM and return its value. |
114`---------------------------------------------------------*/
115
116static inline int
117read_signed_integer (FILE *stream)
118{
a70083a3
AD
119 int c = getc (stream);
120 int sign = 1;
121 int n = 0;
0d533154
AD
122
123 if (c == '-')
124 {
125 c = getc (stream);
126 sign = -1;
127 }
128
129 while (isdigit (c))
130 {
131 n = 10 * n + (c - '0');
132 c = getc (stream);
133 }
134
135 ungetc (c, stream);
136
137 return sign * n;
138}
139\f
79282c5a
AD
140/*--------------------------------------------------------------.
141| Get the data type (alternative in the union) of the value for |
142| symbol N in rule RULE. |
143`--------------------------------------------------------------*/
144
145static char *
b29b2ed5 146get_type_name (int n, symbol_list *rule)
79282c5a
AD
147{
148 int i;
149 symbol_list *rp;
150
151 if (n < 0)
152 {
153 complain (_("invalid $ value"));
154 return NULL;
155 }
156
157 rp = rule;
158 i = 0;
159
160 while (i < n)
161 {
162 rp = rp->next;
163 if (rp == NULL || rp->sym == NULL)
164 {
165 complain (_("invalid $ value"));
166 return NULL;
167 }
168 i++;
169 }
170
171 return rp->sym->type_name;
172}
173\f
337bab46
AD
174/*------------------------------------------------------------.
175| Dump the string from FIN to OOUT if non null. MATCH is the |
176| delimiter of the string (either ' or "). |
177`------------------------------------------------------------*/
ae3c3164
AD
178
179static inline void
b6610515 180copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
181{
182 int c;
183
b6610515
RA
184 if (store)
185 obstack_1grow (oout, match);
8c7ebe49 186
4a120d45 187 c = getc (fin);
ae3c3164
AD
188
189 while (c != match)
190 {
191 if (c == EOF)
192 fatal (_("unterminated string at end of file"));
193 if (c == '\n')
194 {
a0f6b076 195 complain (_("unterminated string"));
4a120d45 196 ungetc (c, fin);
ae3c3164
AD
197 c = match; /* invent terminator */
198 continue;
199 }
200
337bab46 201 obstack_1grow (oout, c);
ae3c3164
AD
202
203 if (c == '\\')
204 {
4a120d45 205 c = getc (fin);
ae3c3164
AD
206 if (c == EOF)
207 fatal (_("unterminated string at end of file"));
337bab46 208 obstack_1grow (oout, c);
8c7ebe49 209
ae3c3164
AD
210 if (c == '\n')
211 lineno++;
212 }
213
a70083a3 214 c = getc (fin);
ae3c3164
AD
215 }
216
b6610515
RA
217 if (store)
218 obstack_1grow (oout, c);
219}
220
221/* FIXME. */
222
223static inline void
224copy_string (FILE *fin, struct obstack *oout, int match)
225{
226 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
227}
228
b6610515
RA
229/* FIXME. */
230
231static inline void
232copy_identifier (FILE *fin, struct obstack *oout)
233{
234 int c;
235
236 while (isalnum (c = getc (fin)) || c == '_')
237 obstack_1grow (oout, c);
238
239 ungetc (c, fin);
240}
ae3c3164 241
2666f928
AD
242
243/*------------------------------------------------------------------.
244| Dump the wannabee comment from IN to OOUT. In fact we just saw a |
245| `/', which might or might not be a comment. In any case, copy |
246| what we saw. |
247`------------------------------------------------------------------*/
ae3c3164
AD
248
249static inline void
2666f928 250copy_comment (FILE *fin, struct obstack *oout)
ae3c3164
AD
251{
252 int cplus_comment;
a70083a3 253 int ended;
550a72a3
AD
254 int c;
255
256 /* We read a `/', output it. */
2666f928 257 obstack_1grow (oout, '/');
550a72a3
AD
258
259 switch ((c = getc (fin)))
260 {
261 case '/':
262 cplus_comment = 1;
263 break;
264 case '*':
265 cplus_comment = 0;
266 break;
267 default:
268 ungetc (c, fin);
269 return;
270 }
ae3c3164 271
2666f928 272 obstack_1grow (oout, c);
550a72a3 273 c = getc (fin);
ae3c3164
AD
274
275 ended = 0;
276 while (!ended)
277 {
278 if (!cplus_comment && c == '*')
279 {
280 while (c == '*')
281 {
2666f928 282 obstack_1grow (oout, c);
550a72a3 283 c = getc (fin);
ae3c3164
AD
284 }
285
286 if (c == '/')
287 {
2666f928 288 obstack_1grow (oout, c);
ae3c3164
AD
289 ended = 1;
290 }
291 }
292 else if (c == '\n')
293 {
294 lineno++;
2666f928 295 obstack_1grow (oout, c);
ae3c3164
AD
296 if (cplus_comment)
297 ended = 1;
298 else
550a72a3 299 c = getc (fin);
ae3c3164
AD
300 }
301 else if (c == EOF)
302 fatal (_("unterminated comment"));
303 else
304 {
2666f928 305 obstack_1grow (oout, c);
550a72a3 306 c = getc (fin);
ae3c3164
AD
307 }
308 }
309}
310
311
a70083a3 312/*-----------------------------------------------------------------.
337bab46 313| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
314| reference to this location. STACK_OFFSET is the number of values |
315| in the current rule so far, which says where to find `$0' with |
316| respect to the top of the stack. |
317`-----------------------------------------------------------------*/
1ff442ca 318
a70083a3 319static inline void
337bab46 320copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 321{
a70083a3 322 int c;
1ff442ca 323
a70083a3
AD
324 c = getc (fin);
325 if (c == '$')
1ff442ca 326 {
ff4423cc 327 obstack_sgrow (oout, "yyloc");
89cab50d 328 locations_flag = 1;
a70083a3
AD
329 }
330 else if (isdigit (c) || c == '-')
331 {
332 int n;
1ff442ca 333
a70083a3
AD
334 ungetc (c, fin);
335 n = read_signed_integer (fin);
11e2beca
AD
336 if (n > stack_offset)
337 complain (_("invalid value: %s%d"), "@", n);
338 else
339 {
340 /* Offset is always 0 if parser has already popped the stack
341 pointer. */
342 obstack_fgrow1 (oout, "yylsp[%d]",
343 n - (semantic_parser ? 0 : stack_offset));
344 locations_flag = 1;
345 }
1ff442ca 346 }
a70083a3 347 else
ff4a34be
AD
348 {
349 char buf[] = "@c";
350 buf[1] = c;
351 complain (_("%s is invalid"), quote (buf));
352 }
1ff442ca 353}
79282c5a
AD
354
355
356/*-------------------------------------------------------------------.
357| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
358| |
359| Possible inputs: $[<TYPENAME>]($|integer) |
360| |
337bab46 361| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
362| the number of values in the current rule so far, which says where |
363| to find `$0' with respect to the top of the stack. |
364`-------------------------------------------------------------------*/
365
366static inline void
337bab46 367copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
368 symbol_list *rule, int stack_offset)
369{
370 int c = getc (fin);
b0ce6046 371 const char *type_name = NULL;
79282c5a 372
f282676b 373 /* Get the type name if explicit. */
79282c5a
AD
374 if (c == '<')
375 {
f282676b 376 read_type_name (fin);
79282c5a
AD
377 type_name = token_buffer;
378 value_components_used = 1;
79282c5a
AD
379 c = getc (fin);
380 }
381
382 if (c == '$')
383 {
ff4423cc 384 obstack_sgrow (oout, "yyval");
8c7ebe49 385
79282c5a
AD
386 if (!type_name)
387 type_name = get_type_name (0, rule);
388 if (type_name)
337bab46 389 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
390 if (!type_name && typed)
391 complain (_("$$ of `%s' has no declared type"),
392 rule->sym->tag);
393 }
394 else if (isdigit (c) || c == '-')
395 {
396 int n;
397 ungetc (c, fin);
398 n = read_signed_integer (fin);
399
11e2beca
AD
400 if (n > stack_offset)
401 complain (_("invalid value: %s%d"), "$", n);
402 else
403 {
404 if (!type_name && n > 0)
405 type_name = get_type_name (n, rule);
406
407 /* Offset is always 0 if parser has already popped the stack
408 pointer. */
409 obstack_fgrow1 (oout, "yyvsp[%d]",
410 n - (semantic_parser ? 0 : stack_offset));
411
412 if (type_name)
413 obstack_fgrow1 (oout, ".%s", type_name);
414 if (!type_name && typed)
415 complain (_("$%d of `%s' has no declared type"),
416 n, rule->sym->tag);
417 }
79282c5a
AD
418 }
419 else
420 {
421 char buf[] = "$c";
422 buf[1] = c;
423 complain (_("%s is invalid"), quote (buf));
424 }
425}
a70083a3
AD
426\f
427/*-------------------------------------------------------------------.
428| Copy the contents of a `%{ ... %}' into the definitions file. The |
429| `%{' has already been read. Return after reading the `%}'. |
430`-------------------------------------------------------------------*/
1ff442ca 431
4a120d45 432static void
118fb205 433copy_definition (void)
1ff442ca 434{
a70083a3 435 int c;
ae3c3164 436 /* -1 while reading a character if prev char was %. */
a70083a3 437 int after_percent;
1ff442ca 438
89cab50d 439 if (!no_lines_flag)
25b222fa
MA
440 {
441 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 442 lineno, quotearg_style (c_quoting_style,
b7c49edf 443 muscle_find ("filename")));
25b222fa 444 }
1ff442ca
NF
445
446 after_percent = 0;
447
ae3c3164 448 c = getc (finput);
1ff442ca
NF
449
450 for (;;)
451 {
452 switch (c)
453 {
454 case '\n':
dd60faec 455 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
456 lineno++;
457 break;
458
459 case '%':
a70083a3 460 after_percent = -1;
1ff442ca 461 break;
a083fbbf 462
1ff442ca
NF
463 case '\'':
464 case '"':
337bab46 465 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
466 break;
467
468 case '/':
337bab46 469 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
470 break;
471
472 case EOF:
a70083a3 473 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
474
475 default:
dd60faec 476 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
477 }
478
a70083a3 479 c = getc (finput);
1ff442ca
NF
480
481 if (after_percent)
482 {
483 if (c == '}')
484 return;
dd60faec 485 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
486 }
487 after_percent = 0;
1ff442ca 488 }
1ff442ca
NF
489}
490
491
d7020c20
AD
492/*-------------------------------------------------------------------.
493| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
494| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
495| are reversed. |
496`-------------------------------------------------------------------*/
1ff442ca 497
4a120d45 498static void
d7020c20 499parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 500{
342b8b6e
AD
501 token_t token = tok_undef;
502 char *typename = NULL;
1ff442ca 503
1e9798d5
AD
504 /* The symbol being defined. */
505 struct bucket *symbol = NULL;
506
507 /* After `%token' and `%nterm', any number of symbols maybe be
508 defined. */
1ff442ca
NF
509 for (;;)
510 {
e6011337
JT
511 int tmp_char = ungetc (skip_white_space (), finput);
512
1e9798d5
AD
513 /* `%' (for instance from `%token', or from `%%' etc.) is the
514 only valid means to end this declaration. */
e6011337 515 if (tmp_char == '%')
1ff442ca 516 return;
e6011337 517 if (tmp_char == EOF)
a0f6b076 518 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 519
a70083a3 520 token = lex ();
511e79b3 521 if (token == tok_comma)
943819bf
RS
522 {
523 symbol = NULL;
524 continue;
525 }
511e79b3 526 if (token == tok_typename)
1ff442ca 527 {
95e36146 528 typename = xstrdup (token_buffer);
1ff442ca 529 value_components_used = 1;
943819bf
RS
530 symbol = NULL;
531 }
511e79b3 532 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 533 {
8e03724b
AD
534 if (symval->alias)
535 warn (_("symbol `%s' used more than once as a literal string"),
536 symval->tag);
537 else if (symbol->alias)
538 warn (_("symbol `%s' given more than one literal string"),
539 symbol->tag);
540 else
541 {
542 symval->class = token_sym;
543 symval->type_name = typename;
544 symval->user_token_number = symbol->user_token_number;
545 symbol->user_token_number = SALIAS;
546 symval->alias = symbol;
547 symbol->alias = symval;
548 /* symbol and symval combined are only one symbol */
549 nsyms--;
550 }
8e03724b 551 symbol = NULL;
1ff442ca 552 }
511e79b3 553 else if (token == tok_identifier)
1ff442ca
NF
554 {
555 int oldclass = symval->class;
943819bf 556 symbol = symval;
1ff442ca 557
943819bf 558 if (symbol->class == what_is_not)
a0f6b076 559 complain (_("symbol %s redefined"), symbol->tag);
943819bf 560 symbol->class = what_is;
d7020c20 561 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 562 symbol->value = nvars++;
1ff442ca
NF
563
564 if (typename)
565 {
943819bf
RS
566 if (symbol->type_name == NULL)
567 symbol->type_name = typename;
a70083a3 568 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 569 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
570 }
571 }
511e79b3 572 else if (symbol && token == tok_number)
a70083a3 573 {
943819bf 574 symbol->user_token_number = numval;
b7c49edf
AD
575 /* User defined EOF token? */
576 if (numval == 0)
577 eoftoken = symbol;
a70083a3 578 }
1ff442ca 579 else
943819bf 580 {
a0f6b076 581 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
582 token_buffer,
583 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 584 skip_to_char ('%');
943819bf 585 }
1ff442ca
NF
586 }
587
588}
589
1ff442ca 590
d7020c20
AD
591/*------------------------------.
592| Parse what comes after %start |
593`------------------------------*/
1ff442ca 594
4a120d45 595static void
118fb205 596parse_start_decl (void)
1ff442ca
NF
597{
598 if (start_flag)
27821bff 599 complain (_("multiple %s declarations"), "%start");
511e79b3 600 if (lex () != tok_identifier)
27821bff 601 complain (_("invalid %s declaration"), "%start");
943819bf
RS
602 else
603 {
604 start_flag = 1;
605 startval = symval;
606 }
1ff442ca
NF
607}
608
a70083a3
AD
609/*-----------------------------------------------------------.
610| read in a %type declaration and record its information for |
611| get_type_name to access |
612`-----------------------------------------------------------*/
613
614static void
615parse_type_decl (void)
616{
a70083a3
AD
617 char *name;
618
511e79b3 619 if (lex () != tok_typename)
a70083a3
AD
620 {
621 complain ("%s", _("%type declaration has no <typename>"));
622 skip_to_char ('%');
623 return;
624 }
625
95e36146 626 name = xstrdup (token_buffer);
a70083a3
AD
627
628 for (;;)
629 {
f17bcd1f 630 token_t t;
a70083a3
AD
631 int tmp_char = ungetc (skip_white_space (), finput);
632
633 if (tmp_char == '%')
634 return;
635 if (tmp_char == EOF)
636 fatal (_("Premature EOF after %s"), token_buffer);
637
638 t = lex ();
639
640 switch (t)
1ff442ca
NF
641 {
642
511e79b3
AD
643 case tok_comma:
644 case tok_semicolon:
1ff442ca
NF
645 break;
646
511e79b3 647 case tok_identifier:
1ff442ca
NF
648 if (symval->type_name == NULL)
649 symval->type_name = name;
a70083a3 650 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 651 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
652
653 break;
654
655 default:
a0f6b076
AD
656 complain (_("invalid %%type declaration due to item: %s"),
657 token_buffer);
a70083a3 658 skip_to_char ('%');
1ff442ca
NF
659 }
660 }
661}
662
663
664
d7020c20
AD
665/*----------------------------------------------------------------.
666| Read in a %left, %right or %nonassoc declaration and record its |
667| information. |
668`----------------------------------------------------------------*/
1ff442ca 669
4a120d45 670static void
d7020c20 671parse_assoc_decl (associativity assoc)
1ff442ca 672{
a70083a3
AD
673 char *name = NULL;
674 int prev = 0;
1ff442ca 675
a70083a3 676 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 677
1ff442ca
NF
678 for (;;)
679 {
f17bcd1f 680 token_t t;
e6011337 681 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 682
e6011337 683 if (tmp_char == '%')
1ff442ca 684 return;
e6011337 685 if (tmp_char == EOF)
a0f6b076 686 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 687
a70083a3 688 t = lex ();
1ff442ca
NF
689
690 switch (t)
691 {
511e79b3 692 case tok_typename:
95e36146 693 name = xstrdup (token_buffer);
1ff442ca
NF
694 break;
695
511e79b3 696 case tok_comma:
1ff442ca
NF
697 break;
698
511e79b3 699 case tok_identifier:
1ff442ca 700 if (symval->prec != 0)
a0f6b076 701 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
702 symval->prec = lastprec;
703 symval->assoc = assoc;
d7020c20 704 if (symval->class == nterm_sym)
a0f6b076 705 complain (_("symbol %s redefined"), symval->tag);
d7020c20 706 symval->class = token_sym;
1ff442ca 707 if (name)
a70083a3 708 { /* record the type, if one is specified */
1ff442ca
NF
709 if (symval->type_name == NULL)
710 symval->type_name = name;
a70083a3 711 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 712 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
713 }
714 break;
715
511e79b3
AD
716 case tok_number:
717 if (prev == tok_identifier)
a70083a3 718 {
1ff442ca 719 symval->user_token_number = numval;
a70083a3
AD
720 }
721 else
722 {
723 complain (_
724 ("invalid text (%s) - number should be after identifier"),
725token_buffer);
726 skip_to_char ('%');
727 }
1ff442ca
NF
728 break;
729
511e79b3 730 case tok_semicolon:
1ff442ca
NF
731 return;
732
733 default:
a0f6b076 734 complain (_("unexpected item: %s"), token_buffer);
a70083a3 735 skip_to_char ('%');
1ff442ca
NF
736 }
737
738 prev = t;
1ff442ca
NF
739 }
740}
741
742
743
dd60faec 744/*--------------------------------------------------------------.
180d45ba
PB
745| Copy the union declaration into the stype muscle |
746| (and fdefines), where it is made into the definition of |
747| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 748`--------------------------------------------------------------*/
1ff442ca 749
4a120d45 750static void
118fb205 751parse_union_decl (void)
1ff442ca 752{
a70083a3
AD
753 int c;
754 int count = 0;
428046f8 755 bool done = FALSE;
180d45ba 756 struct obstack union_obstack;
1ff442ca 757 if (typed)
27821bff 758 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
759
760 typed = 1;
761
180d45ba
PB
762 obstack_init (&union_obstack);
763 obstack_sgrow (&union_obstack, "union");
1ff442ca 764
428046f8 765 while (!done)
1ff442ca 766 {
428046f8
AD
767 c = xgetc (finput);
768
342b8b6e
AD
769 /* If C contains '/', it is output by copy_comment (). */
770 if (c != '/')
2666f928 771 obstack_1grow (&union_obstack, c);
1ff442ca
NF
772
773 switch (c)
774 {
775 case '\n':
776 lineno++;
777 break;
778
779 case '/':
2666f928 780 copy_comment (finput, &union_obstack);
1ff442ca
NF
781 break;
782
1ff442ca
NF
783 case '{':
784 count++;
785 break;
786
787 case '}':
428046f8 788 /* FIXME: Errr. How could this happen???. --akim */
1ff442ca 789 if (count == 0)
27821bff 790 complain (_("unmatched %s"), "`}'");
1ff442ca 791 count--;
428046f8
AD
792 if (!count)
793 done = TRUE;
794 break;
1ff442ca 795 }
1ff442ca 796 }
180d45ba 797
428046f8
AD
798 /* JF don't choke on trailing semi */
799 c = skip_white_space ();
800 if (c != ';')
801 ungetc (c, finput);
802 obstack_1grow (&union_obstack, 0);
803 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
804}
805
d7020c20
AD
806
807/*-------------------------------------------------------.
808| Parse the declaration %expect N which says to expect N |
809| shift-reduce conflicts. |
810`-------------------------------------------------------*/
1ff442ca 811
4a120d45 812static void
118fb205 813parse_expect_decl (void)
1ff442ca 814{
131e2fef 815 int c = skip_white_space ();
1ff442ca
NF
816 ungetc (c, finput);
817
131e2fef 818 if (!isdigit (c))
79282c5a 819 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
820 else
821 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
822}
823
a70083a3
AD
824
825/*-------------------------------------------------------------------.
826| Parse what comes after %thong. the full syntax is |
827| |
828| %thong <type> token number literal |
829| |
830| the <type> or number may be omitted. The number specifies the |
831| user_token_number. |
832| |
833| Two symbols are entered in the table, one for the token symbol and |
834| one for the literal. Both are given the <type>, if any, from the |
835| declaration. The ->user_token_number of the first is SALIAS and |
836| the ->user_token_number of the second is set to the number, if |
837| any, from the declaration. The two symbols are linked via |
838| pointers in their ->alias fields. |
839| |
840| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
841| only the literal string is retained it is the literal string that |
842| is output to yytname |
843`-------------------------------------------------------------------*/
844
845static void
846parse_thong_decl (void)
7b306f52 847{
f17bcd1f 848 token_t token;
a70083a3
AD
849 struct bucket *symbol;
850 char *typename = 0;
6b7e85b9 851 int usrtoknum = SUNDEF;
7b306f52 852
a70083a3 853 token = lex (); /* fetch typename or first token */
511e79b3 854 if (token == tok_typename)
7b306f52 855 {
95e36146 856 typename = xstrdup (token_buffer);
a70083a3
AD
857 value_components_used = 1;
858 token = lex (); /* fetch first token */
7b306f52 859 }
7b306f52 860
a70083a3 861 /* process first token */
7b306f52 862
511e79b3 863 if (token != tok_identifier)
a70083a3
AD
864 {
865 complain (_("unrecognized item %s, expected an identifier"),
866 token_buffer);
867 skip_to_char ('%');
868 return;
7b306f52 869 }
d7020c20 870 symval->class = token_sym;
a70083a3
AD
871 symval->type_name = typename;
872 symval->user_token_number = SALIAS;
873 symbol = symval;
7b306f52 874
a70083a3 875 token = lex (); /* get number or literal string */
1ff442ca 876
511e79b3 877 if (token == tok_number)
943819bf 878 {
a70083a3
AD
879 usrtoknum = numval;
880 token = lex (); /* okay, did number, now get literal */
943819bf 881 }
1ff442ca 882
a70083a3 883 /* process literal string token */
1ff442ca 884
511e79b3 885 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 886 {
a70083a3
AD
887 complain (_("expected string constant instead of %s"), token_buffer);
888 skip_to_char ('%');
889 return;
1ff442ca 890 }
d7020c20 891 symval->class = token_sym;
a70083a3
AD
892 symval->type_name = typename;
893 symval->user_token_number = usrtoknum;
1ff442ca 894
a70083a3
AD
895 symval->alias = symbol;
896 symbol->alias = symval;
1ff442ca 897
79282c5a
AD
898 /* symbol and symval combined are only one symbol. */
899 nsyms--;
a70083a3 900}
3cef001a 901
11e2beca 902
b6610515 903static void
11d82f03 904parse_muscle_decl (void)
b6610515
RA
905{
906 int ch = ungetc (skip_white_space (), finput);
b7c49edf
AD
907 char *muscle_key;
908 char *muscle_value;
b6610515
RA
909
910 /* Read key. */
911 if (!isalpha (ch) && ch != '_')
912 {
913 complain (_("invalid %s declaration"), "%define");
914 skip_to_char ('%');
915 return;
916 }
11d82f03
MA
917 copy_identifier (finput, &muscle_obstack);
918 obstack_1grow (&muscle_obstack, 0);
919 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 920
b6610515
RA
921 /* Read value. */
922 ch = skip_white_space ();
923 if (ch != '"')
924 {
925 ungetc (ch, finput);
926 if (ch != EOF)
927 {
928 complain (_("invalid %s declaration"), "%define");
929 skip_to_char ('%');
930 return;
931 }
932 else
933 fatal (_("Premature EOF after %s"), "\"");
934 }
11d82f03
MA
935 copy_string2 (finput, &muscle_obstack, '"', 0);
936 obstack_1grow (&muscle_obstack, 0);
937 muscle_value = obstack_finish (&muscle_obstack);
b6610515 938
b6610515 939 /* Store the (key, value) pair in the environment. */
11d82f03 940 muscle_insert (muscle_key, muscle_value);
b6610515
RA
941}
942
2ba3b73c 943
426cf563
MA
944
945/*---------------------------------.
a870c567 946| Parse a double quoted parameter. |
426cf563
MA
947`---------------------------------*/
948
949static const char *
950parse_dquoted_param (const char *from)
951{
952 struct obstack param_obstack;
953 const char *param = NULL;
954 int c;
955
956 obstack_init (&param_obstack);
957 c = skip_white_space ();
958
959 if (c != '"')
960 {
961 complain (_("invalid %s declaration"), from);
962 ungetc (c, finput);
963 skip_to_char ('%');
964 return NULL;
965 }
966
2648a72d
AD
967 while ((c = literalchar ()) != '"')
968 obstack_1grow (&param_obstack, c);
a870c567 969
426cf563
MA
970 obstack_1grow (&param_obstack, '\0');
971 param = obstack_finish (&param_obstack);
972
973 if (c != '"' || strlen (param) == 0)
974 {
975 complain (_("invalid %s declaration"), from);
976 if (c != '"')
977 ungetc (c, finput);
978 skip_to_char ('%');
979 return NULL;
980 }
981
982 return param;
983}
984
2ba3b73c
MA
985/*----------------------------------.
986| Parse what comes after %skeleton. |
987`----------------------------------*/
988
a870c567 989static void
2ba3b73c
MA
990parse_skel_decl (void)
991{
426cf563 992 skeleton = parse_dquoted_param ("%skeleton");
2ba3b73c
MA
993}
994
a70083a3
AD
995/*----------------------------------------------------------------.
996| Read from finput until `%%' is seen. Discard the `%%'. Handle |
997| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 998| groups to ATTRS_OBSTACK. |
a70083a3 999`----------------------------------------------------------------*/
1ff442ca 1000
4a120d45 1001static void
a70083a3 1002read_declarations (void)
1ff442ca 1003{
a70083a3 1004 for (;;)
1ff442ca 1005 {
951366c1 1006 int c = skip_white_space ();
1ff442ca 1007
a70083a3
AD
1008 if (c == '%')
1009 {
951366c1 1010 token_t tok = parse_percent_token ();
1ff442ca 1011
a70083a3 1012 switch (tok)
943819bf 1013 {
511e79b3 1014 case tok_two_percents:
a70083a3 1015 return;
1ff442ca 1016
511e79b3 1017 case tok_percent_left_curly:
a70083a3
AD
1018 copy_definition ();
1019 break;
1ff442ca 1020
511e79b3 1021 case tok_token:
d7020c20 1022 parse_token_decl (token_sym, nterm_sym);
a70083a3 1023 break;
1ff442ca 1024
511e79b3 1025 case tok_nterm:
d7020c20 1026 parse_token_decl (nterm_sym, token_sym);
a70083a3 1027 break;
1ff442ca 1028
511e79b3 1029 case tok_type:
a70083a3
AD
1030 parse_type_decl ();
1031 break;
1ff442ca 1032
511e79b3 1033 case tok_start:
a70083a3
AD
1034 parse_start_decl ();
1035 break;
118fb205 1036
511e79b3 1037 case tok_union:
a70083a3
AD
1038 parse_union_decl ();
1039 break;
1ff442ca 1040
511e79b3 1041 case tok_expect:
a70083a3
AD
1042 parse_expect_decl ();
1043 break;
6deb4447 1044
511e79b3 1045 case tok_thong:
a70083a3
AD
1046 parse_thong_decl ();
1047 break;
d7020c20 1048
511e79b3 1049 case tok_left:
d7020c20 1050 parse_assoc_decl (left_assoc);
a70083a3 1051 break;
1ff442ca 1052
511e79b3 1053 case tok_right:
d7020c20 1054 parse_assoc_decl (right_assoc);
a70083a3 1055 break;
1ff442ca 1056
511e79b3 1057 case tok_nonassoc:
d7020c20 1058 parse_assoc_decl (non_assoc);
a70083a3 1059 break;
1ff442ca 1060
b6610515 1061 case tok_define:
11d82f03 1062 parse_muscle_decl ();
b6610515 1063 break;
342b8b6e 1064
2ba3b73c
MA
1065 case tok_skel:
1066 parse_skel_decl ();
1067 break;
b6610515 1068
511e79b3 1069 case tok_noop:
a70083a3 1070 break;
1ff442ca 1071
951366c1
AD
1072 case tok_stropt:
1073 case tok_intopt:
1074 case tok_obsolete:
951366c1
AD
1075 abort ();
1076 break;
1077
e0c40012 1078 case tok_illegal:
a70083a3
AD
1079 default:
1080 complain (_("unrecognized: %s"), token_buffer);
1081 skip_to_char ('%');
1082 }
1083 }
1084 else if (c == EOF)
1085 fatal (_("no input grammar"));
1086 else
1087 {
ff4a34be
AD
1088 char buf[] = "c";
1089 buf[0] = c;
1090 complain (_("unknown character: %s"), quote (buf));
a70083a3 1091 skip_to_char ('%');
1ff442ca 1092 }
1ff442ca 1093 }
1ff442ca 1094}
a70083a3
AD
1095\f
1096/*-------------------------------------------------------------------.
1097| Assuming that a `{' has just been seen, copy everything up to the |
1098| matching `}' into the actions file. STACK_OFFSET is the number of |
1099| values in the current rule so far, which says where to find `$0' |
1100| with respect to the top of the stack. |
14d293ac 1101| |
11e2beca
AD
1102| This routine is used both for actions and guards. Only |
1103| ACTION_OBSTACK is used, but this is fine, since we use only |
14d293ac 1104| pointers to relevant portions inside this obstack. |
a70083a3 1105`-------------------------------------------------------------------*/
1ff442ca 1106
4a120d45 1107static void
14d293ac 1108parse_braces (symbol_list *rule, int stack_offset)
1ff442ca 1109{
a70083a3 1110 int c;
a70083a3 1111 int count;
1ff442ca 1112
1ff442ca 1113 count = 1;
1ff442ca
NF
1114 while (count > 0)
1115 {
14d293ac
AD
1116 while ((c = getc (finput)) != '}')
1117 switch (c)
1118 {
1119 case '\n':
1120 obstack_1grow (&action_obstack, c);
1121 lineno++;
1122 break;
1ff442ca 1123
14d293ac
AD
1124 case '{':
1125 obstack_1grow (&action_obstack, c);
1126 count++;
1127 break;
1ff442ca 1128
14d293ac
AD
1129 case '\'':
1130 case '"':
1131 copy_string (finput, &action_obstack, c);
1132 break;
1ff442ca 1133
14d293ac
AD
1134 case '/':
1135 copy_comment (finput, &action_obstack);
1136 break;
1ff442ca 1137
14d293ac
AD
1138 case '$':
1139 copy_dollar (finput, &action_obstack,
1140 rule, stack_offset);
1141 break;
1ff442ca 1142
14d293ac
AD
1143 case '@':
1144 copy_at (finput, &action_obstack,
1145 stack_offset);
1146 break;
a70083a3 1147
14d293ac
AD
1148 case EOF:
1149 fatal (_("unmatched %s"), "`{'");
a70083a3 1150
14d293ac
AD
1151 default:
1152 obstack_1grow (&action_obstack, c);
1153 }
a70083a3 1154
14d293ac 1155 /* Above loop exits when C is '}'. */
a70083a3
AD
1156 if (--count)
1157 {
8c7ebe49 1158 obstack_1grow (&action_obstack, c);
a70083a3
AD
1159 c = getc (finput);
1160 }
1161 }
1162
3f96f4dc 1163 obstack_1grow (&action_obstack, '\0');
a70083a3 1164}
14d293ac 1165
a70083a3
AD
1166
1167static void
14d293ac 1168parse_action (symbol_list *rule, int stack_offset)
a70083a3 1169{
14d293ac
AD
1170 rule->action_line = lineno;
1171 parse_braces (rule, stack_offset);
1172 rule->action = obstack_finish (&action_obstack);
1173}
a70083a3 1174
a70083a3 1175
14d293ac
AD
1176static void
1177parse_guard (symbol_list *rule, int stack_offset)
1178{
1179 token_t t = lex ();
1180 if (t != tok_left_curly)
1181 complain (_("invalid %s declaration"), "%guard");
f499b062 1182 rule->guard_line = lineno;
14d293ac
AD
1183 parse_braces (rule, stack_offset);
1184 rule->guard = obstack_finish (&action_obstack);
1ff442ca 1185}
14d293ac 1186
a70083a3
AD
1187\f
1188
a70083a3
AD
1189/*-------------------------------------------------------------------.
1190| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1191| with the user's names. |
1192`-------------------------------------------------------------------*/
1ff442ca 1193
4a120d45 1194static bucket *
118fb205 1195gensym (void)
1ff442ca 1196{
274d42ce
AD
1197 /* Incremented for each generated symbol */
1198 static int gensym_count = 0;
1199 static char buf[256];
1200
a70083a3 1201 bucket *sym;
1ff442ca 1202
274d42ce
AD
1203 sprintf (buf, "@%d", ++gensym_count);
1204 token_buffer = buf;
a70083a3 1205 sym = getsym (token_buffer);
d7020c20 1206 sym->class = nterm_sym;
1ff442ca 1207 sym->value = nvars++;
36281465 1208 return sym;
1ff442ca 1209}
a70083a3 1210\f
107f7dfb
AD
1211/*-------------------------------------------------------------------.
1212| Parse the input grammar into a one symbol_list structure. Each |
1213| rule is represented by a sequence of symbols: the left hand side |
1214| followed by the contents of the right hand side, followed by a |
1215| null pointer instead of a symbol to terminate the rule. The next |
1216| symbol is the lhs of the following rule. |
1217| |
1218| All guards and actions are copied out to the appropriate files, |
1219| labelled by the rule number they apply to. |
1220| |
1221| Bison used to allow some %directives in the rules sections, but |
1222| this is no longer consider appropriate: (i) the documented grammar |
1223| doesn't claim it, (ii), it would promote bad style, (iii), error |
1224| recovery for %directives consists in skipping the junk until a `%' |
1225| is seen and helrp synchronizing. This scheme is definitely wrong |
1226| in the rules section. |
1227`-------------------------------------------------------------------*/
1ff442ca 1228
4a120d45 1229static void
118fb205 1230readgram (void)
1ff442ca 1231{
f17bcd1f 1232 token_t t;
a70083a3 1233 bucket *lhs = NULL;
107f7dfb
AD
1234 symbol_list *p = NULL;
1235 symbol_list *p1 = NULL;
a70083a3 1236 bucket *bp;
1ff442ca 1237
ff4a34be
AD
1238 /* Points to first symbol_list of current rule. its symbol is the
1239 lhs of the rule. */
107f7dfb 1240 symbol_list *crule = NULL;
ff4a34be 1241 /* Points to the symbol_list preceding crule. */
107f7dfb 1242 symbol_list *crule1 = NULL;
1ff442ca 1243
a70083a3 1244 t = lex ();
1ff442ca 1245
511e79b3 1246 while (t != tok_two_percents && t != tok_eof)
107f7dfb
AD
1247 if (t == tok_identifier || t == tok_bar)
1248 {
1249 int action_flag = 0;
1250 /* Number of symbols in rhs of this rule so far */
1251 int rulelength = 0;
1252 int xactions = 0; /* JF for error checking */
1253 bucket *first_rhs = 0;
1254
1255 if (t == tok_identifier)
1256 {
1257 lhs = symval;
1258
1259 if (!start_flag)
1260 {
1261 startval = lhs;
1262 start_flag = 1;
1263 }
1ff442ca 1264
107f7dfb
AD
1265 t = lex ();
1266 if (t != tok_colon)
1267 {
1268 complain (_("ill-formed rule: initial symbol not followed by colon"));
1269 unlex (t);
1270 }
1271 }
1272
1273 if (nrules == 0 && t == tok_bar)
1274 {
1275 complain (_("grammar starts with vertical bar"));
1276 lhs = symval; /* BOGUS: use a random symval */
1277 }
1278 /* start a new rule and record its lhs. */
1279
1280 nrules++;
1281 nitems++;
1282
1283 p = symbol_list_new (lhs);
1284
1285 crule1 = p1;
1286 if (p1)
1287 p1->next = p;
1288 else
1289 grammar = p;
1ff442ca 1290
107f7dfb
AD
1291 p1 = p;
1292 crule = p;
1ff442ca 1293
107f7dfb 1294 /* mark the rule's lhs as a nonterminal if not already so. */
1ff442ca 1295
107f7dfb
AD
1296 if (lhs->class == unknown_sym)
1297 {
1298 lhs->class = nterm_sym;
1299 lhs->value = nvars;
1300 nvars++;
1301 }
1302 else if (lhs->class == token_sym)
1303 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca 1304
107f7dfb 1305 /* read the rhs of the rule. */
1ff442ca 1306
107f7dfb
AD
1307 for (;;)
1308 {
1309 t = lex ();
1310 if (t == tok_prec)
1311 {
1312 t = lex ();
1313 crule->ruleprec = symval;
1314 t = lex ();
1315 }
1316
1317 if (!(t == tok_identifier || t == tok_left_curly))
1318 break;
1ff442ca 1319
107f7dfb
AD
1320 /* If next token is an identifier, see if a colon follows it.
1321 If one does, exit this rule now. */
1322 if (t == tok_identifier)
1323 {
1324 bucket *ssave;
1325 token_t t1;
1326
1327 ssave = symval;
1328 t1 = lex ();
1329 unlex (t1);
1330 symval = ssave;
1331 if (t1 == tok_colon)
1332 break;
1333
1334 if (!first_rhs) /* JF */
1335 first_rhs = symval;
1336 /* Not followed by colon =>
1337 process as part of this rule's rhs. */
1338 }
1339
1340 /* If we just passed an action, that action was in the middle
1341 of a rule, so make a dummy rule to reduce it to a
1342 non-terminal. */
1343 if (action_flag)
1344 {
1345 /* Since the action was written out with this rule's
1346 number, we must give the new rule this number by
1347 inserting the new rule before it. */
1348
1349 /* Make a dummy nonterminal, a gensym. */
1350 bucket *sdummy = gensym ();
1351
1352 /* Make a new rule, whose body is empty, before the
1353 current one, so that the action just read can
1354 belong to it. */
1355 nrules++;
1356 nitems++;
1357 p = symbol_list_new (sdummy);
1358 /* Attach its lineno to that of the host rule. */
1359 p->line = crule->line;
1360 if (crule1)
1361 crule1->next = p;
1362 else
1363 grammar = p;
1364 /* End of the rule. */
1365 crule1 = symbol_list_new (NULL);
1366 crule1->next = crule;
1367
1368 p->next = crule1;
1369
1370 /* Insert the dummy generated by that rule into this
1371 rule. */
1372 nitems++;
1373 p = symbol_list_new (sdummy);
1374 p1->next = p;
1375 p1 = p;
1376
1377 action_flag = 0;
1378 }
1379
1380 if (t == tok_identifier)
1381 {
1382 nitems++;
1383 p = symbol_list_new (symval);
1384 p1->next = p;
1385 p1 = p;
1386 }
1387 else /* handle an action. */
1388 {
14d293ac 1389 parse_action (crule, rulelength);
107f7dfb
AD
1390 action_flag = 1;
1391 xactions++; /* JF */
1392 }
1393 rulelength++;
1394 } /* end of read rhs of rule */
1395
1396 /* Put an empty link in the list to mark the end of this rule */
1397 p = symbol_list_new (NULL);
1398 p1->next = p;
1399 p1 = p;
1400
1401 if (t == tok_prec)
1402 {
1403 complain (_("two @prec's in a row"));
1404 t = lex ();
1405 crule->ruleprec = symval;
1406 t = lex ();
1407 }
f499b062 1408
107f7dfb
AD
1409 if (t == tok_guard)
1410 {
1411 if (!semantic_parser)
1412 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1413
14d293ac 1414 parse_guard (crule, rulelength);
a70083a3 1415 t = lex ();
107f7dfb 1416 }
f499b062
AD
1417
1418 if (t == tok_left_curly)
107f7dfb
AD
1419 {
1420 /* This case never occurs -wjh */
1421 if (action_flag)
1422 complain (_("two actions at end of one rule"));
14d293ac 1423 parse_action (crule, rulelength);
107f7dfb
AD
1424 action_flag = 1;
1425 xactions++; /* -wjh */
1426 t = lex ();
1427 }
1428 /* If $$ is being set in default way, report if any type
1429 mismatch. */
1430 else if (!xactions
1431 && first_rhs && lhs->type_name != first_rhs->type_name)
1432 {
1433 if (lhs->type_name == 0
1434 || first_rhs->type_name == 0
1435 || strcmp (lhs->type_name, first_rhs->type_name))
1436 complain (_("type clash (`%s' `%s') on default action"),
1437 lhs->type_name ? lhs->type_name : "",
1438 first_rhs->type_name ? first_rhs->type_name : "");
1439 }
1440 /* Warn if there is no default for $$ but we need one. */
1441 else if (!xactions && !first_rhs && lhs->type_name != 0)
1442 complain (_("empty rule for typed nonterminal, and no action"));
1443 if (t == tok_semicolon)
a70083a3 1444 t = lex ();
107f7dfb
AD
1445 }
1446 else
1447 {
1448 complain (_("invalid input: %s"), quote (token_buffer));
1449 t = lex ();
1450 }
943819bf 1451
b68e7744
AD
1452 /* grammar has been read. Do some checking */
1453
1454 if (nrules == 0)
1455 fatal (_("no rules in the input grammar"));
1456
1457 /* Report any undefined symbols and consider them nonterminals. */
1458
1459 for (bp = firstsymbol; bp; bp = bp->next)
1460 if (bp->class == unknown_sym)
1461 {
1462 complain (_
1463 ("symbol %s is used, but is not defined as a token and has no rules"),
1464 bp->tag);
1465 bp->class = nterm_sym;
1466 bp->value = nvars++;
1467 }
1468
ff442794
AD
1469 /* Insert the initial rule, which line is that of the first rule
1470 (not that of the start symbol):
30171f79
AD
1471
1472 axiom: %start EOF. */
1473 p = symbol_list_new (axiom);
ff442794 1474 p->line = grammar->line;
30171f79
AD
1475 p->next = symbol_list_new (startval);
1476 p->next->next = symbol_list_new (eoftoken);
1477 p->next->next->next = symbol_list_new (NULL);
1478 p->next->next->next->next = grammar;
1479 nrules += 1;
1480 nitems += 3;
1481 grammar = p;
1482 startval = axiom;
1ff442ca
NF
1483
1484 if (nsyms > MAXSHORT)
a0f6b076
AD
1485 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1486 MAXSHORT);
1ff442ca
NF
1487
1488 ntokens = nsyms - nvars;
1489}
ff48177d
MA
1490
1491/* At the end of the grammar file, some C source code must
63c2d5de 1492 be stored. It is going to be associated to the epilogue
ff48177d
MA
1493 directive. */
1494static void
1495read_additionnal_code (void)
1496{
1497 char c;
63c2d5de 1498 struct obstack el_obstack;
342b8b6e 1499
63c2d5de 1500 obstack_init (&el_obstack);
ff48177d 1501
710ddc4f
MA
1502 if (!no_lines_flag)
1503 {
1504 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1505 lineno, quotearg_style (c_quoting_style,
b7c49edf 1506 muscle_find ("filename")));
710ddc4f
MA
1507 }
1508
ff48177d 1509 while ((c = getc (finput)) != EOF)
63c2d5de 1510 obstack_1grow (&el_obstack, c);
342b8b6e 1511
63c2d5de 1512 obstack_1grow (&el_obstack, 0);
11d82f03 1513 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1514}
1515
a70083a3 1516\f
037ca2f1
AD
1517/*------------------------------------------------------------------.
1518| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1519| number. |
1520`------------------------------------------------------------------*/
1521
1522static void
1523token_translations_init (void)
1524{
1525 bucket *bp = NULL;
1526 int i;
1527
1528 token_translations = XCALLOC (short, max_user_token_number + 1);
1529
1530 /* Initialize all entries for literal tokens to 2, the internal
1531 token number for $undefined., which represents all invalid
1532 inputs. */
1533 for (i = 0; i <= max_user_token_number; i++)
1534 token_translations[i] = 2;
1535
1536 for (bp = firstsymbol; bp; bp = bp->next)
1537 {
1538 /* Non-terminal? */
1539 if (bp->value >= ntokens)
1540 continue;
1541 /* A token string alias? */
1542 if (bp->user_token_number == SALIAS)
1543 continue;
6b7e85b9
AD
1544
1545 assert (bp->user_token_number != SUNDEF);
1546
037ca2f1
AD
1547 /* A token which translation has already been set? */
1548 if (token_translations[bp->user_token_number] != 2)
1549 complain (_("tokens %s and %s both assigned number %d"),
ad949da9 1550 symbols[token_translations[bp->user_token_number]]->tag,
037ca2f1
AD
1551 bp->tag, bp->user_token_number);
1552 token_translations[bp->user_token_number] = bp->value;
1553 }
1554}
1555
1556
0e78e603
AD
1557/*----------------------------------------------------------------.
1558| Assign symbol numbers, and write definition of token names into |
1559| FDEFINES. Set up vectors SYMBOL_TABLE, TAGS of symbols. |
1560`----------------------------------------------------------------*/
1ff442ca 1561
4a120d45 1562static void
118fb205 1563packsymbols (void)
1ff442ca 1564{
342b8b6e 1565 bucket *bp = NULL;
a70083a3 1566 int tokno = 1;
a70083a3 1567 int last_user_token_number;
1ff442ca 1568
0e78e603 1569 symbols = XCALLOC (bucket *, nsyms);
1ff442ca 1570
1ff442ca
NF
1571 max_user_token_number = 256;
1572 last_user_token_number = 256;
1573
1574 for (bp = firstsymbol; bp; bp = bp->next)
1575 {
d7020c20 1576 if (bp->class == nterm_sym)
1ff442ca
NF
1577 {
1578 bp->value += ntokens;
1579 }
943819bf
RS
1580 else if (bp->alias)
1581 {
b7c49edf
AD
1582 /* This symbol and its alias are a single token defn.
1583 Allocate a tokno, and assign to both check agreement of
1584 prec and assoc fields and make both the same */
1585 if (bp->value == -1)
1586 {
1587 if (bp == eoftoken || bp->alias == eoftoken)
1588 bp->value = bp->alias->value = 0;
1589 else
1590 {
1591 bp->value = bp->alias->value = tokno++;
1592 }
1593 }
943819bf 1594
0a6384c4
AD
1595 if (bp->prec != bp->alias->prec)
1596 {
1597 if (bp->prec != 0 && bp->alias->prec != 0
1598 && bp->user_token_number == SALIAS)
a0f6b076
AD
1599 complain (_("conflicting precedences for %s and %s"),
1600 bp->tag, bp->alias->tag);
0a6384c4
AD
1601 if (bp->prec != 0)
1602 bp->alias->prec = bp->prec;
1603 else
1604 bp->prec = bp->alias->prec;
1605 }
943819bf 1606
0a6384c4
AD
1607 if (bp->assoc != bp->alias->assoc)
1608 {
a0f6b076
AD
1609 if (bp->assoc != 0 && bp->alias->assoc != 0
1610 && bp->user_token_number == SALIAS)
1611 complain (_("conflicting assoc values for %s and %s"),
1612 bp->tag, bp->alias->tag);
1613 if (bp->assoc != 0)
1614 bp->alias->assoc = bp->assoc;
1615 else
1616 bp->assoc = bp->alias->assoc;
1617 }
0a6384c4 1618
b7c49edf 1619 /* Do not do processing below for SALIASs. */
0a6384c4 1620 if (bp->user_token_number == SALIAS)
b7c49edf 1621 continue;
943819bf 1622
a70083a3 1623 }
b7c49edf 1624 else /* bp->class == token_sym */
943819bf 1625 {
b7c49edf
AD
1626 if (bp == eoftoken)
1627 bp->value = 0;
1628 else
1629 bp->value = tokno++;
943819bf
RS
1630 }
1631
d7020c20 1632 if (bp->class == token_sym)
1ff442ca 1633 {
6b7e85b9 1634 if (bp->user_token_number == SUNDEF)
1ff442ca
NF
1635 bp->user_token_number = ++last_user_token_number;
1636 if (bp->user_token_number > max_user_token_number)
1637 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1638 }
1639
0e78e603 1640 symbols[bp->value] = bp;
1ff442ca
NF
1641 }
1642
037ca2f1 1643 token_translations_init ();
1ff442ca
NF
1644
1645 error_token_number = errtoken->value;
1646
e3f1699f
AD
1647 if (startval->class == unknown_sym)
1648 fatal (_("the start symbol %s is undefined"), startval->tag);
1649 else if (startval->class == token_sym)
1650 fatal (_("the start symbol %s is a token"), startval->tag);
1651
1652 start_symbol = startval->value;
1653}
1654
1655
93ede233
AD
1656/*---------------------------------------------------------------.
1657| Save the definition of token names in the `TOKENDEFS' muscle. |
1658`---------------------------------------------------------------*/
e3f1699f
AD
1659
1660static void
93ede233 1661symbols_save (void)
e3f1699f 1662{
93ede233
AD
1663 struct obstack tokendefs;
1664 bucket *bp;
93ede233
AD
1665 obstack_init (&tokendefs);
1666
1667 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1668 {
ec2da99f 1669 char *symbol = bp->tag; /* get symbol */
1ff442ca 1670
93ede233
AD
1671 if (bp->value >= ntokens)
1672 continue;
1673 if (bp->user_token_number == SALIAS)
1674 continue;
1675 if ('\'' == *symbol)
1676 continue; /* skip literal character */
1677 if (bp == errtoken)
1678 continue; /* skip error token */
1679 if ('\"' == *symbol)
037ca2f1 1680 {
93ede233
AD
1681 /* use literal string only if given a symbol with an alias */
1682 if (bp->alias)
1683 symbol = bp->alias->tag;
1684 else
1685 continue;
037ca2f1 1686 }
93ede233
AD
1687
1688 /* Don't #define nonliteral tokens whose names contain periods. */
ec2da99f 1689 if (strchr (symbol, '.'))
93ede233
AD
1690 continue;
1691
7742ddeb 1692 obstack_fgrow2 (&tokendefs, "# define %s\t%d\n",
93ede233
AD
1693 symbol, bp->user_token_number);
1694 if (semantic_parser)
1695 /* FIXME: This is probably wrong, and should be just as
1696 above. --akim. */
7742ddeb 1697 obstack_fgrow2 (&tokendefs, "# define T%s\t%d\n", symbol, bp->value);
1ff442ca 1698 }
93ede233
AD
1699
1700 obstack_1grow (&tokendefs, 0);
1701 muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1702 obstack_free (&tokendefs, NULL);
1ff442ca 1703}
a083fbbf 1704
1ff442ca 1705
a70083a3
AD
1706/*---------------------------------------------------------------.
1707| Convert the rules into the representation using RRHS, RLHS and |
1708| RITEMS. |
1709`---------------------------------------------------------------*/
1ff442ca 1710
4a120d45 1711static void
118fb205 1712packgram (void)
1ff442ca 1713{
a70083a3
AD
1714 int itemno;
1715 int ruleno;
1716 symbol_list *p;
1ff442ca 1717
adc8c848
AD
1718 /* We use short to index items. */
1719 if (nitems >= MAXSHORT)
1720 fatal (_("too many items (max %d)"), MAXSHORT);
1721
d7913476 1722 ritem = XCALLOC (short, nitems + 1);
b2ed6e58 1723 rule_table = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1724
1725 itemno = 0;
1726 ruleno = 1;
1727
1728 p = grammar;
1729 while (p)
1730 {
b29b2ed5 1731 bucket *ruleprec = p->ruleprec;
b2ed6e58
AD
1732 rule_table[ruleno].lhs = p->sym->value;
1733 rule_table[ruleno].rhs = itemno;
b29b2ed5 1734 rule_table[ruleno].line = p->line;
68f1e3ed 1735 rule_table[ruleno].useful = TRUE;
3f96f4dc
AD
1736 rule_table[ruleno].action = p->action;
1737 rule_table[ruleno].action_line = p->action_line;
f499b062
AD
1738 rule_table[ruleno].guard = p->guard;
1739 rule_table[ruleno].guard_line = p->guard_line;
1ff442ca
NF
1740
1741 p = p->next;
1742 while (p && p->sym)
1743 {
1744 ritem[itemno++] = p->sym->value;
1745 /* A rule gets by default the precedence and associativity
1746 of the last token in it. */
d7020c20 1747 if (p->sym->class == token_sym)
1ff442ca 1748 {
652a871c
AD
1749 rule_table[ruleno].prec = p->sym->prec;
1750 rule_table[ruleno].assoc = p->sym->assoc;
1ff442ca 1751 }
a70083a3
AD
1752 if (p)
1753 p = p->next;
1ff442ca
NF
1754 }
1755
1756 /* If this rule has a %prec,
a70083a3 1757 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1758 if (ruleprec)
1759 {
652a871c
AD
1760 rule_table[ruleno].prec = ruleprec->prec;
1761 rule_table[ruleno].assoc = ruleprec->assoc;
1762 rule_table[ruleno].precsym = ruleprec->value;
1ff442ca
NF
1763 }
1764
1765 ritem[itemno++] = -ruleno;
1766 ruleno++;
1767
a70083a3
AD
1768 if (p)
1769 p = p->next;
1ff442ca
NF
1770 }
1771
1772 ritem[itemno] = 0;
75142d45
AD
1773 nritems = itemno;
1774 assert (nritems == nitems);
3067fbef
AD
1775
1776 if (trace_flag)
1777 ritem_print (stderr);
1ff442ca 1778}
a70083a3
AD
1779\f
1780/*-------------------------------------------------------------------.
1781| Read in the grammar specification and record it in the format |
ea5607fd 1782| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1783| and all actions into ACTION_OBSTACK, in each case forming the body |
1784| of a C function (YYGUARD or YYACTION) which contains a switch |
1785| statement to decide which guard or action to execute. |
a70083a3
AD
1786`-------------------------------------------------------------------*/
1787
1788void
1789reader (void)
1790{
1791 start_flag = 0;
1792 startval = NULL; /* start symbol not specified yet. */
1793
b7c49edf 1794 nsyms = 0;
a70083a3
AD
1795 nvars = 0;
1796 nrules = 0;
1797 nitems = 0;
a70083a3
AD
1798
1799 typed = 0;
1800 lastprec = 0;
1801
a70083a3
AD
1802 semantic_parser = 0;
1803 pure_parser = 0;
a70083a3
AD
1804
1805 grammar = NULL;
1806
342b8b6e 1807 lex_init ();
a70083a3
AD
1808 lineno = 1;
1809
11d82f03
MA
1810 /* Initialize the muscle obstack. */
1811 obstack_init (&muscle_obstack);
82e236e2 1812
a70083a3
AD
1813 /* Initialize the symbol table. */
1814 tabinit ();
b6610515 1815
30171f79
AD
1816 /* Construct the axiom symbol. */
1817 axiom = getsym ("$axiom");
1818 axiom->class = nterm_sym;
1819 axiom->value = nvars++;
1820
a70083a3
AD
1821 /* Construct the error token */
1822 errtoken = getsym ("error");
d7020c20 1823 errtoken->class = token_sym;
a70083a3 1824 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 1825
a70083a3
AD
1826 /* Construct a token that represents all undefined literal tokens.
1827 It is always token number 2. */
1828 undeftoken = getsym ("$undefined.");
d7020c20 1829 undeftoken->class = token_sym;
a70083a3
AD
1830 undeftoken->user_token_number = 2;
1831
331dbc1b
AD
1832 /* Initialize the obstacks. */
1833 obstack_init (&action_obstack);
1834 obstack_init (&attrs_obstack);
331dbc1b
AD
1835 obstack_init (&output_obstack);
1836
1837 finput = xfopen (infile, "r");
1838
896fe5c1
AD
1839 /* Read the declaration section. Copy %{ ... %} groups to
1840 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1841 etc. found there. */
a70083a3 1842 read_declarations ();
b7c49edf
AD
1843
1844 /* If the user did not define her EOFTOKEN, do it now. */
1845 if (!eoftoken)
1846 {
1847 eoftoken = getsym ("$");
1848 eoftoken->class = token_sym;
1849 /* Value specified by POSIX. */
1850 eoftoken->user_token_number = 0;
1851 }
1852
a70083a3
AD
1853 /* Read in the grammar, build grammar in list form. Write out
1854 guards and actions. */
1855 readgram ();
ff48177d
MA
1856 /* Some C code is given at the end of the grammar file. */
1857 read_additionnal_code ();
b0c4483e 1858
331dbc1b
AD
1859 lex_free ();
1860 xfclose (finput);
1861
a70083a3
AD
1862 /* Assign the symbols their symbol numbers. Write #defines for the
1863 token symbols into FDEFINES if requested. */
1864 packsymbols ();
93ede233
AD
1865 /* Save them. */
1866 symbols_save ();
1867
a70083a3
AD
1868 /* Convert the grammar into the format described in gram.h. */
1869 packgram ();
a70083a3 1870}