]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/reader.c (readgram): Bind the initial rule's lineno to that
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
037ca2f1 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca
NF
30#include "lex.h"
31#include "gram.h"
a0f6b076 32#include "complain.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
340ef489 35#include "conflicts.h"
11d82f03 36#include "muscle_tab.h"
1ff442ca 37
a70083a3
AD
38typedef struct symbol_list
39{
40 struct symbol_list *next;
41 bucket *sym;
b29b2ed5 42 int line;
f499b062 43
3f96f4dc
AD
44 /* The action is attached to the LHS of a rule. */
45 const char *action;
46 int action_line;
f499b062
AD
47
48 /* The guard is attached to the LHS of a rule. */
49 const char *guard;
50 int guard_line;
a70083a3 51 bucket *ruleprec;
d945f5cd 52} symbol_list;
118fb205 53
1ff442ca 54int lineno;
1ff442ca 55char **tags;
d019d655 56short *user_toknums;
4a120d45
JT
57static symbol_list *grammar;
58static int start_flag;
59static bucket *startval;
1ff442ca
NF
60
61/* Nonzero if components of semantic values are used, implying
62 they must be unions. */
63static int value_components_used;
64
d7020c20
AD
65/* Nonzero if %union has been seen. */
66static int typed;
1ff442ca 67
d7020c20
AD
68/* Incremented for each %left, %right or %nonassoc seen */
69static int lastprec;
1ff442ca 70
b7c49edf
AD
71static bucket *errtoken = NULL;
72static bucket *undeftoken = NULL;
73static bucket *eoftoken = NULL;
30171f79 74static bucket *axiom = NULL;
b29b2ed5 75
6255b435 76static symbol_list *
b29b2ed5
AD
77symbol_list_new (bucket *sym)
78{
79 symbol_list *res = XMALLOC (symbol_list, 1);
80 res->next = NULL;
81 res->sym = sym;
82 res->line = lineno;
d945f5cd
AD
83 res->action = NULL;
84 res->action_line = 0;
f499b062
AD
85 res->guard = NULL;
86 res->guard_line = 0;
b29b2ed5
AD
87 res->ruleprec = NULL;
88 return res;
89}
90
0d533154 91\f
a70083a3 92
0d533154
AD
93/*===================\
94| Low level lexing. |
95\===================*/
943819bf
RS
96
97static void
118fb205 98skip_to_char (int target)
943819bf
RS
99{
100 int c;
101 if (target == '\n')
a0f6b076 102 complain (_(" Skipping to next \\n"));
943819bf 103 else
a0f6b076 104 complain (_(" Skipping to next %c"), target);
943819bf
RS
105
106 do
0d533154 107 c = skip_white_space ();
943819bf 108 while (c != target && c != EOF);
a083fbbf 109 if (c != EOF)
0d533154 110 ungetc (c, finput);
943819bf
RS
111}
112
113
0d533154
AD
114/*---------------------------------------------------------.
115| Read a signed integer from STREAM and return its value. |
116`---------------------------------------------------------*/
117
118static inline int
119read_signed_integer (FILE *stream)
120{
a70083a3
AD
121 int c = getc (stream);
122 int sign = 1;
123 int n = 0;
0d533154
AD
124
125 if (c == '-')
126 {
127 c = getc (stream);
128 sign = -1;
129 }
130
131 while (isdigit (c))
132 {
133 n = 10 * n + (c - '0');
134 c = getc (stream);
135 }
136
137 ungetc (c, stream);
138
139 return sign * n;
140}
141\f
79282c5a
AD
142/*--------------------------------------------------------------.
143| Get the data type (alternative in the union) of the value for |
144| symbol N in rule RULE. |
145`--------------------------------------------------------------*/
146
147static char *
b29b2ed5 148get_type_name (int n, symbol_list *rule)
79282c5a
AD
149{
150 int i;
151 symbol_list *rp;
152
153 if (n < 0)
154 {
155 complain (_("invalid $ value"));
156 return NULL;
157 }
158
159 rp = rule;
160 i = 0;
161
162 while (i < n)
163 {
164 rp = rp->next;
165 if (rp == NULL || rp->sym == NULL)
166 {
167 complain (_("invalid $ value"));
168 return NULL;
169 }
170 i++;
171 }
172
173 return rp->sym->type_name;
174}
175\f
337bab46
AD
176/*------------------------------------------------------------.
177| Dump the string from FIN to OOUT if non null. MATCH is the |
178| delimiter of the string (either ' or "). |
179`------------------------------------------------------------*/
ae3c3164
AD
180
181static inline void
b6610515 182copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
183{
184 int c;
185
b6610515
RA
186 if (store)
187 obstack_1grow (oout, match);
8c7ebe49 188
4a120d45 189 c = getc (fin);
ae3c3164
AD
190
191 while (c != match)
192 {
193 if (c == EOF)
194 fatal (_("unterminated string at end of file"));
195 if (c == '\n')
196 {
a0f6b076 197 complain (_("unterminated string"));
4a120d45 198 ungetc (c, fin);
ae3c3164
AD
199 c = match; /* invent terminator */
200 continue;
201 }
202
337bab46 203 obstack_1grow (oout, c);
ae3c3164
AD
204
205 if (c == '\\')
206 {
4a120d45 207 c = getc (fin);
ae3c3164
AD
208 if (c == EOF)
209 fatal (_("unterminated string at end of file"));
337bab46 210 obstack_1grow (oout, c);
8c7ebe49 211
ae3c3164
AD
212 if (c == '\n')
213 lineno++;
214 }
215
a70083a3 216 c = getc (fin);
ae3c3164
AD
217 }
218
b6610515
RA
219 if (store)
220 obstack_1grow (oout, c);
221}
222
223/* FIXME. */
224
225static inline void
226copy_string (FILE *fin, struct obstack *oout, int match)
227{
228 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
229}
230
b6610515
RA
231/* FIXME. */
232
233static inline void
234copy_identifier (FILE *fin, struct obstack *oout)
235{
236 int c;
237
238 while (isalnum (c = getc (fin)) || c == '_')
239 obstack_1grow (oout, c);
240
241 ungetc (c, fin);
242}
ae3c3164 243
2666f928
AD
244
245/*------------------------------------------------------------------.
246| Dump the wannabee comment from IN to OOUT. In fact we just saw a |
247| `/', which might or might not be a comment. In any case, copy |
248| what we saw. |
249`------------------------------------------------------------------*/
ae3c3164
AD
250
251static inline void
2666f928 252copy_comment (FILE *fin, struct obstack *oout)
ae3c3164
AD
253{
254 int cplus_comment;
a70083a3 255 int ended;
550a72a3
AD
256 int c;
257
258 /* We read a `/', output it. */
2666f928 259 obstack_1grow (oout, '/');
550a72a3
AD
260
261 switch ((c = getc (fin)))
262 {
263 case '/':
264 cplus_comment = 1;
265 break;
266 case '*':
267 cplus_comment = 0;
268 break;
269 default:
270 ungetc (c, fin);
271 return;
272 }
ae3c3164 273
2666f928 274 obstack_1grow (oout, c);
550a72a3 275 c = getc (fin);
ae3c3164
AD
276
277 ended = 0;
278 while (!ended)
279 {
280 if (!cplus_comment && c == '*')
281 {
282 while (c == '*')
283 {
2666f928 284 obstack_1grow (oout, c);
550a72a3 285 c = getc (fin);
ae3c3164
AD
286 }
287
288 if (c == '/')
289 {
2666f928 290 obstack_1grow (oout, c);
ae3c3164
AD
291 ended = 1;
292 }
293 }
294 else if (c == '\n')
295 {
296 lineno++;
2666f928 297 obstack_1grow (oout, c);
ae3c3164
AD
298 if (cplus_comment)
299 ended = 1;
300 else
550a72a3 301 c = getc (fin);
ae3c3164
AD
302 }
303 else if (c == EOF)
304 fatal (_("unterminated comment"));
305 else
306 {
2666f928 307 obstack_1grow (oout, c);
550a72a3 308 c = getc (fin);
ae3c3164
AD
309 }
310 }
311}
312
313
a70083a3 314/*-----------------------------------------------------------------.
337bab46 315| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
316| reference to this location. STACK_OFFSET is the number of values |
317| in the current rule so far, which says where to find `$0' with |
318| respect to the top of the stack. |
319`-----------------------------------------------------------------*/
1ff442ca 320
a70083a3 321static inline void
337bab46 322copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 323{
a70083a3 324 int c;
1ff442ca 325
a70083a3
AD
326 c = getc (fin);
327 if (c == '$')
1ff442ca 328 {
ff4423cc 329 obstack_sgrow (oout, "yyloc");
89cab50d 330 locations_flag = 1;
a70083a3
AD
331 }
332 else if (isdigit (c) || c == '-')
333 {
334 int n;
1ff442ca 335
a70083a3
AD
336 ungetc (c, fin);
337 n = read_signed_integer (fin);
11e2beca
AD
338 if (n > stack_offset)
339 complain (_("invalid value: %s%d"), "@", n);
340 else
341 {
342 /* Offset is always 0 if parser has already popped the stack
343 pointer. */
344 obstack_fgrow1 (oout, "yylsp[%d]",
345 n - (semantic_parser ? 0 : stack_offset));
346 locations_flag = 1;
347 }
1ff442ca 348 }
a70083a3 349 else
ff4a34be
AD
350 {
351 char buf[] = "@c";
352 buf[1] = c;
353 complain (_("%s is invalid"), quote (buf));
354 }
1ff442ca 355}
79282c5a
AD
356
357
358/*-------------------------------------------------------------------.
359| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
360| |
361| Possible inputs: $[<TYPENAME>]($|integer) |
362| |
337bab46 363| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
364| the number of values in the current rule so far, which says where |
365| to find `$0' with respect to the top of the stack. |
366`-------------------------------------------------------------------*/
367
368static inline void
337bab46 369copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
370 symbol_list *rule, int stack_offset)
371{
372 int c = getc (fin);
b0ce6046 373 const char *type_name = NULL;
79282c5a 374
f282676b 375 /* Get the type name if explicit. */
79282c5a
AD
376 if (c == '<')
377 {
f282676b 378 read_type_name (fin);
79282c5a
AD
379 type_name = token_buffer;
380 value_components_used = 1;
79282c5a
AD
381 c = getc (fin);
382 }
383
384 if (c == '$')
385 {
ff4423cc 386 obstack_sgrow (oout, "yyval");
8c7ebe49 387
79282c5a
AD
388 if (!type_name)
389 type_name = get_type_name (0, rule);
390 if (type_name)
337bab46 391 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
392 if (!type_name && typed)
393 complain (_("$$ of `%s' has no declared type"),
394 rule->sym->tag);
395 }
396 else if (isdigit (c) || c == '-')
397 {
398 int n;
399 ungetc (c, fin);
400 n = read_signed_integer (fin);
401
11e2beca
AD
402 if (n > stack_offset)
403 complain (_("invalid value: %s%d"), "$", n);
404 else
405 {
406 if (!type_name && n > 0)
407 type_name = get_type_name (n, rule);
408
409 /* Offset is always 0 if parser has already popped the stack
410 pointer. */
411 obstack_fgrow1 (oout, "yyvsp[%d]",
412 n - (semantic_parser ? 0 : stack_offset));
413
414 if (type_name)
415 obstack_fgrow1 (oout, ".%s", type_name);
416 if (!type_name && typed)
417 complain (_("$%d of `%s' has no declared type"),
418 n, rule->sym->tag);
419 }
79282c5a
AD
420 }
421 else
422 {
423 char buf[] = "$c";
424 buf[1] = c;
425 complain (_("%s is invalid"), quote (buf));
426 }
427}
a70083a3
AD
428\f
429/*-------------------------------------------------------------------.
430| Copy the contents of a `%{ ... %}' into the definitions file. The |
431| `%{' has already been read. Return after reading the `%}'. |
432`-------------------------------------------------------------------*/
1ff442ca 433
4a120d45 434static void
118fb205 435copy_definition (void)
1ff442ca 436{
a70083a3 437 int c;
ae3c3164 438 /* -1 while reading a character if prev char was %. */
a70083a3 439 int after_percent;
1ff442ca 440
89cab50d 441 if (!no_lines_flag)
25b222fa
MA
442 {
443 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 444 lineno, quotearg_style (c_quoting_style,
b7c49edf 445 muscle_find ("filename")));
25b222fa 446 }
1ff442ca
NF
447
448 after_percent = 0;
449
ae3c3164 450 c = getc (finput);
1ff442ca
NF
451
452 for (;;)
453 {
454 switch (c)
455 {
456 case '\n':
dd60faec 457 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
458 lineno++;
459 break;
460
461 case '%':
a70083a3 462 after_percent = -1;
1ff442ca 463 break;
a083fbbf 464
1ff442ca
NF
465 case '\'':
466 case '"':
337bab46 467 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
468 break;
469
470 case '/':
337bab46 471 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
472 break;
473
474 case EOF:
a70083a3 475 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
476
477 default:
dd60faec 478 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
479 }
480
a70083a3 481 c = getc (finput);
1ff442ca
NF
482
483 if (after_percent)
484 {
485 if (c == '}')
486 return;
dd60faec 487 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
488 }
489 after_percent = 0;
1ff442ca 490 }
1ff442ca
NF
491}
492
493
d7020c20
AD
494/*-------------------------------------------------------------------.
495| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
496| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
497| are reversed. |
498`-------------------------------------------------------------------*/
1ff442ca 499
4a120d45 500static void
d7020c20 501parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 502{
342b8b6e
AD
503 token_t token = tok_undef;
504 char *typename = NULL;
1ff442ca 505
1e9798d5
AD
506 /* The symbol being defined. */
507 struct bucket *symbol = NULL;
508
509 /* After `%token' and `%nterm', any number of symbols maybe be
510 defined. */
1ff442ca
NF
511 for (;;)
512 {
e6011337
JT
513 int tmp_char = ungetc (skip_white_space (), finput);
514
1e9798d5
AD
515 /* `%' (for instance from `%token', or from `%%' etc.) is the
516 only valid means to end this declaration. */
e6011337 517 if (tmp_char == '%')
1ff442ca 518 return;
e6011337 519 if (tmp_char == EOF)
a0f6b076 520 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 521
a70083a3 522 token = lex ();
511e79b3 523 if (token == tok_comma)
943819bf
RS
524 {
525 symbol = NULL;
526 continue;
527 }
511e79b3 528 if (token == tok_typename)
1ff442ca 529 {
95e36146 530 typename = xstrdup (token_buffer);
1ff442ca 531 value_components_used = 1;
943819bf
RS
532 symbol = NULL;
533 }
511e79b3 534 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 535 {
8e03724b
AD
536 if (symval->alias)
537 warn (_("symbol `%s' used more than once as a literal string"),
538 symval->tag);
539 else if (symbol->alias)
540 warn (_("symbol `%s' given more than one literal string"),
541 symbol->tag);
542 else
543 {
544 symval->class = token_sym;
545 symval->type_name = typename;
546 symval->user_token_number = symbol->user_token_number;
547 symbol->user_token_number = SALIAS;
548 symval->alias = symbol;
549 symbol->alias = symval;
550 /* symbol and symval combined are only one symbol */
551 nsyms--;
552 }
8e03724b 553 symbol = NULL;
1ff442ca 554 }
511e79b3 555 else if (token == tok_identifier)
1ff442ca
NF
556 {
557 int oldclass = symval->class;
943819bf 558 symbol = symval;
1ff442ca 559
943819bf 560 if (symbol->class == what_is_not)
a0f6b076 561 complain (_("symbol %s redefined"), symbol->tag);
943819bf 562 symbol->class = what_is;
d7020c20 563 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 564 symbol->value = nvars++;
1ff442ca
NF
565
566 if (typename)
567 {
943819bf
RS
568 if (symbol->type_name == NULL)
569 symbol->type_name = typename;
a70083a3 570 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 571 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
572 }
573 }
511e79b3 574 else if (symbol && token == tok_number)
a70083a3 575 {
943819bf 576 symbol->user_token_number = numval;
b7c49edf
AD
577 /* User defined EOF token? */
578 if (numval == 0)
579 eoftoken = symbol;
a70083a3 580 }
1ff442ca 581 else
943819bf 582 {
a0f6b076 583 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
584 token_buffer,
585 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 586 skip_to_char ('%');
943819bf 587 }
1ff442ca
NF
588 }
589
590}
591
1ff442ca 592
d7020c20
AD
593/*------------------------------.
594| Parse what comes after %start |
595`------------------------------*/
1ff442ca 596
4a120d45 597static void
118fb205 598parse_start_decl (void)
1ff442ca
NF
599{
600 if (start_flag)
27821bff 601 complain (_("multiple %s declarations"), "%start");
511e79b3 602 if (lex () != tok_identifier)
27821bff 603 complain (_("invalid %s declaration"), "%start");
943819bf
RS
604 else
605 {
606 start_flag = 1;
607 startval = symval;
608 }
1ff442ca
NF
609}
610
a70083a3
AD
611/*-----------------------------------------------------------.
612| read in a %type declaration and record its information for |
613| get_type_name to access |
614`-----------------------------------------------------------*/
615
616static void
617parse_type_decl (void)
618{
a70083a3
AD
619 char *name;
620
511e79b3 621 if (lex () != tok_typename)
a70083a3
AD
622 {
623 complain ("%s", _("%type declaration has no <typename>"));
624 skip_to_char ('%');
625 return;
626 }
627
95e36146 628 name = xstrdup (token_buffer);
a70083a3
AD
629
630 for (;;)
631 {
f17bcd1f 632 token_t t;
a70083a3
AD
633 int tmp_char = ungetc (skip_white_space (), finput);
634
635 if (tmp_char == '%')
636 return;
637 if (tmp_char == EOF)
638 fatal (_("Premature EOF after %s"), token_buffer);
639
640 t = lex ();
641
642 switch (t)
1ff442ca
NF
643 {
644
511e79b3
AD
645 case tok_comma:
646 case tok_semicolon:
1ff442ca
NF
647 break;
648
511e79b3 649 case tok_identifier:
1ff442ca
NF
650 if (symval->type_name == NULL)
651 symval->type_name = name;
a70083a3 652 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 653 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
654
655 break;
656
657 default:
a0f6b076
AD
658 complain (_("invalid %%type declaration due to item: %s"),
659 token_buffer);
a70083a3 660 skip_to_char ('%');
1ff442ca
NF
661 }
662 }
663}
664
665
666
d7020c20
AD
667/*----------------------------------------------------------------.
668| Read in a %left, %right or %nonassoc declaration and record its |
669| information. |
670`----------------------------------------------------------------*/
1ff442ca 671
4a120d45 672static void
d7020c20 673parse_assoc_decl (associativity assoc)
1ff442ca 674{
a70083a3
AD
675 char *name = NULL;
676 int prev = 0;
1ff442ca 677
a70083a3 678 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 679
1ff442ca
NF
680 for (;;)
681 {
f17bcd1f 682 token_t t;
e6011337 683 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 684
e6011337 685 if (tmp_char == '%')
1ff442ca 686 return;
e6011337 687 if (tmp_char == EOF)
a0f6b076 688 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 689
a70083a3 690 t = lex ();
1ff442ca
NF
691
692 switch (t)
693 {
511e79b3 694 case tok_typename:
95e36146 695 name = xstrdup (token_buffer);
1ff442ca
NF
696 break;
697
511e79b3 698 case tok_comma:
1ff442ca
NF
699 break;
700
511e79b3 701 case tok_identifier:
1ff442ca 702 if (symval->prec != 0)
a0f6b076 703 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
704 symval->prec = lastprec;
705 symval->assoc = assoc;
d7020c20 706 if (symval->class == nterm_sym)
a0f6b076 707 complain (_("symbol %s redefined"), symval->tag);
d7020c20 708 symval->class = token_sym;
1ff442ca 709 if (name)
a70083a3 710 { /* record the type, if one is specified */
1ff442ca
NF
711 if (symval->type_name == NULL)
712 symval->type_name = name;
a70083a3 713 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 714 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
715 }
716 break;
717
511e79b3
AD
718 case tok_number:
719 if (prev == tok_identifier)
a70083a3 720 {
1ff442ca 721 symval->user_token_number = numval;
a70083a3
AD
722 }
723 else
724 {
725 complain (_
726 ("invalid text (%s) - number should be after identifier"),
727token_buffer);
728 skip_to_char ('%');
729 }
1ff442ca
NF
730 break;
731
511e79b3 732 case tok_semicolon:
1ff442ca
NF
733 return;
734
735 default:
a0f6b076 736 complain (_("unexpected item: %s"), token_buffer);
a70083a3 737 skip_to_char ('%');
1ff442ca
NF
738 }
739
740 prev = t;
1ff442ca
NF
741 }
742}
743
744
745
dd60faec 746/*--------------------------------------------------------------.
180d45ba
PB
747| Copy the union declaration into the stype muscle |
748| (and fdefines), where it is made into the definition of |
749| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 750`--------------------------------------------------------------*/
1ff442ca 751
4a120d45 752static void
118fb205 753parse_union_decl (void)
1ff442ca 754{
a70083a3
AD
755 int c;
756 int count = 0;
428046f8 757 bool done = FALSE;
180d45ba 758 struct obstack union_obstack;
1ff442ca 759 if (typed)
27821bff 760 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
761
762 typed = 1;
763
180d45ba
PB
764 obstack_init (&union_obstack);
765 obstack_sgrow (&union_obstack, "union");
1ff442ca 766
428046f8 767 while (!done)
1ff442ca 768 {
428046f8
AD
769 c = xgetc (finput);
770
342b8b6e
AD
771 /* If C contains '/', it is output by copy_comment (). */
772 if (c != '/')
2666f928 773 obstack_1grow (&union_obstack, c);
1ff442ca
NF
774
775 switch (c)
776 {
777 case '\n':
778 lineno++;
779 break;
780
781 case '/':
2666f928 782 copy_comment (finput, &union_obstack);
1ff442ca
NF
783 break;
784
1ff442ca
NF
785 case '{':
786 count++;
787 break;
788
789 case '}':
428046f8 790 /* FIXME: Errr. How could this happen???. --akim */
1ff442ca 791 if (count == 0)
27821bff 792 complain (_("unmatched %s"), "`}'");
1ff442ca 793 count--;
428046f8
AD
794 if (!count)
795 done = TRUE;
796 break;
1ff442ca 797 }
1ff442ca 798 }
180d45ba 799
428046f8
AD
800 /* JF don't choke on trailing semi */
801 c = skip_white_space ();
802 if (c != ';')
803 ungetc (c, finput);
804 obstack_1grow (&union_obstack, 0);
805 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
806}
807
d7020c20
AD
808
809/*-------------------------------------------------------.
810| Parse the declaration %expect N which says to expect N |
811| shift-reduce conflicts. |
812`-------------------------------------------------------*/
1ff442ca 813
4a120d45 814static void
118fb205 815parse_expect_decl (void)
1ff442ca 816{
131e2fef 817 int c = skip_white_space ();
1ff442ca
NF
818 ungetc (c, finput);
819
131e2fef 820 if (!isdigit (c))
79282c5a 821 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
822 else
823 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
824}
825
a70083a3
AD
826
827/*-------------------------------------------------------------------.
828| Parse what comes after %thong. the full syntax is |
829| |
830| %thong <type> token number literal |
831| |
832| the <type> or number may be omitted. The number specifies the |
833| user_token_number. |
834| |
835| Two symbols are entered in the table, one for the token symbol and |
836| one for the literal. Both are given the <type>, if any, from the |
837| declaration. The ->user_token_number of the first is SALIAS and |
838| the ->user_token_number of the second is set to the number, if |
839| any, from the declaration. The two symbols are linked via |
840| pointers in their ->alias fields. |
841| |
842| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
843| only the literal string is retained it is the literal string that |
844| is output to yytname |
845`-------------------------------------------------------------------*/
846
847static void
848parse_thong_decl (void)
7b306f52 849{
f17bcd1f 850 token_t token;
a70083a3
AD
851 struct bucket *symbol;
852 char *typename = 0;
6b7e85b9 853 int usrtoknum = SUNDEF;
7b306f52 854
a70083a3 855 token = lex (); /* fetch typename or first token */
511e79b3 856 if (token == tok_typename)
7b306f52 857 {
95e36146 858 typename = xstrdup (token_buffer);
a70083a3
AD
859 value_components_used = 1;
860 token = lex (); /* fetch first token */
7b306f52 861 }
7b306f52 862
a70083a3 863 /* process first token */
7b306f52 864
511e79b3 865 if (token != tok_identifier)
a70083a3
AD
866 {
867 complain (_("unrecognized item %s, expected an identifier"),
868 token_buffer);
869 skip_to_char ('%');
870 return;
7b306f52 871 }
d7020c20 872 symval->class = token_sym;
a70083a3
AD
873 symval->type_name = typename;
874 symval->user_token_number = SALIAS;
875 symbol = symval;
7b306f52 876
a70083a3 877 token = lex (); /* get number or literal string */
1ff442ca 878
511e79b3 879 if (token == tok_number)
943819bf 880 {
a70083a3
AD
881 usrtoknum = numval;
882 token = lex (); /* okay, did number, now get literal */
943819bf 883 }
1ff442ca 884
a70083a3 885 /* process literal string token */
1ff442ca 886
511e79b3 887 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 888 {
a70083a3
AD
889 complain (_("expected string constant instead of %s"), token_buffer);
890 skip_to_char ('%');
891 return;
1ff442ca 892 }
d7020c20 893 symval->class = token_sym;
a70083a3
AD
894 symval->type_name = typename;
895 symval->user_token_number = usrtoknum;
1ff442ca 896
a70083a3
AD
897 symval->alias = symbol;
898 symbol->alias = symval;
1ff442ca 899
79282c5a
AD
900 /* symbol and symval combined are only one symbol. */
901 nsyms--;
a70083a3 902}
3cef001a 903
11e2beca 904
b6610515 905static void
11d82f03 906parse_muscle_decl (void)
b6610515
RA
907{
908 int ch = ungetc (skip_white_space (), finput);
b7c49edf
AD
909 char *muscle_key;
910 char *muscle_value;
b6610515
RA
911
912 /* Read key. */
913 if (!isalpha (ch) && ch != '_')
914 {
915 complain (_("invalid %s declaration"), "%define");
916 skip_to_char ('%');
917 return;
918 }
11d82f03
MA
919 copy_identifier (finput, &muscle_obstack);
920 obstack_1grow (&muscle_obstack, 0);
921 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 922
b6610515
RA
923 /* Read value. */
924 ch = skip_white_space ();
925 if (ch != '"')
926 {
927 ungetc (ch, finput);
928 if (ch != EOF)
929 {
930 complain (_("invalid %s declaration"), "%define");
931 skip_to_char ('%');
932 return;
933 }
934 else
935 fatal (_("Premature EOF after %s"), "\"");
936 }
11d82f03
MA
937 copy_string2 (finput, &muscle_obstack, '"', 0);
938 obstack_1grow (&muscle_obstack, 0);
939 muscle_value = obstack_finish (&muscle_obstack);
b6610515 940
b6610515 941 /* Store the (key, value) pair in the environment. */
11d82f03 942 muscle_insert (muscle_key, muscle_value);
b6610515
RA
943}
944
2ba3b73c 945
426cf563
MA
946
947/*---------------------------------.
a870c567 948| Parse a double quoted parameter. |
426cf563
MA
949`---------------------------------*/
950
951static const char *
952parse_dquoted_param (const char *from)
953{
954 struct obstack param_obstack;
955 const char *param = NULL;
956 int c;
957
958 obstack_init (&param_obstack);
959 c = skip_white_space ();
960
961 if (c != '"')
962 {
963 complain (_("invalid %s declaration"), from);
964 ungetc (c, finput);
965 skip_to_char ('%');
966 return NULL;
967 }
968
2648a72d
AD
969 while ((c = literalchar ()) != '"')
970 obstack_1grow (&param_obstack, c);
a870c567 971
426cf563
MA
972 obstack_1grow (&param_obstack, '\0');
973 param = obstack_finish (&param_obstack);
974
975 if (c != '"' || strlen (param) == 0)
976 {
977 complain (_("invalid %s declaration"), from);
978 if (c != '"')
979 ungetc (c, finput);
980 skip_to_char ('%');
981 return NULL;
982 }
983
984 return param;
985}
986
2ba3b73c
MA
987/*----------------------------------.
988| Parse what comes after %skeleton. |
989`----------------------------------*/
990
a870c567 991static void
2ba3b73c
MA
992parse_skel_decl (void)
993{
426cf563 994 skeleton = parse_dquoted_param ("%skeleton");
2ba3b73c
MA
995}
996
a70083a3
AD
997/*----------------------------------------------------------------.
998| Read from finput until `%%' is seen. Discard the `%%'. Handle |
999| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 1000| groups to ATTRS_OBSTACK. |
a70083a3 1001`----------------------------------------------------------------*/
1ff442ca 1002
4a120d45 1003static void
a70083a3 1004read_declarations (void)
1ff442ca 1005{
a70083a3 1006 for (;;)
1ff442ca 1007 {
951366c1 1008 int c = skip_white_space ();
1ff442ca 1009
a70083a3
AD
1010 if (c == '%')
1011 {
951366c1 1012 token_t tok = parse_percent_token ();
1ff442ca 1013
a70083a3 1014 switch (tok)
943819bf 1015 {
511e79b3 1016 case tok_two_percents:
a70083a3 1017 return;
1ff442ca 1018
511e79b3 1019 case tok_percent_left_curly:
a70083a3
AD
1020 copy_definition ();
1021 break;
1ff442ca 1022
511e79b3 1023 case tok_token:
d7020c20 1024 parse_token_decl (token_sym, nterm_sym);
a70083a3 1025 break;
1ff442ca 1026
511e79b3 1027 case tok_nterm:
d7020c20 1028 parse_token_decl (nterm_sym, token_sym);
a70083a3 1029 break;
1ff442ca 1030
511e79b3 1031 case tok_type:
a70083a3
AD
1032 parse_type_decl ();
1033 break;
1ff442ca 1034
511e79b3 1035 case tok_start:
a70083a3
AD
1036 parse_start_decl ();
1037 break;
118fb205 1038
511e79b3 1039 case tok_union:
a70083a3
AD
1040 parse_union_decl ();
1041 break;
1ff442ca 1042
511e79b3 1043 case tok_expect:
a70083a3
AD
1044 parse_expect_decl ();
1045 break;
6deb4447 1046
511e79b3 1047 case tok_thong:
a70083a3
AD
1048 parse_thong_decl ();
1049 break;
d7020c20 1050
511e79b3 1051 case tok_left:
d7020c20 1052 parse_assoc_decl (left_assoc);
a70083a3 1053 break;
1ff442ca 1054
511e79b3 1055 case tok_right:
d7020c20 1056 parse_assoc_decl (right_assoc);
a70083a3 1057 break;
1ff442ca 1058
511e79b3 1059 case tok_nonassoc:
d7020c20 1060 parse_assoc_decl (non_assoc);
a70083a3 1061 break;
1ff442ca 1062
b6610515 1063 case tok_define:
11d82f03 1064 parse_muscle_decl ();
b6610515 1065 break;
342b8b6e 1066
2ba3b73c
MA
1067 case tok_skel:
1068 parse_skel_decl ();
1069 break;
b6610515 1070
511e79b3 1071 case tok_noop:
a70083a3 1072 break;
1ff442ca 1073
951366c1
AD
1074 case tok_stropt:
1075 case tok_intopt:
1076 case tok_obsolete:
951366c1
AD
1077 abort ();
1078 break;
1079
e0c40012 1080 case tok_illegal:
a70083a3
AD
1081 default:
1082 complain (_("unrecognized: %s"), token_buffer);
1083 skip_to_char ('%');
1084 }
1085 }
1086 else if (c == EOF)
1087 fatal (_("no input grammar"));
1088 else
1089 {
ff4a34be
AD
1090 char buf[] = "c";
1091 buf[0] = c;
1092 complain (_("unknown character: %s"), quote (buf));
a70083a3 1093 skip_to_char ('%');
1ff442ca 1094 }
1ff442ca 1095 }
1ff442ca 1096}
a70083a3
AD
1097\f
1098/*-------------------------------------------------------------------.
1099| Assuming that a `{' has just been seen, copy everything up to the |
1100| matching `}' into the actions file. STACK_OFFSET is the number of |
1101| values in the current rule so far, which says where to find `$0' |
1102| with respect to the top of the stack. |
14d293ac 1103| |
11e2beca
AD
1104| This routine is used both for actions and guards. Only |
1105| ACTION_OBSTACK is used, but this is fine, since we use only |
14d293ac 1106| pointers to relevant portions inside this obstack. |
a70083a3 1107`-------------------------------------------------------------------*/
1ff442ca 1108
4a120d45 1109static void
14d293ac 1110parse_braces (symbol_list *rule, int stack_offset)
1ff442ca 1111{
a70083a3 1112 int c;
a70083a3 1113 int count;
1ff442ca 1114
1ff442ca 1115 count = 1;
1ff442ca
NF
1116 while (count > 0)
1117 {
14d293ac
AD
1118 while ((c = getc (finput)) != '}')
1119 switch (c)
1120 {
1121 case '\n':
1122 obstack_1grow (&action_obstack, c);
1123 lineno++;
1124 break;
1ff442ca 1125
14d293ac
AD
1126 case '{':
1127 obstack_1grow (&action_obstack, c);
1128 count++;
1129 break;
1ff442ca 1130
14d293ac
AD
1131 case '\'':
1132 case '"':
1133 copy_string (finput, &action_obstack, c);
1134 break;
1ff442ca 1135
14d293ac
AD
1136 case '/':
1137 copy_comment (finput, &action_obstack);
1138 break;
1ff442ca 1139
14d293ac
AD
1140 case '$':
1141 copy_dollar (finput, &action_obstack,
1142 rule, stack_offset);
1143 break;
1ff442ca 1144
14d293ac
AD
1145 case '@':
1146 copy_at (finput, &action_obstack,
1147 stack_offset);
1148 break;
a70083a3 1149
14d293ac
AD
1150 case EOF:
1151 fatal (_("unmatched %s"), "`{'");
a70083a3 1152
14d293ac
AD
1153 default:
1154 obstack_1grow (&action_obstack, c);
1155 }
a70083a3 1156
14d293ac 1157 /* Above loop exits when C is '}'. */
a70083a3
AD
1158 if (--count)
1159 {
8c7ebe49 1160 obstack_1grow (&action_obstack, c);
a70083a3
AD
1161 c = getc (finput);
1162 }
1163 }
1164
3f96f4dc 1165 obstack_1grow (&action_obstack, '\0');
a70083a3 1166}
14d293ac 1167
a70083a3
AD
1168
1169static void
14d293ac 1170parse_action (symbol_list *rule, int stack_offset)
a70083a3 1171{
14d293ac
AD
1172 rule->action_line = lineno;
1173 parse_braces (rule, stack_offset);
1174 rule->action = obstack_finish (&action_obstack);
1175}
a70083a3 1176
a70083a3 1177
14d293ac
AD
1178static void
1179parse_guard (symbol_list *rule, int stack_offset)
1180{
1181 token_t t = lex ();
1182 if (t != tok_left_curly)
1183 complain (_("invalid %s declaration"), "%guard");
f499b062 1184 rule->guard_line = lineno;
14d293ac
AD
1185 parse_braces (rule, stack_offset);
1186 rule->guard = obstack_finish (&action_obstack);
1ff442ca 1187}
14d293ac 1188
a70083a3
AD
1189\f
1190
a70083a3
AD
1191/*-------------------------------------------------------------------.
1192| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1193| with the user's names. |
1194`-------------------------------------------------------------------*/
1ff442ca 1195
4a120d45 1196static bucket *
118fb205 1197gensym (void)
1ff442ca 1198{
274d42ce
AD
1199 /* Incremented for each generated symbol */
1200 static int gensym_count = 0;
1201 static char buf[256];
1202
a70083a3 1203 bucket *sym;
1ff442ca 1204
274d42ce
AD
1205 sprintf (buf, "@%d", ++gensym_count);
1206 token_buffer = buf;
a70083a3 1207 sym = getsym (token_buffer);
d7020c20 1208 sym->class = nterm_sym;
1ff442ca 1209 sym->value = nvars++;
36281465 1210 return sym;
1ff442ca 1211}
a70083a3 1212\f
107f7dfb
AD
1213/*-------------------------------------------------------------------.
1214| Parse the input grammar into a one symbol_list structure. Each |
1215| rule is represented by a sequence of symbols: the left hand side |
1216| followed by the contents of the right hand side, followed by a |
1217| null pointer instead of a symbol to terminate the rule. The next |
1218| symbol is the lhs of the following rule. |
1219| |
1220| All guards and actions are copied out to the appropriate files, |
1221| labelled by the rule number they apply to. |
1222| |
1223| Bison used to allow some %directives in the rules sections, but |
1224| this is no longer consider appropriate: (i) the documented grammar |
1225| doesn't claim it, (ii), it would promote bad style, (iii), error |
1226| recovery for %directives consists in skipping the junk until a `%' |
1227| is seen and helrp synchronizing. This scheme is definitely wrong |
1228| in the rules section. |
1229`-------------------------------------------------------------------*/
1ff442ca 1230
4a120d45 1231static void
118fb205 1232readgram (void)
1ff442ca 1233{
f17bcd1f 1234 token_t t;
a70083a3 1235 bucket *lhs = NULL;
107f7dfb
AD
1236 symbol_list *p = NULL;
1237 symbol_list *p1 = NULL;
a70083a3 1238 bucket *bp;
1ff442ca 1239
ff4a34be
AD
1240 /* Points to first symbol_list of current rule. its symbol is the
1241 lhs of the rule. */
107f7dfb 1242 symbol_list *crule = NULL;
ff4a34be 1243 /* Points to the symbol_list preceding crule. */
107f7dfb 1244 symbol_list *crule1 = NULL;
1ff442ca 1245
a70083a3 1246 t = lex ();
1ff442ca 1247
511e79b3 1248 while (t != tok_two_percents && t != tok_eof)
107f7dfb
AD
1249 if (t == tok_identifier || t == tok_bar)
1250 {
1251 int action_flag = 0;
1252 /* Number of symbols in rhs of this rule so far */
1253 int rulelength = 0;
1254 int xactions = 0; /* JF for error checking */
1255 bucket *first_rhs = 0;
1256
1257 if (t == tok_identifier)
1258 {
1259 lhs = symval;
1260
1261 if (!start_flag)
1262 {
1263 startval = lhs;
1264 start_flag = 1;
1265 }
1ff442ca 1266
107f7dfb
AD
1267 t = lex ();
1268 if (t != tok_colon)
1269 {
1270 complain (_("ill-formed rule: initial symbol not followed by colon"));
1271 unlex (t);
1272 }
1273 }
1274
1275 if (nrules == 0 && t == tok_bar)
1276 {
1277 complain (_("grammar starts with vertical bar"));
1278 lhs = symval; /* BOGUS: use a random symval */
1279 }
1280 /* start a new rule and record its lhs. */
1281
1282 nrules++;
1283 nitems++;
1284
1285 p = symbol_list_new (lhs);
1286
1287 crule1 = p1;
1288 if (p1)
1289 p1->next = p;
1290 else
1291 grammar = p;
1ff442ca 1292
107f7dfb
AD
1293 p1 = p;
1294 crule = p;
1ff442ca 1295
107f7dfb 1296 /* mark the rule's lhs as a nonterminal if not already so. */
1ff442ca 1297
107f7dfb
AD
1298 if (lhs->class == unknown_sym)
1299 {
1300 lhs->class = nterm_sym;
1301 lhs->value = nvars;
1302 nvars++;
1303 }
1304 else if (lhs->class == token_sym)
1305 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca 1306
107f7dfb 1307 /* read the rhs of the rule. */
1ff442ca 1308
107f7dfb
AD
1309 for (;;)
1310 {
1311 t = lex ();
1312 if (t == tok_prec)
1313 {
1314 t = lex ();
1315 crule->ruleprec = symval;
1316 t = lex ();
1317 }
1318
1319 if (!(t == tok_identifier || t == tok_left_curly))
1320 break;
1ff442ca 1321
107f7dfb
AD
1322 /* If next token is an identifier, see if a colon follows it.
1323 If one does, exit this rule now. */
1324 if (t == tok_identifier)
1325 {
1326 bucket *ssave;
1327 token_t t1;
1328
1329 ssave = symval;
1330 t1 = lex ();
1331 unlex (t1);
1332 symval = ssave;
1333 if (t1 == tok_colon)
1334 break;
1335
1336 if (!first_rhs) /* JF */
1337 first_rhs = symval;
1338 /* Not followed by colon =>
1339 process as part of this rule's rhs. */
1340 }
1341
1342 /* If we just passed an action, that action was in the middle
1343 of a rule, so make a dummy rule to reduce it to a
1344 non-terminal. */
1345 if (action_flag)
1346 {
1347 /* Since the action was written out with this rule's
1348 number, we must give the new rule this number by
1349 inserting the new rule before it. */
1350
1351 /* Make a dummy nonterminal, a gensym. */
1352 bucket *sdummy = gensym ();
1353
1354 /* Make a new rule, whose body is empty, before the
1355 current one, so that the action just read can
1356 belong to it. */
1357 nrules++;
1358 nitems++;
1359 p = symbol_list_new (sdummy);
1360 /* Attach its lineno to that of the host rule. */
1361 p->line = crule->line;
1362 if (crule1)
1363 crule1->next = p;
1364 else
1365 grammar = p;
1366 /* End of the rule. */
1367 crule1 = symbol_list_new (NULL);
1368 crule1->next = crule;
1369
1370 p->next = crule1;
1371
1372 /* Insert the dummy generated by that rule into this
1373 rule. */
1374 nitems++;
1375 p = symbol_list_new (sdummy);
1376 p1->next = p;
1377 p1 = p;
1378
1379 action_flag = 0;
1380 }
1381
1382 if (t == tok_identifier)
1383 {
1384 nitems++;
1385 p = symbol_list_new (symval);
1386 p1->next = p;
1387 p1 = p;
1388 }
1389 else /* handle an action. */
1390 {
14d293ac 1391 parse_action (crule, rulelength);
107f7dfb
AD
1392 action_flag = 1;
1393 xactions++; /* JF */
1394 }
1395 rulelength++;
1396 } /* end of read rhs of rule */
1397
1398 /* Put an empty link in the list to mark the end of this rule */
1399 p = symbol_list_new (NULL);
1400 p1->next = p;
1401 p1 = p;
1402
1403 if (t == tok_prec)
1404 {
1405 complain (_("two @prec's in a row"));
1406 t = lex ();
1407 crule->ruleprec = symval;
1408 t = lex ();
1409 }
f499b062 1410
107f7dfb
AD
1411 if (t == tok_guard)
1412 {
1413 if (!semantic_parser)
1414 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1415
14d293ac 1416 parse_guard (crule, rulelength);
a70083a3 1417 t = lex ();
107f7dfb 1418 }
f499b062
AD
1419
1420 if (t == tok_left_curly)
107f7dfb
AD
1421 {
1422 /* This case never occurs -wjh */
1423 if (action_flag)
1424 complain (_("two actions at end of one rule"));
14d293ac 1425 parse_action (crule, rulelength);
107f7dfb
AD
1426 action_flag = 1;
1427 xactions++; /* -wjh */
1428 t = lex ();
1429 }
1430 /* If $$ is being set in default way, report if any type
1431 mismatch. */
1432 else if (!xactions
1433 && first_rhs && lhs->type_name != first_rhs->type_name)
1434 {
1435 if (lhs->type_name == 0
1436 || first_rhs->type_name == 0
1437 || strcmp (lhs->type_name, first_rhs->type_name))
1438 complain (_("type clash (`%s' `%s') on default action"),
1439 lhs->type_name ? lhs->type_name : "",
1440 first_rhs->type_name ? first_rhs->type_name : "");
1441 }
1442 /* Warn if there is no default for $$ but we need one. */
1443 else if (!xactions && !first_rhs && lhs->type_name != 0)
1444 complain (_("empty rule for typed nonterminal, and no action"));
1445 if (t == tok_semicolon)
a70083a3 1446 t = lex ();
107f7dfb
AD
1447 }
1448 else
1449 {
1450 complain (_("invalid input: %s"), quote (token_buffer));
1451 t = lex ();
1452 }
943819bf 1453
ff442794
AD
1454 /* Insert the initial rule, which line is that of the first rule
1455 (not that of the start symbol):
30171f79
AD
1456
1457 axiom: %start EOF. */
1458 p = symbol_list_new (axiom);
ff442794 1459 p->line = grammar->line;
30171f79
AD
1460 p->next = symbol_list_new (startval);
1461 p->next->next = symbol_list_new (eoftoken);
1462 p->next->next->next = symbol_list_new (NULL);
1463 p->next->next->next->next = grammar;
1464 nrules += 1;
1465 nitems += 3;
1466 grammar = p;
1467 startval = axiom;
1ff442ca 1468
943819bf
RS
1469 /* grammar has been read. Do some checking */
1470
1ff442ca 1471 if (nsyms > MAXSHORT)
a0f6b076
AD
1472 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1473 MAXSHORT);
1ff442ca 1474 if (nrules == 0)
a0f6b076 1475 fatal (_("no rules in the input grammar"));
1ff442ca 1476
1ff442ca
NF
1477 /* Report any undefined symbols and consider them nonterminals. */
1478
1479 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1480 if (bp->class == unknown_sym)
1ff442ca 1481 {
a70083a3
AD
1482 complain (_
1483 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1484 bp->tag);
d7020c20 1485 bp->class = nterm_sym;
1ff442ca
NF
1486 bp->value = nvars++;
1487 }
1488
1489 ntokens = nsyms - nvars;
1490}
ff48177d
MA
1491
1492/* At the end of the grammar file, some C source code must
63c2d5de 1493 be stored. It is going to be associated to the epilogue
ff48177d
MA
1494 directive. */
1495static void
1496read_additionnal_code (void)
1497{
1498 char c;
63c2d5de 1499 struct obstack el_obstack;
342b8b6e 1500
63c2d5de 1501 obstack_init (&el_obstack);
ff48177d 1502
710ddc4f
MA
1503 if (!no_lines_flag)
1504 {
1505 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1506 lineno, quotearg_style (c_quoting_style,
b7c49edf 1507 muscle_find ("filename")));
710ddc4f
MA
1508 }
1509
ff48177d 1510 while ((c = getc (finput)) != EOF)
63c2d5de 1511 obstack_1grow (&el_obstack, c);
342b8b6e 1512
63c2d5de 1513 obstack_1grow (&el_obstack, 0);
11d82f03 1514 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1515}
1516
a70083a3 1517\f
037ca2f1
AD
1518/*------------------------------------------------------------------.
1519| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1520| number. |
1521`------------------------------------------------------------------*/
1522
1523static void
1524token_translations_init (void)
1525{
1526 bucket *bp = NULL;
1527 int i;
1528
1529 token_translations = XCALLOC (short, max_user_token_number + 1);
1530
1531 /* Initialize all entries for literal tokens to 2, the internal
1532 token number for $undefined., which represents all invalid
1533 inputs. */
1534 for (i = 0; i <= max_user_token_number; i++)
1535 token_translations[i] = 2;
1536
1537 for (bp = firstsymbol; bp; bp = bp->next)
1538 {
1539 /* Non-terminal? */
1540 if (bp->value >= ntokens)
1541 continue;
1542 /* A token string alias? */
1543 if (bp->user_token_number == SALIAS)
1544 continue;
6b7e85b9
AD
1545
1546 assert (bp->user_token_number != SUNDEF);
1547
037ca2f1
AD
1548 /* A token which translation has already been set? */
1549 if (token_translations[bp->user_token_number] != 2)
1550 complain (_("tokens %s and %s both assigned number %d"),
1551 tags[token_translations[bp->user_token_number]],
1552 bp->tag, bp->user_token_number);
1553 token_translations[bp->user_token_number] = bp->value;
1554 }
1555}
1556
1557
a70083a3
AD
1558/*------------------------------------------------------------------.
1559| Assign symbol numbers, and write definition of token names into |
b2ca4022 1560| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1561| of symbols. |
1562`------------------------------------------------------------------*/
1ff442ca 1563
4a120d45 1564static void
118fb205 1565packsymbols (void)
1ff442ca 1566{
342b8b6e 1567 bucket *bp = NULL;
a70083a3 1568 int tokno = 1;
a70083a3 1569 int last_user_token_number;
1ff442ca 1570
d7913476 1571 tags = XCALLOC (char *, nsyms + 1);
d7913476 1572 user_toknums = XCALLOC (short, nsyms + 1);
1ff442ca 1573
d7913476
AD
1574 sprec = XCALLOC (short, nsyms);
1575 sassoc = XCALLOC (short, nsyms);
1ff442ca
NF
1576
1577 max_user_token_number = 256;
1578 last_user_token_number = 256;
1579
1580 for (bp = firstsymbol; bp; bp = bp->next)
1581 {
d7020c20 1582 if (bp->class == nterm_sym)
1ff442ca
NF
1583 {
1584 bp->value += ntokens;
1585 }
943819bf
RS
1586 else if (bp->alias)
1587 {
b7c49edf
AD
1588 /* This symbol and its alias are a single token defn.
1589 Allocate a tokno, and assign to both check agreement of
1590 prec and assoc fields and make both the same */
1591 if (bp->value == -1)
1592 {
1593 if (bp == eoftoken || bp->alias == eoftoken)
1594 bp->value = bp->alias->value = 0;
1595 else
1596 {
1597 bp->value = bp->alias->value = tokno++;
1598 }
1599 }
943819bf 1600
0a6384c4
AD
1601 if (bp->prec != bp->alias->prec)
1602 {
1603 if (bp->prec != 0 && bp->alias->prec != 0
1604 && bp->user_token_number == SALIAS)
a0f6b076
AD
1605 complain (_("conflicting precedences for %s and %s"),
1606 bp->tag, bp->alias->tag);
0a6384c4
AD
1607 if (bp->prec != 0)
1608 bp->alias->prec = bp->prec;
1609 else
1610 bp->prec = bp->alias->prec;
1611 }
943819bf 1612
0a6384c4
AD
1613 if (bp->assoc != bp->alias->assoc)
1614 {
a0f6b076
AD
1615 if (bp->assoc != 0 && bp->alias->assoc != 0
1616 && bp->user_token_number == SALIAS)
1617 complain (_("conflicting assoc values for %s and %s"),
1618 bp->tag, bp->alias->tag);
1619 if (bp->assoc != 0)
1620 bp->alias->assoc = bp->assoc;
1621 else
1622 bp->assoc = bp->alias->assoc;
1623 }
0a6384c4 1624
b7c49edf 1625 /* Do not do processing below for SALIASs. */
0a6384c4 1626 if (bp->user_token_number == SALIAS)
b7c49edf 1627 continue;
943819bf 1628
a70083a3 1629 }
b7c49edf 1630 else /* bp->class == token_sym */
943819bf 1631 {
b7c49edf
AD
1632 if (bp == eoftoken)
1633 bp->value = 0;
1634 else
1635 bp->value = tokno++;
943819bf
RS
1636 }
1637
d7020c20 1638 if (bp->class == token_sym)
1ff442ca 1639 {
6b7e85b9 1640 if (bp->user_token_number == SUNDEF)
1ff442ca
NF
1641 bp->user_token_number = ++last_user_token_number;
1642 if (bp->user_token_number > max_user_token_number)
1643 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1644 }
1645
1646 tags[bp->value] = bp->tag;
943819bf 1647 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1648 sprec[bp->value] = bp->prec;
1649 sassoc[bp->value] = bp->assoc;
1ff442ca
NF
1650 }
1651
037ca2f1 1652 token_translations_init ();
1ff442ca
NF
1653
1654 error_token_number = errtoken->value;
1655
e3f1699f
AD
1656 if (startval->class == unknown_sym)
1657 fatal (_("the start symbol %s is undefined"), startval->tag);
1658 else if (startval->class == token_sym)
1659 fatal (_("the start symbol %s is a token"), startval->tag);
1660
1661 start_symbol = startval->value;
1662}
1663
1664
93ede233
AD
1665/*---------------------------------------------------------------.
1666| Save the definition of token names in the `TOKENDEFS' muscle. |
1667`---------------------------------------------------------------*/
e3f1699f
AD
1668
1669static void
93ede233 1670symbols_save (void)
e3f1699f 1671{
93ede233
AD
1672 struct obstack tokendefs;
1673 bucket *bp;
93ede233
AD
1674 obstack_init (&tokendefs);
1675
1676 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1677 {
ec2da99f 1678 char *symbol = bp->tag; /* get symbol */
1ff442ca 1679
93ede233
AD
1680 if (bp->value >= ntokens)
1681 continue;
1682 if (bp->user_token_number == SALIAS)
1683 continue;
1684 if ('\'' == *symbol)
1685 continue; /* skip literal character */
1686 if (bp == errtoken)
1687 continue; /* skip error token */
1688 if ('\"' == *symbol)
037ca2f1 1689 {
93ede233
AD
1690 /* use literal string only if given a symbol with an alias */
1691 if (bp->alias)
1692 symbol = bp->alias->tag;
1693 else
1694 continue;
037ca2f1 1695 }
93ede233
AD
1696
1697 /* Don't #define nonliteral tokens whose names contain periods. */
ec2da99f 1698 if (strchr (symbol, '.'))
93ede233
AD
1699 continue;
1700
7742ddeb 1701 obstack_fgrow2 (&tokendefs, "# define %s\t%d\n",
93ede233
AD
1702 symbol, bp->user_token_number);
1703 if (semantic_parser)
1704 /* FIXME: This is probably wrong, and should be just as
1705 above. --akim. */
7742ddeb 1706 obstack_fgrow2 (&tokendefs, "# define T%s\t%d\n", symbol, bp->value);
1ff442ca 1707 }
93ede233
AD
1708
1709 obstack_1grow (&tokendefs, 0);
1710 muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1711 obstack_free (&tokendefs, NULL);
1ff442ca 1712}
a083fbbf 1713
1ff442ca 1714
a70083a3
AD
1715/*---------------------------------------------------------------.
1716| Convert the rules into the representation using RRHS, RLHS and |
1717| RITEMS. |
1718`---------------------------------------------------------------*/
1ff442ca 1719
4a120d45 1720static void
118fb205 1721packgram (void)
1ff442ca 1722{
a70083a3
AD
1723 int itemno;
1724 int ruleno;
1725 symbol_list *p;
1ff442ca 1726
adc8c848
AD
1727 /* We use short to index items. */
1728 if (nitems >= MAXSHORT)
1729 fatal (_("too many items (max %d)"), MAXSHORT);
1730
d7913476 1731 ritem = XCALLOC (short, nitems + 1);
b2ed6e58 1732 rule_table = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1733
1734 itemno = 0;
1735 ruleno = 1;
1736
1737 p = grammar;
1738 while (p)
1739 {
b29b2ed5 1740 bucket *ruleprec = p->ruleprec;
b2ed6e58
AD
1741 rule_table[ruleno].lhs = p->sym->value;
1742 rule_table[ruleno].rhs = itemno;
b29b2ed5 1743 rule_table[ruleno].line = p->line;
68f1e3ed 1744 rule_table[ruleno].useful = TRUE;
3f96f4dc
AD
1745 rule_table[ruleno].action = p->action;
1746 rule_table[ruleno].action_line = p->action_line;
f499b062
AD
1747 rule_table[ruleno].guard = p->guard;
1748 rule_table[ruleno].guard_line = p->guard_line;
1ff442ca
NF
1749
1750 p = p->next;
1751 while (p && p->sym)
1752 {
1753 ritem[itemno++] = p->sym->value;
1754 /* A rule gets by default the precedence and associativity
1755 of the last token in it. */
d7020c20 1756 if (p->sym->class == token_sym)
1ff442ca 1757 {
652a871c
AD
1758 rule_table[ruleno].prec = p->sym->prec;
1759 rule_table[ruleno].assoc = p->sym->assoc;
1ff442ca 1760 }
a70083a3
AD
1761 if (p)
1762 p = p->next;
1ff442ca
NF
1763 }
1764
1765 /* If this rule has a %prec,
a70083a3 1766 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1767 if (ruleprec)
1768 {
652a871c
AD
1769 rule_table[ruleno].prec = ruleprec->prec;
1770 rule_table[ruleno].assoc = ruleprec->assoc;
1771 rule_table[ruleno].precsym = ruleprec->value;
1ff442ca
NF
1772 }
1773
1774 ritem[itemno++] = -ruleno;
1775 ruleno++;
1776
a70083a3
AD
1777 if (p)
1778 p = p->next;
1ff442ca
NF
1779 }
1780
1781 ritem[itemno] = 0;
75142d45
AD
1782 nritems = itemno;
1783 assert (nritems == nitems);
3067fbef
AD
1784
1785 if (trace_flag)
1786 ritem_print (stderr);
1ff442ca 1787}
a70083a3
AD
1788\f
1789/*-------------------------------------------------------------------.
1790| Read in the grammar specification and record it in the format |
ea5607fd 1791| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1792| and all actions into ACTION_OBSTACK, in each case forming the body |
1793| of a C function (YYGUARD or YYACTION) which contains a switch |
1794| statement to decide which guard or action to execute. |
a70083a3
AD
1795`-------------------------------------------------------------------*/
1796
1797void
1798reader (void)
1799{
1800 start_flag = 0;
1801 startval = NULL; /* start symbol not specified yet. */
1802
b7c49edf 1803 nsyms = 0;
a70083a3
AD
1804 nvars = 0;
1805 nrules = 0;
1806 nitems = 0;
a70083a3
AD
1807
1808 typed = 0;
1809 lastprec = 0;
1810
a70083a3
AD
1811 semantic_parser = 0;
1812 pure_parser = 0;
a70083a3
AD
1813
1814 grammar = NULL;
1815
342b8b6e 1816 lex_init ();
a70083a3
AD
1817 lineno = 1;
1818
11d82f03
MA
1819 /* Initialize the muscle obstack. */
1820 obstack_init (&muscle_obstack);
82e236e2 1821
a70083a3
AD
1822 /* Initialize the symbol table. */
1823 tabinit ();
b6610515 1824
30171f79
AD
1825 /* Construct the axiom symbol. */
1826 axiom = getsym ("$axiom");
1827 axiom->class = nterm_sym;
1828 axiom->value = nvars++;
1829
a70083a3
AD
1830 /* Construct the error token */
1831 errtoken = getsym ("error");
d7020c20 1832 errtoken->class = token_sym;
a70083a3 1833 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 1834
a70083a3
AD
1835 /* Construct a token that represents all undefined literal tokens.
1836 It is always token number 2. */
1837 undeftoken = getsym ("$undefined.");
d7020c20 1838 undeftoken->class = token_sym;
a70083a3
AD
1839 undeftoken->user_token_number = 2;
1840
331dbc1b
AD
1841 /* Initialize the obstacks. */
1842 obstack_init (&action_obstack);
1843 obstack_init (&attrs_obstack);
331dbc1b
AD
1844 obstack_init (&output_obstack);
1845
1846 finput = xfopen (infile, "r");
1847
896fe5c1
AD
1848 /* Read the declaration section. Copy %{ ... %} groups to
1849 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1850 etc. found there. */
a70083a3 1851 read_declarations ();
b7c49edf
AD
1852
1853 /* If the user did not define her EOFTOKEN, do it now. */
1854 if (!eoftoken)
1855 {
1856 eoftoken = getsym ("$");
1857 eoftoken->class = token_sym;
1858 /* Value specified by POSIX. */
1859 eoftoken->user_token_number = 0;
1860 }
1861
a70083a3
AD
1862 /* Read in the grammar, build grammar in list form. Write out
1863 guards and actions. */
1864 readgram ();
ff48177d
MA
1865 /* Some C code is given at the end of the grammar file. */
1866 read_additionnal_code ();
b0c4483e 1867
331dbc1b
AD
1868 lex_free ();
1869 xfclose (finput);
1870
a70083a3
AD
1871 /* Assign the symbols their symbol numbers. Write #defines for the
1872 token symbols into FDEFINES if requested. */
1873 packsymbols ();
93ede233
AD
1874 /* Save them. */
1875 symbols_save ();
1876
a70083a3
AD
1877 /* Convert the grammar into the format described in gram.h. */
1878 packgram ();
a70083a3 1879}