]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/reader.c (symbol_list_new): Be sure to initialize all the
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
037ca2f1 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca
NF
30#include "lex.h"
31#include "gram.h"
a0f6b076 32#include "complain.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
340ef489 35#include "conflicts.h"
11d82f03 36#include "muscle_tab.h"
1ff442ca 37
a70083a3
AD
38typedef struct symbol_list
39{
40 struct symbol_list *next;
41 bucket *sym;
b29b2ed5 42 int line;
3f96f4dc
AD
43 /* The action is attached to the LHS of a rule. */
44 const char *action;
45 int action_line;
a70083a3 46 bucket *ruleprec;
d945f5cd 47} symbol_list;
118fb205 48
1ff442ca 49int lineno;
1ff442ca 50char **tags;
d019d655 51short *user_toknums;
4a120d45
JT
52static symbol_list *grammar;
53static int start_flag;
54static bucket *startval;
1ff442ca
NF
55
56/* Nonzero if components of semantic values are used, implying
57 they must be unions. */
58static int value_components_used;
59
d7020c20
AD
60/* Nonzero if %union has been seen. */
61static int typed;
1ff442ca 62
d7020c20
AD
63/* Incremented for each %left, %right or %nonassoc seen */
64static int lastprec;
1ff442ca 65
1ff442ca 66static bucket *errtoken;
5b2e3c89 67static bucket *undeftoken;
b29b2ed5
AD
68
69
6255b435 70static symbol_list *
b29b2ed5
AD
71symbol_list_new (bucket *sym)
72{
73 symbol_list *res = XMALLOC (symbol_list, 1);
74 res->next = NULL;
75 res->sym = sym;
76 res->line = lineno;
d945f5cd
AD
77 res->action = NULL;
78 res->action_line = 0;
b29b2ed5
AD
79 res->ruleprec = NULL;
80 return res;
81}
82
0d533154 83\f
a70083a3 84
0d533154
AD
85/*===================\
86| Low level lexing. |
87\===================*/
943819bf
RS
88
89static void
118fb205 90skip_to_char (int target)
943819bf
RS
91{
92 int c;
93 if (target == '\n')
a0f6b076 94 complain (_(" Skipping to next \\n"));
943819bf 95 else
a0f6b076 96 complain (_(" Skipping to next %c"), target);
943819bf
RS
97
98 do
0d533154 99 c = skip_white_space ();
943819bf 100 while (c != target && c != EOF);
a083fbbf 101 if (c != EOF)
0d533154 102 ungetc (c, finput);
943819bf
RS
103}
104
105
0d533154
AD
106/*---------------------------------------------------------.
107| Read a signed integer from STREAM and return its value. |
108`---------------------------------------------------------*/
109
110static inline int
111read_signed_integer (FILE *stream)
112{
a70083a3
AD
113 int c = getc (stream);
114 int sign = 1;
115 int n = 0;
0d533154
AD
116
117 if (c == '-')
118 {
119 c = getc (stream);
120 sign = -1;
121 }
122
123 while (isdigit (c))
124 {
125 n = 10 * n + (c - '0');
126 c = getc (stream);
127 }
128
129 ungetc (c, stream);
130
131 return sign * n;
132}
133\f
79282c5a
AD
134/*--------------------------------------------------------------.
135| Get the data type (alternative in the union) of the value for |
136| symbol N in rule RULE. |
137`--------------------------------------------------------------*/
138
139static char *
b29b2ed5 140get_type_name (int n, symbol_list *rule)
79282c5a
AD
141{
142 int i;
143 symbol_list *rp;
144
145 if (n < 0)
146 {
147 complain (_("invalid $ value"));
148 return NULL;
149 }
150
151 rp = rule;
152 i = 0;
153
154 while (i < n)
155 {
156 rp = rp->next;
157 if (rp == NULL || rp->sym == NULL)
158 {
159 complain (_("invalid $ value"));
160 return NULL;
161 }
162 i++;
163 }
164
165 return rp->sym->type_name;
166}
167\f
337bab46
AD
168/*------------------------------------------------------------.
169| Dump the string from FIN to OOUT if non null. MATCH is the |
170| delimiter of the string (either ' or "). |
171`------------------------------------------------------------*/
ae3c3164
AD
172
173static inline void
b6610515 174copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
175{
176 int c;
177
b6610515
RA
178 if (store)
179 obstack_1grow (oout, match);
8c7ebe49 180
4a120d45 181 c = getc (fin);
ae3c3164
AD
182
183 while (c != match)
184 {
185 if (c == EOF)
186 fatal (_("unterminated string at end of file"));
187 if (c == '\n')
188 {
a0f6b076 189 complain (_("unterminated string"));
4a120d45 190 ungetc (c, fin);
ae3c3164
AD
191 c = match; /* invent terminator */
192 continue;
193 }
194
337bab46 195 obstack_1grow (oout, c);
ae3c3164
AD
196
197 if (c == '\\')
198 {
4a120d45 199 c = getc (fin);
ae3c3164
AD
200 if (c == EOF)
201 fatal (_("unterminated string at end of file"));
337bab46 202 obstack_1grow (oout, c);
8c7ebe49 203
ae3c3164
AD
204 if (c == '\n')
205 lineno++;
206 }
207
a70083a3 208 c = getc (fin);
ae3c3164
AD
209 }
210
b6610515
RA
211 if (store)
212 obstack_1grow (oout, c);
213}
214
215/* FIXME. */
216
217static inline void
218copy_string (FILE *fin, struct obstack *oout, int match)
219{
220 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
221}
222
b6610515
RA
223/* FIXME. */
224
225static inline void
226copy_identifier (FILE *fin, struct obstack *oout)
227{
228 int c;
229
230 while (isalnum (c = getc (fin)) || c == '_')
231 obstack_1grow (oout, c);
232
233 ungetc (c, fin);
234}
ae3c3164 235
2666f928
AD
236
237/*------------------------------------------------------------------.
238| Dump the wannabee comment from IN to OOUT. In fact we just saw a |
239| `/', which might or might not be a comment. In any case, copy |
240| what we saw. |
241`------------------------------------------------------------------*/
ae3c3164
AD
242
243static inline void
2666f928 244copy_comment (FILE *fin, struct obstack *oout)
ae3c3164
AD
245{
246 int cplus_comment;
a70083a3 247 int ended;
550a72a3
AD
248 int c;
249
250 /* We read a `/', output it. */
2666f928 251 obstack_1grow (oout, '/');
550a72a3
AD
252
253 switch ((c = getc (fin)))
254 {
255 case '/':
256 cplus_comment = 1;
257 break;
258 case '*':
259 cplus_comment = 0;
260 break;
261 default:
262 ungetc (c, fin);
263 return;
264 }
ae3c3164 265
2666f928 266 obstack_1grow (oout, c);
550a72a3 267 c = getc (fin);
ae3c3164
AD
268
269 ended = 0;
270 while (!ended)
271 {
272 if (!cplus_comment && c == '*')
273 {
274 while (c == '*')
275 {
2666f928 276 obstack_1grow (oout, c);
550a72a3 277 c = getc (fin);
ae3c3164
AD
278 }
279
280 if (c == '/')
281 {
2666f928 282 obstack_1grow (oout, c);
ae3c3164
AD
283 ended = 1;
284 }
285 }
286 else if (c == '\n')
287 {
288 lineno++;
2666f928 289 obstack_1grow (oout, c);
ae3c3164
AD
290 if (cplus_comment)
291 ended = 1;
292 else
550a72a3 293 c = getc (fin);
ae3c3164
AD
294 }
295 else if (c == EOF)
296 fatal (_("unterminated comment"));
297 else
298 {
2666f928 299 obstack_1grow (oout, c);
550a72a3 300 c = getc (fin);
ae3c3164
AD
301 }
302 }
303}
304
305
a70083a3 306/*-----------------------------------------------------------------.
337bab46 307| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
308| reference to this location. STACK_OFFSET is the number of values |
309| in the current rule so far, which says where to find `$0' with |
310| respect to the top of the stack. |
311`-----------------------------------------------------------------*/
1ff442ca 312
a70083a3 313static inline void
337bab46 314copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 315{
a70083a3 316 int c;
1ff442ca 317
a70083a3
AD
318 c = getc (fin);
319 if (c == '$')
1ff442ca 320 {
ff4423cc 321 obstack_sgrow (oout, "yyloc");
89cab50d 322 locations_flag = 1;
a70083a3
AD
323 }
324 else if (isdigit (c) || c == '-')
325 {
326 int n;
1ff442ca 327
a70083a3
AD
328 ungetc (c, fin);
329 n = read_signed_integer (fin);
943819bf 330
337bab46 331 obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
89cab50d 332 locations_flag = 1;
1ff442ca 333 }
a70083a3 334 else
ff4a34be
AD
335 {
336 char buf[] = "@c";
337 buf[1] = c;
338 complain (_("%s is invalid"), quote (buf));
339 }
1ff442ca 340}
79282c5a
AD
341
342
343/*-------------------------------------------------------------------.
344| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
345| |
346| Possible inputs: $[<TYPENAME>]($|integer) |
347| |
337bab46 348| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
349| the number of values in the current rule so far, which says where |
350| to find `$0' with respect to the top of the stack. |
351`-------------------------------------------------------------------*/
352
353static inline void
337bab46 354copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
355 symbol_list *rule, int stack_offset)
356{
357 int c = getc (fin);
b0ce6046 358 const char *type_name = NULL;
79282c5a 359
f282676b 360 /* Get the type name if explicit. */
79282c5a
AD
361 if (c == '<')
362 {
f282676b 363 read_type_name (fin);
79282c5a
AD
364 type_name = token_buffer;
365 value_components_used = 1;
79282c5a
AD
366 c = getc (fin);
367 }
368
369 if (c == '$')
370 {
ff4423cc 371 obstack_sgrow (oout, "yyval");
8c7ebe49 372
79282c5a
AD
373 if (!type_name)
374 type_name = get_type_name (0, rule);
375 if (type_name)
337bab46 376 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
377 if (!type_name && typed)
378 complain (_("$$ of `%s' has no declared type"),
379 rule->sym->tag);
380 }
381 else if (isdigit (c) || c == '-')
382 {
383 int n;
384 ungetc (c, fin);
385 n = read_signed_integer (fin);
386
387 if (!type_name && n > 0)
388 type_name = get_type_name (n, rule);
389
337bab46 390 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
8c7ebe49 391
79282c5a 392 if (type_name)
337bab46 393 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
394 if (!type_name && typed)
395 complain (_("$%d of `%s' has no declared type"),
396 n, rule->sym->tag);
397 }
398 else
399 {
400 char buf[] = "$c";
401 buf[1] = c;
402 complain (_("%s is invalid"), quote (buf));
403 }
404}
a70083a3
AD
405\f
406/*-------------------------------------------------------------------.
407| Copy the contents of a `%{ ... %}' into the definitions file. The |
408| `%{' has already been read. Return after reading the `%}'. |
409`-------------------------------------------------------------------*/
1ff442ca 410
4a120d45 411static void
118fb205 412copy_definition (void)
1ff442ca 413{
a70083a3 414 int c;
ae3c3164 415 /* -1 while reading a character if prev char was %. */
a70083a3 416 int after_percent;
1ff442ca 417
89cab50d 418 if (!no_lines_flag)
25b222fa
MA
419 {
420 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 421 lineno, quotearg_style (c_quoting_style,
25b222fa
MA
422 muscle_find("filename")));
423 }
1ff442ca
NF
424
425 after_percent = 0;
426
ae3c3164 427 c = getc (finput);
1ff442ca
NF
428
429 for (;;)
430 {
431 switch (c)
432 {
433 case '\n':
dd60faec 434 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
435 lineno++;
436 break;
437
438 case '%':
a70083a3 439 after_percent = -1;
1ff442ca 440 break;
a083fbbf 441
1ff442ca
NF
442 case '\'':
443 case '"':
337bab46 444 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
445 break;
446
447 case '/':
337bab46 448 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
449 break;
450
451 case EOF:
a70083a3 452 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
453
454 default:
dd60faec 455 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
456 }
457
a70083a3 458 c = getc (finput);
1ff442ca
NF
459
460 if (after_percent)
461 {
462 if (c == '}')
463 return;
dd60faec 464 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
465 }
466 after_percent = 0;
1ff442ca 467 }
1ff442ca
NF
468}
469
470
d7020c20
AD
471/*-------------------------------------------------------------------.
472| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
473| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
474| are reversed. |
475`-------------------------------------------------------------------*/
1ff442ca 476
4a120d45 477static void
d7020c20 478parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 479{
342b8b6e
AD
480 token_t token = tok_undef;
481 char *typename = NULL;
1ff442ca 482
1e9798d5
AD
483 /* The symbol being defined. */
484 struct bucket *symbol = NULL;
485
486 /* After `%token' and `%nterm', any number of symbols maybe be
487 defined. */
1ff442ca
NF
488 for (;;)
489 {
e6011337
JT
490 int tmp_char = ungetc (skip_white_space (), finput);
491
1e9798d5
AD
492 /* `%' (for instance from `%token', or from `%%' etc.) is the
493 only valid means to end this declaration. */
e6011337 494 if (tmp_char == '%')
1ff442ca 495 return;
e6011337 496 if (tmp_char == EOF)
a0f6b076 497 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 498
a70083a3 499 token = lex ();
511e79b3 500 if (token == tok_comma)
943819bf
RS
501 {
502 symbol = NULL;
503 continue;
504 }
511e79b3 505 if (token == tok_typename)
1ff442ca 506 {
95e36146 507 typename = xstrdup (token_buffer);
1ff442ca 508 value_components_used = 1;
943819bf
RS
509 symbol = NULL;
510 }
511e79b3 511 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 512 {
8e03724b
AD
513 if (symval->alias)
514 warn (_("symbol `%s' used more than once as a literal string"),
515 symval->tag);
516 else if (symbol->alias)
517 warn (_("symbol `%s' given more than one literal string"),
518 symbol->tag);
519 else
520 {
521 symval->class = token_sym;
522 symval->type_name = typename;
523 symval->user_token_number = symbol->user_token_number;
524 symbol->user_token_number = SALIAS;
525 symval->alias = symbol;
526 symbol->alias = symval;
527 /* symbol and symval combined are only one symbol */
528 nsyms--;
529 }
8e03724b 530 symbol = NULL;
1ff442ca 531 }
511e79b3 532 else if (token == tok_identifier)
1ff442ca
NF
533 {
534 int oldclass = symval->class;
943819bf 535 symbol = symval;
1ff442ca 536
943819bf 537 if (symbol->class == what_is_not)
a0f6b076 538 complain (_("symbol %s redefined"), symbol->tag);
943819bf 539 symbol->class = what_is;
d7020c20 540 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 541 symbol->value = nvars++;
1ff442ca
NF
542
543 if (typename)
544 {
943819bf
RS
545 if (symbol->type_name == NULL)
546 symbol->type_name = typename;
a70083a3 547 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 548 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
549 }
550 }
511e79b3 551 else if (symbol && token == tok_number)
a70083a3 552 {
943819bf 553 symbol->user_token_number = numval;
a70083a3 554 }
1ff442ca 555 else
943819bf 556 {
a0f6b076 557 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
558 token_buffer,
559 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 560 skip_to_char ('%');
943819bf 561 }
1ff442ca
NF
562 }
563
564}
565
1ff442ca 566
d7020c20
AD
567/*------------------------------.
568| Parse what comes after %start |
569`------------------------------*/
1ff442ca 570
4a120d45 571static void
118fb205 572parse_start_decl (void)
1ff442ca
NF
573{
574 if (start_flag)
27821bff 575 complain (_("multiple %s declarations"), "%start");
511e79b3 576 if (lex () != tok_identifier)
27821bff 577 complain (_("invalid %s declaration"), "%start");
943819bf
RS
578 else
579 {
580 start_flag = 1;
581 startval = symval;
582 }
1ff442ca
NF
583}
584
a70083a3
AD
585/*-----------------------------------------------------------.
586| read in a %type declaration and record its information for |
587| get_type_name to access |
588`-----------------------------------------------------------*/
589
590static void
591parse_type_decl (void)
592{
a70083a3
AD
593 char *name;
594
511e79b3 595 if (lex () != tok_typename)
a70083a3
AD
596 {
597 complain ("%s", _("%type declaration has no <typename>"));
598 skip_to_char ('%');
599 return;
600 }
601
95e36146 602 name = xstrdup (token_buffer);
a70083a3
AD
603
604 for (;;)
605 {
f17bcd1f 606 token_t t;
a70083a3
AD
607 int tmp_char = ungetc (skip_white_space (), finput);
608
609 if (tmp_char == '%')
610 return;
611 if (tmp_char == EOF)
612 fatal (_("Premature EOF after %s"), token_buffer);
613
614 t = lex ();
615
616 switch (t)
1ff442ca
NF
617 {
618
511e79b3
AD
619 case tok_comma:
620 case tok_semicolon:
1ff442ca
NF
621 break;
622
511e79b3 623 case tok_identifier:
1ff442ca
NF
624 if (symval->type_name == NULL)
625 symval->type_name = name;
a70083a3 626 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 627 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
628
629 break;
630
631 default:
a0f6b076
AD
632 complain (_("invalid %%type declaration due to item: %s"),
633 token_buffer);
a70083a3 634 skip_to_char ('%');
1ff442ca
NF
635 }
636 }
637}
638
639
640
d7020c20
AD
641/*----------------------------------------------------------------.
642| Read in a %left, %right or %nonassoc declaration and record its |
643| information. |
644`----------------------------------------------------------------*/
1ff442ca 645
4a120d45 646static void
d7020c20 647parse_assoc_decl (associativity assoc)
1ff442ca 648{
a70083a3
AD
649 char *name = NULL;
650 int prev = 0;
1ff442ca 651
a70083a3 652 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 653
1ff442ca
NF
654 for (;;)
655 {
f17bcd1f 656 token_t t;
e6011337 657 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 658
e6011337 659 if (tmp_char == '%')
1ff442ca 660 return;
e6011337 661 if (tmp_char == EOF)
a0f6b076 662 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 663
a70083a3 664 t = lex ();
1ff442ca
NF
665
666 switch (t)
667 {
511e79b3 668 case tok_typename:
95e36146 669 name = xstrdup (token_buffer);
1ff442ca
NF
670 break;
671
511e79b3 672 case tok_comma:
1ff442ca
NF
673 break;
674
511e79b3 675 case tok_identifier:
1ff442ca 676 if (symval->prec != 0)
a0f6b076 677 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
678 symval->prec = lastprec;
679 symval->assoc = assoc;
d7020c20 680 if (symval->class == nterm_sym)
a0f6b076 681 complain (_("symbol %s redefined"), symval->tag);
d7020c20 682 symval->class = token_sym;
1ff442ca 683 if (name)
a70083a3 684 { /* record the type, if one is specified */
1ff442ca
NF
685 if (symval->type_name == NULL)
686 symval->type_name = name;
a70083a3 687 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 688 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
689 }
690 break;
691
511e79b3
AD
692 case tok_number:
693 if (prev == tok_identifier)
a70083a3 694 {
1ff442ca 695 symval->user_token_number = numval;
a70083a3
AD
696 }
697 else
698 {
699 complain (_
700 ("invalid text (%s) - number should be after identifier"),
701token_buffer);
702 skip_to_char ('%');
703 }
1ff442ca
NF
704 break;
705
511e79b3 706 case tok_semicolon:
1ff442ca
NF
707 return;
708
709 default:
a0f6b076 710 complain (_("unexpected item: %s"), token_buffer);
a70083a3 711 skip_to_char ('%');
1ff442ca
NF
712 }
713
714 prev = t;
1ff442ca
NF
715 }
716}
717
718
719
dd60faec 720/*--------------------------------------------------------------.
180d45ba
PB
721| Copy the union declaration into the stype muscle |
722| (and fdefines), where it is made into the definition of |
723| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 724`--------------------------------------------------------------*/
1ff442ca 725
4a120d45 726static void
118fb205 727parse_union_decl (void)
1ff442ca 728{
a70083a3
AD
729 int c;
730 int count = 0;
428046f8 731 bool done = FALSE;
180d45ba 732 struct obstack union_obstack;
1ff442ca 733 if (typed)
27821bff 734 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
735
736 typed = 1;
737
180d45ba
PB
738 obstack_init (&union_obstack);
739 obstack_sgrow (&union_obstack, "union");
1ff442ca 740
428046f8 741 while (!done)
1ff442ca 742 {
428046f8
AD
743 c = xgetc (finput);
744
342b8b6e
AD
745 /* If C contains '/', it is output by copy_comment (). */
746 if (c != '/')
2666f928 747 obstack_1grow (&union_obstack, c);
1ff442ca
NF
748
749 switch (c)
750 {
751 case '\n':
752 lineno++;
753 break;
754
755 case '/':
2666f928 756 copy_comment (finput, &union_obstack);
1ff442ca
NF
757 break;
758
1ff442ca
NF
759 case '{':
760 count++;
761 break;
762
763 case '}':
428046f8 764 /* FIXME: Errr. How could this happen???. --akim */
1ff442ca 765 if (count == 0)
27821bff 766 complain (_("unmatched %s"), "`}'");
1ff442ca 767 count--;
428046f8
AD
768 if (!count)
769 done = TRUE;
770 break;
1ff442ca 771 }
1ff442ca 772 }
180d45ba 773
428046f8
AD
774 /* JF don't choke on trailing semi */
775 c = skip_white_space ();
776 if (c != ';')
777 ungetc (c, finput);
778 obstack_1grow (&union_obstack, 0);
779 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
780}
781
d7020c20
AD
782
783/*-------------------------------------------------------.
784| Parse the declaration %expect N which says to expect N |
785| shift-reduce conflicts. |
786`-------------------------------------------------------*/
1ff442ca 787
4a120d45 788static void
118fb205 789parse_expect_decl (void)
1ff442ca 790{
131e2fef 791 int c = skip_white_space ();
1ff442ca
NF
792 ungetc (c, finput);
793
131e2fef 794 if (!isdigit (c))
79282c5a 795 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
796 else
797 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
798}
799
a70083a3
AD
800
801/*-------------------------------------------------------------------.
802| Parse what comes after %thong. the full syntax is |
803| |
804| %thong <type> token number literal |
805| |
806| the <type> or number may be omitted. The number specifies the |
807| user_token_number. |
808| |
809| Two symbols are entered in the table, one for the token symbol and |
810| one for the literal. Both are given the <type>, if any, from the |
811| declaration. The ->user_token_number of the first is SALIAS and |
812| the ->user_token_number of the second is set to the number, if |
813| any, from the declaration. The two symbols are linked via |
814| pointers in their ->alias fields. |
815| |
816| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
817| only the literal string is retained it is the literal string that |
818| is output to yytname |
819`-------------------------------------------------------------------*/
820
821static void
822parse_thong_decl (void)
7b306f52 823{
f17bcd1f 824 token_t token;
a70083a3
AD
825 struct bucket *symbol;
826 char *typename = 0;
6b7e85b9 827 int usrtoknum = SUNDEF;
7b306f52 828
a70083a3 829 token = lex (); /* fetch typename or first token */
511e79b3 830 if (token == tok_typename)
7b306f52 831 {
95e36146 832 typename = xstrdup (token_buffer);
a70083a3
AD
833 value_components_used = 1;
834 token = lex (); /* fetch first token */
7b306f52 835 }
7b306f52 836
a70083a3 837 /* process first token */
7b306f52 838
511e79b3 839 if (token != tok_identifier)
a70083a3
AD
840 {
841 complain (_("unrecognized item %s, expected an identifier"),
842 token_buffer);
843 skip_to_char ('%');
844 return;
7b306f52 845 }
d7020c20 846 symval->class = token_sym;
a70083a3
AD
847 symval->type_name = typename;
848 symval->user_token_number = SALIAS;
849 symbol = symval;
7b306f52 850
a70083a3 851 token = lex (); /* get number or literal string */
1ff442ca 852
511e79b3 853 if (token == tok_number)
943819bf 854 {
a70083a3
AD
855 usrtoknum = numval;
856 token = lex (); /* okay, did number, now get literal */
943819bf 857 }
1ff442ca 858
a70083a3 859 /* process literal string token */
1ff442ca 860
511e79b3 861 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 862 {
a70083a3
AD
863 complain (_("expected string constant instead of %s"), token_buffer);
864 skip_to_char ('%');
865 return;
1ff442ca 866 }
d7020c20 867 symval->class = token_sym;
a70083a3
AD
868 symval->type_name = typename;
869 symval->user_token_number = usrtoknum;
1ff442ca 870
a70083a3
AD
871 symval->alias = symbol;
872 symbol->alias = symval;
1ff442ca 873
79282c5a
AD
874 /* symbol and symval combined are only one symbol. */
875 nsyms--;
a70083a3 876}
3cef001a 877
b6610515 878static void
11d82f03 879parse_muscle_decl (void)
b6610515
RA
880{
881 int ch = ungetc (skip_white_space (), finput);
11d82f03
MA
882 char* muscle_key;
883 char* muscle_value;
b6610515
RA
884
885 /* Read key. */
886 if (!isalpha (ch) && ch != '_')
887 {
888 complain (_("invalid %s declaration"), "%define");
889 skip_to_char ('%');
890 return;
891 }
11d82f03
MA
892 copy_identifier (finput, &muscle_obstack);
893 obstack_1grow (&muscle_obstack, 0);
894 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 895
b6610515
RA
896 /* Read value. */
897 ch = skip_white_space ();
898 if (ch != '"')
899 {
900 ungetc (ch, finput);
901 if (ch != EOF)
902 {
903 complain (_("invalid %s declaration"), "%define");
904 skip_to_char ('%');
905 return;
906 }
907 else
908 fatal (_("Premature EOF after %s"), "\"");
909 }
11d82f03
MA
910 copy_string2 (finput, &muscle_obstack, '"', 0);
911 obstack_1grow (&muscle_obstack, 0);
912 muscle_value = obstack_finish (&muscle_obstack);
b6610515 913
b6610515 914 /* Store the (key, value) pair in the environment. */
11d82f03 915 muscle_insert (muscle_key, muscle_value);
b6610515
RA
916}
917
2ba3b73c 918
426cf563
MA
919
920/*---------------------------------.
a870c567 921| Parse a double quoted parameter. |
426cf563
MA
922`---------------------------------*/
923
924static const char *
925parse_dquoted_param (const char *from)
926{
927 struct obstack param_obstack;
928 const char *param = NULL;
929 int c;
930
931 obstack_init (&param_obstack);
932 c = skip_white_space ();
933
934 if (c != '"')
935 {
936 complain (_("invalid %s declaration"), from);
937 ungetc (c, finput);
938 skip_to_char ('%');
939 return NULL;
940 }
941
2648a72d
AD
942 while ((c = literalchar ()) != '"')
943 obstack_1grow (&param_obstack, c);
a870c567 944
426cf563
MA
945 obstack_1grow (&param_obstack, '\0');
946 param = obstack_finish (&param_obstack);
947
948 if (c != '"' || strlen (param) == 0)
949 {
950 complain (_("invalid %s declaration"), from);
951 if (c != '"')
952 ungetc (c, finput);
953 skip_to_char ('%');
954 return NULL;
955 }
956
957 return param;
958}
959
2ba3b73c
MA
960/*----------------------------------.
961| Parse what comes after %skeleton. |
962`----------------------------------*/
963
a870c567 964static void
2ba3b73c
MA
965parse_skel_decl (void)
966{
426cf563 967 skeleton = parse_dquoted_param ("%skeleton");
2ba3b73c
MA
968}
969
a70083a3
AD
970/*----------------------------------------------------------------.
971| Read from finput until `%%' is seen. Discard the `%%'. Handle |
972| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 973| groups to ATTRS_OBSTACK. |
a70083a3 974`----------------------------------------------------------------*/
1ff442ca 975
4a120d45 976static void
a70083a3 977read_declarations (void)
1ff442ca 978{
a70083a3 979 for (;;)
1ff442ca 980 {
951366c1 981 int c = skip_white_space ();
1ff442ca 982
a70083a3
AD
983 if (c == '%')
984 {
951366c1 985 token_t tok = parse_percent_token ();
1ff442ca 986
a70083a3 987 switch (tok)
943819bf 988 {
511e79b3 989 case tok_two_percents:
a70083a3 990 return;
1ff442ca 991
511e79b3 992 case tok_percent_left_curly:
a70083a3
AD
993 copy_definition ();
994 break;
1ff442ca 995
511e79b3 996 case tok_token:
d7020c20 997 parse_token_decl (token_sym, nterm_sym);
a70083a3 998 break;
1ff442ca 999
511e79b3 1000 case tok_nterm:
d7020c20 1001 parse_token_decl (nterm_sym, token_sym);
a70083a3 1002 break;
1ff442ca 1003
511e79b3 1004 case tok_type:
a70083a3
AD
1005 parse_type_decl ();
1006 break;
1ff442ca 1007
511e79b3 1008 case tok_start:
a70083a3
AD
1009 parse_start_decl ();
1010 break;
118fb205 1011
511e79b3 1012 case tok_union:
a70083a3
AD
1013 parse_union_decl ();
1014 break;
1ff442ca 1015
511e79b3 1016 case tok_expect:
a70083a3
AD
1017 parse_expect_decl ();
1018 break;
6deb4447 1019
511e79b3 1020 case tok_thong:
a70083a3
AD
1021 parse_thong_decl ();
1022 break;
d7020c20 1023
511e79b3 1024 case tok_left:
d7020c20 1025 parse_assoc_decl (left_assoc);
a70083a3 1026 break;
1ff442ca 1027
511e79b3 1028 case tok_right:
d7020c20 1029 parse_assoc_decl (right_assoc);
a70083a3 1030 break;
1ff442ca 1031
511e79b3 1032 case tok_nonassoc:
d7020c20 1033 parse_assoc_decl (non_assoc);
a70083a3 1034 break;
1ff442ca 1035
b6610515 1036 case tok_define:
11d82f03 1037 parse_muscle_decl ();
b6610515 1038 break;
342b8b6e 1039
2ba3b73c
MA
1040 case tok_skel:
1041 parse_skel_decl ();
1042 break;
b6610515 1043
511e79b3 1044 case tok_noop:
a70083a3 1045 break;
1ff442ca 1046
951366c1
AD
1047 case tok_stropt:
1048 case tok_intopt:
1049 case tok_obsolete:
951366c1
AD
1050 abort ();
1051 break;
1052
e0c40012 1053 case tok_illegal:
a70083a3
AD
1054 default:
1055 complain (_("unrecognized: %s"), token_buffer);
1056 skip_to_char ('%');
1057 }
1058 }
1059 else if (c == EOF)
1060 fatal (_("no input grammar"));
1061 else
1062 {
ff4a34be
AD
1063 char buf[] = "c";
1064 buf[0] = c;
1065 complain (_("unknown character: %s"), quote (buf));
a70083a3 1066 skip_to_char ('%');
1ff442ca 1067 }
1ff442ca 1068 }
1ff442ca 1069}
a70083a3
AD
1070\f
1071/*-------------------------------------------------------------------.
1072| Assuming that a `{' has just been seen, copy everything up to the |
1073| matching `}' into the actions file. STACK_OFFSET is the number of |
1074| values in the current rule so far, which says where to find `$0' |
1075| with respect to the top of the stack. |
1076`-------------------------------------------------------------------*/
1ff442ca 1077
4a120d45 1078static void
79282c5a 1079copy_action (symbol_list *rule, int stack_offset)
1ff442ca 1080{
a70083a3 1081 int c;
a70083a3 1082 int count;
1ff442ca
NF
1083
1084 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1085 if (semantic_parser)
1086 stack_offset = 0;
1ff442ca 1087
1ff442ca 1088 count = 1;
a70083a3 1089 c = getc (finput);
1ff442ca
NF
1090
1091 while (count > 0)
1092 {
1093 while (c != '}')
a70083a3
AD
1094 {
1095 switch (c)
1ff442ca
NF
1096 {
1097 case '\n':
8c7ebe49 1098 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1099 lineno++;
1100 break;
1101
1102 case '{':
8c7ebe49 1103 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1104 count++;
1105 break;
1106
1107 case '\'':
1108 case '"':
337bab46 1109 copy_string (finput, &action_obstack, c);
1ff442ca
NF
1110 break;
1111
1112 case '/':
337bab46 1113 copy_comment (finput, &action_obstack);
1ff442ca
NF
1114 break;
1115
1116 case '$':
337bab46 1117 copy_dollar (finput, &action_obstack,
8c7ebe49 1118 rule, stack_offset);
1ff442ca
NF
1119 break;
1120
1121 case '@':
337bab46 1122 copy_at (finput, &action_obstack,
8c7ebe49 1123 stack_offset);
6666f98f 1124 break;
1ff442ca
NF
1125
1126 case EOF:
27821bff 1127 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1128
1129 default:
8c7ebe49 1130 obstack_1grow (&action_obstack, c);
a70083a3
AD
1131 }
1132
1133 c = getc (finput);
1134 }
1135
1136 /* above loop exits when c is '}' */
1137
1138 if (--count)
1139 {
8c7ebe49 1140 obstack_1grow (&action_obstack, c);
a70083a3
AD
1141 c = getc (finput);
1142 }
1143 }
1144
3f96f4dc
AD
1145 obstack_1grow (&action_obstack, '\0');
1146 rule->action = obstack_finish (&action_obstack);
1147 rule->action_line = lineno;
a70083a3
AD
1148}
1149\f
1150/*-------------------------------------------------------------------.
1151| After `%guard' is seen in the input file, copy the actual guard |
1152| into the guards file. If the guard is followed by an action, copy |
1153| that into the actions file. STACK_OFFSET is the number of values |
1154| in the current rule so far, which says where to find `$0' with |
1155| respect to the top of the stack, for the simple parser in which |
1156| the stack is not popped until after the guard is run. |
1157`-------------------------------------------------------------------*/
1158
1159static void
79282c5a 1160copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1161{
1162 int c;
a70083a3 1163 int count;
a70083a3
AD
1164 int brace_flag = 0;
1165
1166 /* offset is always 0 if parser has already popped the stack pointer */
1167 if (semantic_parser)
1168 stack_offset = 0;
1169
ea5607fd 1170 obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
89cab50d 1171 if (!no_lines_flag)
25b222fa 1172 obstack_fgrow2 (&guard_obstack, muscle_find ("linef"),
682d48cd 1173 lineno, quotearg_style (c_quoting_style,
11d82f03 1174 muscle_find ("filename")));
ea5607fd 1175 obstack_1grow (&guard_obstack, '{');
a70083a3
AD
1176
1177 count = 0;
1178 c = getc (finput);
1179
1180 while (brace_flag ? (count > 0) : (c != ';'))
1181 {
1182 switch (c)
1183 {
1184 case '\n':
ea5607fd 1185 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1186 lineno++;
1187 break;
1188
1189 case '{':
ea5607fd 1190 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1191 brace_flag = 1;
1192 count++;
1193 break;
1194
1195 case '}':
ea5607fd 1196 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1197 if (count > 0)
1198 count--;
1199 else
1200 {
1201 complain (_("unmatched %s"), "`}'");
1202 c = getc (finput); /* skip it */
1203 }
1204 break;
1205
1206 case '\'':
1207 case '"':
337bab46 1208 copy_string (finput, &guard_obstack, c);
a70083a3
AD
1209 break;
1210
1211 case '/':
337bab46 1212 copy_comment (finput, &guard_obstack);
a70083a3
AD
1213 break;
1214
1215 case '$':
337bab46 1216 copy_dollar (finput, &guard_obstack, rule, stack_offset);
a70083a3 1217 break;
1ff442ca 1218
a70083a3 1219 case '@':
337bab46 1220 copy_at (finput, &guard_obstack, stack_offset);
a70083a3 1221 break;
1ff442ca 1222
a70083a3
AD
1223 case EOF:
1224 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1225
a70083a3 1226 default:
ea5607fd 1227 obstack_1grow (&guard_obstack, c);
1ff442ca 1228 }
a70083a3
AD
1229
1230 if (c != '}' || count != 0)
1231 c = getc (finput);
1ff442ca
NF
1232 }
1233
a70083a3
AD
1234 c = skip_white_space ();
1235
ff4423cc 1236 obstack_sgrow (&guard_obstack, ";\n break;}");
a70083a3
AD
1237 if (c == '{')
1238 copy_action (rule, stack_offset);
1239 else if (c == '=')
1240 {
1241 c = getc (finput); /* why not skip_white_space -wjh */
1242 if (c == '{')
1243 copy_action (rule, stack_offset);
1244 }
1245 else
1246 ungetc (c, finput);
1ff442ca 1247}
a70083a3
AD
1248\f
1249
a70083a3
AD
1250/*-------------------------------------------------------------------.
1251| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1252| with the user's names. |
1253`-------------------------------------------------------------------*/
1ff442ca 1254
4a120d45 1255static bucket *
118fb205 1256gensym (void)
1ff442ca 1257{
274d42ce
AD
1258 /* Incremented for each generated symbol */
1259 static int gensym_count = 0;
1260 static char buf[256];
1261
a70083a3 1262 bucket *sym;
1ff442ca 1263
274d42ce
AD
1264 sprintf (buf, "@%d", ++gensym_count);
1265 token_buffer = buf;
a70083a3 1266 sym = getsym (token_buffer);
d7020c20 1267 sym->class = nterm_sym;
1ff442ca 1268 sym->value = nvars++;
36281465 1269 return sym;
1ff442ca 1270}
a70083a3 1271\f
107f7dfb
AD
1272/*-------------------------------------------------------------------.
1273| Parse the input grammar into a one symbol_list structure. Each |
1274| rule is represented by a sequence of symbols: the left hand side |
1275| followed by the contents of the right hand side, followed by a |
1276| null pointer instead of a symbol to terminate the rule. The next |
1277| symbol is the lhs of the following rule. |
1278| |
1279| All guards and actions are copied out to the appropriate files, |
1280| labelled by the rule number they apply to. |
1281| |
1282| Bison used to allow some %directives in the rules sections, but |
1283| this is no longer consider appropriate: (i) the documented grammar |
1284| doesn't claim it, (ii), it would promote bad style, (iii), error |
1285| recovery for %directives consists in skipping the junk until a `%' |
1286| is seen and helrp synchronizing. This scheme is definitely wrong |
1287| in the rules section. |
1288`-------------------------------------------------------------------*/
1ff442ca 1289
4a120d45 1290static void
118fb205 1291readgram (void)
1ff442ca 1292{
f17bcd1f 1293 token_t t;
a70083a3 1294 bucket *lhs = NULL;
107f7dfb
AD
1295 symbol_list *p = NULL;
1296 symbol_list *p1 = NULL;
a70083a3 1297 bucket *bp;
1ff442ca 1298
ff4a34be
AD
1299 /* Points to first symbol_list of current rule. its symbol is the
1300 lhs of the rule. */
107f7dfb 1301 symbol_list *crule = NULL;
ff4a34be 1302 /* Points to the symbol_list preceding crule. */
107f7dfb 1303 symbol_list *crule1 = NULL;
1ff442ca 1304
a70083a3 1305 t = lex ();
1ff442ca 1306
511e79b3 1307 while (t != tok_two_percents && t != tok_eof)
107f7dfb
AD
1308 if (t == tok_identifier || t == tok_bar)
1309 {
1310 int action_flag = 0;
1311 /* Number of symbols in rhs of this rule so far */
1312 int rulelength = 0;
1313 int xactions = 0; /* JF for error checking */
1314 bucket *first_rhs = 0;
1315
1316 if (t == tok_identifier)
1317 {
1318 lhs = symval;
1319
1320 if (!start_flag)
1321 {
1322 startval = lhs;
1323 start_flag = 1;
1324 }
1ff442ca 1325
107f7dfb
AD
1326 t = lex ();
1327 if (t != tok_colon)
1328 {
1329 complain (_("ill-formed rule: initial symbol not followed by colon"));
1330 unlex (t);
1331 }
1332 }
1333
1334 if (nrules == 0 && t == tok_bar)
1335 {
1336 complain (_("grammar starts with vertical bar"));
1337 lhs = symval; /* BOGUS: use a random symval */
1338 }
1339 /* start a new rule and record its lhs. */
1340
1341 nrules++;
1342 nitems++;
1343
1344 p = symbol_list_new (lhs);
1345
1346 crule1 = p1;
1347 if (p1)
1348 p1->next = p;
1349 else
1350 grammar = p;
1ff442ca 1351
107f7dfb
AD
1352 p1 = p;
1353 crule = p;
1ff442ca 1354
107f7dfb 1355 /* mark the rule's lhs as a nonterminal if not already so. */
1ff442ca 1356
107f7dfb
AD
1357 if (lhs->class == unknown_sym)
1358 {
1359 lhs->class = nterm_sym;
1360 lhs->value = nvars;
1361 nvars++;
1362 }
1363 else if (lhs->class == token_sym)
1364 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca 1365
107f7dfb 1366 /* read the rhs of the rule. */
1ff442ca 1367
107f7dfb
AD
1368 for (;;)
1369 {
1370 t = lex ();
1371 if (t == tok_prec)
1372 {
1373 t = lex ();
1374 crule->ruleprec = symval;
1375 t = lex ();
1376 }
1377
1378 if (!(t == tok_identifier || t == tok_left_curly))
1379 break;
1ff442ca 1380
107f7dfb
AD
1381 /* If next token is an identifier, see if a colon follows it.
1382 If one does, exit this rule now. */
1383 if (t == tok_identifier)
1384 {
1385 bucket *ssave;
1386 token_t t1;
1387
1388 ssave = symval;
1389 t1 = lex ();
1390 unlex (t1);
1391 symval = ssave;
1392 if (t1 == tok_colon)
1393 break;
1394
1395 if (!first_rhs) /* JF */
1396 first_rhs = symval;
1397 /* Not followed by colon =>
1398 process as part of this rule's rhs. */
1399 }
1400
1401 /* If we just passed an action, that action was in the middle
1402 of a rule, so make a dummy rule to reduce it to a
1403 non-terminal. */
1404 if (action_flag)
1405 {
1406 /* Since the action was written out with this rule's
1407 number, we must give the new rule this number by
1408 inserting the new rule before it. */
1409
1410 /* Make a dummy nonterminal, a gensym. */
1411 bucket *sdummy = gensym ();
1412
1413 /* Make a new rule, whose body is empty, before the
1414 current one, so that the action just read can
1415 belong to it. */
1416 nrules++;
1417 nitems++;
1418 p = symbol_list_new (sdummy);
1419 /* Attach its lineno to that of the host rule. */
1420 p->line = crule->line;
1421 if (crule1)
1422 crule1->next = p;
1423 else
1424 grammar = p;
1425 /* End of the rule. */
1426 crule1 = symbol_list_new (NULL);
1427 crule1->next = crule;
1428
1429 p->next = crule1;
1430
1431 /* Insert the dummy generated by that rule into this
1432 rule. */
1433 nitems++;
1434 p = symbol_list_new (sdummy);
1435 p1->next = p;
1436 p1 = p;
1437
1438 action_flag = 0;
1439 }
1440
1441 if (t == tok_identifier)
1442 {
1443 nitems++;
1444 p = symbol_list_new (symval);
1445 p1->next = p;
1446 p1 = p;
1447 }
1448 else /* handle an action. */
1449 {
1450 copy_action (crule, rulelength);
1451 action_flag = 1;
1452 xactions++; /* JF */
1453 }
1454 rulelength++;
1455 } /* end of read rhs of rule */
1456
1457 /* Put an empty link in the list to mark the end of this rule */
1458 p = symbol_list_new (NULL);
1459 p1->next = p;
1460 p1 = p;
1461
1462 if (t == tok_prec)
1463 {
1464 complain (_("two @prec's in a row"));
1465 t = lex ();
1466 crule->ruleprec = symval;
1467 t = lex ();
1468 }
1469 if (t == tok_guard)
1470 {
1471 if (!semantic_parser)
1472 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1473
107f7dfb 1474 copy_guard (crule, rulelength);
a70083a3 1475 t = lex ();
107f7dfb
AD
1476 }
1477 else if (t == tok_left_curly)
1478 {
1479 /* This case never occurs -wjh */
1480 if (action_flag)
1481 complain (_("two actions at end of one rule"));
1482 copy_action (crule, rulelength);
1483 action_flag = 1;
1484 xactions++; /* -wjh */
1485 t = lex ();
1486 }
1487 /* If $$ is being set in default way, report if any type
1488 mismatch. */
1489 else if (!xactions
1490 && first_rhs && lhs->type_name != first_rhs->type_name)
1491 {
1492 if (lhs->type_name == 0
1493 || first_rhs->type_name == 0
1494 || strcmp (lhs->type_name, first_rhs->type_name))
1495 complain (_("type clash (`%s' `%s') on default action"),
1496 lhs->type_name ? lhs->type_name : "",
1497 first_rhs->type_name ? first_rhs->type_name : "");
1498 }
1499 /* Warn if there is no default for $$ but we need one. */
1500 else if (!xactions && !first_rhs && lhs->type_name != 0)
1501 complain (_("empty rule for typed nonterminal, and no action"));
1502 if (t == tok_semicolon)
a70083a3 1503 t = lex ();
107f7dfb
AD
1504 }
1505 else
1506 {
1507 complain (_("invalid input: %s"), quote (token_buffer));
1508 t = lex ();
1509 }
943819bf 1510
1ff442ca 1511
943819bf
RS
1512 /* grammar has been read. Do some checking */
1513
1ff442ca 1514 if (nsyms > MAXSHORT)
a0f6b076
AD
1515 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1516 MAXSHORT);
1ff442ca 1517 if (nrules == 0)
a0f6b076 1518 fatal (_("no rules in the input grammar"));
1ff442ca 1519
1ff442ca
NF
1520 /* Report any undefined symbols and consider them nonterminals. */
1521
1522 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1523 if (bp->class == unknown_sym)
1ff442ca 1524 {
a70083a3
AD
1525 complain (_
1526 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1527 bp->tag);
d7020c20 1528 bp->class = nterm_sym;
1ff442ca
NF
1529 bp->value = nvars++;
1530 }
1531
1532 ntokens = nsyms - nvars;
1533}
ff48177d
MA
1534
1535/* At the end of the grammar file, some C source code must
63c2d5de 1536 be stored. It is going to be associated to the epilogue
ff48177d
MA
1537 directive. */
1538static void
1539read_additionnal_code (void)
1540{
1541 char c;
63c2d5de 1542 struct obstack el_obstack;
342b8b6e 1543
63c2d5de 1544 obstack_init (&el_obstack);
ff48177d 1545
710ddc4f
MA
1546 if (!no_lines_flag)
1547 {
1548 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1549 lineno, quotearg_style (c_quoting_style,
1550 muscle_find("filename")));
1551 }
1552
ff48177d 1553 while ((c = getc (finput)) != EOF)
63c2d5de 1554 obstack_1grow (&el_obstack, c);
342b8b6e 1555
63c2d5de 1556 obstack_1grow (&el_obstack, 0);
11d82f03 1557 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1558}
1559
a70083a3 1560\f
037ca2f1
AD
1561/*------------------------------------------------------------------.
1562| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1563| number. |
1564`------------------------------------------------------------------*/
1565
1566static void
1567token_translations_init (void)
1568{
1569 bucket *bp = NULL;
1570 int i;
1571
1572 token_translations = XCALLOC (short, max_user_token_number + 1);
1573
1574 /* Initialize all entries for literal tokens to 2, the internal
1575 token number for $undefined., which represents all invalid
1576 inputs. */
1577 for (i = 0; i <= max_user_token_number; i++)
1578 token_translations[i] = 2;
1579
1580 for (bp = firstsymbol; bp; bp = bp->next)
1581 {
1582 /* Non-terminal? */
1583 if (bp->value >= ntokens)
1584 continue;
1585 /* A token string alias? */
1586 if (bp->user_token_number == SALIAS)
1587 continue;
6b7e85b9
AD
1588
1589 assert (bp->user_token_number != SUNDEF);
1590
037ca2f1
AD
1591 /* A token which translation has already been set? */
1592 if (token_translations[bp->user_token_number] != 2)
1593 complain (_("tokens %s and %s both assigned number %d"),
1594 tags[token_translations[bp->user_token_number]],
1595 bp->tag, bp->user_token_number);
1596 token_translations[bp->user_token_number] = bp->value;
1597 }
1598}
1599
1600
a70083a3
AD
1601/*------------------------------------------------------------------.
1602| Assign symbol numbers, and write definition of token names into |
b2ca4022 1603| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1604| of symbols. |
1605`------------------------------------------------------------------*/
1ff442ca 1606
4a120d45 1607static void
118fb205 1608packsymbols (void)
1ff442ca 1609{
342b8b6e 1610 bucket *bp = NULL;
a70083a3 1611 int tokno = 1;
a70083a3 1612 int last_user_token_number;
4a120d45 1613 static char DOLLAR[] = "$";
1ff442ca 1614
d7913476 1615 tags = XCALLOC (char *, nsyms + 1);
d7913476 1616 user_toknums = XCALLOC (short, nsyms + 1);
1ff442ca 1617
d7913476
AD
1618 sprec = XCALLOC (short, nsyms);
1619 sassoc = XCALLOC (short, nsyms);
1ff442ca 1620
037ca2f1
AD
1621 /* The EOF token. */
1622 tags[0] = DOLLAR;
1623 user_toknums[0] = 0;
1624
1ff442ca
NF
1625 max_user_token_number = 256;
1626 last_user_token_number = 256;
1627
1628 for (bp = firstsymbol; bp; bp = bp->next)
1629 {
d7020c20 1630 if (bp->class == nterm_sym)
1ff442ca
NF
1631 {
1632 bp->value += ntokens;
1633 }
943819bf
RS
1634 else if (bp->alias)
1635 {
0a6384c4
AD
1636 /* this symbol and its alias are a single token defn.
1637 allocate a tokno, and assign to both check agreement of
1638 ->prec and ->assoc fields and make both the same */
1639 if (bp->value == 0)
1640 bp->value = bp->alias->value = tokno++;
943819bf 1641
0a6384c4
AD
1642 if (bp->prec != bp->alias->prec)
1643 {
1644 if (bp->prec != 0 && bp->alias->prec != 0
1645 && bp->user_token_number == SALIAS)
a0f6b076
AD
1646 complain (_("conflicting precedences for %s and %s"),
1647 bp->tag, bp->alias->tag);
0a6384c4
AD
1648 if (bp->prec != 0)
1649 bp->alias->prec = bp->prec;
1650 else
1651 bp->prec = bp->alias->prec;
1652 }
943819bf 1653
0a6384c4
AD
1654 if (bp->assoc != bp->alias->assoc)
1655 {
a0f6b076
AD
1656 if (bp->assoc != 0 && bp->alias->assoc != 0
1657 && bp->user_token_number == SALIAS)
1658 complain (_("conflicting assoc values for %s and %s"),
1659 bp->tag, bp->alias->tag);
1660 if (bp->assoc != 0)
1661 bp->alias->assoc = bp->assoc;
1662 else
1663 bp->assoc = bp->alias->assoc;
1664 }
0a6384c4
AD
1665
1666 if (bp->user_token_number == SALIAS)
a70083a3 1667 continue; /* do not do processing below for SALIASs */
943819bf 1668
a70083a3 1669 }
d7020c20 1670 else /* bp->class == token_sym */
943819bf
RS
1671 {
1672 bp->value = tokno++;
1673 }
1674
d7020c20 1675 if (bp->class == token_sym)
1ff442ca 1676 {
6b7e85b9 1677 if (bp->user_token_number == SUNDEF)
1ff442ca
NF
1678 bp->user_token_number = ++last_user_token_number;
1679 if (bp->user_token_number > max_user_token_number)
1680 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1681 }
1682
1683 tags[bp->value] = bp->tag;
943819bf 1684 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1685 sprec[bp->value] = bp->prec;
1686 sassoc[bp->value] = bp->assoc;
1ff442ca
NF
1687 }
1688
037ca2f1 1689 token_translations_init ();
1ff442ca
NF
1690
1691 error_token_number = errtoken->value;
1692
e3f1699f
AD
1693 if (startval->class == unknown_sym)
1694 fatal (_("the start symbol %s is undefined"), startval->tag);
1695 else if (startval->class == token_sym)
1696 fatal (_("the start symbol %s is a token"), startval->tag);
1697
1698 start_symbol = startval->value;
1699}
1700
1701
93ede233
AD
1702/*---------------------------------------------------------------.
1703| Save the definition of token names in the `TOKENDEFS' muscle. |
1704`---------------------------------------------------------------*/
e3f1699f
AD
1705
1706static void
93ede233 1707symbols_save (void)
e3f1699f 1708{
93ede233
AD
1709 struct obstack tokendefs;
1710 bucket *bp;
1711 char *cp, *symbol;
1712 char c;
1713 obstack_init (&tokendefs);
1714
1715 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1716 {
93ede233 1717 symbol = bp->tag; /* get symbol */
1ff442ca 1718
93ede233
AD
1719 if (bp->value >= ntokens)
1720 continue;
1721 if (bp->user_token_number == SALIAS)
1722 continue;
1723 if ('\'' == *symbol)
1724 continue; /* skip literal character */
1725 if (bp == errtoken)
1726 continue; /* skip error token */
1727 if ('\"' == *symbol)
037ca2f1 1728 {
93ede233
AD
1729 /* use literal string only if given a symbol with an alias */
1730 if (bp->alias)
1731 symbol = bp->alias->tag;
1732 else
1733 continue;
037ca2f1 1734 }
93ede233
AD
1735
1736 /* Don't #define nonliteral tokens whose names contain periods. */
1737 cp = symbol;
1738 while ((c = *cp++) && c != '.');
1739 if (c != '\0')
1740 continue;
1741
7742ddeb 1742 obstack_fgrow2 (&tokendefs, "# define %s\t%d\n",
93ede233
AD
1743 symbol, bp->user_token_number);
1744 if (semantic_parser)
1745 /* FIXME: This is probably wrong, and should be just as
1746 above. --akim. */
7742ddeb 1747 obstack_fgrow2 (&tokendefs, "# define T%s\t%d\n", symbol, bp->value);
1ff442ca 1748 }
93ede233
AD
1749
1750 obstack_1grow (&tokendefs, 0);
1751 muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1752 obstack_free (&tokendefs, NULL);
1ff442ca 1753}
a083fbbf 1754
1ff442ca 1755
a70083a3
AD
1756/*---------------------------------------------------------------.
1757| Convert the rules into the representation using RRHS, RLHS and |
1758| RITEMS. |
1759`---------------------------------------------------------------*/
1ff442ca 1760
4a120d45 1761static void
118fb205 1762packgram (void)
1ff442ca 1763{
a70083a3
AD
1764 int itemno;
1765 int ruleno;
1766 symbol_list *p;
1ff442ca 1767
d7913476 1768 ritem = XCALLOC (short, nitems + 1);
b2ed6e58 1769 rule_table = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1770
1771 itemno = 0;
1772 ruleno = 1;
1773
1774 p = grammar;
1775 while (p)
1776 {
b29b2ed5 1777 bucket *ruleprec = p->ruleprec;
b2ed6e58
AD
1778 rule_table[ruleno].lhs = p->sym->value;
1779 rule_table[ruleno].rhs = itemno;
b29b2ed5 1780 rule_table[ruleno].line = p->line;
68f1e3ed 1781 rule_table[ruleno].useful = TRUE;
3f96f4dc
AD
1782 rule_table[ruleno].action = p->action;
1783 rule_table[ruleno].action_line = p->action_line;
1ff442ca
NF
1784
1785 p = p->next;
1786 while (p && p->sym)
1787 {
1788 ritem[itemno++] = p->sym->value;
1789 /* A rule gets by default the precedence and associativity
1790 of the last token in it. */
d7020c20 1791 if (p->sym->class == token_sym)
1ff442ca 1792 {
652a871c
AD
1793 rule_table[ruleno].prec = p->sym->prec;
1794 rule_table[ruleno].assoc = p->sym->assoc;
1ff442ca 1795 }
a70083a3
AD
1796 if (p)
1797 p = p->next;
1ff442ca
NF
1798 }
1799
1800 /* If this rule has a %prec,
a70083a3 1801 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1802 if (ruleprec)
1803 {
652a871c
AD
1804 rule_table[ruleno].prec = ruleprec->prec;
1805 rule_table[ruleno].assoc = ruleprec->assoc;
1806 rule_table[ruleno].precsym = ruleprec->value;
1ff442ca
NF
1807 }
1808
1809 ritem[itemno++] = -ruleno;
1810 ruleno++;
1811
a70083a3
AD
1812 if (p)
1813 p = p->next;
1ff442ca
NF
1814 }
1815
1816 ritem[itemno] = 0;
3067fbef
AD
1817
1818 if (trace_flag)
1819 ritem_print (stderr);
1ff442ca 1820}
a70083a3
AD
1821\f
1822/*-------------------------------------------------------------------.
1823| Read in the grammar specification and record it in the format |
ea5607fd 1824| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1825| and all actions into ACTION_OBSTACK, in each case forming the body |
1826| of a C function (YYGUARD or YYACTION) which contains a switch |
1827| statement to decide which guard or action to execute. |
a70083a3
AD
1828`-------------------------------------------------------------------*/
1829
1830void
1831reader (void)
1832{
1833 start_flag = 0;
1834 startval = NULL; /* start symbol not specified yet. */
1835
a70083a3
AD
1836 nsyms = 1;
1837 nvars = 0;
1838 nrules = 0;
1839 nitems = 0;
a70083a3
AD
1840
1841 typed = 0;
1842 lastprec = 0;
1843
a70083a3
AD
1844 semantic_parser = 0;
1845 pure_parser = 0;
a70083a3
AD
1846
1847 grammar = NULL;
1848
342b8b6e 1849 lex_init ();
a70083a3
AD
1850 lineno = 1;
1851
11d82f03
MA
1852 /* Initialize the muscle obstack. */
1853 obstack_init (&muscle_obstack);
82e236e2 1854
a70083a3
AD
1855 /* Initialize the symbol table. */
1856 tabinit ();
b6610515 1857
a70083a3
AD
1858 /* Construct the error token */
1859 errtoken = getsym ("error");
d7020c20 1860 errtoken->class = token_sym;
a70083a3 1861 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 1862
a70083a3
AD
1863 /* Construct a token that represents all undefined literal tokens.
1864 It is always token number 2. */
1865 undeftoken = getsym ("$undefined.");
d7020c20 1866 undeftoken->class = token_sym;
a70083a3
AD
1867 undeftoken->user_token_number = 2;
1868
331dbc1b
AD
1869 /* Initialize the obstacks. */
1870 obstack_init (&action_obstack);
1871 obstack_init (&attrs_obstack);
1872 obstack_init (&guard_obstack);
1873 obstack_init (&output_obstack);
1874
1875 finput = xfopen (infile, "r");
1876
896fe5c1
AD
1877 /* Read the declaration section. Copy %{ ... %} groups to
1878 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1879 etc. found there. */
a70083a3 1880 read_declarations ();
a70083a3
AD
1881 /* Read in the grammar, build grammar in list form. Write out
1882 guards and actions. */
1883 readgram ();
ff48177d
MA
1884 /* Some C code is given at the end of the grammar file. */
1885 read_additionnal_code ();
b0c4483e 1886
331dbc1b
AD
1887 lex_free ();
1888 xfclose (finput);
1889
a70083a3
AD
1890 /* Assign the symbols their symbol numbers. Write #defines for the
1891 token symbols into FDEFINES if requested. */
1892 packsymbols ();
93ede233
AD
1893 /* Save them. */
1894 symbols_save ();
1895
a70083a3
AD
1896 /* Convert the grammar into the format described in gram.h. */
1897 packgram ();
a70083a3 1898}