]> git.saurik.com Git - bison.git/blame - src/reader.c
Attach actions to rules, instead of pre-outputting them to
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
037ca2f1 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
8c7ebe49 24#include "obstack.h"
2a91a95e
AD
25#include "quotearg.h"
26#include "quote.h"
ceed8467 27#include "getargs.h"
1ff442ca 28#include "files.h"
1ff442ca 29#include "symtab.h"
82b6d266 30#include "options.h"
1ff442ca
NF
31#include "lex.h"
32#include "gram.h"
a0f6b076 33#include "complain.h"
6c89f1c1 34#include "output.h"
b2ca4022 35#include "reader.h"
340ef489 36#include "conflicts.h"
11d82f03 37#include "muscle_tab.h"
1ff442ca 38
a70083a3
AD
39typedef struct symbol_list
40{
41 struct symbol_list *next;
42 bucket *sym;
b29b2ed5 43 int line;
3f96f4dc
AD
44 /* The action is attached to the LHS of a rule. */
45 const char *action;
46 int action_line;
a70083a3
AD
47 bucket *ruleprec;
48}
49symbol_list;
118fb205 50
1ff442ca 51int lineno;
1ff442ca 52char **tags;
d019d655 53short *user_toknums;
4a120d45
JT
54static symbol_list *grammar;
55static int start_flag;
56static bucket *startval;
1ff442ca
NF
57
58/* Nonzero if components of semantic values are used, implying
59 they must be unions. */
60static int value_components_used;
61
d7020c20
AD
62/* Nonzero if %union has been seen. */
63static int typed;
1ff442ca 64
d7020c20
AD
65/* Incremented for each %left, %right or %nonassoc seen */
66static int lastprec;
1ff442ca 67
1ff442ca 68static bucket *errtoken;
5b2e3c89 69static bucket *undeftoken;
b29b2ed5
AD
70
71
6255b435 72static symbol_list *
b29b2ed5
AD
73symbol_list_new (bucket *sym)
74{
75 symbol_list *res = XMALLOC (symbol_list, 1);
76 res->next = NULL;
77 res->sym = sym;
78 res->line = lineno;
79 res->ruleprec = NULL;
80 return res;
81}
82
0d533154 83\f
a70083a3 84
0d533154
AD
85/*===================\
86| Low level lexing. |
87\===================*/
943819bf
RS
88
89static void
118fb205 90skip_to_char (int target)
943819bf
RS
91{
92 int c;
93 if (target == '\n')
a0f6b076 94 complain (_(" Skipping to next \\n"));
943819bf 95 else
a0f6b076 96 complain (_(" Skipping to next %c"), target);
943819bf
RS
97
98 do
0d533154 99 c = skip_white_space ();
943819bf 100 while (c != target && c != EOF);
a083fbbf 101 if (c != EOF)
0d533154 102 ungetc (c, finput);
943819bf
RS
103}
104
105
0d533154
AD
106/*---------------------------------------------------------.
107| Read a signed integer from STREAM and return its value. |
108`---------------------------------------------------------*/
109
110static inline int
111read_signed_integer (FILE *stream)
112{
a70083a3
AD
113 int c = getc (stream);
114 int sign = 1;
115 int n = 0;
0d533154
AD
116
117 if (c == '-')
118 {
119 c = getc (stream);
120 sign = -1;
121 }
122
123 while (isdigit (c))
124 {
125 n = 10 * n + (c - '0');
126 c = getc (stream);
127 }
128
129 ungetc (c, stream);
130
131 return sign * n;
132}
133\f
79282c5a
AD
134/*--------------------------------------------------------------.
135| Get the data type (alternative in the union) of the value for |
136| symbol N in rule RULE. |
137`--------------------------------------------------------------*/
138
139static char *
b29b2ed5 140get_type_name (int n, symbol_list *rule)
79282c5a
AD
141{
142 int i;
143 symbol_list *rp;
144
145 if (n < 0)
146 {
147 complain (_("invalid $ value"));
148 return NULL;
149 }
150
151 rp = rule;
152 i = 0;
153
154 while (i < n)
155 {
156 rp = rp->next;
157 if (rp == NULL || rp->sym == NULL)
158 {
159 complain (_("invalid $ value"));
160 return NULL;
161 }
162 i++;
163 }
164
165 return rp->sym->type_name;
166}
167\f
337bab46
AD
168/*------------------------------------------------------------.
169| Dump the string from FIN to OOUT if non null. MATCH is the |
170| delimiter of the string (either ' or "). |
171`------------------------------------------------------------*/
ae3c3164
AD
172
173static inline void
b6610515 174copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
175{
176 int c;
177
b6610515
RA
178 if (store)
179 obstack_1grow (oout, match);
8c7ebe49 180
4a120d45 181 c = getc (fin);
ae3c3164
AD
182
183 while (c != match)
184 {
185 if (c == EOF)
186 fatal (_("unterminated string at end of file"));
187 if (c == '\n')
188 {
a0f6b076 189 complain (_("unterminated string"));
4a120d45 190 ungetc (c, fin);
ae3c3164
AD
191 c = match; /* invent terminator */
192 continue;
193 }
194
337bab46 195 obstack_1grow (oout, c);
ae3c3164
AD
196
197 if (c == '\\')
198 {
4a120d45 199 c = getc (fin);
ae3c3164
AD
200 if (c == EOF)
201 fatal (_("unterminated string at end of file"));
337bab46 202 obstack_1grow (oout, c);
8c7ebe49 203
ae3c3164
AD
204 if (c == '\n')
205 lineno++;
206 }
207
a70083a3 208 c = getc (fin);
ae3c3164
AD
209 }
210
b6610515
RA
211 if (store)
212 obstack_1grow (oout, c);
213}
214
215/* FIXME. */
216
217static inline void
218copy_string (FILE *fin, struct obstack *oout, int match)
219{
220 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
221}
222
b6610515
RA
223/* FIXME. */
224
225static inline void
226copy_identifier (FILE *fin, struct obstack *oout)
227{
228 int c;
229
230 while (isalnum (c = getc (fin)) || c == '_')
231 obstack_1grow (oout, c);
232
233 ungetc (c, fin);
234}
ae3c3164 235
337bab46
AD
236/*-----------------------------------------------------------------.
237| Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
238| NULL). In fact we just saw a `/', which might or might not be a |
239| comment. In any case, copy what we saw. |
240| |
241| OUT2 might be NULL. |
242`-----------------------------------------------------------------*/
ae3c3164
AD
243
244static inline void
337bab46 245copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
ae3c3164
AD
246{
247 int cplus_comment;
a70083a3 248 int ended;
550a72a3
AD
249 int c;
250
251 /* We read a `/', output it. */
337bab46 252 obstack_1grow (oout1, '/');
896fe5c1
AD
253 if (oout2)
254 obstack_1grow (oout2, '/');
550a72a3
AD
255
256 switch ((c = getc (fin)))
257 {
258 case '/':
259 cplus_comment = 1;
260 break;
261 case '*':
262 cplus_comment = 0;
263 break;
264 default:
265 ungetc (c, fin);
266 return;
267 }
ae3c3164 268
337bab46 269 obstack_1grow (oout1, c);
896fe5c1
AD
270 if (oout2)
271 obstack_1grow (oout2, c);
550a72a3 272 c = getc (fin);
ae3c3164
AD
273
274 ended = 0;
275 while (!ended)
276 {
277 if (!cplus_comment && c == '*')
278 {
279 while (c == '*')
280 {
337bab46 281 obstack_1grow (oout1, c);
896fe5c1
AD
282 if (oout2)
283 obstack_1grow (oout2, c);
550a72a3 284 c = getc (fin);
ae3c3164
AD
285 }
286
287 if (c == '/')
288 {
337bab46 289 obstack_1grow (oout1, c);
896fe5c1
AD
290 if (oout2)
291 obstack_1grow (oout2, c);
ae3c3164
AD
292 ended = 1;
293 }
294 }
295 else if (c == '\n')
296 {
297 lineno++;
337bab46 298 obstack_1grow (oout1, c);
896fe5c1
AD
299 if (oout2)
300 obstack_1grow (oout2, c);
ae3c3164
AD
301 if (cplus_comment)
302 ended = 1;
303 else
550a72a3 304 c = getc (fin);
ae3c3164
AD
305 }
306 else if (c == EOF)
307 fatal (_("unterminated comment"));
308 else
309 {
337bab46 310 obstack_1grow (oout1, c);
896fe5c1
AD
311 if (oout2)
312 obstack_1grow (oout2, c);
550a72a3 313 c = getc (fin);
ae3c3164
AD
314 }
315 }
316}
317
318
550a72a3
AD
319/*-------------------------------------------------------------------.
320| Dump the comment (actually the current string starting with a `/') |
337bab46 321| from FIN to OOUT. |
550a72a3 322`-------------------------------------------------------------------*/
27821bff
AD
323
324static inline void
337bab46 325copy_comment (FILE *fin, struct obstack *oout)
27821bff 326{
337bab46 327 copy_comment2 (fin, oout, NULL);
27821bff
AD
328}
329
330
a70083a3 331/*-----------------------------------------------------------------.
337bab46 332| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
333| reference to this location. STACK_OFFSET is the number of values |
334| in the current rule so far, which says where to find `$0' with |
335| respect to the top of the stack. |
336`-----------------------------------------------------------------*/
1ff442ca 337
a70083a3 338static inline void
337bab46 339copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 340{
a70083a3 341 int c;
1ff442ca 342
a70083a3
AD
343 c = getc (fin);
344 if (c == '$')
1ff442ca 345 {
ff4423cc 346 obstack_sgrow (oout, "yyloc");
89cab50d 347 locations_flag = 1;
a70083a3
AD
348 }
349 else if (isdigit (c) || c == '-')
350 {
351 int n;
1ff442ca 352
a70083a3
AD
353 ungetc (c, fin);
354 n = read_signed_integer (fin);
943819bf 355
337bab46 356 obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
89cab50d 357 locations_flag = 1;
1ff442ca 358 }
a70083a3 359 else
ff4a34be
AD
360 {
361 char buf[] = "@c";
362 buf[1] = c;
363 complain (_("%s is invalid"), quote (buf));
364 }
1ff442ca 365}
79282c5a
AD
366
367
368/*-------------------------------------------------------------------.
369| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
370| |
371| Possible inputs: $[<TYPENAME>]($|integer) |
372| |
337bab46 373| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
374| the number of values in the current rule so far, which says where |
375| to find `$0' with respect to the top of the stack. |
376`-------------------------------------------------------------------*/
377
378static inline void
337bab46 379copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
380 symbol_list *rule, int stack_offset)
381{
382 int c = getc (fin);
b0ce6046 383 const char *type_name = NULL;
79282c5a 384
f282676b 385 /* Get the type name if explicit. */
79282c5a
AD
386 if (c == '<')
387 {
f282676b 388 read_type_name (fin);
79282c5a
AD
389 type_name = token_buffer;
390 value_components_used = 1;
79282c5a
AD
391 c = getc (fin);
392 }
393
394 if (c == '$')
395 {
ff4423cc 396 obstack_sgrow (oout, "yyval");
8c7ebe49 397
79282c5a
AD
398 if (!type_name)
399 type_name = get_type_name (0, rule);
400 if (type_name)
337bab46 401 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
402 if (!type_name && typed)
403 complain (_("$$ of `%s' has no declared type"),
404 rule->sym->tag);
405 }
406 else if (isdigit (c) || c == '-')
407 {
408 int n;
409 ungetc (c, fin);
410 n = read_signed_integer (fin);
411
412 if (!type_name && n > 0)
413 type_name = get_type_name (n, rule);
414
337bab46 415 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
8c7ebe49 416
79282c5a 417 if (type_name)
337bab46 418 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
419 if (!type_name && typed)
420 complain (_("$%d of `%s' has no declared type"),
421 n, rule->sym->tag);
422 }
423 else
424 {
425 char buf[] = "$c";
426 buf[1] = c;
427 complain (_("%s is invalid"), quote (buf));
428 }
429}
a70083a3
AD
430\f
431/*-------------------------------------------------------------------.
432| Copy the contents of a `%{ ... %}' into the definitions file. The |
433| `%{' has already been read. Return after reading the `%}'. |
434`-------------------------------------------------------------------*/
1ff442ca 435
4a120d45 436static void
118fb205 437copy_definition (void)
1ff442ca 438{
a70083a3 439 int c;
ae3c3164 440 /* -1 while reading a character if prev char was %. */
a70083a3 441 int after_percent;
1ff442ca 442
89cab50d 443 if (!no_lines_flag)
25b222fa
MA
444 {
445 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 446 lineno, quotearg_style (c_quoting_style,
25b222fa
MA
447 muscle_find("filename")));
448 }
1ff442ca
NF
449
450 after_percent = 0;
451
ae3c3164 452 c = getc (finput);
1ff442ca
NF
453
454 for (;;)
455 {
456 switch (c)
457 {
458 case '\n':
dd60faec 459 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
460 lineno++;
461 break;
462
463 case '%':
a70083a3 464 after_percent = -1;
1ff442ca 465 break;
a083fbbf 466
1ff442ca
NF
467 case '\'':
468 case '"':
337bab46 469 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
470 break;
471
472 case '/':
337bab46 473 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
474 break;
475
476 case EOF:
a70083a3 477 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
478
479 default:
dd60faec 480 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
481 }
482
a70083a3 483 c = getc (finput);
1ff442ca
NF
484
485 if (after_percent)
486 {
487 if (c == '}')
488 return;
dd60faec 489 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
490 }
491 after_percent = 0;
1ff442ca 492 }
1ff442ca
NF
493}
494
495
d7020c20
AD
496/*-------------------------------------------------------------------.
497| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
498| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
499| are reversed. |
500`-------------------------------------------------------------------*/
1ff442ca 501
4a120d45 502static void
d7020c20 503parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 504{
342b8b6e
AD
505 token_t token = tok_undef;
506 char *typename = NULL;
1ff442ca 507
1e9798d5
AD
508 /* The symbol being defined. */
509 struct bucket *symbol = NULL;
510
511 /* After `%token' and `%nterm', any number of symbols maybe be
512 defined. */
1ff442ca
NF
513 for (;;)
514 {
e6011337
JT
515 int tmp_char = ungetc (skip_white_space (), finput);
516
1e9798d5
AD
517 /* `%' (for instance from `%token', or from `%%' etc.) is the
518 only valid means to end this declaration. */
e6011337 519 if (tmp_char == '%')
1ff442ca 520 return;
e6011337 521 if (tmp_char == EOF)
a0f6b076 522 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 523
a70083a3 524 token = lex ();
511e79b3 525 if (token == tok_comma)
943819bf
RS
526 {
527 symbol = NULL;
528 continue;
529 }
511e79b3 530 if (token == tok_typename)
1ff442ca 531 {
95e36146 532 typename = xstrdup (token_buffer);
1ff442ca 533 value_components_used = 1;
943819bf
RS
534 symbol = NULL;
535 }
511e79b3 536 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 537 {
8e03724b
AD
538 if (symval->alias)
539 warn (_("symbol `%s' used more than once as a literal string"),
540 symval->tag);
541 else if (symbol->alias)
542 warn (_("symbol `%s' given more than one literal string"),
543 symbol->tag);
544 else
545 {
546 symval->class = token_sym;
547 symval->type_name = typename;
548 symval->user_token_number = symbol->user_token_number;
549 symbol->user_token_number = SALIAS;
550 symval->alias = symbol;
551 symbol->alias = symval;
552 /* symbol and symval combined are only one symbol */
553 nsyms--;
554 }
8e03724b 555 symbol = NULL;
1ff442ca 556 }
511e79b3 557 else if (token == tok_identifier)
1ff442ca
NF
558 {
559 int oldclass = symval->class;
943819bf 560 symbol = symval;
1ff442ca 561
943819bf 562 if (symbol->class == what_is_not)
a0f6b076 563 complain (_("symbol %s redefined"), symbol->tag);
943819bf 564 symbol->class = what_is;
d7020c20 565 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 566 symbol->value = nvars++;
1ff442ca
NF
567
568 if (typename)
569 {
943819bf
RS
570 if (symbol->type_name == NULL)
571 symbol->type_name = typename;
a70083a3 572 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 573 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
574 }
575 }
511e79b3 576 else if (symbol && token == tok_number)
a70083a3 577 {
943819bf 578 symbol->user_token_number = numval;
a70083a3 579 }
1ff442ca 580 else
943819bf 581 {
a0f6b076 582 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
583 token_buffer,
584 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 585 skip_to_char ('%');
943819bf 586 }
1ff442ca
NF
587 }
588
589}
590
1ff442ca 591
d7020c20
AD
592/*------------------------------.
593| Parse what comes after %start |
594`------------------------------*/
1ff442ca 595
4a120d45 596static void
118fb205 597parse_start_decl (void)
1ff442ca
NF
598{
599 if (start_flag)
27821bff 600 complain (_("multiple %s declarations"), "%start");
511e79b3 601 if (lex () != tok_identifier)
27821bff 602 complain (_("invalid %s declaration"), "%start");
943819bf
RS
603 else
604 {
605 start_flag = 1;
606 startval = symval;
607 }
1ff442ca
NF
608}
609
a70083a3
AD
610/*-----------------------------------------------------------.
611| read in a %type declaration and record its information for |
612| get_type_name to access |
613`-----------------------------------------------------------*/
614
615static void
616parse_type_decl (void)
617{
a70083a3
AD
618 char *name;
619
511e79b3 620 if (lex () != tok_typename)
a70083a3
AD
621 {
622 complain ("%s", _("%type declaration has no <typename>"));
623 skip_to_char ('%');
624 return;
625 }
626
95e36146 627 name = xstrdup (token_buffer);
a70083a3
AD
628
629 for (;;)
630 {
f17bcd1f 631 token_t t;
a70083a3
AD
632 int tmp_char = ungetc (skip_white_space (), finput);
633
634 if (tmp_char == '%')
635 return;
636 if (tmp_char == EOF)
637 fatal (_("Premature EOF after %s"), token_buffer);
638
639 t = lex ();
640
641 switch (t)
1ff442ca
NF
642 {
643
511e79b3
AD
644 case tok_comma:
645 case tok_semicolon:
1ff442ca
NF
646 break;
647
511e79b3 648 case tok_identifier:
1ff442ca
NF
649 if (symval->type_name == NULL)
650 symval->type_name = name;
a70083a3 651 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 652 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
653
654 break;
655
656 default:
a0f6b076
AD
657 complain (_("invalid %%type declaration due to item: %s"),
658 token_buffer);
a70083a3 659 skip_to_char ('%');
1ff442ca
NF
660 }
661 }
662}
663
664
665
d7020c20
AD
666/*----------------------------------------------------------------.
667| Read in a %left, %right or %nonassoc declaration and record its |
668| information. |
669`----------------------------------------------------------------*/
1ff442ca 670
4a120d45 671static void
d7020c20 672parse_assoc_decl (associativity assoc)
1ff442ca 673{
a70083a3
AD
674 char *name = NULL;
675 int prev = 0;
1ff442ca 676
a70083a3 677 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 678
1ff442ca
NF
679 for (;;)
680 {
f17bcd1f 681 token_t t;
e6011337 682 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 683
e6011337 684 if (tmp_char == '%')
1ff442ca 685 return;
e6011337 686 if (tmp_char == EOF)
a0f6b076 687 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 688
a70083a3 689 t = lex ();
1ff442ca
NF
690
691 switch (t)
692 {
511e79b3 693 case tok_typename:
95e36146 694 name = xstrdup (token_buffer);
1ff442ca
NF
695 break;
696
511e79b3 697 case tok_comma:
1ff442ca
NF
698 break;
699
511e79b3 700 case tok_identifier:
1ff442ca 701 if (symval->prec != 0)
a0f6b076 702 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
703 symval->prec = lastprec;
704 symval->assoc = assoc;
d7020c20 705 if (symval->class == nterm_sym)
a0f6b076 706 complain (_("symbol %s redefined"), symval->tag);
d7020c20 707 symval->class = token_sym;
1ff442ca 708 if (name)
a70083a3 709 { /* record the type, if one is specified */
1ff442ca
NF
710 if (symval->type_name == NULL)
711 symval->type_name = name;
a70083a3 712 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 713 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
714 }
715 break;
716
511e79b3
AD
717 case tok_number:
718 if (prev == tok_identifier)
a70083a3 719 {
1ff442ca 720 symval->user_token_number = numval;
a70083a3
AD
721 }
722 else
723 {
724 complain (_
725 ("invalid text (%s) - number should be after identifier"),
726token_buffer);
727 skip_to_char ('%');
728 }
1ff442ca
NF
729 break;
730
511e79b3 731 case tok_semicolon:
1ff442ca
NF
732 return;
733
734 default:
a0f6b076 735 complain (_("unexpected item: %s"), token_buffer);
a70083a3 736 skip_to_char ('%');
1ff442ca
NF
737 }
738
739 prev = t;
1ff442ca
NF
740 }
741}
742
743
744
dd60faec 745/*--------------------------------------------------------------.
180d45ba
PB
746| Copy the union declaration into the stype muscle |
747| (and fdefines), where it is made into the definition of |
748| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 749`--------------------------------------------------------------*/
1ff442ca 750
4a120d45 751static void
118fb205 752parse_union_decl (void)
1ff442ca 753{
a70083a3
AD
754 int c;
755 int count = 0;
180d45ba 756 struct obstack union_obstack;
5f7e0832
AD
757 const char *prologue = "\
758#ifndef YYSTYPE\n\
759typedef union";
760 const char *epilogue = "\
761 yystype;\n\
762# define YYSTYPE yystype\n\
763#endif\n";
1ff442ca
NF
764
765 if (typed)
27821bff 766 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
767
768 typed = 1;
769
f6ec6d13
AD
770 /* FIXME: I'm worried: are you sure attrs_obstack is properly
771 filled? */
5f7e0832
AD
772 /* I don't see any reasons to keep this line, because we should
773 create a special skeleton for this option. */
180d45ba 774 if (no_lines_flag)
dd60faec 775 obstack_1grow (&attrs_obstack, '\n');
342b8b6e 776
180d45ba
PB
777 obstack_init (&union_obstack);
778 obstack_sgrow (&union_obstack, "union");
896fe5c1 779 if (defines_flag)
5f7e0832 780 obstack_sgrow (&defines_obstack, prologue);
1ff442ca 781
27821bff 782 c = getc (finput);
1ff442ca
NF
783
784 while (c != EOF)
785 {
342b8b6e
AD
786 /* If C contains '/', it is output by copy_comment (). */
787 if (c != '/')
788 {
f6ec6d13 789 obstack_1grow (&union_obstack, c);
342b8b6e
AD
790 if (defines_flag)
791 obstack_1grow (&defines_obstack, c);
792 }
1ff442ca
NF
793
794 switch (c)
795 {
796 case '\n':
797 lineno++;
798 break;
799
800 case '/':
180d45ba 801 copy_comment2 (finput, &defines_obstack, &union_obstack);
1ff442ca
NF
802 break;
803
1ff442ca
NF
804 case '{':
805 count++;
806 break;
807
808 case '}':
809 if (count == 0)
27821bff 810 complain (_("unmatched %s"), "`}'");
1ff442ca 811 count--;
943819bf 812 if (count <= 0)
1ff442ca 813 {
896fe5c1 814 if (defines_flag)
5f7e0832 815 obstack_sgrow (&defines_obstack, epilogue);
1ff442ca 816 /* JF don't choke on trailing semi */
27821bff
AD
817 c = skip_white_space ();
818 if (c != ';')
a70083a3 819 ungetc (c, finput);
180d45ba
PB
820 obstack_1grow (&union_obstack, 0);
821 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
822 return;
823 }
824 }
825
27821bff 826 c = getc (finput);
1ff442ca 827 }
180d45ba 828
1ff442ca
NF
829}
830
d7020c20
AD
831
832/*-------------------------------------------------------.
833| Parse the declaration %expect N which says to expect N |
834| shift-reduce conflicts. |
835`-------------------------------------------------------*/
1ff442ca 836
4a120d45 837static void
118fb205 838parse_expect_decl (void)
1ff442ca 839{
131e2fef 840 int c = skip_white_space ();
1ff442ca
NF
841 ungetc (c, finput);
842
131e2fef 843 if (!isdigit (c))
79282c5a 844 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
845 else
846 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
847}
848
a70083a3
AD
849
850/*-------------------------------------------------------------------.
851| Parse what comes after %thong. the full syntax is |
852| |
853| %thong <type> token number literal |
854| |
855| the <type> or number may be omitted. The number specifies the |
856| user_token_number. |
857| |
858| Two symbols are entered in the table, one for the token symbol and |
859| one for the literal. Both are given the <type>, if any, from the |
860| declaration. The ->user_token_number of the first is SALIAS and |
861| the ->user_token_number of the second is set to the number, if |
862| any, from the declaration. The two symbols are linked via |
863| pointers in their ->alias fields. |
864| |
865| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
866| only the literal string is retained it is the literal string that |
867| is output to yytname |
868`-------------------------------------------------------------------*/
869
870static void
871parse_thong_decl (void)
7b306f52 872{
f17bcd1f 873 token_t token;
a70083a3
AD
874 struct bucket *symbol;
875 char *typename = 0;
6b7e85b9 876 int usrtoknum = SUNDEF;
7b306f52 877
a70083a3 878 token = lex (); /* fetch typename or first token */
511e79b3 879 if (token == tok_typename)
7b306f52 880 {
95e36146 881 typename = xstrdup (token_buffer);
a70083a3
AD
882 value_components_used = 1;
883 token = lex (); /* fetch first token */
7b306f52 884 }
7b306f52 885
a70083a3 886 /* process first token */
7b306f52 887
511e79b3 888 if (token != tok_identifier)
a70083a3
AD
889 {
890 complain (_("unrecognized item %s, expected an identifier"),
891 token_buffer);
892 skip_to_char ('%');
893 return;
7b306f52 894 }
d7020c20 895 symval->class = token_sym;
a70083a3
AD
896 symval->type_name = typename;
897 symval->user_token_number = SALIAS;
898 symbol = symval;
7b306f52 899
a70083a3 900 token = lex (); /* get number or literal string */
1ff442ca 901
511e79b3 902 if (token == tok_number)
943819bf 903 {
a70083a3
AD
904 usrtoknum = numval;
905 token = lex (); /* okay, did number, now get literal */
943819bf 906 }
1ff442ca 907
a70083a3 908 /* process literal string token */
1ff442ca 909
511e79b3 910 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 911 {
a70083a3
AD
912 complain (_("expected string constant instead of %s"), token_buffer);
913 skip_to_char ('%');
914 return;
1ff442ca 915 }
d7020c20 916 symval->class = token_sym;
a70083a3
AD
917 symval->type_name = typename;
918 symval->user_token_number = usrtoknum;
1ff442ca 919
a70083a3
AD
920 symval->alias = symbol;
921 symbol->alias = symval;
1ff442ca 922
79282c5a
AD
923 /* symbol and symval combined are only one symbol. */
924 nsyms--;
a70083a3 925}
3cef001a 926
b6610515 927static void
11d82f03 928parse_muscle_decl (void)
b6610515
RA
929{
930 int ch = ungetc (skip_white_space (), finput);
11d82f03
MA
931 char* muscle_key;
932 char* muscle_value;
b6610515
RA
933
934 /* Read key. */
935 if (!isalpha (ch) && ch != '_')
936 {
937 complain (_("invalid %s declaration"), "%define");
938 skip_to_char ('%');
939 return;
940 }
11d82f03
MA
941 copy_identifier (finput, &muscle_obstack);
942 obstack_1grow (&muscle_obstack, 0);
943 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 944
b6610515
RA
945 /* Read value. */
946 ch = skip_white_space ();
947 if (ch != '"')
948 {
949 ungetc (ch, finput);
950 if (ch != EOF)
951 {
952 complain (_("invalid %s declaration"), "%define");
953 skip_to_char ('%');
954 return;
955 }
956 else
957 fatal (_("Premature EOF after %s"), "\"");
958 }
11d82f03
MA
959 copy_string2 (finput, &muscle_obstack, '"', 0);
960 obstack_1grow (&muscle_obstack, 0);
961 muscle_value = obstack_finish (&muscle_obstack);
b6610515 962
b6610515 963 /* Store the (key, value) pair in the environment. */
11d82f03 964 muscle_insert (muscle_key, muscle_value);
b6610515
RA
965}
966
2ba3b73c 967
426cf563
MA
968
969/*---------------------------------.
a870c567 970| Parse a double quoted parameter. |
426cf563
MA
971`---------------------------------*/
972
973static const char *
974parse_dquoted_param (const char *from)
975{
976 struct obstack param_obstack;
977 const char *param = NULL;
978 int c;
979
980 obstack_init (&param_obstack);
981 c = skip_white_space ();
982
983 if (c != '"')
984 {
985 complain (_("invalid %s declaration"), from);
986 ungetc (c, finput);
987 skip_to_char ('%');
988 return NULL;
989 }
990
2648a72d
AD
991 while ((c = literalchar ()) != '"')
992 obstack_1grow (&param_obstack, c);
a870c567 993
426cf563
MA
994 obstack_1grow (&param_obstack, '\0');
995 param = obstack_finish (&param_obstack);
996
997 if (c != '"' || strlen (param) == 0)
998 {
999 complain (_("invalid %s declaration"), from);
1000 if (c != '"')
1001 ungetc (c, finput);
1002 skip_to_char ('%');
1003 return NULL;
1004 }
1005
1006 return param;
1007}
1008
2ba3b73c
MA
1009/*----------------------------------.
1010| Parse what comes after %skeleton. |
1011`----------------------------------*/
1012
a870c567 1013static void
2ba3b73c
MA
1014parse_skel_decl (void)
1015{
426cf563 1016 skeleton = parse_dquoted_param ("%skeleton");
2ba3b73c
MA
1017}
1018
a70083a3
AD
1019/*----------------------------------------------------------------.
1020| Read from finput until `%%' is seen. Discard the `%%'. Handle |
1021| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 1022| groups to ATTRS_OBSTACK. |
a70083a3 1023`----------------------------------------------------------------*/
1ff442ca 1024
4a120d45 1025static void
a70083a3 1026read_declarations (void)
1ff442ca 1027{
a70083a3 1028 for (;;)
1ff442ca 1029 {
951366c1 1030 int c = skip_white_space ();
1ff442ca 1031
a70083a3
AD
1032 if (c == '%')
1033 {
951366c1 1034 token_t tok = parse_percent_token ();
1ff442ca 1035
a70083a3 1036 switch (tok)
943819bf 1037 {
511e79b3 1038 case tok_two_percents:
a70083a3 1039 return;
1ff442ca 1040
511e79b3 1041 case tok_percent_left_curly:
a70083a3
AD
1042 copy_definition ();
1043 break;
1ff442ca 1044
511e79b3 1045 case tok_token:
d7020c20 1046 parse_token_decl (token_sym, nterm_sym);
a70083a3 1047 break;
1ff442ca 1048
511e79b3 1049 case tok_nterm:
d7020c20 1050 parse_token_decl (nterm_sym, token_sym);
a70083a3 1051 break;
1ff442ca 1052
511e79b3 1053 case tok_type:
a70083a3
AD
1054 parse_type_decl ();
1055 break;
1ff442ca 1056
511e79b3 1057 case tok_start:
a70083a3
AD
1058 parse_start_decl ();
1059 break;
118fb205 1060
511e79b3 1061 case tok_union:
a70083a3
AD
1062 parse_union_decl ();
1063 break;
1ff442ca 1064
511e79b3 1065 case tok_expect:
a70083a3
AD
1066 parse_expect_decl ();
1067 break;
6deb4447 1068
511e79b3 1069 case tok_thong:
a70083a3
AD
1070 parse_thong_decl ();
1071 break;
d7020c20 1072
511e79b3 1073 case tok_left:
d7020c20 1074 parse_assoc_decl (left_assoc);
a70083a3 1075 break;
1ff442ca 1076
511e79b3 1077 case tok_right:
d7020c20 1078 parse_assoc_decl (right_assoc);
a70083a3 1079 break;
1ff442ca 1080
511e79b3 1081 case tok_nonassoc:
d7020c20 1082 parse_assoc_decl (non_assoc);
a70083a3 1083 break;
1ff442ca 1084
b6610515 1085 case tok_define:
11d82f03 1086 parse_muscle_decl ();
b6610515 1087 break;
342b8b6e 1088
2ba3b73c
MA
1089 case tok_skel:
1090 parse_skel_decl ();
1091 break;
b6610515 1092
511e79b3 1093 case tok_noop:
a70083a3 1094 break;
1ff442ca 1095
951366c1
AD
1096 case tok_stropt:
1097 case tok_intopt:
1098 case tok_obsolete:
951366c1
AD
1099 abort ();
1100 break;
1101
e0c40012 1102 case tok_illegal:
a70083a3
AD
1103 default:
1104 complain (_("unrecognized: %s"), token_buffer);
1105 skip_to_char ('%');
1106 }
1107 }
1108 else if (c == EOF)
1109 fatal (_("no input grammar"));
1110 else
1111 {
ff4a34be
AD
1112 char buf[] = "c";
1113 buf[0] = c;
1114 complain (_("unknown character: %s"), quote (buf));
a70083a3 1115 skip_to_char ('%');
1ff442ca 1116 }
1ff442ca 1117 }
1ff442ca 1118}
a70083a3
AD
1119\f
1120/*-------------------------------------------------------------------.
1121| Assuming that a `{' has just been seen, copy everything up to the |
1122| matching `}' into the actions file. STACK_OFFSET is the number of |
1123| values in the current rule so far, which says where to find `$0' |
1124| with respect to the top of the stack. |
1125`-------------------------------------------------------------------*/
1ff442ca 1126
4a120d45 1127static void
79282c5a 1128copy_action (symbol_list *rule, int stack_offset)
1ff442ca 1129{
a70083a3 1130 int c;
a70083a3 1131 int count;
1ff442ca
NF
1132
1133 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1134 if (semantic_parser)
1135 stack_offset = 0;
1ff442ca 1136
1ff442ca 1137 count = 1;
a70083a3 1138 c = getc (finput);
1ff442ca
NF
1139
1140 while (count > 0)
1141 {
1142 while (c != '}')
a70083a3
AD
1143 {
1144 switch (c)
1ff442ca
NF
1145 {
1146 case '\n':
8c7ebe49 1147 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1148 lineno++;
1149 break;
1150
1151 case '{':
8c7ebe49 1152 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1153 count++;
1154 break;
1155
1156 case '\'':
1157 case '"':
337bab46 1158 copy_string (finput, &action_obstack, c);
1ff442ca
NF
1159 break;
1160
1161 case '/':
337bab46 1162 copy_comment (finput, &action_obstack);
1ff442ca
NF
1163 break;
1164
1165 case '$':
337bab46 1166 copy_dollar (finput, &action_obstack,
8c7ebe49 1167 rule, stack_offset);
1ff442ca
NF
1168 break;
1169
1170 case '@':
337bab46 1171 copy_at (finput, &action_obstack,
8c7ebe49 1172 stack_offset);
6666f98f 1173 break;
1ff442ca
NF
1174
1175 case EOF:
27821bff 1176 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1177
1178 default:
8c7ebe49 1179 obstack_1grow (&action_obstack, c);
a70083a3
AD
1180 }
1181
1182 c = getc (finput);
1183 }
1184
1185 /* above loop exits when c is '}' */
1186
1187 if (--count)
1188 {
8c7ebe49 1189 obstack_1grow (&action_obstack, c);
a70083a3
AD
1190 c = getc (finput);
1191 }
1192 }
1193
3f96f4dc
AD
1194 obstack_1grow (&action_obstack, '\0');
1195 rule->action = obstack_finish (&action_obstack);
1196 rule->action_line = lineno;
a70083a3
AD
1197}
1198\f
1199/*-------------------------------------------------------------------.
1200| After `%guard' is seen in the input file, copy the actual guard |
1201| into the guards file. If the guard is followed by an action, copy |
1202| that into the actions file. STACK_OFFSET is the number of values |
1203| in the current rule so far, which says where to find `$0' with |
1204| respect to the top of the stack, for the simple parser in which |
1205| the stack is not popped until after the guard is run. |
1206`-------------------------------------------------------------------*/
1207
1208static void
79282c5a 1209copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1210{
1211 int c;
a70083a3 1212 int count;
a70083a3
AD
1213 int brace_flag = 0;
1214
1215 /* offset is always 0 if parser has already popped the stack pointer */
1216 if (semantic_parser)
1217 stack_offset = 0;
1218
ea5607fd 1219 obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
89cab50d 1220 if (!no_lines_flag)
25b222fa 1221 obstack_fgrow2 (&guard_obstack, muscle_find ("linef"),
682d48cd 1222 lineno, quotearg_style (c_quoting_style,
11d82f03 1223 muscle_find ("filename")));
ea5607fd 1224 obstack_1grow (&guard_obstack, '{');
a70083a3
AD
1225
1226 count = 0;
1227 c = getc (finput);
1228
1229 while (brace_flag ? (count > 0) : (c != ';'))
1230 {
1231 switch (c)
1232 {
1233 case '\n':
ea5607fd 1234 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1235 lineno++;
1236 break;
1237
1238 case '{':
ea5607fd 1239 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1240 brace_flag = 1;
1241 count++;
1242 break;
1243
1244 case '}':
ea5607fd 1245 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1246 if (count > 0)
1247 count--;
1248 else
1249 {
1250 complain (_("unmatched %s"), "`}'");
1251 c = getc (finput); /* skip it */
1252 }
1253 break;
1254
1255 case '\'':
1256 case '"':
337bab46 1257 copy_string (finput, &guard_obstack, c);
a70083a3
AD
1258 break;
1259
1260 case '/':
337bab46 1261 copy_comment (finput, &guard_obstack);
a70083a3
AD
1262 break;
1263
1264 case '$':
337bab46 1265 copy_dollar (finput, &guard_obstack, rule, stack_offset);
a70083a3 1266 break;
1ff442ca 1267
a70083a3 1268 case '@':
337bab46 1269 copy_at (finput, &guard_obstack, stack_offset);
a70083a3 1270 break;
1ff442ca 1271
a70083a3
AD
1272 case EOF:
1273 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1274
a70083a3 1275 default:
ea5607fd 1276 obstack_1grow (&guard_obstack, c);
1ff442ca 1277 }
a70083a3
AD
1278
1279 if (c != '}' || count != 0)
1280 c = getc (finput);
1ff442ca
NF
1281 }
1282
a70083a3
AD
1283 c = skip_white_space ();
1284
ff4423cc 1285 obstack_sgrow (&guard_obstack, ";\n break;}");
a70083a3
AD
1286 if (c == '{')
1287 copy_action (rule, stack_offset);
1288 else if (c == '=')
1289 {
1290 c = getc (finput); /* why not skip_white_space -wjh */
1291 if (c == '{')
1292 copy_action (rule, stack_offset);
1293 }
1294 else
1295 ungetc (c, finput);
1ff442ca 1296}
a70083a3
AD
1297\f
1298
a70083a3
AD
1299/*-------------------------------------------------------------------.
1300| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1301| with the user's names. |
1302`-------------------------------------------------------------------*/
1ff442ca 1303
4a120d45 1304static bucket *
118fb205 1305gensym (void)
1ff442ca 1306{
274d42ce
AD
1307 /* Incremented for each generated symbol */
1308 static int gensym_count = 0;
1309 static char buf[256];
1310
a70083a3 1311 bucket *sym;
1ff442ca 1312
274d42ce
AD
1313 sprintf (buf, "@%d", ++gensym_count);
1314 token_buffer = buf;
a70083a3 1315 sym = getsym (token_buffer);
d7020c20 1316 sym->class = nterm_sym;
1ff442ca 1317 sym->value = nvars++;
36281465 1318 return sym;
1ff442ca 1319}
a70083a3 1320\f
107f7dfb
AD
1321/*-------------------------------------------------------------------.
1322| Parse the input grammar into a one symbol_list structure. Each |
1323| rule is represented by a sequence of symbols: the left hand side |
1324| followed by the contents of the right hand side, followed by a |
1325| null pointer instead of a symbol to terminate the rule. The next |
1326| symbol is the lhs of the following rule. |
1327| |
1328| All guards and actions are copied out to the appropriate files, |
1329| labelled by the rule number they apply to. |
1330| |
1331| Bison used to allow some %directives in the rules sections, but |
1332| this is no longer consider appropriate: (i) the documented grammar |
1333| doesn't claim it, (ii), it would promote bad style, (iii), error |
1334| recovery for %directives consists in skipping the junk until a `%' |
1335| is seen and helrp synchronizing. This scheme is definitely wrong |
1336| in the rules section. |
1337`-------------------------------------------------------------------*/
1ff442ca 1338
4a120d45 1339static void
118fb205 1340readgram (void)
1ff442ca 1341{
f17bcd1f 1342 token_t t;
a70083a3 1343 bucket *lhs = NULL;
107f7dfb
AD
1344 symbol_list *p = NULL;
1345 symbol_list *p1 = NULL;
a70083a3 1346 bucket *bp;
1ff442ca 1347
ff4a34be
AD
1348 /* Points to first symbol_list of current rule. its symbol is the
1349 lhs of the rule. */
107f7dfb 1350 symbol_list *crule = NULL;
ff4a34be 1351 /* Points to the symbol_list preceding crule. */
107f7dfb 1352 symbol_list *crule1 = NULL;
1ff442ca 1353
a70083a3 1354 t = lex ();
1ff442ca 1355
511e79b3 1356 while (t != tok_two_percents && t != tok_eof)
107f7dfb
AD
1357 if (t == tok_identifier || t == tok_bar)
1358 {
1359 int action_flag = 0;
1360 /* Number of symbols in rhs of this rule so far */
1361 int rulelength = 0;
1362 int xactions = 0; /* JF for error checking */
1363 bucket *first_rhs = 0;
1364
1365 if (t == tok_identifier)
1366 {
1367 lhs = symval;
1368
1369 if (!start_flag)
1370 {
1371 startval = lhs;
1372 start_flag = 1;
1373 }
1ff442ca 1374
107f7dfb
AD
1375 t = lex ();
1376 if (t != tok_colon)
1377 {
1378 complain (_("ill-formed rule: initial symbol not followed by colon"));
1379 unlex (t);
1380 }
1381 }
1382
1383 if (nrules == 0 && t == tok_bar)
1384 {
1385 complain (_("grammar starts with vertical bar"));
1386 lhs = symval; /* BOGUS: use a random symval */
1387 }
1388 /* start a new rule and record its lhs. */
1389
1390 nrules++;
1391 nitems++;
1392
1393 p = symbol_list_new (lhs);
1394
1395 crule1 = p1;
1396 if (p1)
1397 p1->next = p;
1398 else
1399 grammar = p;
1ff442ca 1400
107f7dfb
AD
1401 p1 = p;
1402 crule = p;
1ff442ca 1403
107f7dfb 1404 /* mark the rule's lhs as a nonterminal if not already so. */
1ff442ca 1405
107f7dfb
AD
1406 if (lhs->class == unknown_sym)
1407 {
1408 lhs->class = nterm_sym;
1409 lhs->value = nvars;
1410 nvars++;
1411 }
1412 else if (lhs->class == token_sym)
1413 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca 1414
107f7dfb 1415 /* read the rhs of the rule. */
1ff442ca 1416
107f7dfb
AD
1417 for (;;)
1418 {
1419 t = lex ();
1420 if (t == tok_prec)
1421 {
1422 t = lex ();
1423 crule->ruleprec = symval;
1424 t = lex ();
1425 }
1426
1427 if (!(t == tok_identifier || t == tok_left_curly))
1428 break;
1ff442ca 1429
107f7dfb
AD
1430 /* If next token is an identifier, see if a colon follows it.
1431 If one does, exit this rule now. */
1432 if (t == tok_identifier)
1433 {
1434 bucket *ssave;
1435 token_t t1;
1436
1437 ssave = symval;
1438 t1 = lex ();
1439 unlex (t1);
1440 symval = ssave;
1441 if (t1 == tok_colon)
1442 break;
1443
1444 if (!first_rhs) /* JF */
1445 first_rhs = symval;
1446 /* Not followed by colon =>
1447 process as part of this rule's rhs. */
1448 }
1449
1450 /* If we just passed an action, that action was in the middle
1451 of a rule, so make a dummy rule to reduce it to a
1452 non-terminal. */
1453 if (action_flag)
1454 {
1455 /* Since the action was written out with this rule's
1456 number, we must give the new rule this number by
1457 inserting the new rule before it. */
1458
1459 /* Make a dummy nonterminal, a gensym. */
1460 bucket *sdummy = gensym ();
1461
1462 /* Make a new rule, whose body is empty, before the
1463 current one, so that the action just read can
1464 belong to it. */
1465 nrules++;
1466 nitems++;
1467 p = symbol_list_new (sdummy);
1468 /* Attach its lineno to that of the host rule. */
1469 p->line = crule->line;
1470 if (crule1)
1471 crule1->next = p;
1472 else
1473 grammar = p;
1474 /* End of the rule. */
1475 crule1 = symbol_list_new (NULL);
1476 crule1->next = crule;
1477
1478 p->next = crule1;
1479
1480 /* Insert the dummy generated by that rule into this
1481 rule. */
1482 nitems++;
1483 p = symbol_list_new (sdummy);
1484 p1->next = p;
1485 p1 = p;
1486
1487 action_flag = 0;
1488 }
1489
1490 if (t == tok_identifier)
1491 {
1492 nitems++;
1493 p = symbol_list_new (symval);
1494 p1->next = p;
1495 p1 = p;
1496 }
1497 else /* handle an action. */
1498 {
1499 copy_action (crule, rulelength);
1500 action_flag = 1;
1501 xactions++; /* JF */
1502 }
1503 rulelength++;
1504 } /* end of read rhs of rule */
1505
1506 /* Put an empty link in the list to mark the end of this rule */
1507 p = symbol_list_new (NULL);
1508 p1->next = p;
1509 p1 = p;
1510
1511 if (t == tok_prec)
1512 {
1513 complain (_("two @prec's in a row"));
1514 t = lex ();
1515 crule->ruleprec = symval;
1516 t = lex ();
1517 }
1518 if (t == tok_guard)
1519 {
1520 if (!semantic_parser)
1521 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1522
107f7dfb 1523 copy_guard (crule, rulelength);
a70083a3 1524 t = lex ();
107f7dfb
AD
1525 }
1526 else if (t == tok_left_curly)
1527 {
1528 /* This case never occurs -wjh */
1529 if (action_flag)
1530 complain (_("two actions at end of one rule"));
1531 copy_action (crule, rulelength);
1532 action_flag = 1;
1533 xactions++; /* -wjh */
1534 t = lex ();
1535 }
1536 /* If $$ is being set in default way, report if any type
1537 mismatch. */
1538 else if (!xactions
1539 && first_rhs && lhs->type_name != first_rhs->type_name)
1540 {
1541 if (lhs->type_name == 0
1542 || first_rhs->type_name == 0
1543 || strcmp (lhs->type_name, first_rhs->type_name))
1544 complain (_("type clash (`%s' `%s') on default action"),
1545 lhs->type_name ? lhs->type_name : "",
1546 first_rhs->type_name ? first_rhs->type_name : "");
1547 }
1548 /* Warn if there is no default for $$ but we need one. */
1549 else if (!xactions && !first_rhs && lhs->type_name != 0)
1550 complain (_("empty rule for typed nonterminal, and no action"));
1551 if (t == tok_semicolon)
a70083a3 1552 t = lex ();
107f7dfb
AD
1553 }
1554 else
1555 {
1556 complain (_("invalid input: %s"), quote (token_buffer));
1557 t = lex ();
1558 }
943819bf 1559
1ff442ca 1560
943819bf
RS
1561 /* grammar has been read. Do some checking */
1562
1ff442ca 1563 if (nsyms > MAXSHORT)
a0f6b076
AD
1564 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1565 MAXSHORT);
1ff442ca 1566 if (nrules == 0)
a0f6b076 1567 fatal (_("no rules in the input grammar"));
1ff442ca 1568
1ff442ca
NF
1569 /* Report any undefined symbols and consider them nonterminals. */
1570
1571 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1572 if (bp->class == unknown_sym)
1ff442ca 1573 {
a70083a3
AD
1574 complain (_
1575 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1576 bp->tag);
d7020c20 1577 bp->class = nterm_sym;
1ff442ca
NF
1578 bp->value = nvars++;
1579 }
1580
1581 ntokens = nsyms - nvars;
1582}
ff48177d
MA
1583
1584/* At the end of the grammar file, some C source code must
63c2d5de 1585 be stored. It is going to be associated to the epilogue
ff48177d
MA
1586 directive. */
1587static void
1588read_additionnal_code (void)
1589{
1590 char c;
63c2d5de 1591 struct obstack el_obstack;
342b8b6e 1592
63c2d5de 1593 obstack_init (&el_obstack);
ff48177d 1594
710ddc4f
MA
1595 if (!no_lines_flag)
1596 {
1597 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1598 lineno, quotearg_style (c_quoting_style,
1599 muscle_find("filename")));
1600 }
1601
ff48177d 1602 while ((c = getc (finput)) != EOF)
63c2d5de 1603 obstack_1grow (&el_obstack, c);
342b8b6e 1604
63c2d5de 1605 obstack_1grow (&el_obstack, 0);
11d82f03 1606 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1607}
1608
a70083a3
AD
1609\f
1610/*--------------------------------------------------------------.
1611| For named tokens, but not literal ones, define the name. The |
1612| value is the user token number. |
1613`--------------------------------------------------------------*/
1ff442ca 1614
4a120d45 1615static void
896fe5c1 1616output_token_defines (struct obstack *oout)
1ff442ca 1617{
a70083a3
AD
1618 bucket *bp;
1619 char *cp, *symbol;
1620 char c;
1ff442ca 1621
a70083a3 1622 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1623 {
a70083a3
AD
1624 symbol = bp->tag; /* get symbol */
1625
1626 if (bp->value >= ntokens)
1627 continue;
1628 if (bp->user_token_number == SALIAS)
1629 continue;
1630 if ('\'' == *symbol)
1631 continue; /* skip literal character */
1632 if (bp == errtoken)
1633 continue; /* skip error token */
1634 if ('\"' == *symbol)
1ff442ca 1635 {
a70083a3
AD
1636 /* use literal string only if given a symbol with an alias */
1637 if (bp->alias)
1638 symbol = bp->alias->tag;
1639 else
1640 continue;
1641 }
1ff442ca 1642
a70083a3
AD
1643 /* Don't #define nonliteral tokens whose names contain periods. */
1644 cp = symbol;
1645 while ((c = *cp++) && c != '.');
1646 if (c != '\0')
1647 continue;
1ff442ca 1648
0b8afb77 1649 obstack_fgrow2 (oout, "# define\t%s\t%d\n",
342b8b6e 1650 symbol, bp->user_token_number);
a70083a3 1651 if (semantic_parser)
342b8b6e
AD
1652 /* FIXME: This is certainly dead wrong, and should be just as
1653 above. --akim. */
0b8afb77 1654 obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
1655 }
1656}
1ff442ca
NF
1657
1658
037ca2f1
AD
1659/*------------------------------------------------------------------.
1660| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1661| number. |
1662`------------------------------------------------------------------*/
1663
1664static void
1665token_translations_init (void)
1666{
1667 bucket *bp = NULL;
1668 int i;
1669
1670 token_translations = XCALLOC (short, max_user_token_number + 1);
1671
1672 /* Initialize all entries for literal tokens to 2, the internal
1673 token number for $undefined., which represents all invalid
1674 inputs. */
1675 for (i = 0; i <= max_user_token_number; i++)
1676 token_translations[i] = 2;
1677
1678 for (bp = firstsymbol; bp; bp = bp->next)
1679 {
1680 /* Non-terminal? */
1681 if (bp->value >= ntokens)
1682 continue;
1683 /* A token string alias? */
1684 if (bp->user_token_number == SALIAS)
1685 continue;
6b7e85b9
AD
1686
1687 assert (bp->user_token_number != SUNDEF);
1688
037ca2f1
AD
1689 /* A token which translation has already been set? */
1690 if (token_translations[bp->user_token_number] != 2)
1691 complain (_("tokens %s and %s both assigned number %d"),
1692 tags[token_translations[bp->user_token_number]],
1693 bp->tag, bp->user_token_number);
1694 token_translations[bp->user_token_number] = bp->value;
1695 }
1696}
1697
1698
a70083a3
AD
1699/*------------------------------------------------------------------.
1700| Assign symbol numbers, and write definition of token names into |
b2ca4022 1701| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1702| of symbols. |
1703`------------------------------------------------------------------*/
1ff442ca 1704
4a120d45 1705static void
118fb205 1706packsymbols (void)
1ff442ca 1707{
342b8b6e 1708 bucket *bp = NULL;
a70083a3 1709 int tokno = 1;
a70083a3 1710 int last_user_token_number;
4a120d45 1711 static char DOLLAR[] = "$";
1ff442ca 1712
d7913476 1713 tags = XCALLOC (char *, nsyms + 1);
d7913476 1714 user_toknums = XCALLOC (short, nsyms + 1);
1ff442ca 1715
d7913476
AD
1716 sprec = XCALLOC (short, nsyms);
1717 sassoc = XCALLOC (short, nsyms);
1ff442ca 1718
037ca2f1
AD
1719 /* The EOF token. */
1720 tags[0] = DOLLAR;
1721 user_toknums[0] = 0;
1722
1ff442ca
NF
1723 max_user_token_number = 256;
1724 last_user_token_number = 256;
1725
1726 for (bp = firstsymbol; bp; bp = bp->next)
1727 {
d7020c20 1728 if (bp->class == nterm_sym)
1ff442ca
NF
1729 {
1730 bp->value += ntokens;
1731 }
943819bf
RS
1732 else if (bp->alias)
1733 {
0a6384c4
AD
1734 /* this symbol and its alias are a single token defn.
1735 allocate a tokno, and assign to both check agreement of
1736 ->prec and ->assoc fields and make both the same */
1737 if (bp->value == 0)
1738 bp->value = bp->alias->value = tokno++;
943819bf 1739
0a6384c4
AD
1740 if (bp->prec != bp->alias->prec)
1741 {
1742 if (bp->prec != 0 && bp->alias->prec != 0
1743 && bp->user_token_number == SALIAS)
a0f6b076
AD
1744 complain (_("conflicting precedences for %s and %s"),
1745 bp->tag, bp->alias->tag);
0a6384c4
AD
1746 if (bp->prec != 0)
1747 bp->alias->prec = bp->prec;
1748 else
1749 bp->prec = bp->alias->prec;
1750 }
943819bf 1751
0a6384c4
AD
1752 if (bp->assoc != bp->alias->assoc)
1753 {
a0f6b076
AD
1754 if (bp->assoc != 0 && bp->alias->assoc != 0
1755 && bp->user_token_number == SALIAS)
1756 complain (_("conflicting assoc values for %s and %s"),
1757 bp->tag, bp->alias->tag);
1758 if (bp->assoc != 0)
1759 bp->alias->assoc = bp->assoc;
1760 else
1761 bp->assoc = bp->alias->assoc;
1762 }
0a6384c4
AD
1763
1764 if (bp->user_token_number == SALIAS)
a70083a3 1765 continue; /* do not do processing below for SALIASs */
943819bf 1766
a70083a3 1767 }
d7020c20 1768 else /* bp->class == token_sym */
943819bf
RS
1769 {
1770 bp->value = tokno++;
1771 }
1772
d7020c20 1773 if (bp->class == token_sym)
1ff442ca 1774 {
6b7e85b9 1775 if (bp->user_token_number == SUNDEF)
1ff442ca
NF
1776 bp->user_token_number = ++last_user_token_number;
1777 if (bp->user_token_number > max_user_token_number)
1778 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1779 }
1780
1781 tags[bp->value] = bp->tag;
943819bf 1782 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1783 sprec[bp->value] = bp->prec;
1784 sassoc[bp->value] = bp->assoc;
1ff442ca
NF
1785 }
1786
037ca2f1 1787 token_translations_init ();
1ff442ca
NF
1788
1789 error_token_number = errtoken->value;
1790
e3f1699f
AD
1791 if (startval->class == unknown_sym)
1792 fatal (_("the start symbol %s is undefined"), startval->tag);
1793 else if (startval->class == token_sym)
1794 fatal (_("the start symbol %s is a token"), startval->tag);
1795
1796 start_symbol = startval->value;
1797}
1798
1799
1800/*-----------------------------------.
1801| Output definition of token names. |
1802`-----------------------------------*/
1803
1804static void
1805symbols_output (void)
1806{
342b8b6e
AD
1807 {
1808 struct obstack tokendefs;
1809 obstack_init (&tokendefs);
1810 output_token_defines (&tokendefs);
1811 obstack_1grow (&tokendefs, 0);
1812 muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1813 obstack_free (&tokendefs, NULL);
1814 }
b6610515 1815
89cab50d 1816 if (defines_flag)
1ff442ca 1817 {
896fe5c1 1818 output_token_defines (&defines_obstack);
1ff442ca
NF
1819
1820 if (!pure_parser)
78af9bbc
AD
1821 obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1822 spec_name_prefix);
1ff442ca 1823 if (semantic_parser)
037ca2f1
AD
1824 {
1825 int i;
1826
1827 for (i = ntokens; i < nsyms; i++)
1828 {
1829 /* don't make these for dummy nonterminals made by gensym. */
1830 if (*tags[i] != '@')
1831 obstack_fgrow2 (&defines_obstack,
1832 "# define\tNT%s\t%d\n", tags[i], i);
1833 }
1ff442ca 1834#if 0
037ca2f1
AD
1835 /* `fdefines' is now a temporary file, so we need to copy its
1836 contents in `done', so we can't close it here. */
1837 fclose (fdefines);
1838 fdefines = NULL;
1ff442ca 1839#endif
037ca2f1 1840 }
1ff442ca
NF
1841 }
1842}
a083fbbf 1843
1ff442ca 1844
a70083a3
AD
1845/*---------------------------------------------------------------.
1846| Convert the rules into the representation using RRHS, RLHS and |
1847| RITEMS. |
1848`---------------------------------------------------------------*/
1ff442ca 1849
4a120d45 1850static void
118fb205 1851packgram (void)
1ff442ca 1852{
a70083a3
AD
1853 int itemno;
1854 int ruleno;
1855 symbol_list *p;
1ff442ca 1856
d7913476 1857 ritem = XCALLOC (short, nitems + 1);
b2ed6e58 1858 rule_table = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1859
1860 itemno = 0;
1861 ruleno = 1;
1862
1863 p = grammar;
1864 while (p)
1865 {
b29b2ed5 1866 bucket *ruleprec = p->ruleprec;
b2ed6e58
AD
1867 rule_table[ruleno].lhs = p->sym->value;
1868 rule_table[ruleno].rhs = itemno;
b29b2ed5 1869 rule_table[ruleno].line = p->line;
68f1e3ed 1870 rule_table[ruleno].useful = TRUE;
3f96f4dc
AD
1871 rule_table[ruleno].action = p->action;
1872 rule_table[ruleno].action_line = p->action_line;
1ff442ca
NF
1873
1874 p = p->next;
1875 while (p && p->sym)
1876 {
1877 ritem[itemno++] = p->sym->value;
1878 /* A rule gets by default the precedence and associativity
1879 of the last token in it. */
d7020c20 1880 if (p->sym->class == token_sym)
1ff442ca 1881 {
652a871c
AD
1882 rule_table[ruleno].prec = p->sym->prec;
1883 rule_table[ruleno].assoc = p->sym->assoc;
1ff442ca 1884 }
a70083a3
AD
1885 if (p)
1886 p = p->next;
1ff442ca
NF
1887 }
1888
1889 /* If this rule has a %prec,
a70083a3 1890 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1891 if (ruleprec)
1892 {
652a871c
AD
1893 rule_table[ruleno].prec = ruleprec->prec;
1894 rule_table[ruleno].assoc = ruleprec->assoc;
1895 rule_table[ruleno].precsym = ruleprec->value;
1ff442ca
NF
1896 }
1897
1898 ritem[itemno++] = -ruleno;
1899 ruleno++;
1900
a70083a3
AD
1901 if (p)
1902 p = p->next;
1ff442ca
NF
1903 }
1904
1905 ritem[itemno] = 0;
3067fbef
AD
1906
1907 if (trace_flag)
1908 ritem_print (stderr);
1ff442ca 1909}
a70083a3
AD
1910\f
1911/*-------------------------------------------------------------------.
1912| Read in the grammar specification and record it in the format |
ea5607fd 1913| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1914| and all actions into ACTION_OBSTACK, in each case forming the body |
1915| of a C function (YYGUARD or YYACTION) which contains a switch |
1916| statement to decide which guard or action to execute. |
a70083a3
AD
1917`-------------------------------------------------------------------*/
1918
1919void
1920reader (void)
1921{
1922 start_flag = 0;
1923 startval = NULL; /* start symbol not specified yet. */
1924
a70083a3
AD
1925 nsyms = 1;
1926 nvars = 0;
1927 nrules = 0;
1928 nitems = 0;
a70083a3
AD
1929
1930 typed = 0;
1931 lastprec = 0;
1932
a70083a3
AD
1933 semantic_parser = 0;
1934 pure_parser = 0;
a70083a3
AD
1935
1936 grammar = NULL;
1937
342b8b6e 1938 lex_init ();
a70083a3
AD
1939 lineno = 1;
1940
11d82f03
MA
1941 /* Initialize the muscle obstack. */
1942 obstack_init (&muscle_obstack);
82e236e2 1943
a70083a3
AD
1944 /* Initialize the symbol table. */
1945 tabinit ();
b6610515 1946
a70083a3
AD
1947 /* Construct the error token */
1948 errtoken = getsym ("error");
d7020c20 1949 errtoken->class = token_sym;
a70083a3 1950 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 1951
a70083a3
AD
1952 /* Construct a token that represents all undefined literal tokens.
1953 It is always token number 2. */
1954 undeftoken = getsym ("$undefined.");
d7020c20 1955 undeftoken->class = token_sym;
a70083a3
AD
1956 undeftoken->user_token_number = 2;
1957
896fe5c1
AD
1958 /* Read the declaration section. Copy %{ ... %} groups to
1959 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1960 etc. found there. */
a70083a3 1961 read_declarations ();
a70083a3
AD
1962 /* Read in the grammar, build grammar in list form. Write out
1963 guards and actions. */
1964 readgram ();
ff48177d
MA
1965 /* Some C code is given at the end of the grammar file. */
1966 read_additionnal_code ();
b0c4483e 1967
a70083a3 1968 /* Now we know whether we need the line-number stack. If we do,
b0c4483e
PB
1969 write its type into the .tab.h file.
1970 This is no longer need with header skeleton. */
1971
a70083a3
AD
1972 /* Assign the symbols their symbol numbers. Write #defines for the
1973 token symbols into FDEFINES if requested. */
1974 packsymbols ();
1975 /* Convert the grammar into the format described in gram.h. */
1976 packgram ();
edad7067
AD
1977 /* Output the headers. */
1978 symbols_output ();
a70083a3 1979}