]> git.saurik.com Git - bison.git/blame - src/reader.c
Adjust.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
8c7ebe49 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
8c7ebe49 24#include "obstack.h"
2a91a95e
AD
25#include "quotearg.h"
26#include "quote.h"
ceed8467 27#include "getargs.h"
1ff442ca 28#include "files.h"
1ff442ca 29#include "symtab.h"
82b6d266 30#include "options.h"
1ff442ca
NF
31#include "lex.h"
32#include "gram.h"
a0f6b076 33#include "complain.h"
6c89f1c1 34#include "output.h"
b2ca4022 35#include "reader.h"
340ef489 36#include "conflicts.h"
11d82f03 37#include "muscle_tab.h"
1ff442ca 38
1ff442ca 39/* Number of slots allocated (but not necessarily used yet) in `rline' */
4a120d45 40static int rline_allocated;
1ff442ca 41
a70083a3
AD
42typedef struct symbol_list
43{
44 struct symbol_list *next;
45 bucket *sym;
46 bucket *ruleprec;
47}
48symbol_list;
118fb205 49
1ff442ca 50int lineno;
1ff442ca 51char **tags;
d019d655 52short *user_toknums;
4a120d45
JT
53static symbol_list *grammar;
54static int start_flag;
55static bucket *startval;
1ff442ca
NF
56
57/* Nonzero if components of semantic values are used, implying
58 they must be unions. */
59static int value_components_used;
60
d7020c20
AD
61/* Nonzero if %union has been seen. */
62static int typed;
1ff442ca 63
d7020c20
AD
64/* Incremented for each %left, %right or %nonassoc seen */
65static int lastprec;
1ff442ca 66
1ff442ca 67static bucket *errtoken;
5b2e3c89 68static bucket *undeftoken;
0d533154 69\f
a70083a3 70
0d533154
AD
71/*===================\
72| Low level lexing. |
73\===================*/
943819bf
RS
74
75static void
118fb205 76skip_to_char (int target)
943819bf
RS
77{
78 int c;
79 if (target == '\n')
a0f6b076 80 complain (_(" Skipping to next \\n"));
943819bf 81 else
a0f6b076 82 complain (_(" Skipping to next %c"), target);
943819bf
RS
83
84 do
0d533154 85 c = skip_white_space ();
943819bf 86 while (c != target && c != EOF);
a083fbbf 87 if (c != EOF)
0d533154 88 ungetc (c, finput);
943819bf
RS
89}
90
91
0d533154
AD
92/*---------------------------------------------------------.
93| Read a signed integer from STREAM and return its value. |
94`---------------------------------------------------------*/
95
96static inline int
97read_signed_integer (FILE *stream)
98{
a70083a3
AD
99 int c = getc (stream);
100 int sign = 1;
101 int n = 0;
0d533154
AD
102
103 if (c == '-')
104 {
105 c = getc (stream);
106 sign = -1;
107 }
108
109 while (isdigit (c))
110 {
111 n = 10 * n + (c - '0');
112 c = getc (stream);
113 }
114
115 ungetc (c, stream);
116
117 return sign * n;
118}
119\f
79282c5a
AD
120/*--------------------------------------------------------------.
121| Get the data type (alternative in the union) of the value for |
122| symbol N in rule RULE. |
123`--------------------------------------------------------------*/
124
125static char *
126get_type_name (int n, symbol_list * rule)
127{
128 int i;
129 symbol_list *rp;
130
131 if (n < 0)
132 {
133 complain (_("invalid $ value"));
134 return NULL;
135 }
136
137 rp = rule;
138 i = 0;
139
140 while (i < n)
141 {
142 rp = rp->next;
143 if (rp == NULL || rp->sym == NULL)
144 {
145 complain (_("invalid $ value"));
146 return NULL;
147 }
148 i++;
149 }
150
151 return rp->sym->type_name;
152}
153\f
337bab46
AD
154/*------------------------------------------------------------.
155| Dump the string from FIN to OOUT if non null. MATCH is the |
156| delimiter of the string (either ' or "). |
157`------------------------------------------------------------*/
ae3c3164
AD
158
159static inline void
b6610515 160copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
161{
162 int c;
163
b6610515
RA
164 if (store)
165 obstack_1grow (oout, match);
8c7ebe49 166
4a120d45 167 c = getc (fin);
ae3c3164
AD
168
169 while (c != match)
170 {
171 if (c == EOF)
172 fatal (_("unterminated string at end of file"));
173 if (c == '\n')
174 {
a0f6b076 175 complain (_("unterminated string"));
4a120d45 176 ungetc (c, fin);
ae3c3164
AD
177 c = match; /* invent terminator */
178 continue;
179 }
180
337bab46 181 obstack_1grow (oout, c);
ae3c3164
AD
182
183 if (c == '\\')
184 {
4a120d45 185 c = getc (fin);
ae3c3164
AD
186 if (c == EOF)
187 fatal (_("unterminated string at end of file"));
337bab46 188 obstack_1grow (oout, c);
8c7ebe49 189
ae3c3164
AD
190 if (c == '\n')
191 lineno++;
192 }
193
a70083a3 194 c = getc (fin);
ae3c3164
AD
195 }
196
b6610515
RA
197 if (store)
198 obstack_1grow (oout, c);
199}
200
201/* FIXME. */
202
203static inline void
204copy_string (FILE *fin, struct obstack *oout, int match)
205{
206 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
207}
208
b6610515
RA
209/* FIXME. */
210
211static inline void
212copy_identifier (FILE *fin, struct obstack *oout)
213{
214 int c;
215
216 while (isalnum (c = getc (fin)) || c == '_')
217 obstack_1grow (oout, c);
218
219 ungetc (c, fin);
220}
ae3c3164 221
337bab46
AD
222/*-----------------------------------------------------------------.
223| Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
224| NULL). In fact we just saw a `/', which might or might not be a |
225| comment. In any case, copy what we saw. |
226| |
227| OUT2 might be NULL. |
228`-----------------------------------------------------------------*/
ae3c3164
AD
229
230static inline void
337bab46 231copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
ae3c3164
AD
232{
233 int cplus_comment;
a70083a3 234 int ended;
550a72a3
AD
235 int c;
236
237 /* We read a `/', output it. */
337bab46 238 obstack_1grow (oout1, '/');
896fe5c1
AD
239 if (oout2)
240 obstack_1grow (oout2, '/');
550a72a3
AD
241
242 switch ((c = getc (fin)))
243 {
244 case '/':
245 cplus_comment = 1;
246 break;
247 case '*':
248 cplus_comment = 0;
249 break;
250 default:
251 ungetc (c, fin);
252 return;
253 }
ae3c3164 254
337bab46 255 obstack_1grow (oout1, c);
896fe5c1
AD
256 if (oout2)
257 obstack_1grow (oout2, c);
550a72a3 258 c = getc (fin);
ae3c3164
AD
259
260 ended = 0;
261 while (!ended)
262 {
263 if (!cplus_comment && c == '*')
264 {
265 while (c == '*')
266 {
337bab46 267 obstack_1grow (oout1, c);
896fe5c1
AD
268 if (oout2)
269 obstack_1grow (oout2, c);
550a72a3 270 c = getc (fin);
ae3c3164
AD
271 }
272
273 if (c == '/')
274 {
337bab46 275 obstack_1grow (oout1, c);
896fe5c1
AD
276 if (oout2)
277 obstack_1grow (oout2, c);
ae3c3164
AD
278 ended = 1;
279 }
280 }
281 else if (c == '\n')
282 {
283 lineno++;
337bab46 284 obstack_1grow (oout1, c);
896fe5c1
AD
285 if (oout2)
286 obstack_1grow (oout2, c);
ae3c3164
AD
287 if (cplus_comment)
288 ended = 1;
289 else
550a72a3 290 c = getc (fin);
ae3c3164
AD
291 }
292 else if (c == EOF)
293 fatal (_("unterminated comment"));
294 else
295 {
337bab46 296 obstack_1grow (oout1, c);
896fe5c1
AD
297 if (oout2)
298 obstack_1grow (oout2, c);
550a72a3 299 c = getc (fin);
ae3c3164
AD
300 }
301 }
302}
303
304
550a72a3
AD
305/*-------------------------------------------------------------------.
306| Dump the comment (actually the current string starting with a `/') |
337bab46 307| from FIN to OOUT. |
550a72a3 308`-------------------------------------------------------------------*/
27821bff
AD
309
310static inline void
337bab46 311copy_comment (FILE *fin, struct obstack *oout)
27821bff 312{
337bab46 313 copy_comment2 (fin, oout, NULL);
27821bff
AD
314}
315
316
a70083a3 317/*-----------------------------------------------------------------.
337bab46 318| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
319| reference to this location. STACK_OFFSET is the number of values |
320| in the current rule so far, which says where to find `$0' with |
321| respect to the top of the stack. |
322`-----------------------------------------------------------------*/
1ff442ca 323
a70083a3 324static inline void
337bab46 325copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 326{
a70083a3 327 int c;
1ff442ca 328
a70083a3
AD
329 c = getc (fin);
330 if (c == '$')
1ff442ca 331 {
ff4423cc 332 obstack_sgrow (oout, "yyloc");
89cab50d 333 locations_flag = 1;
a70083a3
AD
334 }
335 else if (isdigit (c) || c == '-')
336 {
337 int n;
1ff442ca 338
a70083a3
AD
339 ungetc (c, fin);
340 n = read_signed_integer (fin);
943819bf 341
337bab46 342 obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
89cab50d 343 locations_flag = 1;
1ff442ca 344 }
a70083a3 345 else
ff4a34be
AD
346 {
347 char buf[] = "@c";
348 buf[1] = c;
349 complain (_("%s is invalid"), quote (buf));
350 }
1ff442ca 351}
79282c5a
AD
352
353
354/*-------------------------------------------------------------------.
355| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
356| |
357| Possible inputs: $[<TYPENAME>]($|integer) |
358| |
337bab46 359| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
360| the number of values in the current rule so far, which says where |
361| to find `$0' with respect to the top of the stack. |
362`-------------------------------------------------------------------*/
363
364static inline void
337bab46 365copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
366 symbol_list *rule, int stack_offset)
367{
368 int c = getc (fin);
b0ce6046 369 const char *type_name = NULL;
79282c5a 370
f282676b 371 /* Get the type name if explicit. */
79282c5a
AD
372 if (c == '<')
373 {
f282676b 374 read_type_name (fin);
79282c5a
AD
375 type_name = token_buffer;
376 value_components_used = 1;
79282c5a
AD
377 c = getc (fin);
378 }
379
380 if (c == '$')
381 {
ff4423cc 382 obstack_sgrow (oout, "yyval");
8c7ebe49 383
79282c5a
AD
384 if (!type_name)
385 type_name = get_type_name (0, rule);
386 if (type_name)
337bab46 387 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
388 if (!type_name && typed)
389 complain (_("$$ of `%s' has no declared type"),
390 rule->sym->tag);
391 }
392 else if (isdigit (c) || c == '-')
393 {
394 int n;
395 ungetc (c, fin);
396 n = read_signed_integer (fin);
397
398 if (!type_name && n > 0)
399 type_name = get_type_name (n, rule);
400
337bab46 401 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
8c7ebe49 402
79282c5a 403 if (type_name)
337bab46 404 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
405 if (!type_name && typed)
406 complain (_("$%d of `%s' has no declared type"),
407 n, rule->sym->tag);
408 }
409 else
410 {
411 char buf[] = "$c";
412 buf[1] = c;
413 complain (_("%s is invalid"), quote (buf));
414 }
415}
a70083a3
AD
416\f
417/*-------------------------------------------------------------------.
418| Copy the contents of a `%{ ... %}' into the definitions file. The |
419| `%{' has already been read. Return after reading the `%}'. |
420`-------------------------------------------------------------------*/
1ff442ca 421
4a120d45 422static void
118fb205 423copy_definition (void)
1ff442ca 424{
a70083a3 425 int c;
ae3c3164 426 /* -1 while reading a character if prev char was %. */
a70083a3 427 int after_percent;
1ff442ca 428
b6610515 429#if 0
89cab50d 430 if (!no_lines_flag)
25b222fa
MA
431 {
432 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 433 lineno, quotearg_style (c_quoting_style,
25b222fa
MA
434 muscle_find("filename")));
435 }
b6610515 436#endif
1ff442ca
NF
437
438 after_percent = 0;
439
ae3c3164 440 c = getc (finput);
1ff442ca
NF
441
442 for (;;)
443 {
444 switch (c)
445 {
446 case '\n':
dd60faec 447 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
448 lineno++;
449 break;
450
451 case '%':
a70083a3 452 after_percent = -1;
1ff442ca 453 break;
a083fbbf 454
1ff442ca
NF
455 case '\'':
456 case '"':
337bab46 457 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
458 break;
459
460 case '/':
337bab46 461 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
462 break;
463
464 case EOF:
a70083a3 465 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
466
467 default:
dd60faec 468 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
469 }
470
a70083a3 471 c = getc (finput);
1ff442ca
NF
472
473 if (after_percent)
474 {
475 if (c == '}')
476 return;
dd60faec 477 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
478 }
479 after_percent = 0;
1ff442ca 480 }
1ff442ca
NF
481}
482
483
d7020c20
AD
484/*-------------------------------------------------------------------.
485| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
486| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
487| are reversed. |
488`-------------------------------------------------------------------*/
1ff442ca 489
4a120d45 490static void
d7020c20 491parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 492{
342b8b6e
AD
493 token_t token = tok_undef;
494 char *typename = NULL;
1ff442ca 495
1e9798d5
AD
496 /* The symbol being defined. */
497 struct bucket *symbol = NULL;
498
499 /* After `%token' and `%nterm', any number of symbols maybe be
500 defined. */
1ff442ca
NF
501 for (;;)
502 {
e6011337
JT
503 int tmp_char = ungetc (skip_white_space (), finput);
504
1e9798d5
AD
505 /* `%' (for instance from `%token', or from `%%' etc.) is the
506 only valid means to end this declaration. */
e6011337 507 if (tmp_char == '%')
1ff442ca 508 return;
e6011337 509 if (tmp_char == EOF)
a0f6b076 510 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 511
a70083a3 512 token = lex ();
511e79b3 513 if (token == tok_comma)
943819bf
RS
514 {
515 symbol = NULL;
516 continue;
517 }
511e79b3 518 if (token == tok_typename)
1ff442ca 519 {
95e36146 520 typename = xstrdup (token_buffer);
1ff442ca 521 value_components_used = 1;
943819bf
RS
522 symbol = NULL;
523 }
511e79b3 524 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 525 {
8e03724b
AD
526 if (symval->alias)
527 warn (_("symbol `%s' used more than once as a literal string"),
528 symval->tag);
529 else if (symbol->alias)
530 warn (_("symbol `%s' given more than one literal string"),
531 symbol->tag);
532 else
533 {
534 symval->class = token_sym;
535 symval->type_name = typename;
536 symval->user_token_number = symbol->user_token_number;
537 symbol->user_token_number = SALIAS;
538 symval->alias = symbol;
539 symbol->alias = symval;
540 /* symbol and symval combined are only one symbol */
541 nsyms--;
542 }
8e03724b 543 symbol = NULL;
1ff442ca 544 }
511e79b3 545 else if (token == tok_identifier)
1ff442ca
NF
546 {
547 int oldclass = symval->class;
943819bf 548 symbol = symval;
1ff442ca 549
943819bf 550 if (symbol->class == what_is_not)
a0f6b076 551 complain (_("symbol %s redefined"), symbol->tag);
943819bf 552 symbol->class = what_is;
d7020c20 553 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 554 symbol->value = nvars++;
1ff442ca
NF
555
556 if (typename)
557 {
943819bf
RS
558 if (symbol->type_name == NULL)
559 symbol->type_name = typename;
a70083a3 560 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 561 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
562 }
563 }
511e79b3 564 else if (symbol && token == tok_number)
a70083a3 565 {
943819bf 566 symbol->user_token_number = numval;
a70083a3 567 }
1ff442ca 568 else
943819bf 569 {
a0f6b076 570 complain (_("`%s' is invalid in %s"),
d7020c20 571 token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 572 skip_to_char ('%');
943819bf 573 }
1ff442ca
NF
574 }
575
576}
577
1ff442ca 578
d7020c20
AD
579/*------------------------------.
580| Parse what comes after %start |
581`------------------------------*/
1ff442ca 582
4a120d45 583static void
118fb205 584parse_start_decl (void)
1ff442ca
NF
585{
586 if (start_flag)
27821bff 587 complain (_("multiple %s declarations"), "%start");
511e79b3 588 if (lex () != tok_identifier)
27821bff 589 complain (_("invalid %s declaration"), "%start");
943819bf
RS
590 else
591 {
592 start_flag = 1;
593 startval = symval;
594 }
1ff442ca
NF
595}
596
a70083a3
AD
597/*-----------------------------------------------------------.
598| read in a %type declaration and record its information for |
599| get_type_name to access |
600`-----------------------------------------------------------*/
601
602static void
603parse_type_decl (void)
604{
a70083a3
AD
605 char *name;
606
511e79b3 607 if (lex () != tok_typename)
a70083a3
AD
608 {
609 complain ("%s", _("%type declaration has no <typename>"));
610 skip_to_char ('%');
611 return;
612 }
613
95e36146 614 name = xstrdup (token_buffer);
a70083a3
AD
615
616 for (;;)
617 {
f17bcd1f 618 token_t t;
a70083a3
AD
619 int tmp_char = ungetc (skip_white_space (), finput);
620
621 if (tmp_char == '%')
622 return;
623 if (tmp_char == EOF)
624 fatal (_("Premature EOF after %s"), token_buffer);
625
626 t = lex ();
627
628 switch (t)
1ff442ca
NF
629 {
630
511e79b3
AD
631 case tok_comma:
632 case tok_semicolon:
1ff442ca
NF
633 break;
634
511e79b3 635 case tok_identifier:
1ff442ca
NF
636 if (symval->type_name == NULL)
637 symval->type_name = name;
a70083a3 638 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 639 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
640
641 break;
642
643 default:
a0f6b076
AD
644 complain (_("invalid %%type declaration due to item: %s"),
645 token_buffer);
a70083a3 646 skip_to_char ('%');
1ff442ca
NF
647 }
648 }
649}
650
651
652
d7020c20
AD
653/*----------------------------------------------------------------.
654| Read in a %left, %right or %nonassoc declaration and record its |
655| information. |
656`----------------------------------------------------------------*/
1ff442ca 657
4a120d45 658static void
d7020c20 659parse_assoc_decl (associativity assoc)
1ff442ca 660{
a70083a3
AD
661 char *name = NULL;
662 int prev = 0;
1ff442ca 663
a70083a3 664 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 665
1ff442ca
NF
666 for (;;)
667 {
f17bcd1f 668 token_t t;
e6011337 669 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 670
e6011337 671 if (tmp_char == '%')
1ff442ca 672 return;
e6011337 673 if (tmp_char == EOF)
a0f6b076 674 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 675
a70083a3 676 t = lex ();
1ff442ca
NF
677
678 switch (t)
679 {
511e79b3 680 case tok_typename:
95e36146 681 name = xstrdup (token_buffer);
1ff442ca
NF
682 break;
683
511e79b3 684 case tok_comma:
1ff442ca
NF
685 break;
686
511e79b3 687 case tok_identifier:
1ff442ca 688 if (symval->prec != 0)
a0f6b076 689 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
690 symval->prec = lastprec;
691 symval->assoc = assoc;
d7020c20 692 if (symval->class == nterm_sym)
a0f6b076 693 complain (_("symbol %s redefined"), symval->tag);
d7020c20 694 symval->class = token_sym;
1ff442ca 695 if (name)
a70083a3 696 { /* record the type, if one is specified */
1ff442ca
NF
697 if (symval->type_name == NULL)
698 symval->type_name = name;
a70083a3 699 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 700 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
701 }
702 break;
703
511e79b3
AD
704 case tok_number:
705 if (prev == tok_identifier)
a70083a3 706 {
1ff442ca 707 symval->user_token_number = numval;
a70083a3
AD
708 }
709 else
710 {
711 complain (_
712 ("invalid text (%s) - number should be after identifier"),
713token_buffer);
714 skip_to_char ('%');
715 }
1ff442ca
NF
716 break;
717
511e79b3 718 case tok_semicolon:
1ff442ca
NF
719 return;
720
721 default:
a0f6b076 722 complain (_("unexpected item: %s"), token_buffer);
a70083a3 723 skip_to_char ('%');
1ff442ca
NF
724 }
725
726 prev = t;
727
728 }
729}
730
731
732
dd60faec 733/*--------------------------------------------------------------.
180d45ba
PB
734| Copy the union declaration into the stype muscle |
735| (and fdefines), where it is made into the definition of |
736| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 737`--------------------------------------------------------------*/
1ff442ca 738
4a120d45 739static void
118fb205 740parse_union_decl (void)
1ff442ca 741{
a70083a3
AD
742 int c;
743 int count = 0;
180d45ba 744 struct obstack union_obstack;
1ff442ca
NF
745
746 if (typed)
27821bff 747 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
748
749 typed = 1;
750
f6ec6d13
AD
751 /* FIXME: I'm worried: are you sure attrs_obstack is properly
752 filled? */
180d45ba 753 if (no_lines_flag)
dd60faec 754 obstack_1grow (&attrs_obstack, '\n');
342b8b6e 755
180d45ba
PB
756 obstack_init (&union_obstack);
757 obstack_sgrow (&union_obstack, "union");
896fe5c1 758 if (defines_flag)
ff4423cc 759 obstack_sgrow (&defines_obstack, "typedef union");
1ff442ca 760
27821bff 761 c = getc (finput);
1ff442ca
NF
762
763 while (c != EOF)
764 {
342b8b6e
AD
765 /* If C contains '/', it is output by copy_comment (). */
766 if (c != '/')
767 {
f6ec6d13 768 obstack_1grow (&union_obstack, c);
342b8b6e
AD
769 if (defines_flag)
770 obstack_1grow (&defines_obstack, c);
771 }
1ff442ca
NF
772
773 switch (c)
774 {
775 case '\n':
776 lineno++;
777 break;
778
779 case '/':
180d45ba 780 copy_comment2 (finput, &defines_obstack, &union_obstack);
1ff442ca
NF
781 break;
782
1ff442ca
NF
783 case '{':
784 count++;
785 break;
786
787 case '}':
788 if (count == 0)
27821bff 789 complain (_("unmatched %s"), "`}'");
1ff442ca 790 count--;
943819bf 791 if (count <= 0)
1ff442ca 792 {
896fe5c1 793 if (defines_flag)
ff4423cc 794 obstack_sgrow (&defines_obstack, " YYSTYPE;\n");
1ff442ca 795 /* JF don't choke on trailing semi */
27821bff
AD
796 c = skip_white_space ();
797 if (c != ';')
a70083a3 798 ungetc (c, finput);
180d45ba
PB
799 obstack_1grow (&union_obstack, 0);
800 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
801 return;
802 }
803 }
804
27821bff 805 c = getc (finput);
1ff442ca 806 }
180d45ba 807
1ff442ca
NF
808}
809
d7020c20
AD
810
811/*-------------------------------------------------------.
812| Parse the declaration %expect N which says to expect N |
813| shift-reduce conflicts. |
814`-------------------------------------------------------*/
1ff442ca 815
4a120d45 816static void
118fb205 817parse_expect_decl (void)
1ff442ca 818{
131e2fef 819 int c = skip_white_space ();
1ff442ca
NF
820 ungetc (c, finput);
821
131e2fef 822 if (!isdigit (c))
79282c5a 823 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
824 else
825 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
826}
827
a70083a3
AD
828
829/*-------------------------------------------------------------------.
830| Parse what comes after %thong. the full syntax is |
831| |
832| %thong <type> token number literal |
833| |
834| the <type> or number may be omitted. The number specifies the |
835| user_token_number. |
836| |
837| Two symbols are entered in the table, one for the token symbol and |
838| one for the literal. Both are given the <type>, if any, from the |
839| declaration. The ->user_token_number of the first is SALIAS and |
840| the ->user_token_number of the second is set to the number, if |
841| any, from the declaration. The two symbols are linked via |
842| pointers in their ->alias fields. |
843| |
844| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
845| only the literal string is retained it is the literal string that |
846| is output to yytname |
847`-------------------------------------------------------------------*/
848
849static void
850parse_thong_decl (void)
7b306f52 851{
f17bcd1f 852 token_t token;
a70083a3
AD
853 struct bucket *symbol;
854 char *typename = 0;
95e36146 855 int usrtoknum;
7b306f52 856
a70083a3 857 token = lex (); /* fetch typename or first token */
511e79b3 858 if (token == tok_typename)
7b306f52 859 {
95e36146 860 typename = xstrdup (token_buffer);
a70083a3
AD
861 value_components_used = 1;
862 token = lex (); /* fetch first token */
7b306f52 863 }
7b306f52 864
a70083a3 865 /* process first token */
7b306f52 866
511e79b3 867 if (token != tok_identifier)
a70083a3
AD
868 {
869 complain (_("unrecognized item %s, expected an identifier"),
870 token_buffer);
871 skip_to_char ('%');
872 return;
7b306f52 873 }
d7020c20 874 symval->class = token_sym;
a70083a3
AD
875 symval->type_name = typename;
876 symval->user_token_number = SALIAS;
877 symbol = symval;
7b306f52 878
a70083a3 879 token = lex (); /* get number or literal string */
1ff442ca 880
511e79b3 881 if (token == tok_number)
943819bf 882 {
a70083a3
AD
883 usrtoknum = numval;
884 token = lex (); /* okay, did number, now get literal */
943819bf 885 }
a70083a3
AD
886 else
887 usrtoknum = 0;
1ff442ca 888
a70083a3 889 /* process literal string token */
1ff442ca 890
511e79b3 891 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 892 {
a70083a3
AD
893 complain (_("expected string constant instead of %s"), token_buffer);
894 skip_to_char ('%');
895 return;
1ff442ca 896 }
d7020c20 897 symval->class = token_sym;
a70083a3
AD
898 symval->type_name = typename;
899 symval->user_token_number = usrtoknum;
1ff442ca 900
a70083a3
AD
901 symval->alias = symbol;
902 symbol->alias = symval;
1ff442ca 903
79282c5a
AD
904 /* symbol and symval combined are only one symbol. */
905 nsyms--;
a70083a3 906}
3cef001a 907
b6610515 908static void
11d82f03 909parse_muscle_decl (void)
b6610515
RA
910{
911 int ch = ungetc (skip_white_space (), finput);
11d82f03
MA
912 char* muscle_key;
913 char* muscle_value;
b6610515
RA
914
915 /* Read key. */
916 if (!isalpha (ch) && ch != '_')
917 {
918 complain (_("invalid %s declaration"), "%define");
919 skip_to_char ('%');
920 return;
921 }
11d82f03
MA
922 copy_identifier (finput, &muscle_obstack);
923 obstack_1grow (&muscle_obstack, 0);
924 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 925
b6610515
RA
926 /* Read value. */
927 ch = skip_white_space ();
928 if (ch != '"')
929 {
930 ungetc (ch, finput);
931 if (ch != EOF)
932 {
933 complain (_("invalid %s declaration"), "%define");
934 skip_to_char ('%');
935 return;
936 }
937 else
938 fatal (_("Premature EOF after %s"), "\"");
939 }
11d82f03
MA
940 copy_string2 (finput, &muscle_obstack, '"', 0);
941 obstack_1grow (&muscle_obstack, 0);
942 muscle_value = obstack_finish (&muscle_obstack);
b6610515 943
b6610515 944 /* Store the (key, value) pair in the environment. */
11d82f03 945 muscle_insert (muscle_key, muscle_value);
b6610515
RA
946}
947
2ba3b73c
MA
948
949/*----------------------------------.
950| Parse what comes after %skeleton. |
951`----------------------------------*/
952
953void
954parse_skel_decl (void)
955{
956 /* Complete with parse_dquoted_param () on the CVS branch 1.29. */
957}
958
a70083a3
AD
959/*----------------------------------------------------------------.
960| Read from finput until `%%' is seen. Discard the `%%'. Handle |
961| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 962| groups to ATTRS_OBSTACK. |
a70083a3 963`----------------------------------------------------------------*/
1ff442ca 964
4a120d45 965static void
a70083a3 966read_declarations (void)
1ff442ca 967{
a70083a3
AD
968 int c;
969 int tok;
1ff442ca 970
a70083a3 971 for (;;)
1ff442ca 972 {
a70083a3 973 c = skip_white_space ();
1ff442ca 974
a70083a3
AD
975 if (c == '%')
976 {
977 tok = parse_percent_token ();
1ff442ca 978
a70083a3 979 switch (tok)
943819bf 980 {
511e79b3 981 case tok_two_percents:
a70083a3 982 return;
1ff442ca 983
511e79b3 984 case tok_percent_left_curly:
a70083a3
AD
985 copy_definition ();
986 break;
1ff442ca 987
511e79b3 988 case tok_token:
d7020c20 989 parse_token_decl (token_sym, nterm_sym);
a70083a3 990 break;
1ff442ca 991
511e79b3 992 case tok_nterm:
d7020c20 993 parse_token_decl (nterm_sym, token_sym);
a70083a3 994 break;
1ff442ca 995
511e79b3 996 case tok_type:
a70083a3
AD
997 parse_type_decl ();
998 break;
1ff442ca 999
511e79b3 1000 case tok_start:
a70083a3
AD
1001 parse_start_decl ();
1002 break;
118fb205 1003
511e79b3 1004 case tok_union:
a70083a3
AD
1005 parse_union_decl ();
1006 break;
1ff442ca 1007
511e79b3 1008 case tok_expect:
a70083a3
AD
1009 parse_expect_decl ();
1010 break;
6deb4447 1011
511e79b3 1012 case tok_thong:
a70083a3
AD
1013 parse_thong_decl ();
1014 break;
d7020c20 1015
511e79b3 1016 case tok_left:
d7020c20 1017 parse_assoc_decl (left_assoc);
a70083a3 1018 break;
1ff442ca 1019
511e79b3 1020 case tok_right:
d7020c20 1021 parse_assoc_decl (right_assoc);
a70083a3 1022 break;
1ff442ca 1023
511e79b3 1024 case tok_nonassoc:
d7020c20 1025 parse_assoc_decl (non_assoc);
a70083a3 1026 break;
1ff442ca 1027
b6610515 1028 case tok_define:
11d82f03 1029 parse_muscle_decl ();
b6610515 1030 break;
342b8b6e 1031
2ba3b73c
MA
1032 case tok_skel:
1033 parse_skel_decl ();
1034 break;
b6610515 1035
511e79b3 1036 case tok_noop:
a70083a3 1037 break;
1ff442ca 1038
a70083a3
AD
1039 default:
1040 complain (_("unrecognized: %s"), token_buffer);
1041 skip_to_char ('%');
1042 }
1043 }
1044 else if (c == EOF)
1045 fatal (_("no input grammar"));
1046 else
1047 {
ff4a34be
AD
1048 char buf[] = "c";
1049 buf[0] = c;
1050 complain (_("unknown character: %s"), quote (buf));
a70083a3 1051 skip_to_char ('%');
1ff442ca 1052 }
1ff442ca 1053 }
1ff442ca 1054}
a70083a3
AD
1055\f
1056/*-------------------------------------------------------------------.
1057| Assuming that a `{' has just been seen, copy everything up to the |
1058| matching `}' into the actions file. STACK_OFFSET is the number of |
1059| values in the current rule so far, which says where to find `$0' |
1060| with respect to the top of the stack. |
1061`-------------------------------------------------------------------*/
1ff442ca 1062
4a120d45 1063static void
79282c5a 1064copy_action (symbol_list *rule, int stack_offset)
1ff442ca 1065{
a70083a3 1066 int c;
a70083a3 1067 int count;
8c7ebe49 1068 char buf[4096];
1ff442ca
NF
1069
1070 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1071 if (semantic_parser)
1072 stack_offset = 0;
1ff442ca 1073
25b222fa 1074 obstack_fgrow1 (&action_obstack, "\ncase %d:\n", nrules);
8c7ebe49 1075
89cab50d 1076 if (!no_lines_flag)
8c7ebe49 1077 {
25b222fa 1078 obstack_fgrow2 (&action_obstack, muscle_find ("linef"),
342b8b6e 1079 lineno, quotearg_style (c_quoting_style,
25b222fa 1080 muscle_find ("filename")));
8c7ebe49
AD
1081 }
1082 obstack_1grow (&action_obstack, '{');
1ff442ca
NF
1083
1084 count = 1;
a70083a3 1085 c = getc (finput);
1ff442ca
NF
1086
1087 while (count > 0)
1088 {
1089 while (c != '}')
a70083a3
AD
1090 {
1091 switch (c)
1ff442ca
NF
1092 {
1093 case '\n':
8c7ebe49 1094 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1095 lineno++;
1096 break;
1097
1098 case '{':
8c7ebe49 1099 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1100 count++;
1101 break;
1102
1103 case '\'':
1104 case '"':
337bab46 1105 copy_string (finput, &action_obstack, c);
1ff442ca
NF
1106 break;
1107
1108 case '/':
337bab46 1109 copy_comment (finput, &action_obstack);
1ff442ca
NF
1110 break;
1111
1112 case '$':
337bab46 1113 copy_dollar (finput, &action_obstack,
8c7ebe49 1114 rule, stack_offset);
1ff442ca
NF
1115 break;
1116
1117 case '@':
337bab46 1118 copy_at (finput, &action_obstack,
8c7ebe49 1119 stack_offset);
6666f98f 1120 break;
1ff442ca
NF
1121
1122 case EOF:
27821bff 1123 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1124
1125 default:
8c7ebe49 1126 obstack_1grow (&action_obstack, c);
a70083a3
AD
1127 }
1128
1129 c = getc (finput);
1130 }
1131
1132 /* above loop exits when c is '}' */
1133
1134 if (--count)
1135 {
8c7ebe49 1136 obstack_1grow (&action_obstack, c);
a70083a3
AD
1137 c = getc (finput);
1138 }
1139 }
1140
ff4423cc 1141 obstack_sgrow (&action_obstack, ";\n break;}");
a70083a3
AD
1142}
1143\f
1144/*-------------------------------------------------------------------.
1145| After `%guard' is seen in the input file, copy the actual guard |
1146| into the guards file. If the guard is followed by an action, copy |
1147| that into the actions file. STACK_OFFSET is the number of values |
1148| in the current rule so far, which says where to find `$0' with |
1149| respect to the top of the stack, for the simple parser in which |
1150| the stack is not popped until after the guard is run. |
1151`-------------------------------------------------------------------*/
1152
1153static void
79282c5a 1154copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1155{
1156 int c;
a70083a3 1157 int count;
a70083a3
AD
1158 int brace_flag = 0;
1159
1160 /* offset is always 0 if parser has already popped the stack pointer */
1161 if (semantic_parser)
1162 stack_offset = 0;
1163
ea5607fd 1164 obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
89cab50d 1165 if (!no_lines_flag)
25b222fa 1166 obstack_fgrow2 (&guard_obstack, muscle_find ("linef"),
682d48cd 1167 lineno, quotearg_style (c_quoting_style,
11d82f03 1168 muscle_find ("filename")));
ea5607fd 1169 obstack_1grow (&guard_obstack, '{');
a70083a3
AD
1170
1171 count = 0;
1172 c = getc (finput);
1173
1174 while (brace_flag ? (count > 0) : (c != ';'))
1175 {
1176 switch (c)
1177 {
1178 case '\n':
ea5607fd 1179 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1180 lineno++;
1181 break;
1182
1183 case '{':
ea5607fd 1184 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1185 brace_flag = 1;
1186 count++;
1187 break;
1188
1189 case '}':
ea5607fd 1190 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1191 if (count > 0)
1192 count--;
1193 else
1194 {
1195 complain (_("unmatched %s"), "`}'");
1196 c = getc (finput); /* skip it */
1197 }
1198 break;
1199
1200 case '\'':
1201 case '"':
337bab46 1202 copy_string (finput, &guard_obstack, c);
a70083a3
AD
1203 break;
1204
1205 case '/':
337bab46 1206 copy_comment (finput, &guard_obstack);
a70083a3
AD
1207 break;
1208
1209 case '$':
337bab46 1210 copy_dollar (finput, &guard_obstack, rule, stack_offset);
a70083a3 1211 break;
1ff442ca 1212
a70083a3 1213 case '@':
337bab46 1214 copy_at (finput, &guard_obstack, stack_offset);
a70083a3 1215 break;
1ff442ca 1216
a70083a3
AD
1217 case EOF:
1218 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1219
a70083a3 1220 default:
ea5607fd 1221 obstack_1grow (&guard_obstack, c);
1ff442ca 1222 }
a70083a3
AD
1223
1224 if (c != '}' || count != 0)
1225 c = getc (finput);
1ff442ca
NF
1226 }
1227
a70083a3
AD
1228 c = skip_white_space ();
1229
ff4423cc 1230 obstack_sgrow (&guard_obstack, ";\n break;}");
a70083a3
AD
1231 if (c == '{')
1232 copy_action (rule, stack_offset);
1233 else if (c == '=')
1234 {
1235 c = getc (finput); /* why not skip_white_space -wjh */
1236 if (c == '{')
1237 copy_action (rule, stack_offset);
1238 }
1239 else
1240 ungetc (c, finput);
1ff442ca 1241}
a70083a3
AD
1242\f
1243
1244static void
1245record_rule_line (void)
1246{
1247 /* Record each rule's source line number in rline table. */
1ff442ca 1248
a70083a3
AD
1249 if (nrules >= rline_allocated)
1250 {
1251 rline_allocated = nrules * 2;
d7913476 1252 rline = XREALLOC (rline, short, rline_allocated);
a70083a3
AD
1253 }
1254 rline[nrules] = lineno;
1255}
1ff442ca
NF
1256
1257
a70083a3
AD
1258/*-------------------------------------------------------------------.
1259| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1260| with the user's names. |
1261`-------------------------------------------------------------------*/
1ff442ca 1262
4a120d45 1263static bucket *
118fb205 1264gensym (void)
1ff442ca 1265{
274d42ce
AD
1266 /* Incremented for each generated symbol */
1267 static int gensym_count = 0;
1268 static char buf[256];
1269
a70083a3 1270 bucket *sym;
1ff442ca 1271
274d42ce
AD
1272 sprintf (buf, "@%d", ++gensym_count);
1273 token_buffer = buf;
a70083a3 1274 sym = getsym (token_buffer);
d7020c20 1275 sym->class = nterm_sym;
1ff442ca 1276 sym->value = nvars++;
36281465 1277 return sym;
1ff442ca
NF
1278}
1279
a70083a3
AD
1280#if 0
1281/*------------------------------------------------------------------.
1282| read in a %type declaration and record its information for |
1283| get_type_name to access. This is unused. It is only called from |
1284| the #if 0 part of readgram |
1285`------------------------------------------------------------------*/
1286
1287static int
1288get_type (void)
1289{
1290 int k;
f17bcd1f 1291 token_t token;
a70083a3
AD
1292 char *name;
1293
f17bcd1f 1294 token = lex ();
a70083a3 1295
f17bcd1f 1296 if (token != tok_typename)
a70083a3
AD
1297 {
1298 complain (_("invalid %s declaration"), "%type");
1299 return t;
1300 }
1301
95e36146 1302 name = xstrdup (token_buffer);
a70083a3
AD
1303
1304 for (;;)
1305 {
f17bcd1f 1306 token = lex ();
a70083a3 1307
f17bcd1f 1308 switch (token)
a70083a3 1309 {
511e79b3 1310 case tok_semicolon:
a70083a3
AD
1311 return lex ();
1312
511e79b3 1313 case tok_comma:
a70083a3
AD
1314 break;
1315
511e79b3 1316 case tok_identifier:
a70083a3
AD
1317 if (symval->type_name == NULL)
1318 symval->type_name = name;
1319 else if (strcmp (name, symval->type_name) != 0)
1320 complain (_("type redeclaration for %s"), symval->tag);
1321
1322 break;
1323
1324 default:
f17bcd1f 1325 return token;
a70083a3
AD
1326 }
1327 }
1328}
1ff442ca 1329
a70083a3
AD
1330#endif
1331\f
1332/*------------------------------------------------------------------.
1333| Parse the input grammar into a one symbol_list structure. Each |
1334| rule is represented by a sequence of symbols: the left hand side |
1335| followed by the contents of the right hand side, followed by a |
1336| null pointer instead of a symbol to terminate the rule. The next |
1337| symbol is the lhs of the following rule. |
1338| |
1339| All guards and actions are copied out to the appropriate files, |
1340| labelled by the rule number they apply to. |
1341`------------------------------------------------------------------*/
1ff442ca 1342
4a120d45 1343static void
118fb205 1344readgram (void)
1ff442ca 1345{
f17bcd1f 1346 token_t t;
a70083a3
AD
1347 bucket *lhs = NULL;
1348 symbol_list *p;
1349 symbol_list *p1;
1350 bucket *bp;
1ff442ca 1351
ff4a34be
AD
1352 /* Points to first symbol_list of current rule. its symbol is the
1353 lhs of the rule. */
1354 symbol_list *crule;
1355 /* Points to the symbol_list preceding crule. */
1356 symbol_list *crule1;
1ff442ca
NF
1357
1358 p1 = NULL;
1359
a70083a3 1360 t = lex ();
1ff442ca 1361
511e79b3 1362 while (t != tok_two_percents && t != tok_eof)
1ff442ca 1363 {
511e79b3 1364 if (t == tok_identifier || t == tok_bar)
1ff442ca 1365 {
89cab50d 1366 int action_flag = 0;
ff4a34be
AD
1367 /* Number of symbols in rhs of this rule so far */
1368 int rulelength = 0;
1ff442ca
NF
1369 int xactions = 0; /* JF for error checking */
1370 bucket *first_rhs = 0;
1371
511e79b3 1372 if (t == tok_identifier)
1ff442ca
NF
1373 {
1374 lhs = symval;
943819bf
RS
1375
1376 if (!start_flag)
1377 {
1378 startval = lhs;
1379 start_flag = 1;
1380 }
a083fbbf 1381
a70083a3 1382 t = lex ();
511e79b3 1383 if (t != tok_colon)
943819bf 1384 {
a0f6b076 1385 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1386 unlex (t);
943819bf 1387 }
1ff442ca
NF
1388 }
1389
511e79b3 1390 if (nrules == 0 && t == tok_bar)
1ff442ca 1391 {
a0f6b076 1392 complain (_("grammar starts with vertical bar"));
943819bf 1393 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1394 }
1ff442ca
NF
1395 /* start a new rule and record its lhs. */
1396
1397 nrules++;
1398 nitems++;
1399
1400 record_rule_line ();
1401
d7913476 1402 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1403 p->sym = lhs;
1404
1405 crule1 = p1;
1406 if (p1)
1407 p1->next = p;
1408 else
1409 grammar = p;
1410
1411 p1 = p;
1412 crule = p;
1413
1414 /* mark the rule's lhs as a nonterminal if not already so. */
1415
d7020c20 1416 if (lhs->class == unknown_sym)
1ff442ca 1417 {
d7020c20 1418 lhs->class = nterm_sym;
1ff442ca
NF
1419 lhs->value = nvars;
1420 nvars++;
1421 }
d7020c20 1422 else if (lhs->class == token_sym)
a0f6b076 1423 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1424
1425 /* read the rhs of the rule. */
1426
1427 for (;;)
1428 {
a70083a3 1429 t = lex ();
511e79b3 1430 if (t == tok_prec)
943819bf 1431 {
a70083a3 1432 t = lex ();
943819bf 1433 crule->ruleprec = symval;
a70083a3 1434 t = lex ();
943819bf 1435 }
1ff442ca 1436
511e79b3 1437 if (!(t == tok_identifier || t == tok_left_curly))
a70083a3 1438 break;
1ff442ca
NF
1439
1440 /* If next token is an identifier, see if a colon follows it.
a70083a3 1441 If one does, exit this rule now. */
511e79b3 1442 if (t == tok_identifier)
1ff442ca 1443 {
a70083a3 1444 bucket *ssave;
f17bcd1f 1445 token_t t1;
1ff442ca
NF
1446
1447 ssave = symval;
a70083a3
AD
1448 t1 = lex ();
1449 unlex (t1);
1ff442ca 1450 symval = ssave;
511e79b3 1451 if (t1 == tok_colon)
a70083a3 1452 break;
1ff442ca 1453
a70083a3 1454 if (!first_rhs) /* JF */
1ff442ca
NF
1455 first_rhs = symval;
1456 /* Not followed by colon =>
1457 process as part of this rule's rhs. */
1458 }
1459
1460 /* If we just passed an action, that action was in the middle
a70083a3
AD
1461 of a rule, so make a dummy rule to reduce it to a
1462 non-terminal. */
89cab50d 1463 if (action_flag)
1ff442ca 1464 {
a70083a3 1465 bucket *sdummy;
1ff442ca 1466
f282676b
AD
1467 /* Since the action was written out with this rule's
1468 number, we must give the new rule this number by
1469 inserting the new rule before it. */
1ff442ca
NF
1470
1471 /* Make a dummy nonterminal, a gensym. */
a70083a3 1472 sdummy = gensym ();
1ff442ca
NF
1473
1474 /* Make a new rule, whose body is empty,
1475 before the current one, so that the action
1476 just read can belong to it. */
1477 nrules++;
1478 nitems++;
1479 record_rule_line ();
d7913476 1480 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1481 if (crule1)
1482 crule1->next = p;
a70083a3
AD
1483 else
1484 grammar = p;
1ff442ca 1485 p->sym = sdummy;
d7913476 1486 crule1 = XCALLOC (symbol_list, 1);
1ff442ca
NF
1487 p->next = crule1;
1488 crule1->next = crule;
1489
f282676b
AD
1490 /* Insert the dummy generated by that rule into this
1491 rule. */
1ff442ca 1492 nitems++;
d7913476 1493 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1494 p->sym = sdummy;
1495 p1->next = p;
1496 p1 = p;
1497
89cab50d 1498 action_flag = 0;
1ff442ca
NF
1499 }
1500
511e79b3 1501 if (t == tok_identifier)
1ff442ca
NF
1502 {
1503 nitems++;
d7913476 1504 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1505 p->sym = symval;
1506 p1->next = p;
1507 p1 = p;
1508 }
a70083a3 1509 else /* handle an action. */
1ff442ca 1510 {
a70083a3 1511 copy_action (crule, rulelength);
89cab50d 1512 action_flag = 1;
1ff442ca
NF
1513 xactions++; /* JF */
1514 }
1515 rulelength++;
a70083a3 1516 } /* end of read rhs of rule */
1ff442ca
NF
1517
1518 /* Put an empty link in the list to mark the end of this rule */
d7913476 1519 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1520 p1->next = p;
1521 p1 = p;
1522
511e79b3 1523 if (t == tok_prec)
1ff442ca 1524 {
a0f6b076 1525 complain (_("two @prec's in a row"));
a70083a3 1526 t = lex ();
1ff442ca 1527 crule->ruleprec = symval;
a70083a3 1528 t = lex ();
1ff442ca 1529 }
511e79b3 1530 if (t == tok_guard)
1ff442ca 1531 {
a70083a3 1532 if (!semantic_parser)
ff4a34be 1533 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1534
a70083a3
AD
1535 copy_guard (crule, rulelength);
1536 t = lex ();
1ff442ca 1537 }
511e79b3 1538 else if (t == tok_left_curly)
1ff442ca 1539 {
a70083a3 1540 /* This case never occurs -wjh */
89cab50d 1541 if (action_flag)
a0f6b076 1542 complain (_("two actions at end of one rule"));
a70083a3 1543 copy_action (crule, rulelength);
89cab50d 1544 action_flag = 1;
943819bf 1545 xactions++; /* -wjh */
a70083a3 1546 t = lex ();
1ff442ca 1547 }
a0f6b076 1548 /* If $$ is being set in default way, report if any type
6666f98f
AD
1549 mismatch. */
1550 else if (!xactions
a70083a3 1551 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1552 {
6666f98f
AD
1553 if (lhs->type_name == 0
1554 || first_rhs->type_name == 0
a70083a3 1555 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1556 complain (_("type clash (`%s' `%s') on default action"),
1557 lhs->type_name ? lhs->type_name : "",
a70083a3 1558 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1559 }
1560 /* Warn if there is no default for $$ but we need one. */
1561 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1562 complain (_("empty rule for typed nonterminal, and no action"));
511e79b3 1563 if (t == tok_semicolon)
a70083a3 1564 t = lex ();
a083fbbf 1565 }
943819bf 1566#if 0
a70083a3 1567 /* these things can appear as alternatives to rules. */
943819bf
RS
1568/* NO, they cannot.
1569 a) none of the documentation allows them
1570 b) most of them scan forward until finding a next %
1571 thus they may swallow lots of intervening rules
1572*/
511e79b3 1573 else if (t == tok_token)
1ff442ca 1574 {
d7020c20 1575 parse_token_decl (token_sym, nterm_sym);
a70083a3 1576 t = lex ();
1ff442ca 1577 }
511e79b3 1578 else if (t == tok_nterm)
1ff442ca 1579 {
d7020c20 1580 parse_token_decl (nterm_sym, token_sym);
a70083a3 1581 t = lex ();
1ff442ca 1582 }
511e79b3 1583 else if (t == tok_type)
1ff442ca 1584 {
a70083a3 1585 t = get_type ();
1ff442ca 1586 }
511e79b3 1587 else if (t == tok_union)
1ff442ca 1588 {
a70083a3
AD
1589 parse_union_decl ();
1590 t = lex ();
1ff442ca 1591 }
511e79b3 1592 else if (t == tok_expect)
1ff442ca 1593 {
a70083a3
AD
1594 parse_expect_decl ();
1595 t = lex ();
1ff442ca 1596 }
511e79b3 1597 else if (t == tok_start)
1ff442ca 1598 {
a70083a3
AD
1599 parse_start_decl ();
1600 t = lex ();
1ff442ca 1601 }
943819bf
RS
1602#endif
1603
1ff442ca 1604 else
943819bf 1605 {
d01c415b 1606 complain (_("invalid input: %s"), quote (token_buffer));
a70083a3 1607 t = lex ();
943819bf 1608 }
1ff442ca
NF
1609 }
1610
943819bf
RS
1611 /* grammar has been read. Do some checking */
1612
1ff442ca 1613 if (nsyms > MAXSHORT)
a0f6b076
AD
1614 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1615 MAXSHORT);
1ff442ca 1616 if (nrules == 0)
a0f6b076 1617 fatal (_("no rules in the input grammar"));
1ff442ca 1618
1ff442ca
NF
1619 /* Report any undefined symbols and consider them nonterminals. */
1620
1621 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1622 if (bp->class == unknown_sym)
1ff442ca 1623 {
a70083a3
AD
1624 complain (_
1625 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1626 bp->tag);
d7020c20 1627 bp->class = nterm_sym;
1ff442ca
NF
1628 bp->value = nvars++;
1629 }
1630
1631 ntokens = nsyms - nvars;
1632}
ff48177d
MA
1633
1634/* At the end of the grammar file, some C source code must
63c2d5de 1635 be stored. It is going to be associated to the epilogue
ff48177d
MA
1636 directive. */
1637static void
1638read_additionnal_code (void)
1639{
1640 char c;
63c2d5de 1641 struct obstack el_obstack;
342b8b6e 1642
63c2d5de 1643 obstack_init (&el_obstack);
ff48177d
MA
1644
1645 while ((c = getc (finput)) != EOF)
63c2d5de 1646 obstack_1grow (&el_obstack, c);
342b8b6e 1647
63c2d5de 1648 obstack_1grow (&el_obstack, 0);
11d82f03 1649 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1650}
1651
a70083a3
AD
1652\f
1653/*--------------------------------------------------------------.
1654| For named tokens, but not literal ones, define the name. The |
1655| value is the user token number. |
1656`--------------------------------------------------------------*/
1ff442ca 1657
4a120d45 1658static void
896fe5c1 1659output_token_defines (struct obstack *oout)
1ff442ca 1660{
a70083a3
AD
1661 bucket *bp;
1662 char *cp, *symbol;
1663 char c;
1ff442ca 1664
a70083a3 1665 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1666 {
a70083a3
AD
1667 symbol = bp->tag; /* get symbol */
1668
1669 if (bp->value >= ntokens)
1670 continue;
1671 if (bp->user_token_number == SALIAS)
1672 continue;
1673 if ('\'' == *symbol)
1674 continue; /* skip literal character */
1675 if (bp == errtoken)
1676 continue; /* skip error token */
1677 if ('\"' == *symbol)
1ff442ca 1678 {
a70083a3
AD
1679 /* use literal string only if given a symbol with an alias */
1680 if (bp->alias)
1681 symbol = bp->alias->tag;
1682 else
1683 continue;
1684 }
1ff442ca 1685
a70083a3
AD
1686 /* Don't #define nonliteral tokens whose names contain periods. */
1687 cp = symbol;
1688 while ((c = *cp++) && c != '.');
1689 if (c != '\0')
1690 continue;
1ff442ca 1691
0b8afb77 1692 obstack_fgrow2 (oout, "# define\t%s\t%d\n",
342b8b6e 1693 symbol, bp->user_token_number);
a70083a3 1694 if (semantic_parser)
342b8b6e
AD
1695 /* FIXME: This is certainly dead wrong, and should be just as
1696 above. --akim. */
0b8afb77 1697 obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
1698 }
1699}
1ff442ca
NF
1700
1701
a70083a3
AD
1702/*------------------------------------------------------------------.
1703| Assign symbol numbers, and write definition of token names into |
b2ca4022 1704| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1705| of symbols. |
1706`------------------------------------------------------------------*/
1ff442ca 1707
4a120d45 1708static void
118fb205 1709packsymbols (void)
1ff442ca 1710{
342b8b6e 1711 bucket *bp = NULL;
a70083a3 1712 int tokno = 1;
342b8b6e 1713 int i, j;
a70083a3 1714 int last_user_token_number;
4a120d45 1715 static char DOLLAR[] = "$";
1ff442ca 1716
d7913476 1717 tags = XCALLOC (char *, nsyms + 1);
4a120d45 1718 tags[0] = DOLLAR;
d7913476 1719 user_toknums = XCALLOC (short, nsyms + 1);
943819bf 1720 user_toknums[0] = 0;
1ff442ca 1721
d7913476
AD
1722 sprec = XCALLOC (short, nsyms);
1723 sassoc = XCALLOC (short, nsyms);
1ff442ca
NF
1724
1725 max_user_token_number = 256;
1726 last_user_token_number = 256;
1727
1728 for (bp = firstsymbol; bp; bp = bp->next)
1729 {
d7020c20 1730 if (bp->class == nterm_sym)
1ff442ca
NF
1731 {
1732 bp->value += ntokens;
1733 }
943819bf
RS
1734 else if (bp->alias)
1735 {
0a6384c4
AD
1736 /* this symbol and its alias are a single token defn.
1737 allocate a tokno, and assign to both check agreement of
1738 ->prec and ->assoc fields and make both the same */
1739 if (bp->value == 0)
1740 bp->value = bp->alias->value = tokno++;
943819bf 1741
0a6384c4
AD
1742 if (bp->prec != bp->alias->prec)
1743 {
1744 if (bp->prec != 0 && bp->alias->prec != 0
1745 && bp->user_token_number == SALIAS)
a0f6b076
AD
1746 complain (_("conflicting precedences for %s and %s"),
1747 bp->tag, bp->alias->tag);
0a6384c4
AD
1748 if (bp->prec != 0)
1749 bp->alias->prec = bp->prec;
1750 else
1751 bp->prec = bp->alias->prec;
1752 }
943819bf 1753
0a6384c4
AD
1754 if (bp->assoc != bp->alias->assoc)
1755 {
a0f6b076
AD
1756 if (bp->assoc != 0 && bp->alias->assoc != 0
1757 && bp->user_token_number == SALIAS)
1758 complain (_("conflicting assoc values for %s and %s"),
1759 bp->tag, bp->alias->tag);
1760 if (bp->assoc != 0)
1761 bp->alias->assoc = bp->assoc;
1762 else
1763 bp->assoc = bp->alias->assoc;
1764 }
0a6384c4
AD
1765
1766 if (bp->user_token_number == SALIAS)
a70083a3 1767 continue; /* do not do processing below for SALIASs */
943819bf 1768
a70083a3 1769 }
d7020c20 1770 else /* bp->class == token_sym */
943819bf
RS
1771 {
1772 bp->value = tokno++;
1773 }
1774
d7020c20 1775 if (bp->class == token_sym)
1ff442ca 1776 {
342b8b6e 1777 if (!bp->user_token_number)
1ff442ca
NF
1778 bp->user_token_number = ++last_user_token_number;
1779 if (bp->user_token_number > max_user_token_number)
1780 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1781 }
1782
1783 tags[bp->value] = bp->tag;
943819bf 1784 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1785 sprec[bp->value] = bp->prec;
1786 sassoc[bp->value] = bp->assoc;
1ff442ca
NF
1787 }
1788
342b8b6e 1789 token_translations = XCALLOC (short, max_user_token_number + 1);
1ff442ca 1790
342b8b6e
AD
1791 /* initialize all entries for literal tokens to 2, the internal
1792 token number for $undefined., which represents all invalid
1793 inputs. */
1794 for (j = 0; j <= max_user_token_number; j++)
1795 token_translations[j] = 2;
1ff442ca 1796
342b8b6e
AD
1797 for (bp = firstsymbol; bp; bp = bp->next)
1798 {
1799 if (bp->value >= ntokens)
1800 continue; /* non-terminal */
1801 if (bp->user_token_number == SALIAS)
1802 continue;
1803 if (token_translations[bp->user_token_number] != 2)
1804 complain (_("tokens %s and %s both assigned number %d"),
1805 tags[token_translations[bp->user_token_number]],
1806 bp->tag, bp->user_token_number);
1807 token_translations[bp->user_token_number] = bp->value;
1ff442ca
NF
1808 }
1809
1810 error_token_number = errtoken->value;
1811
342b8b6e
AD
1812 {
1813 struct obstack tokendefs;
1814 obstack_init (&tokendefs);
1815 output_token_defines (&tokendefs);
1816 obstack_1grow (&tokendefs, 0);
1817 muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1818 obstack_free (&tokendefs, NULL);
1819 }
b6610515 1820
d8cb5183
MA
1821#if 0
1822 if (!no_parser_flag)
1823 output_token_defines (&table_obstack);
1824#endif
1ff442ca 1825
d7020c20 1826 if (startval->class == unknown_sym)
a0f6b076 1827 fatal (_("the start symbol %s is undefined"), startval->tag);
d7020c20 1828 else if (startval->class == token_sym)
a0f6b076 1829 fatal (_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1830
1831 start_symbol = startval->value;
1832
89cab50d 1833 if (defines_flag)
1ff442ca 1834 {
896fe5c1 1835 output_token_defines (&defines_obstack);
1ff442ca
NF
1836
1837 if (!pure_parser)
1838 {
1839 if (spec_name_prefix)
896fe5c1
AD
1840 obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1841 spec_name_prefix);
1ff442ca 1842 else
ff4423cc 1843 obstack_sgrow (&defines_obstack,
573c1d9f 1844 "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1845 }
1846
1847 if (semantic_parser)
1848 for (i = ntokens; i < nsyms; i++)
1849 {
1850 /* don't make these for dummy nonterminals made by gensym. */
1851 if (*tags[i] != '@')
896fe5c1 1852 obstack_fgrow2 (&defines_obstack,
0b8afb77 1853 "# define\tNT%s\t%d\n", tags[i], i);
1ff442ca
NF
1854 }
1855#if 0
1856 /* `fdefines' is now a temporary file, so we need to copy its
1857 contents in `done', so we can't close it here. */
a70083a3 1858 fclose (fdefines);
1ff442ca
NF
1859 fdefines = NULL;
1860#endif
1861 }
1862}
a083fbbf 1863
1ff442ca 1864
a70083a3
AD
1865/*---------------------------------------------------------------.
1866| Convert the rules into the representation using RRHS, RLHS and |
1867| RITEMS. |
1868`---------------------------------------------------------------*/
1ff442ca 1869
4a120d45 1870static void
118fb205 1871packgram (void)
1ff442ca 1872{
a70083a3
AD
1873 int itemno;
1874 int ruleno;
1875 symbol_list *p;
1ff442ca
NF
1876
1877 bucket *ruleprec;
1878
d7913476
AD
1879 ritem = XCALLOC (short, nitems + 1);
1880 rlhs = XCALLOC (short, nrules) - 1;
1881 rrhs = XCALLOC (short, nrules) - 1;
1882 rprec = XCALLOC (short, nrules) - 1;
1883 rprecsym = XCALLOC (short, nrules) - 1;
1884 rassoc = XCALLOC (short, nrules) - 1;
1ff442ca
NF
1885
1886 itemno = 0;
1887 ruleno = 1;
1888
1889 p = grammar;
1890 while (p)
1891 {
1892 rlhs[ruleno] = p->sym->value;
1893 rrhs[ruleno] = itemno;
1894 ruleprec = p->ruleprec;
1895
1896 p = p->next;
1897 while (p && p->sym)
1898 {
1899 ritem[itemno++] = p->sym->value;
1900 /* A rule gets by default the precedence and associativity
1901 of the last token in it. */
d7020c20 1902 if (p->sym->class == token_sym)
1ff442ca
NF
1903 {
1904 rprec[ruleno] = p->sym->prec;
1905 rassoc[ruleno] = p->sym->assoc;
1906 }
a70083a3
AD
1907 if (p)
1908 p = p->next;
1ff442ca
NF
1909 }
1910
1911 /* If this rule has a %prec,
a70083a3 1912 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1913 if (ruleprec)
1914 {
a70083a3
AD
1915 rprec[ruleno] = ruleprec->prec;
1916 rassoc[ruleno] = ruleprec->assoc;
1ff442ca
NF
1917 rprecsym[ruleno] = ruleprec->value;
1918 }
1919
1920 ritem[itemno++] = -ruleno;
1921 ruleno++;
1922
a70083a3
AD
1923 if (p)
1924 p = p->next;
1ff442ca
NF
1925 }
1926
1927 ritem[itemno] = 0;
1928}
a70083a3
AD
1929\f
1930/*-------------------------------------------------------------------.
1931| Read in the grammar specification and record it in the format |
ea5607fd 1932| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1933| and all actions into ACTION_OBSTACK, in each case forming the body |
1934| of a C function (YYGUARD or YYACTION) which contains a switch |
1935| statement to decide which guard or action to execute. |
a70083a3
AD
1936`-------------------------------------------------------------------*/
1937
1938void
1939reader (void)
1940{
1941 start_flag = 0;
1942 startval = NULL; /* start symbol not specified yet. */
1943
a70083a3
AD
1944 nsyms = 1;
1945 nvars = 0;
1946 nrules = 0;
1947 nitems = 0;
1948 rline_allocated = 10;
d7913476 1949 rline = XCALLOC (short, rline_allocated);
a70083a3
AD
1950
1951 typed = 0;
1952 lastprec = 0;
1953
a70083a3
AD
1954 semantic_parser = 0;
1955 pure_parser = 0;
a70083a3
AD
1956
1957 grammar = NULL;
1958
342b8b6e 1959 lex_init ();
a70083a3
AD
1960 lineno = 1;
1961
11d82f03
MA
1962 /* Initialize the muscle obstack. */
1963 obstack_init (&muscle_obstack);
82e236e2 1964
a70083a3
AD
1965 /* Initialize the symbol table. */
1966 tabinit ();
b6610515 1967
a70083a3
AD
1968 /* Construct the error token */
1969 errtoken = getsym ("error");
d7020c20 1970 errtoken->class = token_sym;
a70083a3 1971 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 1972
a70083a3
AD
1973 /* Construct a token that represents all undefined literal tokens.
1974 It is always token number 2. */
1975 undeftoken = getsym ("$undefined.");
d7020c20 1976 undeftoken->class = token_sym;
a70083a3
AD
1977 undeftoken->user_token_number = 2;
1978
896fe5c1
AD
1979 /* Read the declaration section. Copy %{ ... %} groups to
1980 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1981 etc. found there. */
a70083a3 1982 read_declarations ();
a70083a3
AD
1983 /* Read in the grammar, build grammar in list form. Write out
1984 guards and actions. */
1985 readgram ();
ff48177d
MA
1986 /* Some C code is given at the end of the grammar file. */
1987 read_additionnal_code ();
b0c4483e 1988
a70083a3 1989 /* Now we know whether we need the line-number stack. If we do,
b0c4483e
PB
1990 write its type into the .tab.h file.
1991 This is no longer need with header skeleton. */
1992
a70083a3
AD
1993 /* Assign the symbols their symbol numbers. Write #defines for the
1994 token symbols into FDEFINES if requested. */
1995 packsymbols ();
1996 /* Convert the grammar into the format described in gram.h. */
1997 packgram ();
a70083a3 1998}