]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/reduce.c (print_results): Rename as...
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
037ca2f1 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
8c7ebe49 24#include "obstack.h"
2a91a95e
AD
25#include "quotearg.h"
26#include "quote.h"
ceed8467 27#include "getargs.h"
1ff442ca 28#include "files.h"
1ff442ca 29#include "symtab.h"
82b6d266 30#include "options.h"
1ff442ca
NF
31#include "lex.h"
32#include "gram.h"
a0f6b076 33#include "complain.h"
6c89f1c1 34#include "output.h"
b2ca4022 35#include "reader.h"
340ef489 36#include "conflicts.h"
11d82f03 37#include "muscle_tab.h"
1ff442ca 38
1ff442ca 39/* Number of slots allocated (but not necessarily used yet) in `rline' */
4a120d45 40static int rline_allocated;
1ff442ca 41
a70083a3
AD
42typedef struct symbol_list
43{
44 struct symbol_list *next;
45 bucket *sym;
46 bucket *ruleprec;
47}
48symbol_list;
118fb205 49
1ff442ca 50int lineno;
1ff442ca 51char **tags;
d019d655 52short *user_toknums;
4a120d45
JT
53static symbol_list *grammar;
54static int start_flag;
55static bucket *startval;
1ff442ca
NF
56
57/* Nonzero if components of semantic values are used, implying
58 they must be unions. */
59static int value_components_used;
60
d7020c20
AD
61/* Nonzero if %union has been seen. */
62static int typed;
1ff442ca 63
d7020c20
AD
64/* Incremented for each %left, %right or %nonassoc seen */
65static int lastprec;
1ff442ca 66
1ff442ca 67static bucket *errtoken;
5b2e3c89 68static bucket *undeftoken;
0d533154 69\f
a70083a3 70
0d533154
AD
71/*===================\
72| Low level lexing. |
73\===================*/
943819bf
RS
74
75static void
118fb205 76skip_to_char (int target)
943819bf
RS
77{
78 int c;
79 if (target == '\n')
a0f6b076 80 complain (_(" Skipping to next \\n"));
943819bf 81 else
a0f6b076 82 complain (_(" Skipping to next %c"), target);
943819bf
RS
83
84 do
0d533154 85 c = skip_white_space ();
943819bf 86 while (c != target && c != EOF);
a083fbbf 87 if (c != EOF)
0d533154 88 ungetc (c, finput);
943819bf
RS
89}
90
91
0d533154
AD
92/*---------------------------------------------------------.
93| Read a signed integer from STREAM and return its value. |
94`---------------------------------------------------------*/
95
96static inline int
97read_signed_integer (FILE *stream)
98{
a70083a3
AD
99 int c = getc (stream);
100 int sign = 1;
101 int n = 0;
0d533154
AD
102
103 if (c == '-')
104 {
105 c = getc (stream);
106 sign = -1;
107 }
108
109 while (isdigit (c))
110 {
111 n = 10 * n + (c - '0');
112 c = getc (stream);
113 }
114
115 ungetc (c, stream);
116
117 return sign * n;
118}
119\f
79282c5a
AD
120/*--------------------------------------------------------------.
121| Get the data type (alternative in the union) of the value for |
122| symbol N in rule RULE. |
123`--------------------------------------------------------------*/
124
125static char *
126get_type_name (int n, symbol_list * rule)
127{
128 int i;
129 symbol_list *rp;
130
131 if (n < 0)
132 {
133 complain (_("invalid $ value"));
134 return NULL;
135 }
136
137 rp = rule;
138 i = 0;
139
140 while (i < n)
141 {
142 rp = rp->next;
143 if (rp == NULL || rp->sym == NULL)
144 {
145 complain (_("invalid $ value"));
146 return NULL;
147 }
148 i++;
149 }
150
151 return rp->sym->type_name;
152}
153\f
337bab46
AD
154/*------------------------------------------------------------.
155| Dump the string from FIN to OOUT if non null. MATCH is the |
156| delimiter of the string (either ' or "). |
157`------------------------------------------------------------*/
ae3c3164
AD
158
159static inline void
b6610515 160copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
161{
162 int c;
163
b6610515
RA
164 if (store)
165 obstack_1grow (oout, match);
8c7ebe49 166
4a120d45 167 c = getc (fin);
ae3c3164
AD
168
169 while (c != match)
170 {
171 if (c == EOF)
172 fatal (_("unterminated string at end of file"));
173 if (c == '\n')
174 {
a0f6b076 175 complain (_("unterminated string"));
4a120d45 176 ungetc (c, fin);
ae3c3164
AD
177 c = match; /* invent terminator */
178 continue;
179 }
180
337bab46 181 obstack_1grow (oout, c);
ae3c3164
AD
182
183 if (c == '\\')
184 {
4a120d45 185 c = getc (fin);
ae3c3164
AD
186 if (c == EOF)
187 fatal (_("unterminated string at end of file"));
337bab46 188 obstack_1grow (oout, c);
8c7ebe49 189
ae3c3164
AD
190 if (c == '\n')
191 lineno++;
192 }
193
a70083a3 194 c = getc (fin);
ae3c3164
AD
195 }
196
b6610515
RA
197 if (store)
198 obstack_1grow (oout, c);
199}
200
201/* FIXME. */
202
203static inline void
204copy_string (FILE *fin, struct obstack *oout, int match)
205{
206 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
207}
208
b6610515
RA
209/* FIXME. */
210
211static inline void
212copy_identifier (FILE *fin, struct obstack *oout)
213{
214 int c;
215
216 while (isalnum (c = getc (fin)) || c == '_')
217 obstack_1grow (oout, c);
218
219 ungetc (c, fin);
220}
ae3c3164 221
337bab46
AD
222/*-----------------------------------------------------------------.
223| Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
224| NULL). In fact we just saw a `/', which might or might not be a |
225| comment. In any case, copy what we saw. |
226| |
227| OUT2 might be NULL. |
228`-----------------------------------------------------------------*/
ae3c3164
AD
229
230static inline void
337bab46 231copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
ae3c3164
AD
232{
233 int cplus_comment;
a70083a3 234 int ended;
550a72a3
AD
235 int c;
236
237 /* We read a `/', output it. */
337bab46 238 obstack_1grow (oout1, '/');
896fe5c1
AD
239 if (oout2)
240 obstack_1grow (oout2, '/');
550a72a3
AD
241
242 switch ((c = getc (fin)))
243 {
244 case '/':
245 cplus_comment = 1;
246 break;
247 case '*':
248 cplus_comment = 0;
249 break;
250 default:
251 ungetc (c, fin);
252 return;
253 }
ae3c3164 254
337bab46 255 obstack_1grow (oout1, c);
896fe5c1
AD
256 if (oout2)
257 obstack_1grow (oout2, c);
550a72a3 258 c = getc (fin);
ae3c3164
AD
259
260 ended = 0;
261 while (!ended)
262 {
263 if (!cplus_comment && c == '*')
264 {
265 while (c == '*')
266 {
337bab46 267 obstack_1grow (oout1, c);
896fe5c1
AD
268 if (oout2)
269 obstack_1grow (oout2, c);
550a72a3 270 c = getc (fin);
ae3c3164
AD
271 }
272
273 if (c == '/')
274 {
337bab46 275 obstack_1grow (oout1, c);
896fe5c1
AD
276 if (oout2)
277 obstack_1grow (oout2, c);
ae3c3164
AD
278 ended = 1;
279 }
280 }
281 else if (c == '\n')
282 {
283 lineno++;
337bab46 284 obstack_1grow (oout1, c);
896fe5c1
AD
285 if (oout2)
286 obstack_1grow (oout2, c);
ae3c3164
AD
287 if (cplus_comment)
288 ended = 1;
289 else
550a72a3 290 c = getc (fin);
ae3c3164
AD
291 }
292 else if (c == EOF)
293 fatal (_("unterminated comment"));
294 else
295 {
337bab46 296 obstack_1grow (oout1, c);
896fe5c1
AD
297 if (oout2)
298 obstack_1grow (oout2, c);
550a72a3 299 c = getc (fin);
ae3c3164
AD
300 }
301 }
302}
303
304
550a72a3
AD
305/*-------------------------------------------------------------------.
306| Dump the comment (actually the current string starting with a `/') |
337bab46 307| from FIN to OOUT. |
550a72a3 308`-------------------------------------------------------------------*/
27821bff
AD
309
310static inline void
337bab46 311copy_comment (FILE *fin, struct obstack *oout)
27821bff 312{
337bab46 313 copy_comment2 (fin, oout, NULL);
27821bff
AD
314}
315
316
a70083a3 317/*-----------------------------------------------------------------.
337bab46 318| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
319| reference to this location. STACK_OFFSET is the number of values |
320| in the current rule so far, which says where to find `$0' with |
321| respect to the top of the stack. |
322`-----------------------------------------------------------------*/
1ff442ca 323
a70083a3 324static inline void
337bab46 325copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 326{
a70083a3 327 int c;
1ff442ca 328
a70083a3
AD
329 c = getc (fin);
330 if (c == '$')
1ff442ca 331 {
ff4423cc 332 obstack_sgrow (oout, "yyloc");
89cab50d 333 locations_flag = 1;
a70083a3
AD
334 }
335 else if (isdigit (c) || c == '-')
336 {
337 int n;
1ff442ca 338
a70083a3
AD
339 ungetc (c, fin);
340 n = read_signed_integer (fin);
943819bf 341
337bab46 342 obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
89cab50d 343 locations_flag = 1;
1ff442ca 344 }
a70083a3 345 else
ff4a34be
AD
346 {
347 char buf[] = "@c";
348 buf[1] = c;
349 complain (_("%s is invalid"), quote (buf));
350 }
1ff442ca 351}
79282c5a
AD
352
353
354/*-------------------------------------------------------------------.
355| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
356| |
357| Possible inputs: $[<TYPENAME>]($|integer) |
358| |
337bab46 359| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
360| the number of values in the current rule so far, which says where |
361| to find `$0' with respect to the top of the stack. |
362`-------------------------------------------------------------------*/
363
364static inline void
337bab46 365copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
366 symbol_list *rule, int stack_offset)
367{
368 int c = getc (fin);
b0ce6046 369 const char *type_name = NULL;
79282c5a 370
f282676b 371 /* Get the type name if explicit. */
79282c5a
AD
372 if (c == '<')
373 {
f282676b 374 read_type_name (fin);
79282c5a
AD
375 type_name = token_buffer;
376 value_components_used = 1;
79282c5a
AD
377 c = getc (fin);
378 }
379
380 if (c == '$')
381 {
ff4423cc 382 obstack_sgrow (oout, "yyval");
8c7ebe49 383
79282c5a
AD
384 if (!type_name)
385 type_name = get_type_name (0, rule);
386 if (type_name)
337bab46 387 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
388 if (!type_name && typed)
389 complain (_("$$ of `%s' has no declared type"),
390 rule->sym->tag);
391 }
392 else if (isdigit (c) || c == '-')
393 {
394 int n;
395 ungetc (c, fin);
396 n = read_signed_integer (fin);
397
398 if (!type_name && n > 0)
399 type_name = get_type_name (n, rule);
400
337bab46 401 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
8c7ebe49 402
79282c5a 403 if (type_name)
337bab46 404 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
405 if (!type_name && typed)
406 complain (_("$%d of `%s' has no declared type"),
407 n, rule->sym->tag);
408 }
409 else
410 {
411 char buf[] = "$c";
412 buf[1] = c;
413 complain (_("%s is invalid"), quote (buf));
414 }
415}
a70083a3
AD
416\f
417/*-------------------------------------------------------------------.
418| Copy the contents of a `%{ ... %}' into the definitions file. The |
419| `%{' has already been read. Return after reading the `%}'. |
420`-------------------------------------------------------------------*/
1ff442ca 421
4a120d45 422static void
118fb205 423copy_definition (void)
1ff442ca 424{
a70083a3 425 int c;
ae3c3164 426 /* -1 while reading a character if prev char was %. */
a70083a3 427 int after_percent;
1ff442ca 428
b6610515 429#if 0
89cab50d 430 if (!no_lines_flag)
25b222fa
MA
431 {
432 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 433 lineno, quotearg_style (c_quoting_style,
25b222fa
MA
434 muscle_find("filename")));
435 }
b6610515 436#endif
1ff442ca
NF
437
438 after_percent = 0;
439
ae3c3164 440 c = getc (finput);
1ff442ca
NF
441
442 for (;;)
443 {
444 switch (c)
445 {
446 case '\n':
dd60faec 447 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
448 lineno++;
449 break;
450
451 case '%':
a70083a3 452 after_percent = -1;
1ff442ca 453 break;
a083fbbf 454
1ff442ca
NF
455 case '\'':
456 case '"':
337bab46 457 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
458 break;
459
460 case '/':
337bab46 461 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
462 break;
463
464 case EOF:
a70083a3 465 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
466
467 default:
dd60faec 468 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
469 }
470
a70083a3 471 c = getc (finput);
1ff442ca
NF
472
473 if (after_percent)
474 {
475 if (c == '}')
476 return;
dd60faec 477 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
478 }
479 after_percent = 0;
1ff442ca 480 }
1ff442ca
NF
481}
482
483
d7020c20
AD
484/*-------------------------------------------------------------------.
485| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
486| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
487| are reversed. |
488`-------------------------------------------------------------------*/
1ff442ca 489
4a120d45 490static void
d7020c20 491parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 492{
342b8b6e
AD
493 token_t token = tok_undef;
494 char *typename = NULL;
1ff442ca 495
1e9798d5
AD
496 /* The symbol being defined. */
497 struct bucket *symbol = NULL;
498
499 /* After `%token' and `%nterm', any number of symbols maybe be
500 defined. */
1ff442ca
NF
501 for (;;)
502 {
e6011337
JT
503 int tmp_char = ungetc (skip_white_space (), finput);
504
1e9798d5
AD
505 /* `%' (for instance from `%token', or from `%%' etc.) is the
506 only valid means to end this declaration. */
e6011337 507 if (tmp_char == '%')
1ff442ca 508 return;
e6011337 509 if (tmp_char == EOF)
a0f6b076 510 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 511
a70083a3 512 token = lex ();
511e79b3 513 if (token == tok_comma)
943819bf
RS
514 {
515 symbol = NULL;
516 continue;
517 }
511e79b3 518 if (token == tok_typename)
1ff442ca 519 {
95e36146 520 typename = xstrdup (token_buffer);
1ff442ca 521 value_components_used = 1;
943819bf
RS
522 symbol = NULL;
523 }
511e79b3 524 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 525 {
8e03724b
AD
526 if (symval->alias)
527 warn (_("symbol `%s' used more than once as a literal string"),
528 symval->tag);
529 else if (symbol->alias)
530 warn (_("symbol `%s' given more than one literal string"),
531 symbol->tag);
532 else
533 {
534 symval->class = token_sym;
535 symval->type_name = typename;
536 symval->user_token_number = symbol->user_token_number;
537 symbol->user_token_number = SALIAS;
538 symval->alias = symbol;
539 symbol->alias = symval;
540 /* symbol and symval combined are only one symbol */
541 nsyms--;
542 }
8e03724b 543 symbol = NULL;
1ff442ca 544 }
511e79b3 545 else if (token == tok_identifier)
1ff442ca
NF
546 {
547 int oldclass = symval->class;
943819bf 548 symbol = symval;
1ff442ca 549
943819bf 550 if (symbol->class == what_is_not)
a0f6b076 551 complain (_("symbol %s redefined"), symbol->tag);
943819bf 552 symbol->class = what_is;
d7020c20 553 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 554 symbol->value = nvars++;
1ff442ca
NF
555
556 if (typename)
557 {
943819bf
RS
558 if (symbol->type_name == NULL)
559 symbol->type_name = typename;
a70083a3 560 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 561 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
562 }
563 }
511e79b3 564 else if (symbol && token == tok_number)
a70083a3 565 {
943819bf 566 symbol->user_token_number = numval;
a70083a3 567 }
1ff442ca 568 else
943819bf 569 {
a0f6b076 570 complain (_("`%s' is invalid in %s"),
d7020c20 571 token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 572 skip_to_char ('%');
943819bf 573 }
1ff442ca
NF
574 }
575
576}
577
1ff442ca 578
d7020c20
AD
579/*------------------------------.
580| Parse what comes after %start |
581`------------------------------*/
1ff442ca 582
4a120d45 583static void
118fb205 584parse_start_decl (void)
1ff442ca
NF
585{
586 if (start_flag)
27821bff 587 complain (_("multiple %s declarations"), "%start");
511e79b3 588 if (lex () != tok_identifier)
27821bff 589 complain (_("invalid %s declaration"), "%start");
943819bf
RS
590 else
591 {
592 start_flag = 1;
593 startval = symval;
594 }
1ff442ca
NF
595}
596
a70083a3
AD
597/*-----------------------------------------------------------.
598| read in a %type declaration and record its information for |
599| get_type_name to access |
600`-----------------------------------------------------------*/
601
602static void
603parse_type_decl (void)
604{
a70083a3
AD
605 char *name;
606
511e79b3 607 if (lex () != tok_typename)
a70083a3
AD
608 {
609 complain ("%s", _("%type declaration has no <typename>"));
610 skip_to_char ('%');
611 return;
612 }
613
95e36146 614 name = xstrdup (token_buffer);
a70083a3
AD
615
616 for (;;)
617 {
f17bcd1f 618 token_t t;
a70083a3
AD
619 int tmp_char = ungetc (skip_white_space (), finput);
620
621 if (tmp_char == '%')
622 return;
623 if (tmp_char == EOF)
624 fatal (_("Premature EOF after %s"), token_buffer);
625
626 t = lex ();
627
628 switch (t)
1ff442ca
NF
629 {
630
511e79b3
AD
631 case tok_comma:
632 case tok_semicolon:
1ff442ca
NF
633 break;
634
511e79b3 635 case tok_identifier:
1ff442ca
NF
636 if (symval->type_name == NULL)
637 symval->type_name = name;
a70083a3 638 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 639 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
640
641 break;
642
643 default:
a0f6b076
AD
644 complain (_("invalid %%type declaration due to item: %s"),
645 token_buffer);
a70083a3 646 skip_to_char ('%');
1ff442ca
NF
647 }
648 }
649}
650
651
652
d7020c20
AD
653/*----------------------------------------------------------------.
654| Read in a %left, %right or %nonassoc declaration and record its |
655| information. |
656`----------------------------------------------------------------*/
1ff442ca 657
4a120d45 658static void
d7020c20 659parse_assoc_decl (associativity assoc)
1ff442ca 660{
a70083a3
AD
661 char *name = NULL;
662 int prev = 0;
1ff442ca 663
a70083a3 664 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 665
1ff442ca
NF
666 for (;;)
667 {
f17bcd1f 668 token_t t;
e6011337 669 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 670
e6011337 671 if (tmp_char == '%')
1ff442ca 672 return;
e6011337 673 if (tmp_char == EOF)
a0f6b076 674 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 675
a70083a3 676 t = lex ();
1ff442ca
NF
677
678 switch (t)
679 {
511e79b3 680 case tok_typename:
95e36146 681 name = xstrdup (token_buffer);
1ff442ca
NF
682 break;
683
511e79b3 684 case tok_comma:
1ff442ca
NF
685 break;
686
511e79b3 687 case tok_identifier:
1ff442ca 688 if (symval->prec != 0)
a0f6b076 689 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
690 symval->prec = lastprec;
691 symval->assoc = assoc;
d7020c20 692 if (symval->class == nterm_sym)
a0f6b076 693 complain (_("symbol %s redefined"), symval->tag);
d7020c20 694 symval->class = token_sym;
1ff442ca 695 if (name)
a70083a3 696 { /* record the type, if one is specified */
1ff442ca
NF
697 if (symval->type_name == NULL)
698 symval->type_name = name;
a70083a3 699 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 700 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
701 }
702 break;
703
511e79b3
AD
704 case tok_number:
705 if (prev == tok_identifier)
a70083a3 706 {
1ff442ca 707 symval->user_token_number = numval;
a70083a3
AD
708 }
709 else
710 {
711 complain (_
712 ("invalid text (%s) - number should be after identifier"),
713token_buffer);
714 skip_to_char ('%');
715 }
1ff442ca
NF
716 break;
717
511e79b3 718 case tok_semicolon:
1ff442ca
NF
719 return;
720
721 default:
a0f6b076 722 complain (_("unexpected item: %s"), token_buffer);
a70083a3 723 skip_to_char ('%');
1ff442ca
NF
724 }
725
726 prev = t;
1ff442ca
NF
727 }
728}
729
730
731
dd60faec 732/*--------------------------------------------------------------.
180d45ba
PB
733| Copy the union declaration into the stype muscle |
734| (and fdefines), where it is made into the definition of |
735| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 736`--------------------------------------------------------------*/
1ff442ca 737
4a120d45 738static void
118fb205 739parse_union_decl (void)
1ff442ca 740{
a70083a3
AD
741 int c;
742 int count = 0;
180d45ba 743 struct obstack union_obstack;
5f7e0832
AD
744 const char *prologue = "\
745#ifndef YYSTYPE\n\
746typedef union";
747 const char *epilogue = "\
748 yystype;\n\
749# define YYSTYPE yystype\n\
750#endif\n";
1ff442ca
NF
751
752 if (typed)
27821bff 753 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
754
755 typed = 1;
756
f6ec6d13
AD
757 /* FIXME: I'm worried: are you sure attrs_obstack is properly
758 filled? */
5f7e0832
AD
759 /* I don't see any reasons to keep this line, because we should
760 create a special skeleton for this option. */
180d45ba 761 if (no_lines_flag)
dd60faec 762 obstack_1grow (&attrs_obstack, '\n');
342b8b6e 763
180d45ba
PB
764 obstack_init (&union_obstack);
765 obstack_sgrow (&union_obstack, "union");
896fe5c1 766 if (defines_flag)
5f7e0832 767 obstack_sgrow (&defines_obstack, prologue);
1ff442ca 768
27821bff 769 c = getc (finput);
1ff442ca
NF
770
771 while (c != EOF)
772 {
342b8b6e
AD
773 /* If C contains '/', it is output by copy_comment (). */
774 if (c != '/')
775 {
f6ec6d13 776 obstack_1grow (&union_obstack, c);
342b8b6e
AD
777 if (defines_flag)
778 obstack_1grow (&defines_obstack, c);
779 }
1ff442ca
NF
780
781 switch (c)
782 {
783 case '\n':
784 lineno++;
785 break;
786
787 case '/':
180d45ba 788 copy_comment2 (finput, &defines_obstack, &union_obstack);
1ff442ca
NF
789 break;
790
1ff442ca
NF
791 case '{':
792 count++;
793 break;
794
795 case '}':
796 if (count == 0)
27821bff 797 complain (_("unmatched %s"), "`}'");
1ff442ca 798 count--;
943819bf 799 if (count <= 0)
1ff442ca 800 {
896fe5c1 801 if (defines_flag)
5f7e0832 802 obstack_sgrow (&defines_obstack, epilogue);
1ff442ca 803 /* JF don't choke on trailing semi */
27821bff
AD
804 c = skip_white_space ();
805 if (c != ';')
a70083a3 806 ungetc (c, finput);
180d45ba
PB
807 obstack_1grow (&union_obstack, 0);
808 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
809 return;
810 }
811 }
812
27821bff 813 c = getc (finput);
1ff442ca 814 }
180d45ba 815
1ff442ca
NF
816}
817
d7020c20
AD
818
819/*-------------------------------------------------------.
820| Parse the declaration %expect N which says to expect N |
821| shift-reduce conflicts. |
822`-------------------------------------------------------*/
1ff442ca 823
4a120d45 824static void
118fb205 825parse_expect_decl (void)
1ff442ca 826{
131e2fef 827 int c = skip_white_space ();
1ff442ca
NF
828 ungetc (c, finput);
829
131e2fef 830 if (!isdigit (c))
79282c5a 831 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
832 else
833 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
834}
835
a70083a3
AD
836
837/*-------------------------------------------------------------------.
838| Parse what comes after %thong. the full syntax is |
839| |
840| %thong <type> token number literal |
841| |
842| the <type> or number may be omitted. The number specifies the |
843| user_token_number. |
844| |
845| Two symbols are entered in the table, one for the token symbol and |
846| one for the literal. Both are given the <type>, if any, from the |
847| declaration. The ->user_token_number of the first is SALIAS and |
848| the ->user_token_number of the second is set to the number, if |
849| any, from the declaration. The two symbols are linked via |
850| pointers in their ->alias fields. |
851| |
852| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
853| only the literal string is retained it is the literal string that |
854| is output to yytname |
855`-------------------------------------------------------------------*/
856
857static void
858parse_thong_decl (void)
7b306f52 859{
f17bcd1f 860 token_t token;
a70083a3
AD
861 struct bucket *symbol;
862 char *typename = 0;
6b7e85b9 863 int usrtoknum = SUNDEF;
7b306f52 864
a70083a3 865 token = lex (); /* fetch typename or first token */
511e79b3 866 if (token == tok_typename)
7b306f52 867 {
95e36146 868 typename = xstrdup (token_buffer);
a70083a3
AD
869 value_components_used = 1;
870 token = lex (); /* fetch first token */
7b306f52 871 }
7b306f52 872
a70083a3 873 /* process first token */
7b306f52 874
511e79b3 875 if (token != tok_identifier)
a70083a3
AD
876 {
877 complain (_("unrecognized item %s, expected an identifier"),
878 token_buffer);
879 skip_to_char ('%');
880 return;
7b306f52 881 }
d7020c20 882 symval->class = token_sym;
a70083a3
AD
883 symval->type_name = typename;
884 symval->user_token_number = SALIAS;
885 symbol = symval;
7b306f52 886
a70083a3 887 token = lex (); /* get number or literal string */
1ff442ca 888
511e79b3 889 if (token == tok_number)
943819bf 890 {
a70083a3
AD
891 usrtoknum = numval;
892 token = lex (); /* okay, did number, now get literal */
943819bf 893 }
1ff442ca 894
a70083a3 895 /* process literal string token */
1ff442ca 896
511e79b3 897 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 898 {
a70083a3
AD
899 complain (_("expected string constant instead of %s"), token_buffer);
900 skip_to_char ('%');
901 return;
1ff442ca 902 }
d7020c20 903 symval->class = token_sym;
a70083a3
AD
904 symval->type_name = typename;
905 symval->user_token_number = usrtoknum;
1ff442ca 906
a70083a3
AD
907 symval->alias = symbol;
908 symbol->alias = symval;
1ff442ca 909
79282c5a
AD
910 /* symbol and symval combined are only one symbol. */
911 nsyms--;
a70083a3 912}
3cef001a 913
b6610515 914static void
11d82f03 915parse_muscle_decl (void)
b6610515
RA
916{
917 int ch = ungetc (skip_white_space (), finput);
11d82f03
MA
918 char* muscle_key;
919 char* muscle_value;
b6610515
RA
920
921 /* Read key. */
922 if (!isalpha (ch) && ch != '_')
923 {
924 complain (_("invalid %s declaration"), "%define");
925 skip_to_char ('%');
926 return;
927 }
11d82f03
MA
928 copy_identifier (finput, &muscle_obstack);
929 obstack_1grow (&muscle_obstack, 0);
930 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 931
b6610515
RA
932 /* Read value. */
933 ch = skip_white_space ();
934 if (ch != '"')
935 {
936 ungetc (ch, finput);
937 if (ch != EOF)
938 {
939 complain (_("invalid %s declaration"), "%define");
940 skip_to_char ('%');
941 return;
942 }
943 else
944 fatal (_("Premature EOF after %s"), "\"");
945 }
11d82f03
MA
946 copy_string2 (finput, &muscle_obstack, '"', 0);
947 obstack_1grow (&muscle_obstack, 0);
948 muscle_value = obstack_finish (&muscle_obstack);
b6610515 949
b6610515 950 /* Store the (key, value) pair in the environment. */
11d82f03 951 muscle_insert (muscle_key, muscle_value);
b6610515
RA
952}
953
2ba3b73c
MA
954
955/*----------------------------------.
956| Parse what comes after %skeleton. |
957`----------------------------------*/
958
959void
960parse_skel_decl (void)
961{
962 /* Complete with parse_dquoted_param () on the CVS branch 1.29. */
963}
964
a70083a3
AD
965/*----------------------------------------------------------------.
966| Read from finput until `%%' is seen. Discard the `%%'. Handle |
967| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 968| groups to ATTRS_OBSTACK. |
a70083a3 969`----------------------------------------------------------------*/
1ff442ca 970
4a120d45 971static void
a70083a3 972read_declarations (void)
1ff442ca 973{
a70083a3 974 for (;;)
1ff442ca 975 {
951366c1 976 int c = skip_white_space ();
1ff442ca 977
a70083a3
AD
978 if (c == '%')
979 {
951366c1 980 token_t tok = parse_percent_token ();
1ff442ca 981
a70083a3 982 switch (tok)
943819bf 983 {
511e79b3 984 case tok_two_percents:
a70083a3 985 return;
1ff442ca 986
511e79b3 987 case tok_percent_left_curly:
a70083a3
AD
988 copy_definition ();
989 break;
1ff442ca 990
511e79b3 991 case tok_token:
d7020c20 992 parse_token_decl (token_sym, nterm_sym);
a70083a3 993 break;
1ff442ca 994
511e79b3 995 case tok_nterm:
d7020c20 996 parse_token_decl (nterm_sym, token_sym);
a70083a3 997 break;
1ff442ca 998
511e79b3 999 case tok_type:
a70083a3
AD
1000 parse_type_decl ();
1001 break;
1ff442ca 1002
511e79b3 1003 case tok_start:
a70083a3
AD
1004 parse_start_decl ();
1005 break;
118fb205 1006
511e79b3 1007 case tok_union:
a70083a3
AD
1008 parse_union_decl ();
1009 break;
1ff442ca 1010
511e79b3 1011 case tok_expect:
a70083a3
AD
1012 parse_expect_decl ();
1013 break;
6deb4447 1014
511e79b3 1015 case tok_thong:
a70083a3
AD
1016 parse_thong_decl ();
1017 break;
d7020c20 1018
511e79b3 1019 case tok_left:
d7020c20 1020 parse_assoc_decl (left_assoc);
a70083a3 1021 break;
1ff442ca 1022
511e79b3 1023 case tok_right:
d7020c20 1024 parse_assoc_decl (right_assoc);
a70083a3 1025 break;
1ff442ca 1026
511e79b3 1027 case tok_nonassoc:
d7020c20 1028 parse_assoc_decl (non_assoc);
a70083a3 1029 break;
1ff442ca 1030
b6610515 1031 case tok_define:
11d82f03 1032 parse_muscle_decl ();
b6610515 1033 break;
342b8b6e 1034
2ba3b73c
MA
1035 case tok_skel:
1036 parse_skel_decl ();
1037 break;
b6610515 1038
511e79b3 1039 case tok_noop:
a70083a3 1040 break;
1ff442ca 1041
951366c1
AD
1042 case tok_stropt:
1043 case tok_intopt:
1044 case tok_obsolete:
1045 case tok_illegal:
1046 abort ();
1047 break;
1048
a70083a3
AD
1049 default:
1050 complain (_("unrecognized: %s"), token_buffer);
1051 skip_to_char ('%');
1052 }
1053 }
1054 else if (c == EOF)
1055 fatal (_("no input grammar"));
1056 else
1057 {
ff4a34be
AD
1058 char buf[] = "c";
1059 buf[0] = c;
1060 complain (_("unknown character: %s"), quote (buf));
a70083a3 1061 skip_to_char ('%');
1ff442ca 1062 }
1ff442ca 1063 }
1ff442ca 1064}
a70083a3
AD
1065\f
1066/*-------------------------------------------------------------------.
1067| Assuming that a `{' has just been seen, copy everything up to the |
1068| matching `}' into the actions file. STACK_OFFSET is the number of |
1069| values in the current rule so far, which says where to find `$0' |
1070| with respect to the top of the stack. |
1071`-------------------------------------------------------------------*/
1ff442ca 1072
4a120d45 1073static void
79282c5a 1074copy_action (symbol_list *rule, int stack_offset)
1ff442ca 1075{
a70083a3 1076 int c;
a70083a3 1077 int count;
8c7ebe49 1078 char buf[4096];
1ff442ca
NF
1079
1080 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1081 if (semantic_parser)
1082 stack_offset = 0;
1ff442ca 1083
25b222fa 1084 obstack_fgrow1 (&action_obstack, "\ncase %d:\n", nrules);
8c7ebe49 1085
89cab50d 1086 if (!no_lines_flag)
8c7ebe49 1087 {
25b222fa 1088 obstack_fgrow2 (&action_obstack, muscle_find ("linef"),
342b8b6e 1089 lineno, quotearg_style (c_quoting_style,
25b222fa 1090 muscle_find ("filename")));
8c7ebe49
AD
1091 }
1092 obstack_1grow (&action_obstack, '{');
1ff442ca
NF
1093
1094 count = 1;
a70083a3 1095 c = getc (finput);
1ff442ca
NF
1096
1097 while (count > 0)
1098 {
1099 while (c != '}')
a70083a3
AD
1100 {
1101 switch (c)
1ff442ca
NF
1102 {
1103 case '\n':
8c7ebe49 1104 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1105 lineno++;
1106 break;
1107
1108 case '{':
8c7ebe49 1109 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1110 count++;
1111 break;
1112
1113 case '\'':
1114 case '"':
337bab46 1115 copy_string (finput, &action_obstack, c);
1ff442ca
NF
1116 break;
1117
1118 case '/':
337bab46 1119 copy_comment (finput, &action_obstack);
1ff442ca
NF
1120 break;
1121
1122 case '$':
337bab46 1123 copy_dollar (finput, &action_obstack,
8c7ebe49 1124 rule, stack_offset);
1ff442ca
NF
1125 break;
1126
1127 case '@':
337bab46 1128 copy_at (finput, &action_obstack,
8c7ebe49 1129 stack_offset);
6666f98f 1130 break;
1ff442ca
NF
1131
1132 case EOF:
27821bff 1133 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1134
1135 default:
8c7ebe49 1136 obstack_1grow (&action_obstack, c);
a70083a3
AD
1137 }
1138
1139 c = getc (finput);
1140 }
1141
1142 /* above loop exits when c is '}' */
1143
1144 if (--count)
1145 {
8c7ebe49 1146 obstack_1grow (&action_obstack, c);
a70083a3
AD
1147 c = getc (finput);
1148 }
1149 }
1150
ff4423cc 1151 obstack_sgrow (&action_obstack, ";\n break;}");
a70083a3
AD
1152}
1153\f
1154/*-------------------------------------------------------------------.
1155| After `%guard' is seen in the input file, copy the actual guard |
1156| into the guards file. If the guard is followed by an action, copy |
1157| that into the actions file. STACK_OFFSET is the number of values |
1158| in the current rule so far, which says where to find `$0' with |
1159| respect to the top of the stack, for the simple parser in which |
1160| the stack is not popped until after the guard is run. |
1161`-------------------------------------------------------------------*/
1162
1163static void
79282c5a 1164copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1165{
1166 int c;
a70083a3 1167 int count;
a70083a3
AD
1168 int brace_flag = 0;
1169
1170 /* offset is always 0 if parser has already popped the stack pointer */
1171 if (semantic_parser)
1172 stack_offset = 0;
1173
ea5607fd 1174 obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
89cab50d 1175 if (!no_lines_flag)
25b222fa 1176 obstack_fgrow2 (&guard_obstack, muscle_find ("linef"),
682d48cd 1177 lineno, quotearg_style (c_quoting_style,
11d82f03 1178 muscle_find ("filename")));
ea5607fd 1179 obstack_1grow (&guard_obstack, '{');
a70083a3
AD
1180
1181 count = 0;
1182 c = getc (finput);
1183
1184 while (brace_flag ? (count > 0) : (c != ';'))
1185 {
1186 switch (c)
1187 {
1188 case '\n':
ea5607fd 1189 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1190 lineno++;
1191 break;
1192
1193 case '{':
ea5607fd 1194 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1195 brace_flag = 1;
1196 count++;
1197 break;
1198
1199 case '}':
ea5607fd 1200 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1201 if (count > 0)
1202 count--;
1203 else
1204 {
1205 complain (_("unmatched %s"), "`}'");
1206 c = getc (finput); /* skip it */
1207 }
1208 break;
1209
1210 case '\'':
1211 case '"':
337bab46 1212 copy_string (finput, &guard_obstack, c);
a70083a3
AD
1213 break;
1214
1215 case '/':
337bab46 1216 copy_comment (finput, &guard_obstack);
a70083a3
AD
1217 break;
1218
1219 case '$':
337bab46 1220 copy_dollar (finput, &guard_obstack, rule, stack_offset);
a70083a3 1221 break;
1ff442ca 1222
a70083a3 1223 case '@':
337bab46 1224 copy_at (finput, &guard_obstack, stack_offset);
a70083a3 1225 break;
1ff442ca 1226
a70083a3
AD
1227 case EOF:
1228 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1229
a70083a3 1230 default:
ea5607fd 1231 obstack_1grow (&guard_obstack, c);
1ff442ca 1232 }
a70083a3
AD
1233
1234 if (c != '}' || count != 0)
1235 c = getc (finput);
1ff442ca
NF
1236 }
1237
a70083a3
AD
1238 c = skip_white_space ();
1239
ff4423cc 1240 obstack_sgrow (&guard_obstack, ";\n break;}");
a70083a3
AD
1241 if (c == '{')
1242 copy_action (rule, stack_offset);
1243 else if (c == '=')
1244 {
1245 c = getc (finput); /* why not skip_white_space -wjh */
1246 if (c == '{')
1247 copy_action (rule, stack_offset);
1248 }
1249 else
1250 ungetc (c, finput);
1ff442ca 1251}
a70083a3
AD
1252\f
1253
1254static void
1255record_rule_line (void)
1256{
1257 /* Record each rule's source line number in rline table. */
1ff442ca 1258
a70083a3
AD
1259 if (nrules >= rline_allocated)
1260 {
1261 rline_allocated = nrules * 2;
d7913476 1262 rline = XREALLOC (rline, short, rline_allocated);
a70083a3
AD
1263 }
1264 rline[nrules] = lineno;
1265}
1ff442ca
NF
1266
1267
a70083a3
AD
1268/*-------------------------------------------------------------------.
1269| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1270| with the user's names. |
1271`-------------------------------------------------------------------*/
1ff442ca 1272
4a120d45 1273static bucket *
118fb205 1274gensym (void)
1ff442ca 1275{
274d42ce
AD
1276 /* Incremented for each generated symbol */
1277 static int gensym_count = 0;
1278 static char buf[256];
1279
a70083a3 1280 bucket *sym;
1ff442ca 1281
274d42ce
AD
1282 sprintf (buf, "@%d", ++gensym_count);
1283 token_buffer = buf;
a70083a3 1284 sym = getsym (token_buffer);
d7020c20 1285 sym->class = nterm_sym;
1ff442ca 1286 sym->value = nvars++;
36281465 1287 return sym;
1ff442ca
NF
1288}
1289
a70083a3
AD
1290#if 0
1291/*------------------------------------------------------------------.
1292| read in a %type declaration and record its information for |
1293| get_type_name to access. This is unused. It is only called from |
1294| the #if 0 part of readgram |
1295`------------------------------------------------------------------*/
1296
1297static int
1298get_type (void)
1299{
1300 int k;
f17bcd1f 1301 token_t token;
a70083a3
AD
1302 char *name;
1303
f17bcd1f 1304 token = lex ();
a70083a3 1305
f17bcd1f 1306 if (token != tok_typename)
a70083a3
AD
1307 {
1308 complain (_("invalid %s declaration"), "%type");
1309 return t;
1310 }
1311
95e36146 1312 name = xstrdup (token_buffer);
a70083a3
AD
1313
1314 for (;;)
1315 {
f17bcd1f 1316 token = lex ();
a70083a3 1317
f17bcd1f 1318 switch (token)
a70083a3 1319 {
511e79b3 1320 case tok_semicolon:
a70083a3
AD
1321 return lex ();
1322
511e79b3 1323 case tok_comma:
a70083a3
AD
1324 break;
1325
511e79b3 1326 case tok_identifier:
a70083a3
AD
1327 if (symval->type_name == NULL)
1328 symval->type_name = name;
1329 else if (strcmp (name, symval->type_name) != 0)
1330 complain (_("type redeclaration for %s"), symval->tag);
1331
1332 break;
1333
1334 default:
f17bcd1f 1335 return token;
a70083a3
AD
1336 }
1337 }
1338}
1ff442ca 1339
a70083a3
AD
1340#endif
1341\f
1342/*------------------------------------------------------------------.
1343| Parse the input grammar into a one symbol_list structure. Each |
1344| rule is represented by a sequence of symbols: the left hand side |
1345| followed by the contents of the right hand side, followed by a |
1346| null pointer instead of a symbol to terminate the rule. The next |
1347| symbol is the lhs of the following rule. |
1348| |
1349| All guards and actions are copied out to the appropriate files, |
1350| labelled by the rule number they apply to. |
1351`------------------------------------------------------------------*/
1ff442ca 1352
4a120d45 1353static void
118fb205 1354readgram (void)
1ff442ca 1355{
f17bcd1f 1356 token_t t;
a70083a3
AD
1357 bucket *lhs = NULL;
1358 symbol_list *p;
1359 symbol_list *p1;
1360 bucket *bp;
1ff442ca 1361
ff4a34be
AD
1362 /* Points to first symbol_list of current rule. its symbol is the
1363 lhs of the rule. */
1364 symbol_list *crule;
1365 /* Points to the symbol_list preceding crule. */
1366 symbol_list *crule1;
1ff442ca
NF
1367
1368 p1 = NULL;
1369
a70083a3 1370 t = lex ();
1ff442ca 1371
511e79b3 1372 while (t != tok_two_percents && t != tok_eof)
1ff442ca 1373 {
511e79b3 1374 if (t == tok_identifier || t == tok_bar)
1ff442ca 1375 {
89cab50d 1376 int action_flag = 0;
ff4a34be
AD
1377 /* Number of symbols in rhs of this rule so far */
1378 int rulelength = 0;
1ff442ca
NF
1379 int xactions = 0; /* JF for error checking */
1380 bucket *first_rhs = 0;
1381
511e79b3 1382 if (t == tok_identifier)
1ff442ca
NF
1383 {
1384 lhs = symval;
943819bf
RS
1385
1386 if (!start_flag)
1387 {
1388 startval = lhs;
1389 start_flag = 1;
1390 }
a083fbbf 1391
a70083a3 1392 t = lex ();
511e79b3 1393 if (t != tok_colon)
943819bf 1394 {
a0f6b076 1395 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1396 unlex (t);
943819bf 1397 }
1ff442ca
NF
1398 }
1399
511e79b3 1400 if (nrules == 0 && t == tok_bar)
1ff442ca 1401 {
a0f6b076 1402 complain (_("grammar starts with vertical bar"));
943819bf 1403 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1404 }
1ff442ca
NF
1405 /* start a new rule and record its lhs. */
1406
1407 nrules++;
1408 nitems++;
1409
1410 record_rule_line ();
1411
d7913476 1412 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1413 p->sym = lhs;
1414
1415 crule1 = p1;
1416 if (p1)
1417 p1->next = p;
1418 else
1419 grammar = p;
1420
1421 p1 = p;
1422 crule = p;
1423
1424 /* mark the rule's lhs as a nonterminal if not already so. */
1425
d7020c20 1426 if (lhs->class == unknown_sym)
1ff442ca 1427 {
d7020c20 1428 lhs->class = nterm_sym;
1ff442ca
NF
1429 lhs->value = nvars;
1430 nvars++;
1431 }
d7020c20 1432 else if (lhs->class == token_sym)
a0f6b076 1433 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1434
1435 /* read the rhs of the rule. */
1436
1437 for (;;)
1438 {
a70083a3 1439 t = lex ();
511e79b3 1440 if (t == tok_prec)
943819bf 1441 {
a70083a3 1442 t = lex ();
943819bf 1443 crule->ruleprec = symval;
a70083a3 1444 t = lex ();
943819bf 1445 }
1ff442ca 1446
511e79b3 1447 if (!(t == tok_identifier || t == tok_left_curly))
a70083a3 1448 break;
1ff442ca
NF
1449
1450 /* If next token is an identifier, see if a colon follows it.
a70083a3 1451 If one does, exit this rule now. */
511e79b3 1452 if (t == tok_identifier)
1ff442ca 1453 {
a70083a3 1454 bucket *ssave;
f17bcd1f 1455 token_t t1;
1ff442ca
NF
1456
1457 ssave = symval;
a70083a3
AD
1458 t1 = lex ();
1459 unlex (t1);
1ff442ca 1460 symval = ssave;
511e79b3 1461 if (t1 == tok_colon)
a70083a3 1462 break;
1ff442ca 1463
a70083a3 1464 if (!first_rhs) /* JF */
1ff442ca
NF
1465 first_rhs = symval;
1466 /* Not followed by colon =>
1467 process as part of this rule's rhs. */
1468 }
1469
1470 /* If we just passed an action, that action was in the middle
a70083a3
AD
1471 of a rule, so make a dummy rule to reduce it to a
1472 non-terminal. */
89cab50d 1473 if (action_flag)
1ff442ca 1474 {
a70083a3 1475 bucket *sdummy;
1ff442ca 1476
f282676b
AD
1477 /* Since the action was written out with this rule's
1478 number, we must give the new rule this number by
1479 inserting the new rule before it. */
1ff442ca
NF
1480
1481 /* Make a dummy nonterminal, a gensym. */
a70083a3 1482 sdummy = gensym ();
1ff442ca
NF
1483
1484 /* Make a new rule, whose body is empty,
1485 before the current one, so that the action
1486 just read can belong to it. */
1487 nrules++;
1488 nitems++;
1489 record_rule_line ();
d7913476 1490 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1491 if (crule1)
1492 crule1->next = p;
a70083a3
AD
1493 else
1494 grammar = p;
1ff442ca 1495 p->sym = sdummy;
d7913476 1496 crule1 = XCALLOC (symbol_list, 1);
1ff442ca
NF
1497 p->next = crule1;
1498 crule1->next = crule;
1499
f282676b
AD
1500 /* Insert the dummy generated by that rule into this
1501 rule. */
1ff442ca 1502 nitems++;
d7913476 1503 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1504 p->sym = sdummy;
1505 p1->next = p;
1506 p1 = p;
1507
89cab50d 1508 action_flag = 0;
1ff442ca
NF
1509 }
1510
511e79b3 1511 if (t == tok_identifier)
1ff442ca
NF
1512 {
1513 nitems++;
d7913476 1514 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1515 p->sym = symval;
1516 p1->next = p;
1517 p1 = p;
1518 }
a70083a3 1519 else /* handle an action. */
1ff442ca 1520 {
a70083a3 1521 copy_action (crule, rulelength);
89cab50d 1522 action_flag = 1;
1ff442ca
NF
1523 xactions++; /* JF */
1524 }
1525 rulelength++;
a70083a3 1526 } /* end of read rhs of rule */
1ff442ca
NF
1527
1528 /* Put an empty link in the list to mark the end of this rule */
d7913476 1529 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1530 p1->next = p;
1531 p1 = p;
1532
511e79b3 1533 if (t == tok_prec)
1ff442ca 1534 {
a0f6b076 1535 complain (_("two @prec's in a row"));
a70083a3 1536 t = lex ();
1ff442ca 1537 crule->ruleprec = symval;
a70083a3 1538 t = lex ();
1ff442ca 1539 }
511e79b3 1540 if (t == tok_guard)
1ff442ca 1541 {
a70083a3 1542 if (!semantic_parser)
ff4a34be 1543 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1544
a70083a3
AD
1545 copy_guard (crule, rulelength);
1546 t = lex ();
1ff442ca 1547 }
511e79b3 1548 else if (t == tok_left_curly)
1ff442ca 1549 {
a70083a3 1550 /* This case never occurs -wjh */
89cab50d 1551 if (action_flag)
a0f6b076 1552 complain (_("two actions at end of one rule"));
a70083a3 1553 copy_action (crule, rulelength);
89cab50d 1554 action_flag = 1;
943819bf 1555 xactions++; /* -wjh */
a70083a3 1556 t = lex ();
1ff442ca 1557 }
a0f6b076 1558 /* If $$ is being set in default way, report if any type
6666f98f
AD
1559 mismatch. */
1560 else if (!xactions
a70083a3 1561 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1562 {
6666f98f
AD
1563 if (lhs->type_name == 0
1564 || first_rhs->type_name == 0
a70083a3 1565 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1566 complain (_("type clash (`%s' `%s') on default action"),
1567 lhs->type_name ? lhs->type_name : "",
a70083a3 1568 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1569 }
1570 /* Warn if there is no default for $$ but we need one. */
1571 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1572 complain (_("empty rule for typed nonterminal, and no action"));
511e79b3 1573 if (t == tok_semicolon)
a70083a3 1574 t = lex ();
a083fbbf 1575 }
943819bf 1576#if 0
a70083a3 1577 /* these things can appear as alternatives to rules. */
943819bf
RS
1578/* NO, they cannot.
1579 a) none of the documentation allows them
1580 b) most of them scan forward until finding a next %
1581 thus they may swallow lots of intervening rules
1582*/
511e79b3 1583 else if (t == tok_token)
1ff442ca 1584 {
d7020c20 1585 parse_token_decl (token_sym, nterm_sym);
a70083a3 1586 t = lex ();
1ff442ca 1587 }
511e79b3 1588 else if (t == tok_nterm)
1ff442ca 1589 {
d7020c20 1590 parse_token_decl (nterm_sym, token_sym);
a70083a3 1591 t = lex ();
1ff442ca 1592 }
511e79b3 1593 else if (t == tok_type)
1ff442ca 1594 {
a70083a3 1595 t = get_type ();
1ff442ca 1596 }
511e79b3 1597 else if (t == tok_union)
1ff442ca 1598 {
a70083a3
AD
1599 parse_union_decl ();
1600 t = lex ();
1ff442ca 1601 }
511e79b3 1602 else if (t == tok_expect)
1ff442ca 1603 {
a70083a3
AD
1604 parse_expect_decl ();
1605 t = lex ();
1ff442ca 1606 }
511e79b3 1607 else if (t == tok_start)
1ff442ca 1608 {
a70083a3
AD
1609 parse_start_decl ();
1610 t = lex ();
1ff442ca 1611 }
943819bf
RS
1612#endif
1613
1ff442ca 1614 else
943819bf 1615 {
d01c415b 1616 complain (_("invalid input: %s"), quote (token_buffer));
a70083a3 1617 t = lex ();
943819bf 1618 }
1ff442ca
NF
1619 }
1620
943819bf
RS
1621 /* grammar has been read. Do some checking */
1622
1ff442ca 1623 if (nsyms > MAXSHORT)
a0f6b076
AD
1624 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1625 MAXSHORT);
1ff442ca 1626 if (nrules == 0)
a0f6b076 1627 fatal (_("no rules in the input grammar"));
1ff442ca 1628
1ff442ca
NF
1629 /* Report any undefined symbols and consider them nonterminals. */
1630
1631 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1632 if (bp->class == unknown_sym)
1ff442ca 1633 {
a70083a3
AD
1634 complain (_
1635 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1636 bp->tag);
d7020c20 1637 bp->class = nterm_sym;
1ff442ca
NF
1638 bp->value = nvars++;
1639 }
1640
1641 ntokens = nsyms - nvars;
1642}
ff48177d
MA
1643
1644/* At the end of the grammar file, some C source code must
63c2d5de 1645 be stored. It is going to be associated to the epilogue
ff48177d
MA
1646 directive. */
1647static void
1648read_additionnal_code (void)
1649{
1650 char c;
63c2d5de 1651 struct obstack el_obstack;
342b8b6e 1652
63c2d5de 1653 obstack_init (&el_obstack);
ff48177d
MA
1654
1655 while ((c = getc (finput)) != EOF)
63c2d5de 1656 obstack_1grow (&el_obstack, c);
342b8b6e 1657
63c2d5de 1658 obstack_1grow (&el_obstack, 0);
11d82f03 1659 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1660}
1661
a70083a3
AD
1662\f
1663/*--------------------------------------------------------------.
1664| For named tokens, but not literal ones, define the name. The |
1665| value is the user token number. |
1666`--------------------------------------------------------------*/
1ff442ca 1667
4a120d45 1668static void
896fe5c1 1669output_token_defines (struct obstack *oout)
1ff442ca 1670{
a70083a3
AD
1671 bucket *bp;
1672 char *cp, *symbol;
1673 char c;
1ff442ca 1674
a70083a3 1675 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1676 {
a70083a3
AD
1677 symbol = bp->tag; /* get symbol */
1678
1679 if (bp->value >= ntokens)
1680 continue;
1681 if (bp->user_token_number == SALIAS)
1682 continue;
1683 if ('\'' == *symbol)
1684 continue; /* skip literal character */
1685 if (bp == errtoken)
1686 continue; /* skip error token */
1687 if ('\"' == *symbol)
1ff442ca 1688 {
a70083a3
AD
1689 /* use literal string only if given a symbol with an alias */
1690 if (bp->alias)
1691 symbol = bp->alias->tag;
1692 else
1693 continue;
1694 }
1ff442ca 1695
a70083a3
AD
1696 /* Don't #define nonliteral tokens whose names contain periods. */
1697 cp = symbol;
1698 while ((c = *cp++) && c != '.');
1699 if (c != '\0')
1700 continue;
1ff442ca 1701
0b8afb77 1702 obstack_fgrow2 (oout, "# define\t%s\t%d\n",
342b8b6e 1703 symbol, bp->user_token_number);
a70083a3 1704 if (semantic_parser)
342b8b6e
AD
1705 /* FIXME: This is certainly dead wrong, and should be just as
1706 above. --akim. */
0b8afb77 1707 obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
1708 }
1709}
1ff442ca
NF
1710
1711
037ca2f1
AD
1712/*------------------------------------------------------------------.
1713| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1714| number. |
1715`------------------------------------------------------------------*/
1716
1717static void
1718token_translations_init (void)
1719{
1720 bucket *bp = NULL;
1721 int i;
1722
1723 token_translations = XCALLOC (short, max_user_token_number + 1);
1724
1725 /* Initialize all entries for literal tokens to 2, the internal
1726 token number for $undefined., which represents all invalid
1727 inputs. */
1728 for (i = 0; i <= max_user_token_number; i++)
1729 token_translations[i] = 2;
1730
1731 for (bp = firstsymbol; bp; bp = bp->next)
1732 {
1733 /* Non-terminal? */
1734 if (bp->value >= ntokens)
1735 continue;
1736 /* A token string alias? */
1737 if (bp->user_token_number == SALIAS)
1738 continue;
6b7e85b9
AD
1739
1740 assert (bp->user_token_number != SUNDEF);
1741
037ca2f1
AD
1742 /* A token which translation has already been set? */
1743 if (token_translations[bp->user_token_number] != 2)
1744 complain (_("tokens %s and %s both assigned number %d"),
1745 tags[token_translations[bp->user_token_number]],
1746 bp->tag, bp->user_token_number);
1747 token_translations[bp->user_token_number] = bp->value;
1748 }
1749}
1750
1751
a70083a3
AD
1752/*------------------------------------------------------------------.
1753| Assign symbol numbers, and write definition of token names into |
b2ca4022 1754| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1755| of symbols. |
1756`------------------------------------------------------------------*/
1ff442ca 1757
4a120d45 1758static void
118fb205 1759packsymbols (void)
1ff442ca 1760{
342b8b6e 1761 bucket *bp = NULL;
a70083a3 1762 int tokno = 1;
a70083a3 1763 int last_user_token_number;
4a120d45 1764 static char DOLLAR[] = "$";
1ff442ca 1765
d7913476 1766 tags = XCALLOC (char *, nsyms + 1);
d7913476 1767 user_toknums = XCALLOC (short, nsyms + 1);
1ff442ca 1768
d7913476
AD
1769 sprec = XCALLOC (short, nsyms);
1770 sassoc = XCALLOC (short, nsyms);
1ff442ca 1771
037ca2f1
AD
1772 /* The EOF token. */
1773 tags[0] = DOLLAR;
1774 user_toknums[0] = 0;
1775
1ff442ca
NF
1776 max_user_token_number = 256;
1777 last_user_token_number = 256;
1778
1779 for (bp = firstsymbol; bp; bp = bp->next)
1780 {
d7020c20 1781 if (bp->class == nterm_sym)
1ff442ca
NF
1782 {
1783 bp->value += ntokens;
1784 }
943819bf
RS
1785 else if (bp->alias)
1786 {
0a6384c4
AD
1787 /* this symbol and its alias are a single token defn.
1788 allocate a tokno, and assign to both check agreement of
1789 ->prec and ->assoc fields and make both the same */
1790 if (bp->value == 0)
1791 bp->value = bp->alias->value = tokno++;
943819bf 1792
0a6384c4
AD
1793 if (bp->prec != bp->alias->prec)
1794 {
1795 if (bp->prec != 0 && bp->alias->prec != 0
1796 && bp->user_token_number == SALIAS)
a0f6b076
AD
1797 complain (_("conflicting precedences for %s and %s"),
1798 bp->tag, bp->alias->tag);
0a6384c4
AD
1799 if (bp->prec != 0)
1800 bp->alias->prec = bp->prec;
1801 else
1802 bp->prec = bp->alias->prec;
1803 }
943819bf 1804
0a6384c4
AD
1805 if (bp->assoc != bp->alias->assoc)
1806 {
a0f6b076
AD
1807 if (bp->assoc != 0 && bp->alias->assoc != 0
1808 && bp->user_token_number == SALIAS)
1809 complain (_("conflicting assoc values for %s and %s"),
1810 bp->tag, bp->alias->tag);
1811 if (bp->assoc != 0)
1812 bp->alias->assoc = bp->assoc;
1813 else
1814 bp->assoc = bp->alias->assoc;
1815 }
0a6384c4
AD
1816
1817 if (bp->user_token_number == SALIAS)
a70083a3 1818 continue; /* do not do processing below for SALIASs */
943819bf 1819
a70083a3 1820 }
d7020c20 1821 else /* bp->class == token_sym */
943819bf
RS
1822 {
1823 bp->value = tokno++;
1824 }
1825
d7020c20 1826 if (bp->class == token_sym)
1ff442ca 1827 {
6b7e85b9 1828 if (bp->user_token_number == SUNDEF)
1ff442ca
NF
1829 bp->user_token_number = ++last_user_token_number;
1830 if (bp->user_token_number > max_user_token_number)
1831 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1832 }
1833
1834 tags[bp->value] = bp->tag;
943819bf 1835 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1836 sprec[bp->value] = bp->prec;
1837 sassoc[bp->value] = bp->assoc;
1ff442ca
NF
1838 }
1839
037ca2f1 1840 token_translations_init ();
1ff442ca
NF
1841
1842 error_token_number = errtoken->value;
1843
e3f1699f
AD
1844 if (startval->class == unknown_sym)
1845 fatal (_("the start symbol %s is undefined"), startval->tag);
1846 else if (startval->class == token_sym)
1847 fatal (_("the start symbol %s is a token"), startval->tag);
1848
1849 start_symbol = startval->value;
1850}
1851
1852
1853/*-----------------------------------.
1854| Output definition of token names. |
1855`-----------------------------------*/
1856
1857static void
1858symbols_output (void)
1859{
342b8b6e
AD
1860 {
1861 struct obstack tokendefs;
1862 obstack_init (&tokendefs);
1863 output_token_defines (&tokendefs);
1864 obstack_1grow (&tokendefs, 0);
1865 muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1866 obstack_free (&tokendefs, NULL);
1867 }
b6610515 1868
d8cb5183
MA
1869#if 0
1870 if (!no_parser_flag)
1871 output_token_defines (&table_obstack);
1872#endif
1ff442ca 1873
89cab50d 1874 if (defines_flag)
1ff442ca 1875 {
896fe5c1 1876 output_token_defines (&defines_obstack);
1ff442ca
NF
1877
1878 if (!pure_parser)
1879 {
1880 if (spec_name_prefix)
896fe5c1
AD
1881 obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1882 spec_name_prefix);
1ff442ca 1883 else
ff4423cc 1884 obstack_sgrow (&defines_obstack,
573c1d9f 1885 "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1886 }
1887
1888 if (semantic_parser)
037ca2f1
AD
1889 {
1890 int i;
1891
1892 for (i = ntokens; i < nsyms; i++)
1893 {
1894 /* don't make these for dummy nonterminals made by gensym. */
1895 if (*tags[i] != '@')
1896 obstack_fgrow2 (&defines_obstack,
1897 "# define\tNT%s\t%d\n", tags[i], i);
1898 }
1ff442ca 1899#if 0
037ca2f1
AD
1900 /* `fdefines' is now a temporary file, so we need to copy its
1901 contents in `done', so we can't close it here. */
1902 fclose (fdefines);
1903 fdefines = NULL;
1ff442ca 1904#endif
037ca2f1 1905 }
1ff442ca
NF
1906 }
1907}
a083fbbf 1908
1ff442ca 1909
a70083a3
AD
1910/*---------------------------------------------------------------.
1911| Convert the rules into the representation using RRHS, RLHS and |
1912| RITEMS. |
1913`---------------------------------------------------------------*/
1ff442ca 1914
4a120d45 1915static void
118fb205 1916packgram (void)
1ff442ca 1917{
a70083a3
AD
1918 int itemno;
1919 int ruleno;
1920 symbol_list *p;
1ff442ca
NF
1921
1922 bucket *ruleprec;
1923
d7913476
AD
1924 ritem = XCALLOC (short, nitems + 1);
1925 rlhs = XCALLOC (short, nrules) - 1;
1926 rrhs = XCALLOC (short, nrules) - 1;
1927 rprec = XCALLOC (short, nrules) - 1;
1928 rprecsym = XCALLOC (short, nrules) - 1;
1929 rassoc = XCALLOC (short, nrules) - 1;
1ff442ca
NF
1930
1931 itemno = 0;
1932 ruleno = 1;
1933
1934 p = grammar;
1935 while (p)
1936 {
1937 rlhs[ruleno] = p->sym->value;
1938 rrhs[ruleno] = itemno;
1939 ruleprec = p->ruleprec;
1940
1941 p = p->next;
1942 while (p && p->sym)
1943 {
1944 ritem[itemno++] = p->sym->value;
1945 /* A rule gets by default the precedence and associativity
1946 of the last token in it. */
d7020c20 1947 if (p->sym->class == token_sym)
1ff442ca
NF
1948 {
1949 rprec[ruleno] = p->sym->prec;
1950 rassoc[ruleno] = p->sym->assoc;
1951 }
a70083a3
AD
1952 if (p)
1953 p = p->next;
1ff442ca
NF
1954 }
1955
1956 /* If this rule has a %prec,
a70083a3 1957 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1958 if (ruleprec)
1959 {
a70083a3
AD
1960 rprec[ruleno] = ruleprec->prec;
1961 rassoc[ruleno] = ruleprec->assoc;
1ff442ca
NF
1962 rprecsym[ruleno] = ruleprec->value;
1963 }
1964
1965 ritem[itemno++] = -ruleno;
1966 ruleno++;
1967
a70083a3
AD
1968 if (p)
1969 p = p->next;
1ff442ca
NF
1970 }
1971
1972 ritem[itemno] = 0;
1973}
a70083a3
AD
1974\f
1975/*-------------------------------------------------------------------.
1976| Read in the grammar specification and record it in the format |
ea5607fd 1977| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1978| and all actions into ACTION_OBSTACK, in each case forming the body |
1979| of a C function (YYGUARD or YYACTION) which contains a switch |
1980| statement to decide which guard or action to execute. |
a70083a3
AD
1981`-------------------------------------------------------------------*/
1982
1983void
1984reader (void)
1985{
1986 start_flag = 0;
1987 startval = NULL; /* start symbol not specified yet. */
1988
a70083a3
AD
1989 nsyms = 1;
1990 nvars = 0;
1991 nrules = 0;
1992 nitems = 0;
1993 rline_allocated = 10;
d7913476 1994 rline = XCALLOC (short, rline_allocated);
a70083a3
AD
1995
1996 typed = 0;
1997 lastprec = 0;
1998
a70083a3
AD
1999 semantic_parser = 0;
2000 pure_parser = 0;
a70083a3
AD
2001
2002 grammar = NULL;
2003
342b8b6e 2004 lex_init ();
a70083a3
AD
2005 lineno = 1;
2006
11d82f03
MA
2007 /* Initialize the muscle obstack. */
2008 obstack_init (&muscle_obstack);
82e236e2 2009
a70083a3
AD
2010 /* Initialize the symbol table. */
2011 tabinit ();
b6610515 2012
a70083a3
AD
2013 /* Construct the error token */
2014 errtoken = getsym ("error");
d7020c20 2015 errtoken->class = token_sym;
a70083a3 2016 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 2017
a70083a3
AD
2018 /* Construct a token that represents all undefined literal tokens.
2019 It is always token number 2. */
2020 undeftoken = getsym ("$undefined.");
d7020c20 2021 undeftoken->class = token_sym;
a70083a3
AD
2022 undeftoken->user_token_number = 2;
2023
896fe5c1
AD
2024 /* Read the declaration section. Copy %{ ... %} groups to
2025 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
2026 etc. found there. */
a70083a3 2027 read_declarations ();
a70083a3
AD
2028 /* Read in the grammar, build grammar in list form. Write out
2029 guards and actions. */
2030 readgram ();
ff48177d
MA
2031 /* Some C code is given at the end of the grammar file. */
2032 read_additionnal_code ();
b0c4483e 2033
a70083a3 2034 /* Now we know whether we need the line-number stack. If we do,
b0c4483e
PB
2035 write its type into the .tab.h file.
2036 This is no longer need with header skeleton. */
2037
a70083a3
AD
2038 /* Assign the symbols their symbol numbers. Write #defines for the
2039 token symbols into FDEFINES if requested. */
2040 packsymbols ();
e3f1699f 2041 symbols_output ();
a70083a3
AD
2042 /* Convert the grammar into the format described in gram.h. */
2043 packgram ();
a70083a3 2044}