]> git.saurik.com Git - bison.git/blame - src/reader.c
Merge in branch-1_29.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
8c7ebe49 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
8c7ebe49 24#include "obstack.h"
2a91a95e
AD
25#include "quotearg.h"
26#include "quote.h"
ceed8467 27#include "getargs.h"
1ff442ca 28#include "files.h"
1ff442ca 29#include "symtab.h"
82b6d266 30#include "options.h"
1ff442ca
NF
31#include "lex.h"
32#include "gram.h"
a0f6b076 33#include "complain.h"
6c89f1c1 34#include "output.h"
b2ca4022 35#include "reader.h"
340ef489 36#include "conflicts.h"
11d82f03 37#include "muscle_tab.h"
1ff442ca 38
1ff442ca 39/* Number of slots allocated (but not necessarily used yet) in `rline' */
4a120d45 40static int rline_allocated;
1ff442ca 41
a70083a3
AD
42typedef struct symbol_list
43{
44 struct symbol_list *next;
45 bucket *sym;
46 bucket *ruleprec;
47}
48symbol_list;
118fb205 49
1ff442ca 50int lineno;
1ff442ca 51char **tags;
d019d655 52short *user_toknums;
4a120d45
JT
53static symbol_list *grammar;
54static int start_flag;
55static bucket *startval;
1ff442ca
NF
56
57/* Nonzero if components of semantic values are used, implying
58 they must be unions. */
59static int value_components_used;
60
d7020c20
AD
61/* Nonzero if %union has been seen. */
62static int typed;
1ff442ca 63
d7020c20
AD
64/* Incremented for each %left, %right or %nonassoc seen */
65static int lastprec;
1ff442ca 66
1ff442ca 67static bucket *errtoken;
5b2e3c89 68static bucket *undeftoken;
0d533154 69\f
a70083a3 70
0d533154
AD
71/*===================\
72| Low level lexing. |
73\===================*/
943819bf
RS
74
75static void
118fb205 76skip_to_char (int target)
943819bf
RS
77{
78 int c;
79 if (target == '\n')
a0f6b076 80 complain (_(" Skipping to next \\n"));
943819bf 81 else
a0f6b076 82 complain (_(" Skipping to next %c"), target);
943819bf
RS
83
84 do
0d533154 85 c = skip_white_space ();
943819bf 86 while (c != target && c != EOF);
a083fbbf 87 if (c != EOF)
0d533154 88 ungetc (c, finput);
943819bf
RS
89}
90
91
0d533154
AD
92/*---------------------------------------------------------.
93| Read a signed integer from STREAM and return its value. |
94`---------------------------------------------------------*/
95
96static inline int
97read_signed_integer (FILE *stream)
98{
a70083a3
AD
99 int c = getc (stream);
100 int sign = 1;
101 int n = 0;
0d533154
AD
102
103 if (c == '-')
104 {
105 c = getc (stream);
106 sign = -1;
107 }
108
109 while (isdigit (c))
110 {
111 n = 10 * n + (c - '0');
112 c = getc (stream);
113 }
114
115 ungetc (c, stream);
116
117 return sign * n;
118}
119\f
79282c5a
AD
120/*--------------------------------------------------------------.
121| Get the data type (alternative in the union) of the value for |
122| symbol N in rule RULE. |
123`--------------------------------------------------------------*/
124
125static char *
126get_type_name (int n, symbol_list * rule)
127{
128 int i;
129 symbol_list *rp;
130
131 if (n < 0)
132 {
133 complain (_("invalid $ value"));
134 return NULL;
135 }
136
137 rp = rule;
138 i = 0;
139
140 while (i < n)
141 {
142 rp = rp->next;
143 if (rp == NULL || rp->sym == NULL)
144 {
145 complain (_("invalid $ value"));
146 return NULL;
147 }
148 i++;
149 }
150
151 return rp->sym->type_name;
152}
153\f
337bab46
AD
154/*------------------------------------------------------------.
155| Dump the string from FIN to OOUT if non null. MATCH is the |
156| delimiter of the string (either ' or "). |
157`------------------------------------------------------------*/
ae3c3164
AD
158
159static inline void
b6610515 160copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
161{
162 int c;
163
b6610515
RA
164 if (store)
165 obstack_1grow (oout, match);
8c7ebe49 166
4a120d45 167 c = getc (fin);
ae3c3164
AD
168
169 while (c != match)
170 {
171 if (c == EOF)
172 fatal (_("unterminated string at end of file"));
173 if (c == '\n')
174 {
a0f6b076 175 complain (_("unterminated string"));
4a120d45 176 ungetc (c, fin);
ae3c3164
AD
177 c = match; /* invent terminator */
178 continue;
179 }
180
337bab46 181 obstack_1grow (oout, c);
ae3c3164
AD
182
183 if (c == '\\')
184 {
4a120d45 185 c = getc (fin);
ae3c3164
AD
186 if (c == EOF)
187 fatal (_("unterminated string at end of file"));
337bab46 188 obstack_1grow (oout, c);
8c7ebe49 189
ae3c3164
AD
190 if (c == '\n')
191 lineno++;
192 }
193
a70083a3 194 c = getc (fin);
ae3c3164
AD
195 }
196
b6610515
RA
197 if (store)
198 obstack_1grow (oout, c);
199}
200
201/* FIXME. */
202
203static inline void
204copy_string (FILE *fin, struct obstack *oout, int match)
205{
206 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
207}
208
b6610515
RA
209/* FIXME. */
210
211static inline void
212copy_identifier (FILE *fin, struct obstack *oout)
213{
214 int c;
215
216 while (isalnum (c = getc (fin)) || c == '_')
217 obstack_1grow (oout, c);
218
219 ungetc (c, fin);
220}
ae3c3164 221
337bab46
AD
222/*-----------------------------------------------------------------.
223| Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
224| NULL). In fact we just saw a `/', which might or might not be a |
225| comment. In any case, copy what we saw. |
226| |
227| OUT2 might be NULL. |
228`-----------------------------------------------------------------*/
ae3c3164
AD
229
230static inline void
337bab46 231copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
ae3c3164
AD
232{
233 int cplus_comment;
a70083a3 234 int ended;
550a72a3
AD
235 int c;
236
237 /* We read a `/', output it. */
337bab46 238 obstack_1grow (oout1, '/');
896fe5c1
AD
239 if (oout2)
240 obstack_1grow (oout2, '/');
550a72a3
AD
241
242 switch ((c = getc (fin)))
243 {
244 case '/':
245 cplus_comment = 1;
246 break;
247 case '*':
248 cplus_comment = 0;
249 break;
250 default:
251 ungetc (c, fin);
252 return;
253 }
ae3c3164 254
337bab46 255 obstack_1grow (oout1, c);
896fe5c1
AD
256 if (oout2)
257 obstack_1grow (oout2, c);
550a72a3 258 c = getc (fin);
ae3c3164
AD
259
260 ended = 0;
261 while (!ended)
262 {
263 if (!cplus_comment && c == '*')
264 {
265 while (c == '*')
266 {
337bab46 267 obstack_1grow (oout1, c);
896fe5c1
AD
268 if (oout2)
269 obstack_1grow (oout2, c);
550a72a3 270 c = getc (fin);
ae3c3164
AD
271 }
272
273 if (c == '/')
274 {
337bab46 275 obstack_1grow (oout1, c);
896fe5c1
AD
276 if (oout2)
277 obstack_1grow (oout2, c);
ae3c3164
AD
278 ended = 1;
279 }
280 }
281 else if (c == '\n')
282 {
283 lineno++;
337bab46 284 obstack_1grow (oout1, c);
896fe5c1
AD
285 if (oout2)
286 obstack_1grow (oout2, c);
ae3c3164
AD
287 if (cplus_comment)
288 ended = 1;
289 else
550a72a3 290 c = getc (fin);
ae3c3164
AD
291 }
292 else if (c == EOF)
293 fatal (_("unterminated comment"));
294 else
295 {
337bab46 296 obstack_1grow (oout1, c);
896fe5c1
AD
297 if (oout2)
298 obstack_1grow (oout2, c);
550a72a3 299 c = getc (fin);
ae3c3164
AD
300 }
301 }
302}
303
304
550a72a3
AD
305/*-------------------------------------------------------------------.
306| Dump the comment (actually the current string starting with a `/') |
337bab46 307| from FIN to OOUT. |
550a72a3 308`-------------------------------------------------------------------*/
27821bff
AD
309
310static inline void
337bab46 311copy_comment (FILE *fin, struct obstack *oout)
27821bff 312{
337bab46 313 copy_comment2 (fin, oout, NULL);
27821bff
AD
314}
315
316
a70083a3 317/*-----------------------------------------------------------------.
337bab46 318| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
319| reference to this location. STACK_OFFSET is the number of values |
320| in the current rule so far, which says where to find `$0' with |
321| respect to the top of the stack. |
322`-----------------------------------------------------------------*/
1ff442ca 323
a70083a3 324static inline void
337bab46 325copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 326{
a70083a3 327 int c;
1ff442ca 328
a70083a3
AD
329 c = getc (fin);
330 if (c == '$')
1ff442ca 331 {
ff4423cc 332 obstack_sgrow (oout, "yyloc");
89cab50d 333 locations_flag = 1;
a70083a3
AD
334 }
335 else if (isdigit (c) || c == '-')
336 {
337 int n;
1ff442ca 338
a70083a3
AD
339 ungetc (c, fin);
340 n = read_signed_integer (fin);
943819bf 341
337bab46 342 obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
89cab50d 343 locations_flag = 1;
1ff442ca 344 }
a70083a3 345 else
ff4a34be
AD
346 {
347 char buf[] = "@c";
348 buf[1] = c;
349 complain (_("%s is invalid"), quote (buf));
350 }
1ff442ca 351}
79282c5a
AD
352
353
354/*-------------------------------------------------------------------.
355| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
356| |
357| Possible inputs: $[<TYPENAME>]($|integer) |
358| |
337bab46 359| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
360| the number of values in the current rule so far, which says where |
361| to find `$0' with respect to the top of the stack. |
362`-------------------------------------------------------------------*/
363
364static inline void
337bab46 365copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
366 symbol_list *rule, int stack_offset)
367{
368 int c = getc (fin);
b0ce6046 369 const char *type_name = NULL;
79282c5a 370
f282676b 371 /* Get the type name if explicit. */
79282c5a
AD
372 if (c == '<')
373 {
f282676b 374 read_type_name (fin);
79282c5a
AD
375 type_name = token_buffer;
376 value_components_used = 1;
79282c5a
AD
377 c = getc (fin);
378 }
379
380 if (c == '$')
381 {
ff4423cc 382 obstack_sgrow (oout, "yyval");
8c7ebe49 383
79282c5a
AD
384 if (!type_name)
385 type_name = get_type_name (0, rule);
386 if (type_name)
337bab46 387 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
388 if (!type_name && typed)
389 complain (_("$$ of `%s' has no declared type"),
390 rule->sym->tag);
391 }
392 else if (isdigit (c) || c == '-')
393 {
394 int n;
395 ungetc (c, fin);
396 n = read_signed_integer (fin);
397
398 if (!type_name && n > 0)
399 type_name = get_type_name (n, rule);
400
337bab46 401 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
8c7ebe49 402
79282c5a 403 if (type_name)
337bab46 404 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
405 if (!type_name && typed)
406 complain (_("$%d of `%s' has no declared type"),
407 n, rule->sym->tag);
408 }
409 else
410 {
411 char buf[] = "$c";
412 buf[1] = c;
413 complain (_("%s is invalid"), quote (buf));
414 }
415}
a70083a3
AD
416\f
417/*-------------------------------------------------------------------.
418| Copy the contents of a `%{ ... %}' into the definitions file. The |
419| `%{' has already been read. Return after reading the `%}'. |
420`-------------------------------------------------------------------*/
1ff442ca 421
4a120d45 422static void
118fb205 423copy_definition (void)
1ff442ca 424{
a70083a3 425 int c;
ae3c3164 426 /* -1 while reading a character if prev char was %. */
a70083a3 427 int after_percent;
1ff442ca 428
b6610515 429#if 0
89cab50d 430 if (!no_lines_flag)
25b222fa
MA
431 {
432 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 433 lineno, quotearg_style (c_quoting_style,
25b222fa
MA
434 muscle_find("filename")));
435 }
b6610515 436#endif
1ff442ca
NF
437
438 after_percent = 0;
439
ae3c3164 440 c = getc (finput);
1ff442ca
NF
441
442 for (;;)
443 {
444 switch (c)
445 {
446 case '\n':
dd60faec 447 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
448 lineno++;
449 break;
450
451 case '%':
a70083a3 452 after_percent = -1;
1ff442ca 453 break;
a083fbbf 454
1ff442ca
NF
455 case '\'':
456 case '"':
337bab46 457 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
458 break;
459
460 case '/':
337bab46 461 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
462 break;
463
464 case EOF:
a70083a3 465 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
466
467 default:
dd60faec 468 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
469 }
470
a70083a3 471 c = getc (finput);
1ff442ca
NF
472
473 if (after_percent)
474 {
475 if (c == '}')
476 return;
dd60faec 477 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
478 }
479 after_percent = 0;
1ff442ca 480 }
1ff442ca
NF
481}
482
483
d7020c20
AD
484/*-------------------------------------------------------------------.
485| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
486| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
487| are reversed. |
488`-------------------------------------------------------------------*/
1ff442ca 489
4a120d45 490static void
d7020c20 491parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 492{
342b8b6e
AD
493 token_t token = tok_undef;
494 char *typename = NULL;
1ff442ca 495
1e9798d5
AD
496 /* The symbol being defined. */
497 struct bucket *symbol = NULL;
498
499 /* After `%token' and `%nterm', any number of symbols maybe be
500 defined. */
1ff442ca
NF
501 for (;;)
502 {
e6011337
JT
503 int tmp_char = ungetc (skip_white_space (), finput);
504
1e9798d5
AD
505 /* `%' (for instance from `%token', or from `%%' etc.) is the
506 only valid means to end this declaration. */
e6011337 507 if (tmp_char == '%')
1ff442ca 508 return;
e6011337 509 if (tmp_char == EOF)
a0f6b076 510 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 511
a70083a3 512 token = lex ();
511e79b3 513 if (token == tok_comma)
943819bf
RS
514 {
515 symbol = NULL;
516 continue;
517 }
511e79b3 518 if (token == tok_typename)
1ff442ca 519 {
95e36146 520 typename = xstrdup (token_buffer);
1ff442ca 521 value_components_used = 1;
943819bf
RS
522 symbol = NULL;
523 }
511e79b3 524 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 525 {
8e03724b
AD
526 if (symval->alias)
527 warn (_("symbol `%s' used more than once as a literal string"),
528 symval->tag);
529 else if (symbol->alias)
530 warn (_("symbol `%s' given more than one literal string"),
531 symbol->tag);
532 else
533 {
534 symval->class = token_sym;
535 symval->type_name = typename;
536 symval->user_token_number = symbol->user_token_number;
537 symbol->user_token_number = SALIAS;
538 symval->alias = symbol;
539 symbol->alias = symval;
540 /* symbol and symval combined are only one symbol */
541 nsyms--;
542 }
8e03724b 543 symbol = NULL;
1ff442ca 544 }
511e79b3 545 else if (token == tok_identifier)
1ff442ca
NF
546 {
547 int oldclass = symval->class;
943819bf 548 symbol = symval;
1ff442ca 549
943819bf 550 if (symbol->class == what_is_not)
a0f6b076 551 complain (_("symbol %s redefined"), symbol->tag);
943819bf 552 symbol->class = what_is;
d7020c20 553 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 554 symbol->value = nvars++;
1ff442ca
NF
555
556 if (typename)
557 {
943819bf
RS
558 if (symbol->type_name == NULL)
559 symbol->type_name = typename;
a70083a3 560 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 561 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
562 }
563 }
511e79b3 564 else if (symbol && token == tok_number)
a70083a3 565 {
943819bf 566 symbol->user_token_number = numval;
a70083a3 567 }
1ff442ca 568 else
943819bf 569 {
a0f6b076 570 complain (_("`%s' is invalid in %s"),
d7020c20 571 token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 572 skip_to_char ('%');
943819bf 573 }
1ff442ca
NF
574 }
575
576}
577
1ff442ca 578
d7020c20
AD
579/*------------------------------.
580| Parse what comes after %start |
581`------------------------------*/
1ff442ca 582
4a120d45 583static void
118fb205 584parse_start_decl (void)
1ff442ca
NF
585{
586 if (start_flag)
27821bff 587 complain (_("multiple %s declarations"), "%start");
511e79b3 588 if (lex () != tok_identifier)
27821bff 589 complain (_("invalid %s declaration"), "%start");
943819bf
RS
590 else
591 {
592 start_flag = 1;
593 startval = symval;
594 }
1ff442ca
NF
595}
596
a70083a3
AD
597/*-----------------------------------------------------------.
598| read in a %type declaration and record its information for |
599| get_type_name to access |
600`-----------------------------------------------------------*/
601
602static void
603parse_type_decl (void)
604{
a70083a3
AD
605 char *name;
606
511e79b3 607 if (lex () != tok_typename)
a70083a3
AD
608 {
609 complain ("%s", _("%type declaration has no <typename>"));
610 skip_to_char ('%');
611 return;
612 }
613
95e36146 614 name = xstrdup (token_buffer);
a70083a3
AD
615
616 for (;;)
617 {
f17bcd1f 618 token_t t;
a70083a3
AD
619 int tmp_char = ungetc (skip_white_space (), finput);
620
621 if (tmp_char == '%')
622 return;
623 if (tmp_char == EOF)
624 fatal (_("Premature EOF after %s"), token_buffer);
625
626 t = lex ();
627
628 switch (t)
1ff442ca
NF
629 {
630
511e79b3
AD
631 case tok_comma:
632 case tok_semicolon:
1ff442ca
NF
633 break;
634
511e79b3 635 case tok_identifier:
1ff442ca
NF
636 if (symval->type_name == NULL)
637 symval->type_name = name;
a70083a3 638 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 639 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
640
641 break;
642
643 default:
a0f6b076
AD
644 complain (_("invalid %%type declaration due to item: %s"),
645 token_buffer);
a70083a3 646 skip_to_char ('%');
1ff442ca
NF
647 }
648 }
649}
650
651
652
d7020c20
AD
653/*----------------------------------------------------------------.
654| Read in a %left, %right or %nonassoc declaration and record its |
655| information. |
656`----------------------------------------------------------------*/
1ff442ca 657
4a120d45 658static void
d7020c20 659parse_assoc_decl (associativity assoc)
1ff442ca 660{
a70083a3
AD
661 char *name = NULL;
662 int prev = 0;
1ff442ca 663
a70083a3 664 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 665
1ff442ca
NF
666 for (;;)
667 {
f17bcd1f 668 token_t t;
e6011337 669 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 670
e6011337 671 if (tmp_char == '%')
1ff442ca 672 return;
e6011337 673 if (tmp_char == EOF)
a0f6b076 674 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 675
a70083a3 676 t = lex ();
1ff442ca
NF
677
678 switch (t)
679 {
511e79b3 680 case tok_typename:
95e36146 681 name = xstrdup (token_buffer);
1ff442ca
NF
682 break;
683
511e79b3 684 case tok_comma:
1ff442ca
NF
685 break;
686
511e79b3 687 case tok_identifier:
1ff442ca 688 if (symval->prec != 0)
a0f6b076 689 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
690 symval->prec = lastprec;
691 symval->assoc = assoc;
d7020c20 692 if (symval->class == nterm_sym)
a0f6b076 693 complain (_("symbol %s redefined"), symval->tag);
d7020c20 694 symval->class = token_sym;
1ff442ca 695 if (name)
a70083a3 696 { /* record the type, if one is specified */
1ff442ca
NF
697 if (symval->type_name == NULL)
698 symval->type_name = name;
a70083a3 699 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 700 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
701 }
702 break;
703
511e79b3
AD
704 case tok_number:
705 if (prev == tok_identifier)
a70083a3 706 {
1ff442ca 707 symval->user_token_number = numval;
a70083a3
AD
708 }
709 else
710 {
711 complain (_
712 ("invalid text (%s) - number should be after identifier"),
713token_buffer);
714 skip_to_char ('%');
715 }
1ff442ca
NF
716 break;
717
511e79b3 718 case tok_semicolon:
1ff442ca
NF
719 return;
720
721 default:
a0f6b076 722 complain (_("unexpected item: %s"), token_buffer);
a70083a3 723 skip_to_char ('%');
1ff442ca
NF
724 }
725
726 prev = t;
727
728 }
729}
730
731
732
dd60faec 733/*--------------------------------------------------------------.
180d45ba
PB
734| Copy the union declaration into the stype muscle |
735| (and fdefines), where it is made into the definition of |
736| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 737`--------------------------------------------------------------*/
1ff442ca 738
4a120d45 739static void
118fb205 740parse_union_decl (void)
1ff442ca 741{
a70083a3
AD
742 int c;
743 int count = 0;
180d45ba 744 struct obstack union_obstack;
1ff442ca
NF
745
746 if (typed)
27821bff 747 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
748
749 typed = 1;
750
180d45ba 751 if (no_lines_flag)
dd60faec 752 obstack_1grow (&attrs_obstack, '\n');
342b8b6e 753
180d45ba
PB
754 obstack_init (&union_obstack);
755 obstack_sgrow (&union_obstack, "union");
896fe5c1 756 if (defines_flag)
ff4423cc 757 obstack_sgrow (&defines_obstack, "typedef union");
1ff442ca 758
27821bff 759 c = getc (finput);
1ff442ca
NF
760
761 while (c != EOF)
762 {
342b8b6e
AD
763 /* If C contains '/', it is output by copy_comment (). */
764 if (c != '/')
765 {
766 obstack_1grow (&attrs_obstack, c);
767 if (defines_flag)
768 obstack_1grow (&defines_obstack, c);
769 }
1ff442ca
NF
770
771 switch (c)
772 {
773 case '\n':
774 lineno++;
775 break;
776
777 case '/':
180d45ba 778 copy_comment2 (finput, &defines_obstack, &union_obstack);
1ff442ca
NF
779 break;
780
1ff442ca
NF
781 case '{':
782 count++;
783 break;
784
785 case '}':
786 if (count == 0)
27821bff 787 complain (_("unmatched %s"), "`}'");
1ff442ca 788 count--;
943819bf 789 if (count <= 0)
1ff442ca 790 {
896fe5c1 791 if (defines_flag)
ff4423cc 792 obstack_sgrow (&defines_obstack, " YYSTYPE;\n");
1ff442ca 793 /* JF don't choke on trailing semi */
27821bff
AD
794 c = skip_white_space ();
795 if (c != ';')
a70083a3 796 ungetc (c, finput);
180d45ba
PB
797 obstack_1grow (&union_obstack, 0);
798 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
799 return;
800 }
801 }
802
27821bff 803 c = getc (finput);
1ff442ca 804 }
180d45ba 805
1ff442ca
NF
806}
807
d7020c20
AD
808
809/*-------------------------------------------------------.
810| Parse the declaration %expect N which says to expect N |
811| shift-reduce conflicts. |
812`-------------------------------------------------------*/
1ff442ca 813
4a120d45 814static void
118fb205 815parse_expect_decl (void)
1ff442ca 816{
131e2fef 817 int c = skip_white_space ();
1ff442ca
NF
818 ungetc (c, finput);
819
131e2fef 820 if (!isdigit (c))
79282c5a 821 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
822 else
823 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
824}
825
a70083a3
AD
826
827/*-------------------------------------------------------------------.
828| Parse what comes after %thong. the full syntax is |
829| |
830| %thong <type> token number literal |
831| |
832| the <type> or number may be omitted. The number specifies the |
833| user_token_number. |
834| |
835| Two symbols are entered in the table, one for the token symbol and |
836| one for the literal. Both are given the <type>, if any, from the |
837| declaration. The ->user_token_number of the first is SALIAS and |
838| the ->user_token_number of the second is set to the number, if |
839| any, from the declaration. The two symbols are linked via |
840| pointers in their ->alias fields. |
841| |
842| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
843| only the literal string is retained it is the literal string that |
844| is output to yytname |
845`-------------------------------------------------------------------*/
846
847static void
848parse_thong_decl (void)
7b306f52 849{
f17bcd1f 850 token_t token;
a70083a3
AD
851 struct bucket *symbol;
852 char *typename = 0;
95e36146 853 int usrtoknum;
7b306f52 854
a70083a3 855 token = lex (); /* fetch typename or first token */
511e79b3 856 if (token == tok_typename)
7b306f52 857 {
95e36146 858 typename = xstrdup (token_buffer);
a70083a3
AD
859 value_components_used = 1;
860 token = lex (); /* fetch first token */
7b306f52 861 }
7b306f52 862
a70083a3 863 /* process first token */
7b306f52 864
511e79b3 865 if (token != tok_identifier)
a70083a3
AD
866 {
867 complain (_("unrecognized item %s, expected an identifier"),
868 token_buffer);
869 skip_to_char ('%');
870 return;
7b306f52 871 }
d7020c20 872 symval->class = token_sym;
a70083a3
AD
873 symval->type_name = typename;
874 symval->user_token_number = SALIAS;
875 symbol = symval;
7b306f52 876
a70083a3 877 token = lex (); /* get number or literal string */
1ff442ca 878
511e79b3 879 if (token == tok_number)
943819bf 880 {
a70083a3
AD
881 usrtoknum = numval;
882 token = lex (); /* okay, did number, now get literal */
943819bf 883 }
a70083a3
AD
884 else
885 usrtoknum = 0;
1ff442ca 886
a70083a3 887 /* process literal string token */
1ff442ca 888
511e79b3 889 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 890 {
a70083a3
AD
891 complain (_("expected string constant instead of %s"), token_buffer);
892 skip_to_char ('%');
893 return;
1ff442ca 894 }
d7020c20 895 symval->class = token_sym;
a70083a3
AD
896 symval->type_name = typename;
897 symval->user_token_number = usrtoknum;
1ff442ca 898
a70083a3
AD
899 symval->alias = symbol;
900 symbol->alias = symval;
1ff442ca 901
79282c5a
AD
902 /* symbol and symval combined are only one symbol. */
903 nsyms--;
a70083a3 904}
3cef001a 905
b6610515 906static void
11d82f03 907parse_muscle_decl (void)
b6610515
RA
908{
909 int ch = ungetc (skip_white_space (), finput);
11d82f03
MA
910 char* muscle_key;
911 char* muscle_value;
b6610515
RA
912
913 /* Read key. */
914 if (!isalpha (ch) && ch != '_')
915 {
916 complain (_("invalid %s declaration"), "%define");
917 skip_to_char ('%');
918 return;
919 }
11d82f03
MA
920 copy_identifier (finput, &muscle_obstack);
921 obstack_1grow (&muscle_obstack, 0);
922 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 923
b6610515
RA
924 /* Read value. */
925 ch = skip_white_space ();
926 if (ch != '"')
927 {
928 ungetc (ch, finput);
929 if (ch != EOF)
930 {
931 complain (_("invalid %s declaration"), "%define");
932 skip_to_char ('%');
933 return;
934 }
935 else
936 fatal (_("Premature EOF after %s"), "\"");
937 }
11d82f03
MA
938 copy_string2 (finput, &muscle_obstack, '"', 0);
939 obstack_1grow (&muscle_obstack, 0);
940 muscle_value = obstack_finish (&muscle_obstack);
b6610515 941
b6610515 942 /* Store the (key, value) pair in the environment. */
11d82f03 943 muscle_insert (muscle_key, muscle_value);
b6610515
RA
944}
945
2ba3b73c
MA
946
947/*----------------------------------.
948| Parse what comes after %skeleton. |
949`----------------------------------*/
950
951void
952parse_skel_decl (void)
953{
954 /* Complete with parse_dquoted_param () on the CVS branch 1.29. */
955}
956
a70083a3
AD
957/*----------------------------------------------------------------.
958| Read from finput until `%%' is seen. Discard the `%%'. Handle |
959| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 960| groups to ATTRS_OBSTACK. |
a70083a3 961`----------------------------------------------------------------*/
1ff442ca 962
4a120d45 963static void
a70083a3 964read_declarations (void)
1ff442ca 965{
a70083a3
AD
966 int c;
967 int tok;
1ff442ca 968
a70083a3 969 for (;;)
1ff442ca 970 {
a70083a3 971 c = skip_white_space ();
1ff442ca 972
a70083a3
AD
973 if (c == '%')
974 {
975 tok = parse_percent_token ();
1ff442ca 976
a70083a3 977 switch (tok)
943819bf 978 {
511e79b3 979 case tok_two_percents:
a70083a3 980 return;
1ff442ca 981
511e79b3 982 case tok_percent_left_curly:
a70083a3
AD
983 copy_definition ();
984 break;
1ff442ca 985
511e79b3 986 case tok_token:
d7020c20 987 parse_token_decl (token_sym, nterm_sym);
a70083a3 988 break;
1ff442ca 989
511e79b3 990 case tok_nterm:
d7020c20 991 parse_token_decl (nterm_sym, token_sym);
a70083a3 992 break;
1ff442ca 993
511e79b3 994 case tok_type:
a70083a3
AD
995 parse_type_decl ();
996 break;
1ff442ca 997
511e79b3 998 case tok_start:
a70083a3
AD
999 parse_start_decl ();
1000 break;
118fb205 1001
511e79b3 1002 case tok_union:
a70083a3
AD
1003 parse_union_decl ();
1004 break;
1ff442ca 1005
511e79b3 1006 case tok_expect:
a70083a3
AD
1007 parse_expect_decl ();
1008 break;
6deb4447 1009
511e79b3 1010 case tok_thong:
a70083a3
AD
1011 parse_thong_decl ();
1012 break;
d7020c20 1013
511e79b3 1014 case tok_left:
d7020c20 1015 parse_assoc_decl (left_assoc);
a70083a3 1016 break;
1ff442ca 1017
511e79b3 1018 case tok_right:
d7020c20 1019 parse_assoc_decl (right_assoc);
a70083a3 1020 break;
1ff442ca 1021
511e79b3 1022 case tok_nonassoc:
d7020c20 1023 parse_assoc_decl (non_assoc);
a70083a3 1024 break;
1ff442ca 1025
b6610515 1026 case tok_define:
11d82f03 1027 parse_muscle_decl ();
b6610515 1028 break;
342b8b6e 1029
2ba3b73c
MA
1030 case tok_skel:
1031 parse_skel_decl ();
1032 break;
b6610515 1033
511e79b3 1034 case tok_noop:
a70083a3 1035 break;
1ff442ca 1036
a70083a3
AD
1037 default:
1038 complain (_("unrecognized: %s"), token_buffer);
1039 skip_to_char ('%');
1040 }
1041 }
1042 else if (c == EOF)
1043 fatal (_("no input grammar"));
1044 else
1045 {
ff4a34be
AD
1046 char buf[] = "c";
1047 buf[0] = c;
1048 complain (_("unknown character: %s"), quote (buf));
a70083a3 1049 skip_to_char ('%');
1ff442ca 1050 }
1ff442ca 1051 }
1ff442ca 1052}
a70083a3
AD
1053\f
1054/*-------------------------------------------------------------------.
1055| Assuming that a `{' has just been seen, copy everything up to the |
1056| matching `}' into the actions file. STACK_OFFSET is the number of |
1057| values in the current rule so far, which says where to find `$0' |
1058| with respect to the top of the stack. |
1059`-------------------------------------------------------------------*/
1ff442ca 1060
4a120d45 1061static void
79282c5a 1062copy_action (symbol_list *rule, int stack_offset)
1ff442ca 1063{
a70083a3 1064 int c;
a70083a3 1065 int count;
8c7ebe49 1066 char buf[4096];
1ff442ca
NF
1067
1068 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1069 if (semantic_parser)
1070 stack_offset = 0;
1ff442ca 1071
25b222fa 1072 obstack_fgrow1 (&action_obstack, "\ncase %d:\n", nrules);
8c7ebe49 1073
89cab50d 1074 if (!no_lines_flag)
8c7ebe49 1075 {
25b222fa 1076 obstack_fgrow2 (&action_obstack, muscle_find ("linef"),
342b8b6e 1077 lineno, quotearg_style (c_quoting_style,
25b222fa 1078 muscle_find ("filename")));
8c7ebe49
AD
1079 }
1080 obstack_1grow (&action_obstack, '{');
1ff442ca
NF
1081
1082 count = 1;
a70083a3 1083 c = getc (finput);
1ff442ca
NF
1084
1085 while (count > 0)
1086 {
1087 while (c != '}')
a70083a3
AD
1088 {
1089 switch (c)
1ff442ca
NF
1090 {
1091 case '\n':
8c7ebe49 1092 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1093 lineno++;
1094 break;
1095
1096 case '{':
8c7ebe49 1097 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1098 count++;
1099 break;
1100
1101 case '\'':
1102 case '"':
337bab46 1103 copy_string (finput, &action_obstack, c);
1ff442ca
NF
1104 break;
1105
1106 case '/':
337bab46 1107 copy_comment (finput, &action_obstack);
1ff442ca
NF
1108 break;
1109
1110 case '$':
337bab46 1111 copy_dollar (finput, &action_obstack,
8c7ebe49 1112 rule, stack_offset);
1ff442ca
NF
1113 break;
1114
1115 case '@':
337bab46 1116 copy_at (finput, &action_obstack,
8c7ebe49 1117 stack_offset);
6666f98f 1118 break;
1ff442ca
NF
1119
1120 case EOF:
27821bff 1121 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1122
1123 default:
8c7ebe49 1124 obstack_1grow (&action_obstack, c);
a70083a3
AD
1125 }
1126
1127 c = getc (finput);
1128 }
1129
1130 /* above loop exits when c is '}' */
1131
1132 if (--count)
1133 {
8c7ebe49 1134 obstack_1grow (&action_obstack, c);
a70083a3
AD
1135 c = getc (finput);
1136 }
1137 }
1138
ff4423cc 1139 obstack_sgrow (&action_obstack, ";\n break;}");
a70083a3
AD
1140}
1141\f
1142/*-------------------------------------------------------------------.
1143| After `%guard' is seen in the input file, copy the actual guard |
1144| into the guards file. If the guard is followed by an action, copy |
1145| that into the actions file. STACK_OFFSET is the number of values |
1146| in the current rule so far, which says where to find `$0' with |
1147| respect to the top of the stack, for the simple parser in which |
1148| the stack is not popped until after the guard is run. |
1149`-------------------------------------------------------------------*/
1150
1151static void
79282c5a 1152copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1153{
1154 int c;
a70083a3 1155 int count;
a70083a3
AD
1156 int brace_flag = 0;
1157
1158 /* offset is always 0 if parser has already popped the stack pointer */
1159 if (semantic_parser)
1160 stack_offset = 0;
1161
ea5607fd 1162 obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
89cab50d 1163 if (!no_lines_flag)
25b222fa 1164 obstack_fgrow2 (&guard_obstack, muscle_find ("linef"),
682d48cd 1165 lineno, quotearg_style (c_quoting_style,
11d82f03 1166 muscle_find ("filename")));
ea5607fd 1167 obstack_1grow (&guard_obstack, '{');
a70083a3
AD
1168
1169 count = 0;
1170 c = getc (finput);
1171
1172 while (brace_flag ? (count > 0) : (c != ';'))
1173 {
1174 switch (c)
1175 {
1176 case '\n':
ea5607fd 1177 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1178 lineno++;
1179 break;
1180
1181 case '{':
ea5607fd 1182 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1183 brace_flag = 1;
1184 count++;
1185 break;
1186
1187 case '}':
ea5607fd 1188 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1189 if (count > 0)
1190 count--;
1191 else
1192 {
1193 complain (_("unmatched %s"), "`}'");
1194 c = getc (finput); /* skip it */
1195 }
1196 break;
1197
1198 case '\'':
1199 case '"':
337bab46 1200 copy_string (finput, &guard_obstack, c);
a70083a3
AD
1201 break;
1202
1203 case '/':
337bab46 1204 copy_comment (finput, &guard_obstack);
a70083a3
AD
1205 break;
1206
1207 case '$':
337bab46 1208 copy_dollar (finput, &guard_obstack, rule, stack_offset);
a70083a3 1209 break;
1ff442ca 1210
a70083a3 1211 case '@':
337bab46 1212 copy_at (finput, &guard_obstack, stack_offset);
a70083a3 1213 break;
1ff442ca 1214
a70083a3
AD
1215 case EOF:
1216 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1217
a70083a3 1218 default:
ea5607fd 1219 obstack_1grow (&guard_obstack, c);
1ff442ca 1220 }
a70083a3
AD
1221
1222 if (c != '}' || count != 0)
1223 c = getc (finput);
1ff442ca
NF
1224 }
1225
a70083a3
AD
1226 c = skip_white_space ();
1227
ff4423cc 1228 obstack_sgrow (&guard_obstack, ";\n break;}");
a70083a3
AD
1229 if (c == '{')
1230 copy_action (rule, stack_offset);
1231 else if (c == '=')
1232 {
1233 c = getc (finput); /* why not skip_white_space -wjh */
1234 if (c == '{')
1235 copy_action (rule, stack_offset);
1236 }
1237 else
1238 ungetc (c, finput);
1ff442ca 1239}
a70083a3
AD
1240\f
1241
1242static void
1243record_rule_line (void)
1244{
1245 /* Record each rule's source line number in rline table. */
1ff442ca 1246
a70083a3
AD
1247 if (nrules >= rline_allocated)
1248 {
1249 rline_allocated = nrules * 2;
d7913476 1250 rline = XREALLOC (rline, short, rline_allocated);
a70083a3
AD
1251 }
1252 rline[nrules] = lineno;
1253}
1ff442ca
NF
1254
1255
a70083a3
AD
1256/*-------------------------------------------------------------------.
1257| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1258| with the user's names. |
1259`-------------------------------------------------------------------*/
1ff442ca 1260
4a120d45 1261static bucket *
118fb205 1262gensym (void)
1ff442ca 1263{
274d42ce
AD
1264 /* Incremented for each generated symbol */
1265 static int gensym_count = 0;
1266 static char buf[256];
1267
a70083a3 1268 bucket *sym;
1ff442ca 1269
274d42ce
AD
1270 sprintf (buf, "@%d", ++gensym_count);
1271 token_buffer = buf;
a70083a3 1272 sym = getsym (token_buffer);
d7020c20 1273 sym->class = nterm_sym;
1ff442ca 1274 sym->value = nvars++;
36281465 1275 return sym;
1ff442ca
NF
1276}
1277
a70083a3
AD
1278#if 0
1279/*------------------------------------------------------------------.
1280| read in a %type declaration and record its information for |
1281| get_type_name to access. This is unused. It is only called from |
1282| the #if 0 part of readgram |
1283`------------------------------------------------------------------*/
1284
1285static int
1286get_type (void)
1287{
1288 int k;
f17bcd1f 1289 token_t token;
a70083a3
AD
1290 char *name;
1291
f17bcd1f 1292 token = lex ();
a70083a3 1293
f17bcd1f 1294 if (token != tok_typename)
a70083a3
AD
1295 {
1296 complain (_("invalid %s declaration"), "%type");
1297 return t;
1298 }
1299
95e36146 1300 name = xstrdup (token_buffer);
a70083a3
AD
1301
1302 for (;;)
1303 {
f17bcd1f 1304 token = lex ();
a70083a3 1305
f17bcd1f 1306 switch (token)
a70083a3 1307 {
511e79b3 1308 case tok_semicolon:
a70083a3
AD
1309 return lex ();
1310
511e79b3 1311 case tok_comma:
a70083a3
AD
1312 break;
1313
511e79b3 1314 case tok_identifier:
a70083a3
AD
1315 if (symval->type_name == NULL)
1316 symval->type_name = name;
1317 else if (strcmp (name, symval->type_name) != 0)
1318 complain (_("type redeclaration for %s"), symval->tag);
1319
1320 break;
1321
1322 default:
f17bcd1f 1323 return token;
a70083a3
AD
1324 }
1325 }
1326}
1ff442ca 1327
a70083a3
AD
1328#endif
1329\f
1330/*------------------------------------------------------------------.
1331| Parse the input grammar into a one symbol_list structure. Each |
1332| rule is represented by a sequence of symbols: the left hand side |
1333| followed by the contents of the right hand side, followed by a |
1334| null pointer instead of a symbol to terminate the rule. The next |
1335| symbol is the lhs of the following rule. |
1336| |
1337| All guards and actions are copied out to the appropriate files, |
1338| labelled by the rule number they apply to. |
1339`------------------------------------------------------------------*/
1ff442ca 1340
4a120d45 1341static void
118fb205 1342readgram (void)
1ff442ca 1343{
f17bcd1f 1344 token_t t;
a70083a3
AD
1345 bucket *lhs = NULL;
1346 symbol_list *p;
1347 symbol_list *p1;
1348 bucket *bp;
1ff442ca 1349
ff4a34be
AD
1350 /* Points to first symbol_list of current rule. its symbol is the
1351 lhs of the rule. */
1352 symbol_list *crule;
1353 /* Points to the symbol_list preceding crule. */
1354 symbol_list *crule1;
1ff442ca
NF
1355
1356 p1 = NULL;
1357
a70083a3 1358 t = lex ();
1ff442ca 1359
511e79b3 1360 while (t != tok_two_percents && t != tok_eof)
1ff442ca 1361 {
511e79b3 1362 if (t == tok_identifier || t == tok_bar)
1ff442ca 1363 {
89cab50d 1364 int action_flag = 0;
ff4a34be
AD
1365 /* Number of symbols in rhs of this rule so far */
1366 int rulelength = 0;
1ff442ca
NF
1367 int xactions = 0; /* JF for error checking */
1368 bucket *first_rhs = 0;
1369
511e79b3 1370 if (t == tok_identifier)
1ff442ca
NF
1371 {
1372 lhs = symval;
943819bf
RS
1373
1374 if (!start_flag)
1375 {
1376 startval = lhs;
1377 start_flag = 1;
1378 }
a083fbbf 1379
a70083a3 1380 t = lex ();
511e79b3 1381 if (t != tok_colon)
943819bf 1382 {
a0f6b076 1383 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1384 unlex (t);
943819bf 1385 }
1ff442ca
NF
1386 }
1387
511e79b3 1388 if (nrules == 0 && t == tok_bar)
1ff442ca 1389 {
a0f6b076 1390 complain (_("grammar starts with vertical bar"));
943819bf 1391 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1392 }
1ff442ca
NF
1393 /* start a new rule and record its lhs. */
1394
1395 nrules++;
1396 nitems++;
1397
1398 record_rule_line ();
1399
d7913476 1400 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1401 p->sym = lhs;
1402
1403 crule1 = p1;
1404 if (p1)
1405 p1->next = p;
1406 else
1407 grammar = p;
1408
1409 p1 = p;
1410 crule = p;
1411
1412 /* mark the rule's lhs as a nonterminal if not already so. */
1413
d7020c20 1414 if (lhs->class == unknown_sym)
1ff442ca 1415 {
d7020c20 1416 lhs->class = nterm_sym;
1ff442ca
NF
1417 lhs->value = nvars;
1418 nvars++;
1419 }
d7020c20 1420 else if (lhs->class == token_sym)
a0f6b076 1421 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1422
1423 /* read the rhs of the rule. */
1424
1425 for (;;)
1426 {
a70083a3 1427 t = lex ();
511e79b3 1428 if (t == tok_prec)
943819bf 1429 {
a70083a3 1430 t = lex ();
943819bf 1431 crule->ruleprec = symval;
a70083a3 1432 t = lex ();
943819bf 1433 }
1ff442ca 1434
511e79b3 1435 if (!(t == tok_identifier || t == tok_left_curly))
a70083a3 1436 break;
1ff442ca
NF
1437
1438 /* If next token is an identifier, see if a colon follows it.
a70083a3 1439 If one does, exit this rule now. */
511e79b3 1440 if (t == tok_identifier)
1ff442ca 1441 {
a70083a3 1442 bucket *ssave;
f17bcd1f 1443 token_t t1;
1ff442ca
NF
1444
1445 ssave = symval;
a70083a3
AD
1446 t1 = lex ();
1447 unlex (t1);
1ff442ca 1448 symval = ssave;
511e79b3 1449 if (t1 == tok_colon)
a70083a3 1450 break;
1ff442ca 1451
a70083a3 1452 if (!first_rhs) /* JF */
1ff442ca
NF
1453 first_rhs = symval;
1454 /* Not followed by colon =>
1455 process as part of this rule's rhs. */
1456 }
1457
1458 /* If we just passed an action, that action was in the middle
a70083a3
AD
1459 of a rule, so make a dummy rule to reduce it to a
1460 non-terminal. */
89cab50d 1461 if (action_flag)
1ff442ca 1462 {
a70083a3 1463 bucket *sdummy;
1ff442ca 1464
f282676b
AD
1465 /* Since the action was written out with this rule's
1466 number, we must give the new rule this number by
1467 inserting the new rule before it. */
1ff442ca
NF
1468
1469 /* Make a dummy nonterminal, a gensym. */
a70083a3 1470 sdummy = gensym ();
1ff442ca
NF
1471
1472 /* Make a new rule, whose body is empty,
1473 before the current one, so that the action
1474 just read can belong to it. */
1475 nrules++;
1476 nitems++;
1477 record_rule_line ();
d7913476 1478 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1479 if (crule1)
1480 crule1->next = p;
a70083a3
AD
1481 else
1482 grammar = p;
1ff442ca 1483 p->sym = sdummy;
d7913476 1484 crule1 = XCALLOC (symbol_list, 1);
1ff442ca
NF
1485 p->next = crule1;
1486 crule1->next = crule;
1487
f282676b
AD
1488 /* Insert the dummy generated by that rule into this
1489 rule. */
1ff442ca 1490 nitems++;
d7913476 1491 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1492 p->sym = sdummy;
1493 p1->next = p;
1494 p1 = p;
1495
89cab50d 1496 action_flag = 0;
1ff442ca
NF
1497 }
1498
511e79b3 1499 if (t == tok_identifier)
1ff442ca
NF
1500 {
1501 nitems++;
d7913476 1502 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1503 p->sym = symval;
1504 p1->next = p;
1505 p1 = p;
1506 }
a70083a3 1507 else /* handle an action. */
1ff442ca 1508 {
a70083a3 1509 copy_action (crule, rulelength);
89cab50d 1510 action_flag = 1;
1ff442ca
NF
1511 xactions++; /* JF */
1512 }
1513 rulelength++;
a70083a3 1514 } /* end of read rhs of rule */
1ff442ca
NF
1515
1516 /* Put an empty link in the list to mark the end of this rule */
d7913476 1517 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1518 p1->next = p;
1519 p1 = p;
1520
511e79b3 1521 if (t == tok_prec)
1ff442ca 1522 {
a0f6b076 1523 complain (_("two @prec's in a row"));
a70083a3 1524 t = lex ();
1ff442ca 1525 crule->ruleprec = symval;
a70083a3 1526 t = lex ();
1ff442ca 1527 }
511e79b3 1528 if (t == tok_guard)
1ff442ca 1529 {
a70083a3 1530 if (!semantic_parser)
ff4a34be 1531 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1532
a70083a3
AD
1533 copy_guard (crule, rulelength);
1534 t = lex ();
1ff442ca 1535 }
511e79b3 1536 else if (t == tok_left_curly)
1ff442ca 1537 {
a70083a3 1538 /* This case never occurs -wjh */
89cab50d 1539 if (action_flag)
a0f6b076 1540 complain (_("two actions at end of one rule"));
a70083a3 1541 copy_action (crule, rulelength);
89cab50d 1542 action_flag = 1;
943819bf 1543 xactions++; /* -wjh */
a70083a3 1544 t = lex ();
1ff442ca 1545 }
a0f6b076 1546 /* If $$ is being set in default way, report if any type
6666f98f
AD
1547 mismatch. */
1548 else if (!xactions
a70083a3 1549 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1550 {
6666f98f
AD
1551 if (lhs->type_name == 0
1552 || first_rhs->type_name == 0
a70083a3 1553 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1554 complain (_("type clash (`%s' `%s') on default action"),
1555 lhs->type_name ? lhs->type_name : "",
a70083a3 1556 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1557 }
1558 /* Warn if there is no default for $$ but we need one. */
1559 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1560 complain (_("empty rule for typed nonterminal, and no action"));
511e79b3 1561 if (t == tok_semicolon)
a70083a3 1562 t = lex ();
a083fbbf 1563 }
943819bf 1564#if 0
a70083a3 1565 /* these things can appear as alternatives to rules. */
943819bf
RS
1566/* NO, they cannot.
1567 a) none of the documentation allows them
1568 b) most of them scan forward until finding a next %
1569 thus they may swallow lots of intervening rules
1570*/
511e79b3 1571 else if (t == tok_token)
1ff442ca 1572 {
d7020c20 1573 parse_token_decl (token_sym, nterm_sym);
a70083a3 1574 t = lex ();
1ff442ca 1575 }
511e79b3 1576 else if (t == tok_nterm)
1ff442ca 1577 {
d7020c20 1578 parse_token_decl (nterm_sym, token_sym);
a70083a3 1579 t = lex ();
1ff442ca 1580 }
511e79b3 1581 else if (t == tok_type)
1ff442ca 1582 {
a70083a3 1583 t = get_type ();
1ff442ca 1584 }
511e79b3 1585 else if (t == tok_union)
1ff442ca 1586 {
a70083a3
AD
1587 parse_union_decl ();
1588 t = lex ();
1ff442ca 1589 }
511e79b3 1590 else if (t == tok_expect)
1ff442ca 1591 {
a70083a3
AD
1592 parse_expect_decl ();
1593 t = lex ();
1ff442ca 1594 }
511e79b3 1595 else if (t == tok_start)
1ff442ca 1596 {
a70083a3
AD
1597 parse_start_decl ();
1598 t = lex ();
1ff442ca 1599 }
943819bf
RS
1600#endif
1601
1ff442ca 1602 else
943819bf 1603 {
d01c415b 1604 complain (_("invalid input: %s"), quote (token_buffer));
a70083a3 1605 t = lex ();
943819bf 1606 }
1ff442ca
NF
1607 }
1608
943819bf
RS
1609 /* grammar has been read. Do some checking */
1610
1ff442ca 1611 if (nsyms > MAXSHORT)
a0f6b076
AD
1612 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1613 MAXSHORT);
1ff442ca 1614 if (nrules == 0)
a0f6b076 1615 fatal (_("no rules in the input grammar"));
1ff442ca 1616
1ff442ca
NF
1617 /* Report any undefined symbols and consider them nonterminals. */
1618
1619 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1620 if (bp->class == unknown_sym)
1ff442ca 1621 {
a70083a3
AD
1622 complain (_
1623 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1624 bp->tag);
d7020c20 1625 bp->class = nterm_sym;
1ff442ca
NF
1626 bp->value = nvars++;
1627 }
1628
1629 ntokens = nsyms - nvars;
1630}
ff48177d
MA
1631
1632/* At the end of the grammar file, some C source code must
63c2d5de 1633 be stored. It is going to be associated to the epilogue
ff48177d
MA
1634 directive. */
1635static void
1636read_additionnal_code (void)
1637{
1638 char c;
63c2d5de 1639 struct obstack el_obstack;
342b8b6e 1640
63c2d5de 1641 obstack_init (&el_obstack);
ff48177d
MA
1642
1643 while ((c = getc (finput)) != EOF)
63c2d5de 1644 obstack_1grow (&el_obstack, c);
342b8b6e 1645
63c2d5de 1646 obstack_1grow (&el_obstack, 0);
11d82f03 1647 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1648}
1649
a70083a3
AD
1650\f
1651/*--------------------------------------------------------------.
1652| For named tokens, but not literal ones, define the name. The |
1653| value is the user token number. |
1654`--------------------------------------------------------------*/
1ff442ca 1655
4a120d45 1656static void
896fe5c1 1657output_token_defines (struct obstack *oout)
1ff442ca 1658{
a70083a3
AD
1659 bucket *bp;
1660 char *cp, *symbol;
1661 char c;
1ff442ca 1662
a70083a3 1663 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1664 {
a70083a3
AD
1665 symbol = bp->tag; /* get symbol */
1666
1667 if (bp->value >= ntokens)
1668 continue;
1669 if (bp->user_token_number == SALIAS)
1670 continue;
1671 if ('\'' == *symbol)
1672 continue; /* skip literal character */
1673 if (bp == errtoken)
1674 continue; /* skip error token */
1675 if ('\"' == *symbol)
1ff442ca 1676 {
a70083a3
AD
1677 /* use literal string only if given a symbol with an alias */
1678 if (bp->alias)
1679 symbol = bp->alias->tag;
1680 else
1681 continue;
1682 }
1ff442ca 1683
a70083a3
AD
1684 /* Don't #define nonliteral tokens whose names contain periods. */
1685 cp = symbol;
1686 while ((c = *cp++) && c != '.');
1687 if (c != '\0')
1688 continue;
1ff442ca 1689
0b8afb77 1690 obstack_fgrow2 (oout, "# define\t%s\t%d\n",
342b8b6e 1691 symbol, bp->user_token_number);
a70083a3 1692 if (semantic_parser)
342b8b6e
AD
1693 /* FIXME: This is certainly dead wrong, and should be just as
1694 above. --akim. */
0b8afb77 1695 obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
1696 }
1697}
1ff442ca
NF
1698
1699
a70083a3
AD
1700/*------------------------------------------------------------------.
1701| Assign symbol numbers, and write definition of token names into |
b2ca4022 1702| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1703| of symbols. |
1704`------------------------------------------------------------------*/
1ff442ca 1705
4a120d45 1706static void
118fb205 1707packsymbols (void)
1ff442ca 1708{
342b8b6e 1709 bucket *bp = NULL;
a70083a3 1710 int tokno = 1;
342b8b6e 1711 int i, j;
a70083a3 1712 int last_user_token_number;
4a120d45 1713 static char DOLLAR[] = "$";
1ff442ca 1714
d7913476 1715 tags = XCALLOC (char *, nsyms + 1);
4a120d45 1716 tags[0] = DOLLAR;
d7913476 1717 user_toknums = XCALLOC (short, nsyms + 1);
943819bf 1718 user_toknums[0] = 0;
1ff442ca 1719
d7913476
AD
1720 sprec = XCALLOC (short, nsyms);
1721 sassoc = XCALLOC (short, nsyms);
1ff442ca
NF
1722
1723 max_user_token_number = 256;
1724 last_user_token_number = 256;
1725
1726 for (bp = firstsymbol; bp; bp = bp->next)
1727 {
d7020c20 1728 if (bp->class == nterm_sym)
1ff442ca
NF
1729 {
1730 bp->value += ntokens;
1731 }
943819bf
RS
1732 else if (bp->alias)
1733 {
0a6384c4
AD
1734 /* this symbol and its alias are a single token defn.
1735 allocate a tokno, and assign to both check agreement of
1736 ->prec and ->assoc fields and make both the same */
1737 if (bp->value == 0)
1738 bp->value = bp->alias->value = tokno++;
943819bf 1739
0a6384c4
AD
1740 if (bp->prec != bp->alias->prec)
1741 {
1742 if (bp->prec != 0 && bp->alias->prec != 0
1743 && bp->user_token_number == SALIAS)
a0f6b076
AD
1744 complain (_("conflicting precedences for %s and %s"),
1745 bp->tag, bp->alias->tag);
0a6384c4
AD
1746 if (bp->prec != 0)
1747 bp->alias->prec = bp->prec;
1748 else
1749 bp->prec = bp->alias->prec;
1750 }
943819bf 1751
0a6384c4
AD
1752 if (bp->assoc != bp->alias->assoc)
1753 {
a0f6b076
AD
1754 if (bp->assoc != 0 && bp->alias->assoc != 0
1755 && bp->user_token_number == SALIAS)
1756 complain (_("conflicting assoc values for %s and %s"),
1757 bp->tag, bp->alias->tag);
1758 if (bp->assoc != 0)
1759 bp->alias->assoc = bp->assoc;
1760 else
1761 bp->assoc = bp->alias->assoc;
1762 }
0a6384c4
AD
1763
1764 if (bp->user_token_number == SALIAS)
a70083a3 1765 continue; /* do not do processing below for SALIASs */
943819bf 1766
a70083a3 1767 }
d7020c20 1768 else /* bp->class == token_sym */
943819bf
RS
1769 {
1770 bp->value = tokno++;
1771 }
1772
d7020c20 1773 if (bp->class == token_sym)
1ff442ca 1774 {
342b8b6e 1775 if (!bp->user_token_number)
1ff442ca
NF
1776 bp->user_token_number = ++last_user_token_number;
1777 if (bp->user_token_number > max_user_token_number)
1778 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1779 }
1780
1781 tags[bp->value] = bp->tag;
943819bf 1782 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1783 sprec[bp->value] = bp->prec;
1784 sassoc[bp->value] = bp->assoc;
1ff442ca
NF
1785 }
1786
342b8b6e 1787 token_translations = XCALLOC (short, max_user_token_number + 1);
1ff442ca 1788
342b8b6e
AD
1789 /* initialize all entries for literal tokens to 2, the internal
1790 token number for $undefined., which represents all invalid
1791 inputs. */
1792 for (j = 0; j <= max_user_token_number; j++)
1793 token_translations[j] = 2;
1ff442ca 1794
342b8b6e
AD
1795 for (bp = firstsymbol; bp; bp = bp->next)
1796 {
1797 if (bp->value >= ntokens)
1798 continue; /* non-terminal */
1799 if (bp->user_token_number == SALIAS)
1800 continue;
1801 if (token_translations[bp->user_token_number] != 2)
1802 complain (_("tokens %s and %s both assigned number %d"),
1803 tags[token_translations[bp->user_token_number]],
1804 bp->tag, bp->user_token_number);
1805 token_translations[bp->user_token_number] = bp->value;
1ff442ca
NF
1806 }
1807
1808 error_token_number = errtoken->value;
1809
342b8b6e
AD
1810 {
1811 struct obstack tokendefs;
1812 obstack_init (&tokendefs);
1813 output_token_defines (&tokendefs);
1814 obstack_1grow (&tokendefs, 0);
1815 muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1816 obstack_free (&tokendefs, NULL);
1817 }
b6610515 1818
d8cb5183
MA
1819#if 0
1820 if (!no_parser_flag)
1821 output_token_defines (&table_obstack);
1822#endif
1ff442ca 1823
d7020c20 1824 if (startval->class == unknown_sym)
a0f6b076 1825 fatal (_("the start symbol %s is undefined"), startval->tag);
d7020c20 1826 else if (startval->class == token_sym)
a0f6b076 1827 fatal (_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1828
1829 start_symbol = startval->value;
1830
89cab50d 1831 if (defines_flag)
1ff442ca 1832 {
896fe5c1 1833 output_token_defines (&defines_obstack);
1ff442ca
NF
1834
1835 if (!pure_parser)
1836 {
1837 if (spec_name_prefix)
896fe5c1
AD
1838 obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1839 spec_name_prefix);
1ff442ca 1840 else
ff4423cc 1841 obstack_sgrow (&defines_obstack,
573c1d9f 1842 "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1843 }
1844
1845 if (semantic_parser)
1846 for (i = ntokens; i < nsyms; i++)
1847 {
1848 /* don't make these for dummy nonterminals made by gensym. */
1849 if (*tags[i] != '@')
896fe5c1 1850 obstack_fgrow2 (&defines_obstack,
0b8afb77 1851 "# define\tNT%s\t%d\n", tags[i], i);
1ff442ca
NF
1852 }
1853#if 0
1854 /* `fdefines' is now a temporary file, so we need to copy its
1855 contents in `done', so we can't close it here. */
a70083a3 1856 fclose (fdefines);
1ff442ca
NF
1857 fdefines = NULL;
1858#endif
1859 }
1860}
a083fbbf 1861
1ff442ca 1862
a70083a3
AD
1863/*---------------------------------------------------------------.
1864| Convert the rules into the representation using RRHS, RLHS and |
1865| RITEMS. |
1866`---------------------------------------------------------------*/
1ff442ca 1867
4a120d45 1868static void
118fb205 1869packgram (void)
1ff442ca 1870{
a70083a3
AD
1871 int itemno;
1872 int ruleno;
1873 symbol_list *p;
1ff442ca
NF
1874
1875 bucket *ruleprec;
1876
d7913476
AD
1877 ritem = XCALLOC (short, nitems + 1);
1878 rlhs = XCALLOC (short, nrules) - 1;
1879 rrhs = XCALLOC (short, nrules) - 1;
1880 rprec = XCALLOC (short, nrules) - 1;
1881 rprecsym = XCALLOC (short, nrules) - 1;
1882 rassoc = XCALLOC (short, nrules) - 1;
1ff442ca
NF
1883
1884 itemno = 0;
1885 ruleno = 1;
1886
1887 p = grammar;
1888 while (p)
1889 {
1890 rlhs[ruleno] = p->sym->value;
1891 rrhs[ruleno] = itemno;
1892 ruleprec = p->ruleprec;
1893
1894 p = p->next;
1895 while (p && p->sym)
1896 {
1897 ritem[itemno++] = p->sym->value;
1898 /* A rule gets by default the precedence and associativity
1899 of the last token in it. */
d7020c20 1900 if (p->sym->class == token_sym)
1ff442ca
NF
1901 {
1902 rprec[ruleno] = p->sym->prec;
1903 rassoc[ruleno] = p->sym->assoc;
1904 }
a70083a3
AD
1905 if (p)
1906 p = p->next;
1ff442ca
NF
1907 }
1908
1909 /* If this rule has a %prec,
a70083a3 1910 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1911 if (ruleprec)
1912 {
a70083a3
AD
1913 rprec[ruleno] = ruleprec->prec;
1914 rassoc[ruleno] = ruleprec->assoc;
1ff442ca
NF
1915 rprecsym[ruleno] = ruleprec->value;
1916 }
1917
1918 ritem[itemno++] = -ruleno;
1919 ruleno++;
1920
a70083a3
AD
1921 if (p)
1922 p = p->next;
1ff442ca
NF
1923 }
1924
1925 ritem[itemno] = 0;
1926}
a70083a3
AD
1927\f
1928/*-------------------------------------------------------------------.
1929| Read in the grammar specification and record it in the format |
ea5607fd 1930| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1931| and all actions into ACTION_OBSTACK, in each case forming the body |
1932| of a C function (YYGUARD or YYACTION) which contains a switch |
1933| statement to decide which guard or action to execute. |
a70083a3
AD
1934`-------------------------------------------------------------------*/
1935
1936void
1937reader (void)
1938{
1939 start_flag = 0;
1940 startval = NULL; /* start symbol not specified yet. */
1941
a70083a3
AD
1942 nsyms = 1;
1943 nvars = 0;
1944 nrules = 0;
1945 nitems = 0;
1946 rline_allocated = 10;
d7913476 1947 rline = XCALLOC (short, rline_allocated);
a70083a3
AD
1948
1949 typed = 0;
1950 lastprec = 0;
1951
a70083a3
AD
1952 semantic_parser = 0;
1953 pure_parser = 0;
a70083a3
AD
1954
1955 grammar = NULL;
1956
342b8b6e 1957 lex_init ();
a70083a3
AD
1958 lineno = 1;
1959
11d82f03
MA
1960 /* Initialize the muscle obstack. */
1961 obstack_init (&muscle_obstack);
82e236e2 1962
a70083a3
AD
1963 /* Initialize the symbol table. */
1964 tabinit ();
b6610515 1965
a70083a3
AD
1966 /* Construct the error token */
1967 errtoken = getsym ("error");
d7020c20 1968 errtoken->class = token_sym;
a70083a3 1969 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 1970
a70083a3
AD
1971 /* Construct a token that represents all undefined literal tokens.
1972 It is always token number 2. */
1973 undeftoken = getsym ("$undefined.");
d7020c20 1974 undeftoken->class = token_sym;
a70083a3
AD
1975 undeftoken->user_token_number = 2;
1976
896fe5c1
AD
1977 /* Read the declaration section. Copy %{ ... %} groups to
1978 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1979 etc. found there. */
a70083a3 1980 read_declarations ();
a70083a3
AD
1981 /* Read in the grammar, build grammar in list form. Write out
1982 guards and actions. */
1983 readgram ();
ff48177d
MA
1984 /* Some C code is given at the end of the grammar file. */
1985 read_additionnal_code ();
b0c4483e 1986
a70083a3 1987 /* Now we know whether we need the line-number stack. If we do,
b0c4483e
PB
1988 write its type into the .tab.h file.
1989 This is no longer need with header skeleton. */
1990
a70083a3
AD
1991 /* Assign the symbols their symbol numbers. Write #defines for the
1992 token symbols into FDEFINES if requested. */
1993 packsymbols ();
1994 /* Convert the grammar into the format described in gram.h. */
1995 packgram ();
a70083a3 1996}