]> git.saurik.com Git - bison.git/blame - src/reader.c
Adjust.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
037ca2f1 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
8c7ebe49 24#include "obstack.h"
2a91a95e
AD
25#include "quotearg.h"
26#include "quote.h"
ceed8467 27#include "getargs.h"
1ff442ca 28#include "files.h"
1ff442ca 29#include "symtab.h"
82b6d266 30#include "options.h"
1ff442ca
NF
31#include "lex.h"
32#include "gram.h"
a0f6b076 33#include "complain.h"
6c89f1c1 34#include "output.h"
b2ca4022 35#include "reader.h"
340ef489 36#include "conflicts.h"
11d82f03 37#include "muscle_tab.h"
1ff442ca 38
1ff442ca 39/* Number of slots allocated (but not necessarily used yet) in `rline' */
4a120d45 40static int rline_allocated;
1ff442ca 41
a70083a3
AD
42typedef struct symbol_list
43{
44 struct symbol_list *next;
45 bucket *sym;
46 bucket *ruleprec;
47}
48symbol_list;
118fb205 49
1ff442ca 50int lineno;
1ff442ca 51char **tags;
d019d655 52short *user_toknums;
4a120d45
JT
53static symbol_list *grammar;
54static int start_flag;
55static bucket *startval;
1ff442ca
NF
56
57/* Nonzero if components of semantic values are used, implying
58 they must be unions. */
59static int value_components_used;
60
d7020c20
AD
61/* Nonzero if %union has been seen. */
62static int typed;
1ff442ca 63
d7020c20
AD
64/* Incremented for each %left, %right or %nonassoc seen */
65static int lastprec;
1ff442ca 66
1ff442ca 67static bucket *errtoken;
5b2e3c89 68static bucket *undeftoken;
0d533154 69\f
a70083a3 70
0d533154
AD
71/*===================\
72| Low level lexing. |
73\===================*/
943819bf
RS
74
75static void
118fb205 76skip_to_char (int target)
943819bf
RS
77{
78 int c;
79 if (target == '\n')
a0f6b076 80 complain (_(" Skipping to next \\n"));
943819bf 81 else
a0f6b076 82 complain (_(" Skipping to next %c"), target);
943819bf
RS
83
84 do
0d533154 85 c = skip_white_space ();
943819bf 86 while (c != target && c != EOF);
a083fbbf 87 if (c != EOF)
0d533154 88 ungetc (c, finput);
943819bf
RS
89}
90
91
0d533154
AD
92/*---------------------------------------------------------.
93| Read a signed integer from STREAM and return its value. |
94`---------------------------------------------------------*/
95
96static inline int
97read_signed_integer (FILE *stream)
98{
a70083a3
AD
99 int c = getc (stream);
100 int sign = 1;
101 int n = 0;
0d533154
AD
102
103 if (c == '-')
104 {
105 c = getc (stream);
106 sign = -1;
107 }
108
109 while (isdigit (c))
110 {
111 n = 10 * n + (c - '0');
112 c = getc (stream);
113 }
114
115 ungetc (c, stream);
116
117 return sign * n;
118}
119\f
79282c5a
AD
120/*--------------------------------------------------------------.
121| Get the data type (alternative in the union) of the value for |
122| symbol N in rule RULE. |
123`--------------------------------------------------------------*/
124
125static char *
126get_type_name (int n, symbol_list * rule)
127{
128 int i;
129 symbol_list *rp;
130
131 if (n < 0)
132 {
133 complain (_("invalid $ value"));
134 return NULL;
135 }
136
137 rp = rule;
138 i = 0;
139
140 while (i < n)
141 {
142 rp = rp->next;
143 if (rp == NULL || rp->sym == NULL)
144 {
145 complain (_("invalid $ value"));
146 return NULL;
147 }
148 i++;
149 }
150
151 return rp->sym->type_name;
152}
153\f
337bab46
AD
154/*------------------------------------------------------------.
155| Dump the string from FIN to OOUT if non null. MATCH is the |
156| delimiter of the string (either ' or "). |
157`------------------------------------------------------------*/
ae3c3164
AD
158
159static inline void
b6610515 160copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
161{
162 int c;
163
b6610515
RA
164 if (store)
165 obstack_1grow (oout, match);
8c7ebe49 166
4a120d45 167 c = getc (fin);
ae3c3164
AD
168
169 while (c != match)
170 {
171 if (c == EOF)
172 fatal (_("unterminated string at end of file"));
173 if (c == '\n')
174 {
a0f6b076 175 complain (_("unterminated string"));
4a120d45 176 ungetc (c, fin);
ae3c3164
AD
177 c = match; /* invent terminator */
178 continue;
179 }
180
337bab46 181 obstack_1grow (oout, c);
ae3c3164
AD
182
183 if (c == '\\')
184 {
4a120d45 185 c = getc (fin);
ae3c3164
AD
186 if (c == EOF)
187 fatal (_("unterminated string at end of file"));
337bab46 188 obstack_1grow (oout, c);
8c7ebe49 189
ae3c3164
AD
190 if (c == '\n')
191 lineno++;
192 }
193
a70083a3 194 c = getc (fin);
ae3c3164
AD
195 }
196
b6610515
RA
197 if (store)
198 obstack_1grow (oout, c);
199}
200
201/* FIXME. */
202
203static inline void
204copy_string (FILE *fin, struct obstack *oout, int match)
205{
206 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
207}
208
b6610515
RA
209/* FIXME. */
210
211static inline void
212copy_identifier (FILE *fin, struct obstack *oout)
213{
214 int c;
215
216 while (isalnum (c = getc (fin)) || c == '_')
217 obstack_1grow (oout, c);
218
219 ungetc (c, fin);
220}
ae3c3164 221
337bab46
AD
222/*-----------------------------------------------------------------.
223| Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
224| NULL). In fact we just saw a `/', which might or might not be a |
225| comment. In any case, copy what we saw. |
226| |
227| OUT2 might be NULL. |
228`-----------------------------------------------------------------*/
ae3c3164
AD
229
230static inline void
337bab46 231copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
ae3c3164
AD
232{
233 int cplus_comment;
a70083a3 234 int ended;
550a72a3
AD
235 int c;
236
237 /* We read a `/', output it. */
337bab46 238 obstack_1grow (oout1, '/');
896fe5c1
AD
239 if (oout2)
240 obstack_1grow (oout2, '/');
550a72a3
AD
241
242 switch ((c = getc (fin)))
243 {
244 case '/':
245 cplus_comment = 1;
246 break;
247 case '*':
248 cplus_comment = 0;
249 break;
250 default:
251 ungetc (c, fin);
252 return;
253 }
ae3c3164 254
337bab46 255 obstack_1grow (oout1, c);
896fe5c1
AD
256 if (oout2)
257 obstack_1grow (oout2, c);
550a72a3 258 c = getc (fin);
ae3c3164
AD
259
260 ended = 0;
261 while (!ended)
262 {
263 if (!cplus_comment && c == '*')
264 {
265 while (c == '*')
266 {
337bab46 267 obstack_1grow (oout1, c);
896fe5c1
AD
268 if (oout2)
269 obstack_1grow (oout2, c);
550a72a3 270 c = getc (fin);
ae3c3164
AD
271 }
272
273 if (c == '/')
274 {
337bab46 275 obstack_1grow (oout1, c);
896fe5c1
AD
276 if (oout2)
277 obstack_1grow (oout2, c);
ae3c3164
AD
278 ended = 1;
279 }
280 }
281 else if (c == '\n')
282 {
283 lineno++;
337bab46 284 obstack_1grow (oout1, c);
896fe5c1
AD
285 if (oout2)
286 obstack_1grow (oout2, c);
ae3c3164
AD
287 if (cplus_comment)
288 ended = 1;
289 else
550a72a3 290 c = getc (fin);
ae3c3164
AD
291 }
292 else if (c == EOF)
293 fatal (_("unterminated comment"));
294 else
295 {
337bab46 296 obstack_1grow (oout1, c);
896fe5c1
AD
297 if (oout2)
298 obstack_1grow (oout2, c);
550a72a3 299 c = getc (fin);
ae3c3164
AD
300 }
301 }
302}
303
304
550a72a3
AD
305/*-------------------------------------------------------------------.
306| Dump the comment (actually the current string starting with a `/') |
337bab46 307| from FIN to OOUT. |
550a72a3 308`-------------------------------------------------------------------*/
27821bff
AD
309
310static inline void
337bab46 311copy_comment (FILE *fin, struct obstack *oout)
27821bff 312{
337bab46 313 copy_comment2 (fin, oout, NULL);
27821bff
AD
314}
315
316
a70083a3 317/*-----------------------------------------------------------------.
337bab46 318| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
319| reference to this location. STACK_OFFSET is the number of values |
320| in the current rule so far, which says where to find `$0' with |
321| respect to the top of the stack. |
322`-----------------------------------------------------------------*/
1ff442ca 323
a70083a3 324static inline void
337bab46 325copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 326{
a70083a3 327 int c;
1ff442ca 328
a70083a3
AD
329 c = getc (fin);
330 if (c == '$')
1ff442ca 331 {
ff4423cc 332 obstack_sgrow (oout, "yyloc");
89cab50d 333 locations_flag = 1;
a70083a3
AD
334 }
335 else if (isdigit (c) || c == '-')
336 {
337 int n;
1ff442ca 338
a70083a3
AD
339 ungetc (c, fin);
340 n = read_signed_integer (fin);
943819bf 341
337bab46 342 obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
89cab50d 343 locations_flag = 1;
1ff442ca 344 }
a70083a3 345 else
ff4a34be
AD
346 {
347 char buf[] = "@c";
348 buf[1] = c;
349 complain (_("%s is invalid"), quote (buf));
350 }
1ff442ca 351}
79282c5a
AD
352
353
354/*-------------------------------------------------------------------.
355| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
356| |
357| Possible inputs: $[<TYPENAME>]($|integer) |
358| |
337bab46 359| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
360| the number of values in the current rule so far, which says where |
361| to find `$0' with respect to the top of the stack. |
362`-------------------------------------------------------------------*/
363
364static inline void
337bab46 365copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
366 symbol_list *rule, int stack_offset)
367{
368 int c = getc (fin);
b0ce6046 369 const char *type_name = NULL;
79282c5a 370
f282676b 371 /* Get the type name if explicit. */
79282c5a
AD
372 if (c == '<')
373 {
f282676b 374 read_type_name (fin);
79282c5a
AD
375 type_name = token_buffer;
376 value_components_used = 1;
79282c5a
AD
377 c = getc (fin);
378 }
379
380 if (c == '$')
381 {
ff4423cc 382 obstack_sgrow (oout, "yyval");
8c7ebe49 383
79282c5a
AD
384 if (!type_name)
385 type_name = get_type_name (0, rule);
386 if (type_name)
337bab46 387 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
388 if (!type_name && typed)
389 complain (_("$$ of `%s' has no declared type"),
390 rule->sym->tag);
391 }
392 else if (isdigit (c) || c == '-')
393 {
394 int n;
395 ungetc (c, fin);
396 n = read_signed_integer (fin);
397
398 if (!type_name && n > 0)
399 type_name = get_type_name (n, rule);
400
337bab46 401 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
8c7ebe49 402
79282c5a 403 if (type_name)
337bab46 404 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
405 if (!type_name && typed)
406 complain (_("$%d of `%s' has no declared type"),
407 n, rule->sym->tag);
408 }
409 else
410 {
411 char buf[] = "$c";
412 buf[1] = c;
413 complain (_("%s is invalid"), quote (buf));
414 }
415}
a70083a3
AD
416\f
417/*-------------------------------------------------------------------.
418| Copy the contents of a `%{ ... %}' into the definitions file. The |
419| `%{' has already been read. Return after reading the `%}'. |
420`-------------------------------------------------------------------*/
1ff442ca 421
4a120d45 422static void
118fb205 423copy_definition (void)
1ff442ca 424{
a70083a3 425 int c;
ae3c3164 426 /* -1 while reading a character if prev char was %. */
a70083a3 427 int after_percent;
1ff442ca 428
b6610515 429#if 0
89cab50d 430 if (!no_lines_flag)
25b222fa
MA
431 {
432 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 433 lineno, quotearg_style (c_quoting_style,
25b222fa
MA
434 muscle_find("filename")));
435 }
b6610515 436#endif
1ff442ca
NF
437
438 after_percent = 0;
439
ae3c3164 440 c = getc (finput);
1ff442ca
NF
441
442 for (;;)
443 {
444 switch (c)
445 {
446 case '\n':
dd60faec 447 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
448 lineno++;
449 break;
450
451 case '%':
a70083a3 452 after_percent = -1;
1ff442ca 453 break;
a083fbbf 454
1ff442ca
NF
455 case '\'':
456 case '"':
337bab46 457 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
458 break;
459
460 case '/':
337bab46 461 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
462 break;
463
464 case EOF:
a70083a3 465 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
466
467 default:
dd60faec 468 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
469 }
470
a70083a3 471 c = getc (finput);
1ff442ca
NF
472
473 if (after_percent)
474 {
475 if (c == '}')
476 return;
dd60faec 477 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
478 }
479 after_percent = 0;
1ff442ca 480 }
1ff442ca
NF
481}
482
483
d7020c20
AD
484/*-------------------------------------------------------------------.
485| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
486| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
487| are reversed. |
488`-------------------------------------------------------------------*/
1ff442ca 489
4a120d45 490static void
d7020c20 491parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 492{
342b8b6e
AD
493 token_t token = tok_undef;
494 char *typename = NULL;
1ff442ca 495
1e9798d5
AD
496 /* The symbol being defined. */
497 struct bucket *symbol = NULL;
498
499 /* After `%token' and `%nterm', any number of symbols maybe be
500 defined. */
1ff442ca
NF
501 for (;;)
502 {
e6011337
JT
503 int tmp_char = ungetc (skip_white_space (), finput);
504
1e9798d5
AD
505 /* `%' (for instance from `%token', or from `%%' etc.) is the
506 only valid means to end this declaration. */
e6011337 507 if (tmp_char == '%')
1ff442ca 508 return;
e6011337 509 if (tmp_char == EOF)
a0f6b076 510 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 511
a70083a3 512 token = lex ();
511e79b3 513 if (token == tok_comma)
943819bf
RS
514 {
515 symbol = NULL;
516 continue;
517 }
511e79b3 518 if (token == tok_typename)
1ff442ca 519 {
95e36146 520 typename = xstrdup (token_buffer);
1ff442ca 521 value_components_used = 1;
943819bf
RS
522 symbol = NULL;
523 }
511e79b3 524 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 525 {
8e03724b
AD
526 if (symval->alias)
527 warn (_("symbol `%s' used more than once as a literal string"),
528 symval->tag);
529 else if (symbol->alias)
530 warn (_("symbol `%s' given more than one literal string"),
531 symbol->tag);
532 else
533 {
534 symval->class = token_sym;
535 symval->type_name = typename;
536 symval->user_token_number = symbol->user_token_number;
537 symbol->user_token_number = SALIAS;
538 symval->alias = symbol;
539 symbol->alias = symval;
540 /* symbol and symval combined are only one symbol */
541 nsyms--;
542 }
8e03724b 543 symbol = NULL;
1ff442ca 544 }
511e79b3 545 else if (token == tok_identifier)
1ff442ca
NF
546 {
547 int oldclass = symval->class;
943819bf 548 symbol = symval;
1ff442ca 549
943819bf 550 if (symbol->class == what_is_not)
a0f6b076 551 complain (_("symbol %s redefined"), symbol->tag);
943819bf 552 symbol->class = what_is;
d7020c20 553 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 554 symbol->value = nvars++;
1ff442ca
NF
555
556 if (typename)
557 {
943819bf
RS
558 if (symbol->type_name == NULL)
559 symbol->type_name = typename;
a70083a3 560 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 561 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
562 }
563 }
511e79b3 564 else if (symbol && token == tok_number)
a70083a3 565 {
943819bf 566 symbol->user_token_number = numval;
a70083a3 567 }
1ff442ca 568 else
943819bf 569 {
a0f6b076 570 complain (_("`%s' is invalid in %s"),
d7020c20 571 token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 572 skip_to_char ('%');
943819bf 573 }
1ff442ca
NF
574 }
575
576}
577
1ff442ca 578
d7020c20
AD
579/*------------------------------.
580| Parse what comes after %start |
581`------------------------------*/
1ff442ca 582
4a120d45 583static void
118fb205 584parse_start_decl (void)
1ff442ca
NF
585{
586 if (start_flag)
27821bff 587 complain (_("multiple %s declarations"), "%start");
511e79b3 588 if (lex () != tok_identifier)
27821bff 589 complain (_("invalid %s declaration"), "%start");
943819bf
RS
590 else
591 {
592 start_flag = 1;
593 startval = symval;
594 }
1ff442ca
NF
595}
596
a70083a3
AD
597/*-----------------------------------------------------------.
598| read in a %type declaration and record its information for |
599| get_type_name to access |
600`-----------------------------------------------------------*/
601
602static void
603parse_type_decl (void)
604{
a70083a3
AD
605 char *name;
606
511e79b3 607 if (lex () != tok_typename)
a70083a3
AD
608 {
609 complain ("%s", _("%type declaration has no <typename>"));
610 skip_to_char ('%');
611 return;
612 }
613
95e36146 614 name = xstrdup (token_buffer);
a70083a3
AD
615
616 for (;;)
617 {
f17bcd1f 618 token_t t;
a70083a3
AD
619 int tmp_char = ungetc (skip_white_space (), finput);
620
621 if (tmp_char == '%')
622 return;
623 if (tmp_char == EOF)
624 fatal (_("Premature EOF after %s"), token_buffer);
625
626 t = lex ();
627
628 switch (t)
1ff442ca
NF
629 {
630
511e79b3
AD
631 case tok_comma:
632 case tok_semicolon:
1ff442ca
NF
633 break;
634
511e79b3 635 case tok_identifier:
1ff442ca
NF
636 if (symval->type_name == NULL)
637 symval->type_name = name;
a70083a3 638 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 639 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
640
641 break;
642
643 default:
a0f6b076
AD
644 complain (_("invalid %%type declaration due to item: %s"),
645 token_buffer);
a70083a3 646 skip_to_char ('%');
1ff442ca
NF
647 }
648 }
649}
650
651
652
d7020c20
AD
653/*----------------------------------------------------------------.
654| Read in a %left, %right or %nonassoc declaration and record its |
655| information. |
656`----------------------------------------------------------------*/
1ff442ca 657
4a120d45 658static void
d7020c20 659parse_assoc_decl (associativity assoc)
1ff442ca 660{
a70083a3
AD
661 char *name = NULL;
662 int prev = 0;
1ff442ca 663
a70083a3 664 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 665
1ff442ca
NF
666 for (;;)
667 {
f17bcd1f 668 token_t t;
e6011337 669 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 670
e6011337 671 if (tmp_char == '%')
1ff442ca 672 return;
e6011337 673 if (tmp_char == EOF)
a0f6b076 674 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 675
a70083a3 676 t = lex ();
1ff442ca
NF
677
678 switch (t)
679 {
511e79b3 680 case tok_typename:
95e36146 681 name = xstrdup (token_buffer);
1ff442ca
NF
682 break;
683
511e79b3 684 case tok_comma:
1ff442ca
NF
685 break;
686
511e79b3 687 case tok_identifier:
1ff442ca 688 if (symval->prec != 0)
a0f6b076 689 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
690 symval->prec = lastprec;
691 symval->assoc = assoc;
d7020c20 692 if (symval->class == nterm_sym)
a0f6b076 693 complain (_("symbol %s redefined"), symval->tag);
d7020c20 694 symval->class = token_sym;
1ff442ca 695 if (name)
a70083a3 696 { /* record the type, if one is specified */
1ff442ca
NF
697 if (symval->type_name == NULL)
698 symval->type_name = name;
a70083a3 699 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 700 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
701 }
702 break;
703
511e79b3
AD
704 case tok_number:
705 if (prev == tok_identifier)
a70083a3 706 {
1ff442ca 707 symval->user_token_number = numval;
a70083a3
AD
708 }
709 else
710 {
711 complain (_
712 ("invalid text (%s) - number should be after identifier"),
713token_buffer);
714 skip_to_char ('%');
715 }
1ff442ca
NF
716 break;
717
511e79b3 718 case tok_semicolon:
1ff442ca
NF
719 return;
720
721 default:
a0f6b076 722 complain (_("unexpected item: %s"), token_buffer);
a70083a3 723 skip_to_char ('%');
1ff442ca
NF
724 }
725
726 prev = t;
1ff442ca
NF
727 }
728}
729
730
731
dd60faec 732/*--------------------------------------------------------------.
180d45ba
PB
733| Copy the union declaration into the stype muscle |
734| (and fdefines), where it is made into the definition of |
735| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 736`--------------------------------------------------------------*/
1ff442ca 737
4a120d45 738static void
118fb205 739parse_union_decl (void)
1ff442ca 740{
a70083a3
AD
741 int c;
742 int count = 0;
180d45ba 743 struct obstack union_obstack;
1ff442ca
NF
744
745 if (typed)
27821bff 746 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
747
748 typed = 1;
749
f6ec6d13
AD
750 /* FIXME: I'm worried: are you sure attrs_obstack is properly
751 filled? */
180d45ba 752 if (no_lines_flag)
dd60faec 753 obstack_1grow (&attrs_obstack, '\n');
342b8b6e 754
180d45ba
PB
755 obstack_init (&union_obstack);
756 obstack_sgrow (&union_obstack, "union");
896fe5c1 757 if (defines_flag)
ff4423cc 758 obstack_sgrow (&defines_obstack, "typedef union");
1ff442ca 759
27821bff 760 c = getc (finput);
1ff442ca
NF
761
762 while (c != EOF)
763 {
342b8b6e
AD
764 /* If C contains '/', it is output by copy_comment (). */
765 if (c != '/')
766 {
f6ec6d13 767 obstack_1grow (&union_obstack, c);
342b8b6e
AD
768 if (defines_flag)
769 obstack_1grow (&defines_obstack, c);
770 }
1ff442ca
NF
771
772 switch (c)
773 {
774 case '\n':
775 lineno++;
776 break;
777
778 case '/':
180d45ba 779 copy_comment2 (finput, &defines_obstack, &union_obstack);
1ff442ca
NF
780 break;
781
1ff442ca
NF
782 case '{':
783 count++;
784 break;
785
786 case '}':
787 if (count == 0)
27821bff 788 complain (_("unmatched %s"), "`}'");
1ff442ca 789 count--;
943819bf 790 if (count <= 0)
1ff442ca 791 {
896fe5c1 792 if (defines_flag)
ff4423cc 793 obstack_sgrow (&defines_obstack, " YYSTYPE;\n");
1ff442ca 794 /* JF don't choke on trailing semi */
27821bff
AD
795 c = skip_white_space ();
796 if (c != ';')
a70083a3 797 ungetc (c, finput);
180d45ba
PB
798 obstack_1grow (&union_obstack, 0);
799 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
800 return;
801 }
802 }
803
27821bff 804 c = getc (finput);
1ff442ca 805 }
180d45ba 806
1ff442ca
NF
807}
808
d7020c20
AD
809
810/*-------------------------------------------------------.
811| Parse the declaration %expect N which says to expect N |
812| shift-reduce conflicts. |
813`-------------------------------------------------------*/
1ff442ca 814
4a120d45 815static void
118fb205 816parse_expect_decl (void)
1ff442ca 817{
131e2fef 818 int c = skip_white_space ();
1ff442ca
NF
819 ungetc (c, finput);
820
131e2fef 821 if (!isdigit (c))
79282c5a 822 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
823 else
824 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
825}
826
a70083a3
AD
827
828/*-------------------------------------------------------------------.
829| Parse what comes after %thong. the full syntax is |
830| |
831| %thong <type> token number literal |
832| |
833| the <type> or number may be omitted. The number specifies the |
834| user_token_number. |
835| |
836| Two symbols are entered in the table, one for the token symbol and |
837| one for the literal. Both are given the <type>, if any, from the |
838| declaration. The ->user_token_number of the first is SALIAS and |
839| the ->user_token_number of the second is set to the number, if |
840| any, from the declaration. The two symbols are linked via |
841| pointers in their ->alias fields. |
842| |
843| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
844| only the literal string is retained it is the literal string that |
845| is output to yytname |
846`-------------------------------------------------------------------*/
847
848static void
849parse_thong_decl (void)
7b306f52 850{
f17bcd1f 851 token_t token;
a70083a3
AD
852 struct bucket *symbol;
853 char *typename = 0;
6b7e85b9 854 int usrtoknum = SUNDEF;
7b306f52 855
a70083a3 856 token = lex (); /* fetch typename or first token */
511e79b3 857 if (token == tok_typename)
7b306f52 858 {
95e36146 859 typename = xstrdup (token_buffer);
a70083a3
AD
860 value_components_used = 1;
861 token = lex (); /* fetch first token */
7b306f52 862 }
7b306f52 863
a70083a3 864 /* process first token */
7b306f52 865
511e79b3 866 if (token != tok_identifier)
a70083a3
AD
867 {
868 complain (_("unrecognized item %s, expected an identifier"),
869 token_buffer);
870 skip_to_char ('%');
871 return;
7b306f52 872 }
d7020c20 873 symval->class = token_sym;
a70083a3
AD
874 symval->type_name = typename;
875 symval->user_token_number = SALIAS;
876 symbol = symval;
7b306f52 877
a70083a3 878 token = lex (); /* get number or literal string */
1ff442ca 879
511e79b3 880 if (token == tok_number)
943819bf 881 {
a70083a3
AD
882 usrtoknum = numval;
883 token = lex (); /* okay, did number, now get literal */
943819bf 884 }
1ff442ca 885
a70083a3 886 /* process literal string token */
1ff442ca 887
511e79b3 888 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 889 {
a70083a3
AD
890 complain (_("expected string constant instead of %s"), token_buffer);
891 skip_to_char ('%');
892 return;
1ff442ca 893 }
d7020c20 894 symval->class = token_sym;
a70083a3
AD
895 symval->type_name = typename;
896 symval->user_token_number = usrtoknum;
1ff442ca 897
a70083a3
AD
898 symval->alias = symbol;
899 symbol->alias = symval;
1ff442ca 900
79282c5a
AD
901 /* symbol and symval combined are only one symbol. */
902 nsyms--;
a70083a3 903}
3cef001a 904
b6610515 905static void
11d82f03 906parse_muscle_decl (void)
b6610515
RA
907{
908 int ch = ungetc (skip_white_space (), finput);
11d82f03
MA
909 char* muscle_key;
910 char* muscle_value;
b6610515
RA
911
912 /* Read key. */
913 if (!isalpha (ch) && ch != '_')
914 {
915 complain (_("invalid %s declaration"), "%define");
916 skip_to_char ('%');
917 return;
918 }
11d82f03
MA
919 copy_identifier (finput, &muscle_obstack);
920 obstack_1grow (&muscle_obstack, 0);
921 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 922
b6610515
RA
923 /* Read value. */
924 ch = skip_white_space ();
925 if (ch != '"')
926 {
927 ungetc (ch, finput);
928 if (ch != EOF)
929 {
930 complain (_("invalid %s declaration"), "%define");
931 skip_to_char ('%');
932 return;
933 }
934 else
935 fatal (_("Premature EOF after %s"), "\"");
936 }
11d82f03
MA
937 copy_string2 (finput, &muscle_obstack, '"', 0);
938 obstack_1grow (&muscle_obstack, 0);
939 muscle_value = obstack_finish (&muscle_obstack);
b6610515 940
b6610515 941 /* Store the (key, value) pair in the environment. */
11d82f03 942 muscle_insert (muscle_key, muscle_value);
b6610515
RA
943}
944
2ba3b73c
MA
945
946/*----------------------------------.
947| Parse what comes after %skeleton. |
948`----------------------------------*/
949
950void
951parse_skel_decl (void)
952{
953 /* Complete with parse_dquoted_param () on the CVS branch 1.29. */
954}
955
a70083a3
AD
956/*----------------------------------------------------------------.
957| Read from finput until `%%' is seen. Discard the `%%'. Handle |
958| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 959| groups to ATTRS_OBSTACK. |
a70083a3 960`----------------------------------------------------------------*/
1ff442ca 961
4a120d45 962static void
a70083a3 963read_declarations (void)
1ff442ca 964{
a70083a3
AD
965 int c;
966 int tok;
1ff442ca 967
a70083a3 968 for (;;)
1ff442ca 969 {
a70083a3 970 c = skip_white_space ();
1ff442ca 971
a70083a3
AD
972 if (c == '%')
973 {
974 tok = parse_percent_token ();
1ff442ca 975
a70083a3 976 switch (tok)
943819bf 977 {
511e79b3 978 case tok_two_percents:
a70083a3 979 return;
1ff442ca 980
511e79b3 981 case tok_percent_left_curly:
a70083a3
AD
982 copy_definition ();
983 break;
1ff442ca 984
511e79b3 985 case tok_token:
d7020c20 986 parse_token_decl (token_sym, nterm_sym);
a70083a3 987 break;
1ff442ca 988
511e79b3 989 case tok_nterm:
d7020c20 990 parse_token_decl (nterm_sym, token_sym);
a70083a3 991 break;
1ff442ca 992
511e79b3 993 case tok_type:
a70083a3
AD
994 parse_type_decl ();
995 break;
1ff442ca 996
511e79b3 997 case tok_start:
a70083a3
AD
998 parse_start_decl ();
999 break;
118fb205 1000
511e79b3 1001 case tok_union:
a70083a3
AD
1002 parse_union_decl ();
1003 break;
1ff442ca 1004
511e79b3 1005 case tok_expect:
a70083a3
AD
1006 parse_expect_decl ();
1007 break;
6deb4447 1008
511e79b3 1009 case tok_thong:
a70083a3
AD
1010 parse_thong_decl ();
1011 break;
d7020c20 1012
511e79b3 1013 case tok_left:
d7020c20 1014 parse_assoc_decl (left_assoc);
a70083a3 1015 break;
1ff442ca 1016
511e79b3 1017 case tok_right:
d7020c20 1018 parse_assoc_decl (right_assoc);
a70083a3 1019 break;
1ff442ca 1020
511e79b3 1021 case tok_nonassoc:
d7020c20 1022 parse_assoc_decl (non_assoc);
a70083a3 1023 break;
1ff442ca 1024
b6610515 1025 case tok_define:
11d82f03 1026 parse_muscle_decl ();
b6610515 1027 break;
342b8b6e 1028
2ba3b73c
MA
1029 case tok_skel:
1030 parse_skel_decl ();
1031 break;
b6610515 1032
511e79b3 1033 case tok_noop:
a70083a3 1034 break;
1ff442ca 1035
a70083a3
AD
1036 default:
1037 complain (_("unrecognized: %s"), token_buffer);
1038 skip_to_char ('%');
1039 }
1040 }
1041 else if (c == EOF)
1042 fatal (_("no input grammar"));
1043 else
1044 {
ff4a34be
AD
1045 char buf[] = "c";
1046 buf[0] = c;
1047 complain (_("unknown character: %s"), quote (buf));
a70083a3 1048 skip_to_char ('%');
1ff442ca 1049 }
1ff442ca 1050 }
1ff442ca 1051}
a70083a3
AD
1052\f
1053/*-------------------------------------------------------------------.
1054| Assuming that a `{' has just been seen, copy everything up to the |
1055| matching `}' into the actions file. STACK_OFFSET is the number of |
1056| values in the current rule so far, which says where to find `$0' |
1057| with respect to the top of the stack. |
1058`-------------------------------------------------------------------*/
1ff442ca 1059
4a120d45 1060static void
79282c5a 1061copy_action (symbol_list *rule, int stack_offset)
1ff442ca 1062{
a70083a3 1063 int c;
a70083a3 1064 int count;
8c7ebe49 1065 char buf[4096];
1ff442ca
NF
1066
1067 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1068 if (semantic_parser)
1069 stack_offset = 0;
1ff442ca 1070
25b222fa 1071 obstack_fgrow1 (&action_obstack, "\ncase %d:\n", nrules);
8c7ebe49 1072
89cab50d 1073 if (!no_lines_flag)
8c7ebe49 1074 {
25b222fa 1075 obstack_fgrow2 (&action_obstack, muscle_find ("linef"),
342b8b6e 1076 lineno, quotearg_style (c_quoting_style,
25b222fa 1077 muscle_find ("filename")));
8c7ebe49
AD
1078 }
1079 obstack_1grow (&action_obstack, '{');
1ff442ca
NF
1080
1081 count = 1;
a70083a3 1082 c = getc (finput);
1ff442ca
NF
1083
1084 while (count > 0)
1085 {
1086 while (c != '}')
a70083a3
AD
1087 {
1088 switch (c)
1ff442ca
NF
1089 {
1090 case '\n':
8c7ebe49 1091 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1092 lineno++;
1093 break;
1094
1095 case '{':
8c7ebe49 1096 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1097 count++;
1098 break;
1099
1100 case '\'':
1101 case '"':
337bab46 1102 copy_string (finput, &action_obstack, c);
1ff442ca
NF
1103 break;
1104
1105 case '/':
337bab46 1106 copy_comment (finput, &action_obstack);
1ff442ca
NF
1107 break;
1108
1109 case '$':
337bab46 1110 copy_dollar (finput, &action_obstack,
8c7ebe49 1111 rule, stack_offset);
1ff442ca
NF
1112 break;
1113
1114 case '@':
337bab46 1115 copy_at (finput, &action_obstack,
8c7ebe49 1116 stack_offset);
6666f98f 1117 break;
1ff442ca
NF
1118
1119 case EOF:
27821bff 1120 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1121
1122 default:
8c7ebe49 1123 obstack_1grow (&action_obstack, c);
a70083a3
AD
1124 }
1125
1126 c = getc (finput);
1127 }
1128
1129 /* above loop exits when c is '}' */
1130
1131 if (--count)
1132 {
8c7ebe49 1133 obstack_1grow (&action_obstack, c);
a70083a3
AD
1134 c = getc (finput);
1135 }
1136 }
1137
ff4423cc 1138 obstack_sgrow (&action_obstack, ";\n break;}");
a70083a3
AD
1139}
1140\f
1141/*-------------------------------------------------------------------.
1142| After `%guard' is seen in the input file, copy the actual guard |
1143| into the guards file. If the guard is followed by an action, copy |
1144| that into the actions file. STACK_OFFSET is the number of values |
1145| in the current rule so far, which says where to find `$0' with |
1146| respect to the top of the stack, for the simple parser in which |
1147| the stack is not popped until after the guard is run. |
1148`-------------------------------------------------------------------*/
1149
1150static void
79282c5a 1151copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1152{
1153 int c;
a70083a3 1154 int count;
a70083a3
AD
1155 int brace_flag = 0;
1156
1157 /* offset is always 0 if parser has already popped the stack pointer */
1158 if (semantic_parser)
1159 stack_offset = 0;
1160
ea5607fd 1161 obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
89cab50d 1162 if (!no_lines_flag)
25b222fa 1163 obstack_fgrow2 (&guard_obstack, muscle_find ("linef"),
682d48cd 1164 lineno, quotearg_style (c_quoting_style,
11d82f03 1165 muscle_find ("filename")));
ea5607fd 1166 obstack_1grow (&guard_obstack, '{');
a70083a3
AD
1167
1168 count = 0;
1169 c = getc (finput);
1170
1171 while (brace_flag ? (count > 0) : (c != ';'))
1172 {
1173 switch (c)
1174 {
1175 case '\n':
ea5607fd 1176 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1177 lineno++;
1178 break;
1179
1180 case '{':
ea5607fd 1181 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1182 brace_flag = 1;
1183 count++;
1184 break;
1185
1186 case '}':
ea5607fd 1187 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1188 if (count > 0)
1189 count--;
1190 else
1191 {
1192 complain (_("unmatched %s"), "`}'");
1193 c = getc (finput); /* skip it */
1194 }
1195 break;
1196
1197 case '\'':
1198 case '"':
337bab46 1199 copy_string (finput, &guard_obstack, c);
a70083a3
AD
1200 break;
1201
1202 case '/':
337bab46 1203 copy_comment (finput, &guard_obstack);
a70083a3
AD
1204 break;
1205
1206 case '$':
337bab46 1207 copy_dollar (finput, &guard_obstack, rule, stack_offset);
a70083a3 1208 break;
1ff442ca 1209
a70083a3 1210 case '@':
337bab46 1211 copy_at (finput, &guard_obstack, stack_offset);
a70083a3 1212 break;
1ff442ca 1213
a70083a3
AD
1214 case EOF:
1215 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1216
a70083a3 1217 default:
ea5607fd 1218 obstack_1grow (&guard_obstack, c);
1ff442ca 1219 }
a70083a3
AD
1220
1221 if (c != '}' || count != 0)
1222 c = getc (finput);
1ff442ca
NF
1223 }
1224
a70083a3
AD
1225 c = skip_white_space ();
1226
ff4423cc 1227 obstack_sgrow (&guard_obstack, ";\n break;}");
a70083a3
AD
1228 if (c == '{')
1229 copy_action (rule, stack_offset);
1230 else if (c == '=')
1231 {
1232 c = getc (finput); /* why not skip_white_space -wjh */
1233 if (c == '{')
1234 copy_action (rule, stack_offset);
1235 }
1236 else
1237 ungetc (c, finput);
1ff442ca 1238}
a70083a3
AD
1239\f
1240
1241static void
1242record_rule_line (void)
1243{
1244 /* Record each rule's source line number in rline table. */
1ff442ca 1245
a70083a3
AD
1246 if (nrules >= rline_allocated)
1247 {
1248 rline_allocated = nrules * 2;
d7913476 1249 rline = XREALLOC (rline, short, rline_allocated);
a70083a3
AD
1250 }
1251 rline[nrules] = lineno;
1252}
1ff442ca
NF
1253
1254
a70083a3
AD
1255/*-------------------------------------------------------------------.
1256| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1257| with the user's names. |
1258`-------------------------------------------------------------------*/
1ff442ca 1259
4a120d45 1260static bucket *
118fb205 1261gensym (void)
1ff442ca 1262{
274d42ce
AD
1263 /* Incremented for each generated symbol */
1264 static int gensym_count = 0;
1265 static char buf[256];
1266
a70083a3 1267 bucket *sym;
1ff442ca 1268
274d42ce
AD
1269 sprintf (buf, "@%d", ++gensym_count);
1270 token_buffer = buf;
a70083a3 1271 sym = getsym (token_buffer);
d7020c20 1272 sym->class = nterm_sym;
1ff442ca 1273 sym->value = nvars++;
36281465 1274 return sym;
1ff442ca
NF
1275}
1276
a70083a3
AD
1277#if 0
1278/*------------------------------------------------------------------.
1279| read in a %type declaration and record its information for |
1280| get_type_name to access. This is unused. It is only called from |
1281| the #if 0 part of readgram |
1282`------------------------------------------------------------------*/
1283
1284static int
1285get_type (void)
1286{
1287 int k;
f17bcd1f 1288 token_t token;
a70083a3
AD
1289 char *name;
1290
f17bcd1f 1291 token = lex ();
a70083a3 1292
f17bcd1f 1293 if (token != tok_typename)
a70083a3
AD
1294 {
1295 complain (_("invalid %s declaration"), "%type");
1296 return t;
1297 }
1298
95e36146 1299 name = xstrdup (token_buffer);
a70083a3
AD
1300
1301 for (;;)
1302 {
f17bcd1f 1303 token = lex ();
a70083a3 1304
f17bcd1f 1305 switch (token)
a70083a3 1306 {
511e79b3 1307 case tok_semicolon:
a70083a3
AD
1308 return lex ();
1309
511e79b3 1310 case tok_comma:
a70083a3
AD
1311 break;
1312
511e79b3 1313 case tok_identifier:
a70083a3
AD
1314 if (symval->type_name == NULL)
1315 symval->type_name = name;
1316 else if (strcmp (name, symval->type_name) != 0)
1317 complain (_("type redeclaration for %s"), symval->tag);
1318
1319 break;
1320
1321 default:
f17bcd1f 1322 return token;
a70083a3
AD
1323 }
1324 }
1325}
1ff442ca 1326
a70083a3
AD
1327#endif
1328\f
1329/*------------------------------------------------------------------.
1330| Parse the input grammar into a one symbol_list structure. Each |
1331| rule is represented by a sequence of symbols: the left hand side |
1332| followed by the contents of the right hand side, followed by a |
1333| null pointer instead of a symbol to terminate the rule. The next |
1334| symbol is the lhs of the following rule. |
1335| |
1336| All guards and actions are copied out to the appropriate files, |
1337| labelled by the rule number they apply to. |
1338`------------------------------------------------------------------*/
1ff442ca 1339
4a120d45 1340static void
118fb205 1341readgram (void)
1ff442ca 1342{
f17bcd1f 1343 token_t t;
a70083a3
AD
1344 bucket *lhs = NULL;
1345 symbol_list *p;
1346 symbol_list *p1;
1347 bucket *bp;
1ff442ca 1348
ff4a34be
AD
1349 /* Points to first symbol_list of current rule. its symbol is the
1350 lhs of the rule. */
1351 symbol_list *crule;
1352 /* Points to the symbol_list preceding crule. */
1353 symbol_list *crule1;
1ff442ca
NF
1354
1355 p1 = NULL;
1356
a70083a3 1357 t = lex ();
1ff442ca 1358
511e79b3 1359 while (t != tok_two_percents && t != tok_eof)
1ff442ca 1360 {
511e79b3 1361 if (t == tok_identifier || t == tok_bar)
1ff442ca 1362 {
89cab50d 1363 int action_flag = 0;
ff4a34be
AD
1364 /* Number of symbols in rhs of this rule so far */
1365 int rulelength = 0;
1ff442ca
NF
1366 int xactions = 0; /* JF for error checking */
1367 bucket *first_rhs = 0;
1368
511e79b3 1369 if (t == tok_identifier)
1ff442ca
NF
1370 {
1371 lhs = symval;
943819bf
RS
1372
1373 if (!start_flag)
1374 {
1375 startval = lhs;
1376 start_flag = 1;
1377 }
a083fbbf 1378
a70083a3 1379 t = lex ();
511e79b3 1380 if (t != tok_colon)
943819bf 1381 {
a0f6b076 1382 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1383 unlex (t);
943819bf 1384 }
1ff442ca
NF
1385 }
1386
511e79b3 1387 if (nrules == 0 && t == tok_bar)
1ff442ca 1388 {
a0f6b076 1389 complain (_("grammar starts with vertical bar"));
943819bf 1390 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1391 }
1ff442ca
NF
1392 /* start a new rule and record its lhs. */
1393
1394 nrules++;
1395 nitems++;
1396
1397 record_rule_line ();
1398
d7913476 1399 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1400 p->sym = lhs;
1401
1402 crule1 = p1;
1403 if (p1)
1404 p1->next = p;
1405 else
1406 grammar = p;
1407
1408 p1 = p;
1409 crule = p;
1410
1411 /* mark the rule's lhs as a nonterminal if not already so. */
1412
d7020c20 1413 if (lhs->class == unknown_sym)
1ff442ca 1414 {
d7020c20 1415 lhs->class = nterm_sym;
1ff442ca
NF
1416 lhs->value = nvars;
1417 nvars++;
1418 }
d7020c20 1419 else if (lhs->class == token_sym)
a0f6b076 1420 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1421
1422 /* read the rhs of the rule. */
1423
1424 for (;;)
1425 {
a70083a3 1426 t = lex ();
511e79b3 1427 if (t == tok_prec)
943819bf 1428 {
a70083a3 1429 t = lex ();
943819bf 1430 crule->ruleprec = symval;
a70083a3 1431 t = lex ();
943819bf 1432 }
1ff442ca 1433
511e79b3 1434 if (!(t == tok_identifier || t == tok_left_curly))
a70083a3 1435 break;
1ff442ca
NF
1436
1437 /* If next token is an identifier, see if a colon follows it.
a70083a3 1438 If one does, exit this rule now. */
511e79b3 1439 if (t == tok_identifier)
1ff442ca 1440 {
a70083a3 1441 bucket *ssave;
f17bcd1f 1442 token_t t1;
1ff442ca
NF
1443
1444 ssave = symval;
a70083a3
AD
1445 t1 = lex ();
1446 unlex (t1);
1ff442ca 1447 symval = ssave;
511e79b3 1448 if (t1 == tok_colon)
a70083a3 1449 break;
1ff442ca 1450
a70083a3 1451 if (!first_rhs) /* JF */
1ff442ca
NF
1452 first_rhs = symval;
1453 /* Not followed by colon =>
1454 process as part of this rule's rhs. */
1455 }
1456
1457 /* If we just passed an action, that action was in the middle
a70083a3
AD
1458 of a rule, so make a dummy rule to reduce it to a
1459 non-terminal. */
89cab50d 1460 if (action_flag)
1ff442ca 1461 {
a70083a3 1462 bucket *sdummy;
1ff442ca 1463
f282676b
AD
1464 /* Since the action was written out with this rule's
1465 number, we must give the new rule this number by
1466 inserting the new rule before it. */
1ff442ca
NF
1467
1468 /* Make a dummy nonterminal, a gensym. */
a70083a3 1469 sdummy = gensym ();
1ff442ca
NF
1470
1471 /* Make a new rule, whose body is empty,
1472 before the current one, so that the action
1473 just read can belong to it. */
1474 nrules++;
1475 nitems++;
1476 record_rule_line ();
d7913476 1477 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1478 if (crule1)
1479 crule1->next = p;
a70083a3
AD
1480 else
1481 grammar = p;
1ff442ca 1482 p->sym = sdummy;
d7913476 1483 crule1 = XCALLOC (symbol_list, 1);
1ff442ca
NF
1484 p->next = crule1;
1485 crule1->next = crule;
1486
f282676b
AD
1487 /* Insert the dummy generated by that rule into this
1488 rule. */
1ff442ca 1489 nitems++;
d7913476 1490 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1491 p->sym = sdummy;
1492 p1->next = p;
1493 p1 = p;
1494
89cab50d 1495 action_flag = 0;
1ff442ca
NF
1496 }
1497
511e79b3 1498 if (t == tok_identifier)
1ff442ca
NF
1499 {
1500 nitems++;
d7913476 1501 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1502 p->sym = symval;
1503 p1->next = p;
1504 p1 = p;
1505 }
a70083a3 1506 else /* handle an action. */
1ff442ca 1507 {
a70083a3 1508 copy_action (crule, rulelength);
89cab50d 1509 action_flag = 1;
1ff442ca
NF
1510 xactions++; /* JF */
1511 }
1512 rulelength++;
a70083a3 1513 } /* end of read rhs of rule */
1ff442ca
NF
1514
1515 /* Put an empty link in the list to mark the end of this rule */
d7913476 1516 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1517 p1->next = p;
1518 p1 = p;
1519
511e79b3 1520 if (t == tok_prec)
1ff442ca 1521 {
a0f6b076 1522 complain (_("two @prec's in a row"));
a70083a3 1523 t = lex ();
1ff442ca 1524 crule->ruleprec = symval;
a70083a3 1525 t = lex ();
1ff442ca 1526 }
511e79b3 1527 if (t == tok_guard)
1ff442ca 1528 {
a70083a3 1529 if (!semantic_parser)
ff4a34be 1530 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1531
a70083a3
AD
1532 copy_guard (crule, rulelength);
1533 t = lex ();
1ff442ca 1534 }
511e79b3 1535 else if (t == tok_left_curly)
1ff442ca 1536 {
a70083a3 1537 /* This case never occurs -wjh */
89cab50d 1538 if (action_flag)
a0f6b076 1539 complain (_("two actions at end of one rule"));
a70083a3 1540 copy_action (crule, rulelength);
89cab50d 1541 action_flag = 1;
943819bf 1542 xactions++; /* -wjh */
a70083a3 1543 t = lex ();
1ff442ca 1544 }
a0f6b076 1545 /* If $$ is being set in default way, report if any type
6666f98f
AD
1546 mismatch. */
1547 else if (!xactions
a70083a3 1548 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1549 {
6666f98f
AD
1550 if (lhs->type_name == 0
1551 || first_rhs->type_name == 0
a70083a3 1552 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1553 complain (_("type clash (`%s' `%s') on default action"),
1554 lhs->type_name ? lhs->type_name : "",
a70083a3 1555 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1556 }
1557 /* Warn if there is no default for $$ but we need one. */
1558 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1559 complain (_("empty rule for typed nonterminal, and no action"));
511e79b3 1560 if (t == tok_semicolon)
a70083a3 1561 t = lex ();
a083fbbf 1562 }
943819bf 1563#if 0
a70083a3 1564 /* these things can appear as alternatives to rules. */
943819bf
RS
1565/* NO, they cannot.
1566 a) none of the documentation allows them
1567 b) most of them scan forward until finding a next %
1568 thus they may swallow lots of intervening rules
1569*/
511e79b3 1570 else if (t == tok_token)
1ff442ca 1571 {
d7020c20 1572 parse_token_decl (token_sym, nterm_sym);
a70083a3 1573 t = lex ();
1ff442ca 1574 }
511e79b3 1575 else if (t == tok_nterm)
1ff442ca 1576 {
d7020c20 1577 parse_token_decl (nterm_sym, token_sym);
a70083a3 1578 t = lex ();
1ff442ca 1579 }
511e79b3 1580 else if (t == tok_type)
1ff442ca 1581 {
a70083a3 1582 t = get_type ();
1ff442ca 1583 }
511e79b3 1584 else if (t == tok_union)
1ff442ca 1585 {
a70083a3
AD
1586 parse_union_decl ();
1587 t = lex ();
1ff442ca 1588 }
511e79b3 1589 else if (t == tok_expect)
1ff442ca 1590 {
a70083a3
AD
1591 parse_expect_decl ();
1592 t = lex ();
1ff442ca 1593 }
511e79b3 1594 else if (t == tok_start)
1ff442ca 1595 {
a70083a3
AD
1596 parse_start_decl ();
1597 t = lex ();
1ff442ca 1598 }
943819bf
RS
1599#endif
1600
1ff442ca 1601 else
943819bf 1602 {
d01c415b 1603 complain (_("invalid input: %s"), quote (token_buffer));
a70083a3 1604 t = lex ();
943819bf 1605 }
1ff442ca
NF
1606 }
1607
943819bf
RS
1608 /* grammar has been read. Do some checking */
1609
1ff442ca 1610 if (nsyms > MAXSHORT)
a0f6b076
AD
1611 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1612 MAXSHORT);
1ff442ca 1613 if (nrules == 0)
a0f6b076 1614 fatal (_("no rules in the input grammar"));
1ff442ca 1615
1ff442ca
NF
1616 /* Report any undefined symbols and consider them nonterminals. */
1617
1618 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1619 if (bp->class == unknown_sym)
1ff442ca 1620 {
a70083a3
AD
1621 complain (_
1622 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1623 bp->tag);
d7020c20 1624 bp->class = nterm_sym;
1ff442ca
NF
1625 bp->value = nvars++;
1626 }
1627
1628 ntokens = nsyms - nvars;
1629}
ff48177d
MA
1630
1631/* At the end of the grammar file, some C source code must
63c2d5de 1632 be stored. It is going to be associated to the epilogue
ff48177d
MA
1633 directive. */
1634static void
1635read_additionnal_code (void)
1636{
1637 char c;
63c2d5de 1638 struct obstack el_obstack;
342b8b6e 1639
63c2d5de 1640 obstack_init (&el_obstack);
ff48177d
MA
1641
1642 while ((c = getc (finput)) != EOF)
63c2d5de 1643 obstack_1grow (&el_obstack, c);
342b8b6e 1644
63c2d5de 1645 obstack_1grow (&el_obstack, 0);
11d82f03 1646 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1647}
1648
a70083a3
AD
1649\f
1650/*--------------------------------------------------------------.
1651| For named tokens, but not literal ones, define the name. The |
1652| value is the user token number. |
1653`--------------------------------------------------------------*/
1ff442ca 1654
4a120d45 1655static void
896fe5c1 1656output_token_defines (struct obstack *oout)
1ff442ca 1657{
a70083a3
AD
1658 bucket *bp;
1659 char *cp, *symbol;
1660 char c;
1ff442ca 1661
a70083a3 1662 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1663 {
a70083a3
AD
1664 symbol = bp->tag; /* get symbol */
1665
1666 if (bp->value >= ntokens)
1667 continue;
1668 if (bp->user_token_number == SALIAS)
1669 continue;
1670 if ('\'' == *symbol)
1671 continue; /* skip literal character */
1672 if (bp == errtoken)
1673 continue; /* skip error token */
1674 if ('\"' == *symbol)
1ff442ca 1675 {
a70083a3
AD
1676 /* use literal string only if given a symbol with an alias */
1677 if (bp->alias)
1678 symbol = bp->alias->tag;
1679 else
1680 continue;
1681 }
1ff442ca 1682
a70083a3
AD
1683 /* Don't #define nonliteral tokens whose names contain periods. */
1684 cp = symbol;
1685 while ((c = *cp++) && c != '.');
1686 if (c != '\0')
1687 continue;
1ff442ca 1688
0b8afb77 1689 obstack_fgrow2 (oout, "# define\t%s\t%d\n",
342b8b6e 1690 symbol, bp->user_token_number);
a70083a3 1691 if (semantic_parser)
342b8b6e
AD
1692 /* FIXME: This is certainly dead wrong, and should be just as
1693 above. --akim. */
0b8afb77 1694 obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
1695 }
1696}
1ff442ca
NF
1697
1698
037ca2f1
AD
1699/*------------------------------------------------------------------.
1700| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1701| number. |
1702`------------------------------------------------------------------*/
1703
1704static void
1705token_translations_init (void)
1706{
1707 bucket *bp = NULL;
1708 int i;
1709
1710 token_translations = XCALLOC (short, max_user_token_number + 1);
1711
1712 /* Initialize all entries for literal tokens to 2, the internal
1713 token number for $undefined., which represents all invalid
1714 inputs. */
1715 for (i = 0; i <= max_user_token_number; i++)
1716 token_translations[i] = 2;
1717
1718 for (bp = firstsymbol; bp; bp = bp->next)
1719 {
1720 /* Non-terminal? */
1721 if (bp->value >= ntokens)
1722 continue;
1723 /* A token string alias? */
1724 if (bp->user_token_number == SALIAS)
1725 continue;
6b7e85b9
AD
1726
1727 assert (bp->user_token_number != SUNDEF);
1728
037ca2f1
AD
1729 /* A token which translation has already been set? */
1730 if (token_translations[bp->user_token_number] != 2)
1731 complain (_("tokens %s and %s both assigned number %d"),
1732 tags[token_translations[bp->user_token_number]],
1733 bp->tag, bp->user_token_number);
1734 token_translations[bp->user_token_number] = bp->value;
1735 }
1736}
1737
1738
a70083a3
AD
1739/*------------------------------------------------------------------.
1740| Assign symbol numbers, and write definition of token names into |
b2ca4022 1741| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1742| of symbols. |
1743`------------------------------------------------------------------*/
1ff442ca 1744
4a120d45 1745static void
118fb205 1746packsymbols (void)
1ff442ca 1747{
342b8b6e 1748 bucket *bp = NULL;
a70083a3 1749 int tokno = 1;
a70083a3 1750 int last_user_token_number;
4a120d45 1751 static char DOLLAR[] = "$";
1ff442ca 1752
d7913476 1753 tags = XCALLOC (char *, nsyms + 1);
d7913476 1754 user_toknums = XCALLOC (short, nsyms + 1);
1ff442ca 1755
d7913476
AD
1756 sprec = XCALLOC (short, nsyms);
1757 sassoc = XCALLOC (short, nsyms);
1ff442ca 1758
037ca2f1
AD
1759 /* The EOF token. */
1760 tags[0] = DOLLAR;
1761 user_toknums[0] = 0;
1762
1ff442ca
NF
1763 max_user_token_number = 256;
1764 last_user_token_number = 256;
1765
1766 for (bp = firstsymbol; bp; bp = bp->next)
1767 {
d7020c20 1768 if (bp->class == nterm_sym)
1ff442ca
NF
1769 {
1770 bp->value += ntokens;
1771 }
943819bf
RS
1772 else if (bp->alias)
1773 {
0a6384c4
AD
1774 /* this symbol and its alias are a single token defn.
1775 allocate a tokno, and assign to both check agreement of
1776 ->prec and ->assoc fields and make both the same */
1777 if (bp->value == 0)
1778 bp->value = bp->alias->value = tokno++;
943819bf 1779
0a6384c4
AD
1780 if (bp->prec != bp->alias->prec)
1781 {
1782 if (bp->prec != 0 && bp->alias->prec != 0
1783 && bp->user_token_number == SALIAS)
a0f6b076
AD
1784 complain (_("conflicting precedences for %s and %s"),
1785 bp->tag, bp->alias->tag);
0a6384c4
AD
1786 if (bp->prec != 0)
1787 bp->alias->prec = bp->prec;
1788 else
1789 bp->prec = bp->alias->prec;
1790 }
943819bf 1791
0a6384c4
AD
1792 if (bp->assoc != bp->alias->assoc)
1793 {
a0f6b076
AD
1794 if (bp->assoc != 0 && bp->alias->assoc != 0
1795 && bp->user_token_number == SALIAS)
1796 complain (_("conflicting assoc values for %s and %s"),
1797 bp->tag, bp->alias->tag);
1798 if (bp->assoc != 0)
1799 bp->alias->assoc = bp->assoc;
1800 else
1801 bp->assoc = bp->alias->assoc;
1802 }
0a6384c4
AD
1803
1804 if (bp->user_token_number == SALIAS)
a70083a3 1805 continue; /* do not do processing below for SALIASs */
943819bf 1806
a70083a3 1807 }
d7020c20 1808 else /* bp->class == token_sym */
943819bf
RS
1809 {
1810 bp->value = tokno++;
1811 }
1812
d7020c20 1813 if (bp->class == token_sym)
1ff442ca 1814 {
6b7e85b9 1815 if (bp->user_token_number == SUNDEF)
1ff442ca
NF
1816 bp->user_token_number = ++last_user_token_number;
1817 if (bp->user_token_number > max_user_token_number)
1818 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1819 }
1820
1821 tags[bp->value] = bp->tag;
943819bf 1822 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1823 sprec[bp->value] = bp->prec;
1824 sassoc[bp->value] = bp->assoc;
1ff442ca
NF
1825 }
1826
037ca2f1 1827 token_translations_init ();
1ff442ca
NF
1828
1829 error_token_number = errtoken->value;
1830
342b8b6e
AD
1831 {
1832 struct obstack tokendefs;
1833 obstack_init (&tokendefs);
1834 output_token_defines (&tokendefs);
1835 obstack_1grow (&tokendefs, 0);
1836 muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1837 obstack_free (&tokendefs, NULL);
1838 }
b6610515 1839
d8cb5183
MA
1840#if 0
1841 if (!no_parser_flag)
1842 output_token_defines (&table_obstack);
1843#endif
1ff442ca 1844
d7020c20 1845 if (startval->class == unknown_sym)
a0f6b076 1846 fatal (_("the start symbol %s is undefined"), startval->tag);
d7020c20 1847 else if (startval->class == token_sym)
a0f6b076 1848 fatal (_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1849
1850 start_symbol = startval->value;
1851
89cab50d 1852 if (defines_flag)
1ff442ca 1853 {
896fe5c1 1854 output_token_defines (&defines_obstack);
1ff442ca
NF
1855
1856 if (!pure_parser)
1857 {
1858 if (spec_name_prefix)
896fe5c1
AD
1859 obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1860 spec_name_prefix);
1ff442ca 1861 else
ff4423cc 1862 obstack_sgrow (&defines_obstack,
573c1d9f 1863 "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1864 }
1865
1866 if (semantic_parser)
037ca2f1
AD
1867 {
1868 int i;
1869
1870 for (i = ntokens; i < nsyms; i++)
1871 {
1872 /* don't make these for dummy nonterminals made by gensym. */
1873 if (*tags[i] != '@')
1874 obstack_fgrow2 (&defines_obstack,
1875 "# define\tNT%s\t%d\n", tags[i], i);
1876 }
1ff442ca 1877#if 0
037ca2f1
AD
1878 /* `fdefines' is now a temporary file, so we need to copy its
1879 contents in `done', so we can't close it here. */
1880 fclose (fdefines);
1881 fdefines = NULL;
1ff442ca 1882#endif
037ca2f1 1883 }
1ff442ca
NF
1884 }
1885}
a083fbbf 1886
1ff442ca 1887
a70083a3
AD
1888/*---------------------------------------------------------------.
1889| Convert the rules into the representation using RRHS, RLHS and |
1890| RITEMS. |
1891`---------------------------------------------------------------*/
1ff442ca 1892
4a120d45 1893static void
118fb205 1894packgram (void)
1ff442ca 1895{
a70083a3
AD
1896 int itemno;
1897 int ruleno;
1898 symbol_list *p;
1ff442ca
NF
1899
1900 bucket *ruleprec;
1901
d7913476
AD
1902 ritem = XCALLOC (short, nitems + 1);
1903 rlhs = XCALLOC (short, nrules) - 1;
1904 rrhs = XCALLOC (short, nrules) - 1;
1905 rprec = XCALLOC (short, nrules) - 1;
1906 rprecsym = XCALLOC (short, nrules) - 1;
1907 rassoc = XCALLOC (short, nrules) - 1;
1ff442ca
NF
1908
1909 itemno = 0;
1910 ruleno = 1;
1911
1912 p = grammar;
1913 while (p)
1914 {
1915 rlhs[ruleno] = p->sym->value;
1916 rrhs[ruleno] = itemno;
1917 ruleprec = p->ruleprec;
1918
1919 p = p->next;
1920 while (p && p->sym)
1921 {
1922 ritem[itemno++] = p->sym->value;
1923 /* A rule gets by default the precedence and associativity
1924 of the last token in it. */
d7020c20 1925 if (p->sym->class == token_sym)
1ff442ca
NF
1926 {
1927 rprec[ruleno] = p->sym->prec;
1928 rassoc[ruleno] = p->sym->assoc;
1929 }
a70083a3
AD
1930 if (p)
1931 p = p->next;
1ff442ca
NF
1932 }
1933
1934 /* If this rule has a %prec,
a70083a3 1935 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1936 if (ruleprec)
1937 {
a70083a3
AD
1938 rprec[ruleno] = ruleprec->prec;
1939 rassoc[ruleno] = ruleprec->assoc;
1ff442ca
NF
1940 rprecsym[ruleno] = ruleprec->value;
1941 }
1942
1943 ritem[itemno++] = -ruleno;
1944 ruleno++;
1945
a70083a3
AD
1946 if (p)
1947 p = p->next;
1ff442ca
NF
1948 }
1949
1950 ritem[itemno] = 0;
1951}
a70083a3
AD
1952\f
1953/*-------------------------------------------------------------------.
1954| Read in the grammar specification and record it in the format |
ea5607fd 1955| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1956| and all actions into ACTION_OBSTACK, in each case forming the body |
1957| of a C function (YYGUARD or YYACTION) which contains a switch |
1958| statement to decide which guard or action to execute. |
a70083a3
AD
1959`-------------------------------------------------------------------*/
1960
1961void
1962reader (void)
1963{
1964 start_flag = 0;
1965 startval = NULL; /* start symbol not specified yet. */
1966
a70083a3
AD
1967 nsyms = 1;
1968 nvars = 0;
1969 nrules = 0;
1970 nitems = 0;
1971 rline_allocated = 10;
d7913476 1972 rline = XCALLOC (short, rline_allocated);
a70083a3
AD
1973
1974 typed = 0;
1975 lastprec = 0;
1976
a70083a3
AD
1977 semantic_parser = 0;
1978 pure_parser = 0;
a70083a3
AD
1979
1980 grammar = NULL;
1981
342b8b6e 1982 lex_init ();
a70083a3
AD
1983 lineno = 1;
1984
11d82f03
MA
1985 /* Initialize the muscle obstack. */
1986 obstack_init (&muscle_obstack);
82e236e2 1987
a70083a3
AD
1988 /* Initialize the symbol table. */
1989 tabinit ();
b6610515 1990
a70083a3
AD
1991 /* Construct the error token */
1992 errtoken = getsym ("error");
d7020c20 1993 errtoken->class = token_sym;
a70083a3 1994 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 1995
a70083a3
AD
1996 /* Construct a token that represents all undefined literal tokens.
1997 It is always token number 2. */
1998 undeftoken = getsym ("$undefined.");
d7020c20 1999 undeftoken->class = token_sym;
a70083a3
AD
2000 undeftoken->user_token_number = 2;
2001
896fe5c1
AD
2002 /* Read the declaration section. Copy %{ ... %} groups to
2003 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
2004 etc. found there. */
a70083a3 2005 read_declarations ();
a70083a3
AD
2006 /* Read in the grammar, build grammar in list form. Write out
2007 guards and actions. */
2008 readgram ();
ff48177d
MA
2009 /* Some C code is given at the end of the grammar file. */
2010 read_additionnal_code ();
b0c4483e 2011
a70083a3 2012 /* Now we know whether we need the line-number stack. If we do,
b0c4483e
PB
2013 write its type into the .tab.h file.
2014 This is no longer need with header skeleton. */
2015
a70083a3
AD
2016 /* Assign the symbols their symbol numbers. Write #defines for the
2017 token symbols into FDEFINES if requested. */
2018 packsymbols ();
2019 /* Convert the grammar into the format described in gram.h. */
2020 packgram ();
a70083a3 2021}