]> git.saurik.com Git - bison.git/blame - src/reader.c
Update
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
8c7ebe49 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
8c7ebe49 24#include "obstack.h"
2a91a95e
AD
25#include "quotearg.h"
26#include "quote.h"
ceed8467 27#include "getargs.h"
1ff442ca 28#include "files.h"
d7913476 29#include "xalloc.h"
1ff442ca
NF
30#include "symtab.h"
31#include "lex.h"
32#include "gram.h"
a0f6b076 33#include "complain.h"
6c89f1c1 34#include "output.h"
b2ca4022 35#include "reader.h"
340ef489 36#include "conflicts.h"
b6610515 37#include "macrotab.h"
1ff442ca 38
1ff442ca 39/* Number of slots allocated (but not necessarily used yet) in `rline' */
4a120d45 40static int rline_allocated;
1ff442ca 41
a70083a3
AD
42typedef struct symbol_list
43{
44 struct symbol_list *next;
45 bucket *sym;
46 bucket *ruleprec;
47}
48symbol_list;
118fb205 49
1ff442ca 50int lineno;
1ff442ca 51char **tags;
d019d655 52short *user_toknums;
4a120d45
JT
53static symbol_list *grammar;
54static int start_flag;
55static bucket *startval;
1ff442ca
NF
56
57/* Nonzero if components of semantic values are used, implying
58 they must be unions. */
59static int value_components_used;
60
d7020c20
AD
61/* Nonzero if %union has been seen. */
62static int typed;
1ff442ca 63
d7020c20
AD
64/* Incremented for each %left, %right or %nonassoc seen */
65static int lastprec;
1ff442ca 66
1ff442ca 67static bucket *errtoken;
5b2e3c89 68static bucket *undeftoken;
0d533154 69\f
a70083a3 70
0d533154
AD
71/*===================\
72| Low level lexing. |
73\===================*/
943819bf
RS
74
75static void
118fb205 76skip_to_char (int target)
943819bf
RS
77{
78 int c;
79 if (target == '\n')
a0f6b076 80 complain (_(" Skipping to next \\n"));
943819bf 81 else
a0f6b076 82 complain (_(" Skipping to next %c"), target);
943819bf
RS
83
84 do
0d533154 85 c = skip_white_space ();
943819bf 86 while (c != target && c != EOF);
a083fbbf 87 if (c != EOF)
0d533154 88 ungetc (c, finput);
943819bf
RS
89}
90
91
0d533154
AD
92/*---------------------------------------------------------.
93| Read a signed integer from STREAM and return its value. |
94`---------------------------------------------------------*/
95
96static inline int
97read_signed_integer (FILE *stream)
98{
a70083a3
AD
99 int c = getc (stream);
100 int sign = 1;
101 int n = 0;
0d533154
AD
102
103 if (c == '-')
104 {
105 c = getc (stream);
106 sign = -1;
107 }
108
109 while (isdigit (c))
110 {
111 n = 10 * n + (c - '0');
112 c = getc (stream);
113 }
114
115 ungetc (c, stream);
116
117 return sign * n;
118}
119\f
79282c5a
AD
120/*--------------------------------------------------------------.
121| Get the data type (alternative in the union) of the value for |
122| symbol N in rule RULE. |
123`--------------------------------------------------------------*/
124
125static char *
126get_type_name (int n, symbol_list * rule)
127{
128 int i;
129 symbol_list *rp;
130
131 if (n < 0)
132 {
133 complain (_("invalid $ value"));
134 return NULL;
135 }
136
137 rp = rule;
138 i = 0;
139
140 while (i < n)
141 {
142 rp = rp->next;
143 if (rp == NULL || rp->sym == NULL)
144 {
145 complain (_("invalid $ value"));
146 return NULL;
147 }
148 i++;
149 }
150
151 return rp->sym->type_name;
152}
153\f
337bab46
AD
154/*------------------------------------------------------------.
155| Dump the string from FIN to OOUT if non null. MATCH is the |
156| delimiter of the string (either ' or "). |
157`------------------------------------------------------------*/
ae3c3164
AD
158
159static inline void
b6610515 160copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
161{
162 int c;
163
b6610515
RA
164 if (store)
165 obstack_1grow (oout, match);
8c7ebe49 166
4a120d45 167 c = getc (fin);
ae3c3164
AD
168
169 while (c != match)
170 {
171 if (c == EOF)
172 fatal (_("unterminated string at end of file"));
173 if (c == '\n')
174 {
a0f6b076 175 complain (_("unterminated string"));
4a120d45 176 ungetc (c, fin);
ae3c3164
AD
177 c = match; /* invent terminator */
178 continue;
179 }
180
337bab46 181 obstack_1grow (oout, c);
ae3c3164
AD
182
183 if (c == '\\')
184 {
4a120d45 185 c = getc (fin);
ae3c3164
AD
186 if (c == EOF)
187 fatal (_("unterminated string at end of file"));
337bab46 188 obstack_1grow (oout, c);
8c7ebe49 189
ae3c3164
AD
190 if (c == '\n')
191 lineno++;
192 }
193
a70083a3 194 c = getc (fin);
ae3c3164
AD
195 }
196
b6610515
RA
197 if (store)
198 obstack_1grow (oout, c);
199}
200
201/* FIXME. */
202
203static inline void
204copy_string (FILE *fin, struct obstack *oout, int match)
205{
206 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
207}
208
b6610515
RA
209/* FIXME. */
210
211static inline void
212copy_identifier (FILE *fin, struct obstack *oout)
213{
214 int c;
215
216 while (isalnum (c = getc (fin)) || c == '_')
217 obstack_1grow (oout, c);
218
219 ungetc (c, fin);
220}
ae3c3164 221
337bab46
AD
222/*-----------------------------------------------------------------.
223| Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
224| NULL). In fact we just saw a `/', which might or might not be a |
225| comment. In any case, copy what we saw. |
226| |
227| OUT2 might be NULL. |
228`-----------------------------------------------------------------*/
ae3c3164
AD
229
230static inline void
337bab46 231copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
ae3c3164
AD
232{
233 int cplus_comment;
a70083a3 234 int ended;
550a72a3
AD
235 int c;
236
237 /* We read a `/', output it. */
337bab46 238 obstack_1grow (oout1, '/');
896fe5c1
AD
239 if (oout2)
240 obstack_1grow (oout2, '/');
550a72a3
AD
241
242 switch ((c = getc (fin)))
243 {
244 case '/':
245 cplus_comment = 1;
246 break;
247 case '*':
248 cplus_comment = 0;
249 break;
250 default:
251 ungetc (c, fin);
252 return;
253 }
ae3c3164 254
337bab46 255 obstack_1grow (oout1, c);
896fe5c1
AD
256 if (oout2)
257 obstack_1grow (oout2, c);
550a72a3 258 c = getc (fin);
ae3c3164
AD
259
260 ended = 0;
261 while (!ended)
262 {
263 if (!cplus_comment && c == '*')
264 {
265 while (c == '*')
266 {
337bab46 267 obstack_1grow (oout1, c);
896fe5c1
AD
268 if (oout2)
269 obstack_1grow (oout2, c);
550a72a3 270 c = getc (fin);
ae3c3164
AD
271 }
272
273 if (c == '/')
274 {
337bab46 275 obstack_1grow (oout1, c);
896fe5c1
AD
276 if (oout2)
277 obstack_1grow (oout2, c);
ae3c3164
AD
278 ended = 1;
279 }
280 }
281 else if (c == '\n')
282 {
283 lineno++;
337bab46 284 obstack_1grow (oout1, c);
896fe5c1
AD
285 if (oout2)
286 obstack_1grow (oout2, c);
ae3c3164
AD
287 if (cplus_comment)
288 ended = 1;
289 else
550a72a3 290 c = getc (fin);
ae3c3164
AD
291 }
292 else if (c == EOF)
293 fatal (_("unterminated comment"));
294 else
295 {
337bab46 296 obstack_1grow (oout1, c);
896fe5c1
AD
297 if (oout2)
298 obstack_1grow (oout2, c);
550a72a3 299 c = getc (fin);
ae3c3164
AD
300 }
301 }
302}
303
304
550a72a3
AD
305/*-------------------------------------------------------------------.
306| Dump the comment (actually the current string starting with a `/') |
337bab46 307| from FIN to OOUT. |
550a72a3 308`-------------------------------------------------------------------*/
27821bff
AD
309
310static inline void
337bab46 311copy_comment (FILE *fin, struct obstack *oout)
27821bff 312{
337bab46 313 copy_comment2 (fin, oout, NULL);
27821bff
AD
314}
315
316
a70083a3 317/*-----------------------------------------------------------------.
337bab46 318| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
319| reference to this location. STACK_OFFSET is the number of values |
320| in the current rule so far, which says where to find `$0' with |
321| respect to the top of the stack. |
322`-----------------------------------------------------------------*/
1ff442ca 323
a70083a3 324static inline void
337bab46 325copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 326{
a70083a3 327 int c;
1ff442ca 328
a70083a3
AD
329 c = getc (fin);
330 if (c == '$')
1ff442ca 331 {
ff4423cc 332 obstack_sgrow (oout, "yyloc");
89cab50d 333 locations_flag = 1;
a70083a3
AD
334 }
335 else if (isdigit (c) || c == '-')
336 {
337 int n;
1ff442ca 338
a70083a3
AD
339 ungetc (c, fin);
340 n = read_signed_integer (fin);
943819bf 341
337bab46 342 obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
89cab50d 343 locations_flag = 1;
1ff442ca 344 }
a70083a3 345 else
ff4a34be
AD
346 {
347 char buf[] = "@c";
348 buf[1] = c;
349 complain (_("%s is invalid"), quote (buf));
350 }
1ff442ca 351}
79282c5a
AD
352
353
354/*-------------------------------------------------------------------.
355| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
356| |
357| Possible inputs: $[<TYPENAME>]($|integer) |
358| |
337bab46 359| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
360| the number of values in the current rule so far, which says where |
361| to find `$0' with respect to the top of the stack. |
362`-------------------------------------------------------------------*/
363
364static inline void
337bab46 365copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
366 symbol_list *rule, int stack_offset)
367{
368 int c = getc (fin);
b0ce6046 369 const char *type_name = NULL;
79282c5a 370
f282676b 371 /* Get the type name if explicit. */
79282c5a
AD
372 if (c == '<')
373 {
f282676b 374 read_type_name (fin);
79282c5a
AD
375 type_name = token_buffer;
376 value_components_used = 1;
79282c5a
AD
377 c = getc (fin);
378 }
379
380 if (c == '$')
381 {
ff4423cc 382 obstack_sgrow (oout, "yyval");
8c7ebe49 383
79282c5a
AD
384 if (!type_name)
385 type_name = get_type_name (0, rule);
386 if (type_name)
337bab46 387 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
388 if (!type_name && typed)
389 complain (_("$$ of `%s' has no declared type"),
390 rule->sym->tag);
391 }
392 else if (isdigit (c) || c == '-')
393 {
394 int n;
395 ungetc (c, fin);
396 n = read_signed_integer (fin);
397
398 if (!type_name && n > 0)
399 type_name = get_type_name (n, rule);
400
337bab46 401 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
8c7ebe49 402
79282c5a 403 if (type_name)
337bab46 404 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
405 if (!type_name && typed)
406 complain (_("$%d of `%s' has no declared type"),
407 n, rule->sym->tag);
408 }
409 else
410 {
411 char buf[] = "$c";
412 buf[1] = c;
413 complain (_("%s is invalid"), quote (buf));
414 }
415}
a70083a3
AD
416\f
417/*-------------------------------------------------------------------.
418| Copy the contents of a `%{ ... %}' into the definitions file. The |
419| `%{' has already been read. Return after reading the `%}'. |
420`-------------------------------------------------------------------*/
1ff442ca 421
4a120d45 422static void
118fb205 423copy_definition (void)
1ff442ca 424{
a70083a3 425 int c;
ae3c3164 426 /* -1 while reading a character if prev char was %. */
a70083a3 427 int after_percent;
1ff442ca 428
b6610515 429#if 0
89cab50d 430 if (!no_lines_flag)
2a91a95e
AD
431 obstack_fgrow2 (&attrs_obstack, "#line %d %s\n",
432 lineno, quotearg_style (c_quoting_style, infile));
b6610515 433#endif
1ff442ca
NF
434
435 after_percent = 0;
436
ae3c3164 437 c = getc (finput);
1ff442ca
NF
438
439 for (;;)
440 {
441 switch (c)
442 {
443 case '\n':
dd60faec 444 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
445 lineno++;
446 break;
447
448 case '%':
a70083a3 449 after_percent = -1;
1ff442ca 450 break;
a083fbbf 451
1ff442ca
NF
452 case '\'':
453 case '"':
337bab46 454 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
455 break;
456
457 case '/':
337bab46 458 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
459 break;
460
461 case EOF:
a70083a3 462 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
463
464 default:
dd60faec 465 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
466 }
467
a70083a3 468 c = getc (finput);
1ff442ca
NF
469
470 if (after_percent)
471 {
472 if (c == '}')
473 return;
dd60faec 474 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
475 }
476 after_percent = 0;
1ff442ca 477 }
1ff442ca
NF
478}
479
480
d7020c20
AD
481/*-------------------------------------------------------------------.
482| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
483| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
484| are reversed. |
485`-------------------------------------------------------------------*/
1ff442ca 486
4a120d45 487static void
d7020c20 488parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 489{
f17bcd1f 490 token_t token = 0;
a70083a3 491 char *typename = 0;
1ff442ca 492
1e9798d5
AD
493 /* The symbol being defined. */
494 struct bucket *symbol = NULL;
495
496 /* After `%token' and `%nterm', any number of symbols maybe be
497 defined. */
1ff442ca
NF
498 for (;;)
499 {
e6011337
JT
500 int tmp_char = ungetc (skip_white_space (), finput);
501
1e9798d5
AD
502 /* `%' (for instance from `%token', or from `%%' etc.) is the
503 only valid means to end this declaration. */
e6011337 504 if (tmp_char == '%')
1ff442ca 505 return;
e6011337 506 if (tmp_char == EOF)
a0f6b076 507 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 508
a70083a3 509 token = lex ();
511e79b3 510 if (token == tok_comma)
943819bf
RS
511 {
512 symbol = NULL;
513 continue;
514 }
511e79b3 515 if (token == tok_typename)
1ff442ca 516 {
95e36146 517 typename = xstrdup (token_buffer);
1ff442ca 518 value_components_used = 1;
943819bf
RS
519 symbol = NULL;
520 }
511e79b3 521 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 522 {
8e03724b
AD
523 if (symval->alias)
524 warn (_("symbol `%s' used more than once as a literal string"),
525 symval->tag);
526 else if (symbol->alias)
527 warn (_("symbol `%s' given more than one literal string"),
528 symbol->tag);
529 else
530 {
531 symval->class = token_sym;
532 symval->type_name = typename;
533 symval->user_token_number = symbol->user_token_number;
534 symbol->user_token_number = SALIAS;
535 symval->alias = symbol;
536 symbol->alias = symval;
537 /* symbol and symval combined are only one symbol */
538 nsyms--;
539 }
943819bf 540 translations = 1;
8e03724b 541 symbol = NULL;
1ff442ca 542 }
511e79b3 543 else if (token == tok_identifier)
1ff442ca
NF
544 {
545 int oldclass = symval->class;
943819bf 546 symbol = symval;
1ff442ca 547
943819bf 548 if (symbol->class == what_is_not)
a0f6b076 549 complain (_("symbol %s redefined"), symbol->tag);
943819bf 550 symbol->class = what_is;
d7020c20 551 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 552 symbol->value = nvars++;
1ff442ca
NF
553
554 if (typename)
555 {
943819bf
RS
556 if (symbol->type_name == NULL)
557 symbol->type_name = typename;
a70083a3 558 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 559 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
560 }
561 }
511e79b3 562 else if (symbol && token == tok_number)
a70083a3 563 {
943819bf 564 symbol->user_token_number = numval;
1ff442ca 565 translations = 1;
a70083a3 566 }
1ff442ca 567 else
943819bf 568 {
a0f6b076 569 complain (_("`%s' is invalid in %s"),
d7020c20 570 token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 571 skip_to_char ('%');
943819bf 572 }
1ff442ca
NF
573 }
574
575}
576
1ff442ca 577
d7020c20
AD
578/*------------------------------.
579| Parse what comes after %start |
580`------------------------------*/
1ff442ca 581
4a120d45 582static void
118fb205 583parse_start_decl (void)
1ff442ca
NF
584{
585 if (start_flag)
27821bff 586 complain (_("multiple %s declarations"), "%start");
511e79b3 587 if (lex () != tok_identifier)
27821bff 588 complain (_("invalid %s declaration"), "%start");
943819bf
RS
589 else
590 {
591 start_flag = 1;
592 startval = symval;
593 }
1ff442ca
NF
594}
595
a70083a3
AD
596/*-----------------------------------------------------------.
597| read in a %type declaration and record its information for |
598| get_type_name to access |
599`-----------------------------------------------------------*/
600
601static void
602parse_type_decl (void)
603{
a70083a3
AD
604 char *name;
605
511e79b3 606 if (lex () != tok_typename)
a70083a3
AD
607 {
608 complain ("%s", _("%type declaration has no <typename>"));
609 skip_to_char ('%');
610 return;
611 }
612
95e36146 613 name = xstrdup (token_buffer);
a70083a3
AD
614
615 for (;;)
616 {
f17bcd1f 617 token_t t;
a70083a3
AD
618 int tmp_char = ungetc (skip_white_space (), finput);
619
620 if (tmp_char == '%')
621 return;
622 if (tmp_char == EOF)
623 fatal (_("Premature EOF after %s"), token_buffer);
624
625 t = lex ();
626
627 switch (t)
1ff442ca
NF
628 {
629
511e79b3
AD
630 case tok_comma:
631 case tok_semicolon:
1ff442ca
NF
632 break;
633
511e79b3 634 case tok_identifier:
1ff442ca
NF
635 if (symval->type_name == NULL)
636 symval->type_name = name;
a70083a3 637 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 638 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
639
640 break;
641
642 default:
a0f6b076
AD
643 complain (_("invalid %%type declaration due to item: %s"),
644 token_buffer);
a70083a3 645 skip_to_char ('%');
1ff442ca
NF
646 }
647 }
648}
649
650
651
d7020c20
AD
652/*----------------------------------------------------------------.
653| Read in a %left, %right or %nonassoc declaration and record its |
654| information. |
655`----------------------------------------------------------------*/
1ff442ca 656
4a120d45 657static void
d7020c20 658parse_assoc_decl (associativity assoc)
1ff442ca 659{
a70083a3
AD
660 char *name = NULL;
661 int prev = 0;
1ff442ca 662
a70083a3 663 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 664
1ff442ca
NF
665 for (;;)
666 {
f17bcd1f 667 token_t t;
e6011337 668 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 669
e6011337 670 if (tmp_char == '%')
1ff442ca 671 return;
e6011337 672 if (tmp_char == EOF)
a0f6b076 673 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 674
a70083a3 675 t = lex ();
1ff442ca
NF
676
677 switch (t)
678 {
511e79b3 679 case tok_typename:
95e36146 680 name = xstrdup (token_buffer);
1ff442ca
NF
681 break;
682
511e79b3 683 case tok_comma:
1ff442ca
NF
684 break;
685
511e79b3 686 case tok_identifier:
1ff442ca 687 if (symval->prec != 0)
a0f6b076 688 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
689 symval->prec = lastprec;
690 symval->assoc = assoc;
d7020c20 691 if (symval->class == nterm_sym)
a0f6b076 692 complain (_("symbol %s redefined"), symval->tag);
d7020c20 693 symval->class = token_sym;
1ff442ca 694 if (name)
a70083a3 695 { /* record the type, if one is specified */
1ff442ca
NF
696 if (symval->type_name == NULL)
697 symval->type_name = name;
a70083a3 698 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 699 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
700 }
701 break;
702
511e79b3
AD
703 case tok_number:
704 if (prev == tok_identifier)
a70083a3 705 {
1ff442ca
NF
706 symval->user_token_number = numval;
707 translations = 1;
a70083a3
AD
708 }
709 else
710 {
711 complain (_
712 ("invalid text (%s) - number should be after identifier"),
713token_buffer);
714 skip_to_char ('%');
715 }
1ff442ca
NF
716 break;
717
511e79b3 718 case tok_semicolon:
1ff442ca
NF
719 return;
720
721 default:
a0f6b076 722 complain (_("unexpected item: %s"), token_buffer);
a70083a3 723 skip_to_char ('%');
1ff442ca
NF
724 }
725
726 prev = t;
727
728 }
729}
730
731
732
dd60faec
AD
733/*--------------------------------------------------------------.
734| Copy the union declaration into ATTRS_OBSTACK (and fdefines), |
735| where it is made into the definition of YYSTYPE, the type of |
736| elements of the parser value stack. |
737`--------------------------------------------------------------*/
1ff442ca 738
4a120d45 739static void
118fb205 740parse_union_decl (void)
1ff442ca 741{
a70083a3
AD
742 int c;
743 int count = 0;
1ff442ca
NF
744
745 if (typed)
27821bff 746 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
747
748 typed = 1;
749
89cab50d 750 if (!no_lines_flag)
2a91a95e 751 obstack_fgrow2 (&attrs_obstack, "\n#line %d %s\n",
dda680cb
PB
752 lineno, quotearg_style (c_quoting_style,
753 macro_find("filename")));
1ff442ca 754 else
dd60faec 755 obstack_1grow (&attrs_obstack, '\n');
1ff442ca 756
ff4423cc 757 obstack_sgrow (&attrs_obstack, "typedef union");
896fe5c1 758 if (defines_flag)
ff4423cc 759 obstack_sgrow (&defines_obstack, "typedef union");
1ff442ca 760
27821bff 761 c = getc (finput);
1ff442ca
NF
762
763 while (c != EOF)
764 {
dd60faec 765 obstack_1grow (&attrs_obstack, c);
896fe5c1 766 if (defines_flag)
d7045ec6 767 obstack_1grow (&defines_obstack, c);
1ff442ca
NF
768
769 switch (c)
770 {
771 case '\n':
772 lineno++;
773 break;
774
775 case '/':
337bab46 776 copy_comment2 (finput, &defines_obstack, &attrs_obstack);
1ff442ca
NF
777 break;
778
1ff442ca
NF
779 case '{':
780 count++;
781 break;
782
783 case '}':
784 if (count == 0)
27821bff 785 complain (_("unmatched %s"), "`}'");
1ff442ca 786 count--;
943819bf 787 if (count <= 0)
1ff442ca 788 {
ff4423cc 789 obstack_sgrow (&attrs_obstack, " YYSTYPE;\n");
896fe5c1 790 if (defines_flag)
ff4423cc 791 obstack_sgrow (&defines_obstack, " YYSTYPE;\n");
1ff442ca 792 /* JF don't choke on trailing semi */
27821bff
AD
793 c = skip_white_space ();
794 if (c != ';')
a70083a3 795 ungetc (c, finput);
1ff442ca
NF
796 return;
797 }
798 }
799
27821bff 800 c = getc (finput);
1ff442ca
NF
801 }
802}
803
d7020c20
AD
804
805/*-------------------------------------------------------.
806| Parse the declaration %expect N which says to expect N |
807| shift-reduce conflicts. |
808`-------------------------------------------------------*/
1ff442ca 809
4a120d45 810static void
118fb205 811parse_expect_decl (void)
1ff442ca 812{
131e2fef 813 int c = skip_white_space ();
1ff442ca
NF
814 ungetc (c, finput);
815
131e2fef 816 if (!isdigit (c))
79282c5a 817 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
818 else
819 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
820}
821
a70083a3
AD
822
823/*-------------------------------------------------------------------.
824| Parse what comes after %thong. the full syntax is |
825| |
826| %thong <type> token number literal |
827| |
828| the <type> or number may be omitted. The number specifies the |
829| user_token_number. |
830| |
831| Two symbols are entered in the table, one for the token symbol and |
832| one for the literal. Both are given the <type>, if any, from the |
833| declaration. The ->user_token_number of the first is SALIAS and |
834| the ->user_token_number of the second is set to the number, if |
835| any, from the declaration. The two symbols are linked via |
836| pointers in their ->alias fields. |
837| |
838| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
839| only the literal string is retained it is the literal string that |
840| is output to yytname |
841`-------------------------------------------------------------------*/
842
843static void
844parse_thong_decl (void)
7b306f52 845{
f17bcd1f 846 token_t token;
a70083a3
AD
847 struct bucket *symbol;
848 char *typename = 0;
95e36146 849 int usrtoknum;
7b306f52 850
a70083a3
AD
851 translations = 1;
852 token = lex (); /* fetch typename or first token */
511e79b3 853 if (token == tok_typename)
7b306f52 854 {
95e36146 855 typename = xstrdup (token_buffer);
a70083a3
AD
856 value_components_used = 1;
857 token = lex (); /* fetch first token */
7b306f52 858 }
7b306f52 859
a70083a3 860 /* process first token */
7b306f52 861
511e79b3 862 if (token != tok_identifier)
a70083a3
AD
863 {
864 complain (_("unrecognized item %s, expected an identifier"),
865 token_buffer);
866 skip_to_char ('%');
867 return;
7b306f52 868 }
d7020c20 869 symval->class = token_sym;
a70083a3
AD
870 symval->type_name = typename;
871 symval->user_token_number = SALIAS;
872 symbol = symval;
7b306f52 873
a70083a3 874 token = lex (); /* get number or literal string */
1ff442ca 875
511e79b3 876 if (token == tok_number)
943819bf 877 {
a70083a3
AD
878 usrtoknum = numval;
879 token = lex (); /* okay, did number, now get literal */
943819bf 880 }
a70083a3
AD
881 else
882 usrtoknum = 0;
1ff442ca 883
a70083a3 884 /* process literal string token */
1ff442ca 885
511e79b3 886 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 887 {
a70083a3
AD
888 complain (_("expected string constant instead of %s"), token_buffer);
889 skip_to_char ('%');
890 return;
1ff442ca 891 }
d7020c20 892 symval->class = token_sym;
a70083a3
AD
893 symval->type_name = typename;
894 symval->user_token_number = usrtoknum;
1ff442ca 895
a70083a3
AD
896 symval->alias = symbol;
897 symbol->alias = symval;
1ff442ca 898
79282c5a
AD
899 /* symbol and symval combined are only one symbol. */
900 nsyms--;
a70083a3 901}
3cef001a 902
b6610515
RA
903/* FIXME. */
904
905static void
906parse_macro_decl (void)
907{
908 int ch = ungetc (skip_white_space (), finput);
909 char* macro_key;
910 char* macro_value;
b6610515
RA
911
912 /* Read key. */
913 if (!isalpha (ch) && ch != '_')
914 {
915 complain (_("invalid %s declaration"), "%define");
916 skip_to_char ('%');
917 return;
918 }
919 copy_identifier (finput, &macro_obstack);
82e236e2 920 obstack_1grow (&macro_obstack, 0);
b6610515
RA
921 macro_key = obstack_finish (&macro_obstack);
922
923 /* Read value. */
924 ch = skip_white_space ();
925 if (ch != '"')
926 {
927 ungetc (ch, finput);
928 if (ch != EOF)
929 {
930 complain (_("invalid %s declaration"), "%define");
931 skip_to_char ('%');
932 return;
933 }
934 else
935 fatal (_("Premature EOF after %s"), "\"");
936 }
82e236e2
RA
937 copy_string2 (finput, &macro_obstack, '"', 0);
938 obstack_1grow (&macro_obstack, 0);
b6610515
RA
939 macro_value = obstack_finish (&macro_obstack);
940
b6610515
RA
941 /* Store the (key, value) pair in the environment. */
942 macro_insert (macro_key, macro_value);
943}
944
2ba3b73c
MA
945
946/*----------------------------------.
947| Parse what comes after %skeleton. |
948`----------------------------------*/
949
950void
951parse_skel_decl (void)
952{
953 /* Complete with parse_dquoted_param () on the CVS branch 1.29. */
954}
955
7333d403
AD
956/*------------------------------------------.
957| Parse what comes after %header_extension. |
958`------------------------------------------*/
959
960static void
961parse_header_extension_decl (void)
962{
963 char buff[32];
3e3da797 964
7333d403
AD
965 if (header_extension)
966 complain (_("multiple %%header_extension declarations"));
967 fscanf (finput, "%s", buff);
3e3da797 968 header_extension = xstrdup (buff);
7333d403
AD
969}
970
971/*------------------------------------------.
972| Parse what comes after %source_extension. |
973`------------------------------------------*/
974
975static void
976parse_source_extension_decl (void)
977{
978 char buff[32];
3e3da797 979
7333d403
AD
980 if (src_extension)
981 complain (_("multiple %%source_extension declarations"));
982 fscanf (finput, "%s", buff);
3e3da797 983 src_extension = xstrdup (buff);
7333d403 984}
d7020c20 985
a70083a3
AD
986/*----------------------------------------------------------------.
987| Read from finput until `%%' is seen. Discard the `%%'. Handle |
988| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 989| groups to ATTRS_OBSTACK. |
a70083a3 990`----------------------------------------------------------------*/
1ff442ca 991
4a120d45 992static void
a70083a3 993read_declarations (void)
1ff442ca 994{
a70083a3
AD
995 int c;
996 int tok;
1ff442ca 997
a70083a3 998 for (;;)
1ff442ca 999 {
a70083a3 1000 c = skip_white_space ();
1ff442ca 1001
a70083a3
AD
1002 if (c == '%')
1003 {
1004 tok = parse_percent_token ();
1ff442ca 1005
a70083a3 1006 switch (tok)
943819bf 1007 {
511e79b3 1008 case tok_two_percents:
a70083a3 1009 return;
1ff442ca 1010
511e79b3 1011 case tok_percent_left_curly:
a70083a3
AD
1012 copy_definition ();
1013 break;
1ff442ca 1014
511e79b3 1015 case tok_token:
d7020c20 1016 parse_token_decl (token_sym, nterm_sym);
a70083a3 1017 break;
1ff442ca 1018
511e79b3 1019 case tok_nterm:
d7020c20 1020 parse_token_decl (nterm_sym, token_sym);
a70083a3 1021 break;
1ff442ca 1022
511e79b3 1023 case tok_type:
a70083a3
AD
1024 parse_type_decl ();
1025 break;
1ff442ca 1026
511e79b3 1027 case tok_start:
a70083a3
AD
1028 parse_start_decl ();
1029 break;
118fb205 1030
511e79b3 1031 case tok_union:
a70083a3
AD
1032 parse_union_decl ();
1033 break;
1ff442ca 1034
511e79b3 1035 case tok_expect:
a70083a3
AD
1036 parse_expect_decl ();
1037 break;
6deb4447 1038
511e79b3 1039 case tok_thong:
a70083a3
AD
1040 parse_thong_decl ();
1041 break;
d7020c20 1042
511e79b3 1043 case tok_left:
d7020c20 1044 parse_assoc_decl (left_assoc);
a70083a3 1045 break;
1ff442ca 1046
511e79b3 1047 case tok_right:
d7020c20 1048 parse_assoc_decl (right_assoc);
a70083a3 1049 break;
1ff442ca 1050
511e79b3 1051 case tok_nonassoc:
d7020c20 1052 parse_assoc_decl (non_assoc);
a70083a3 1053 break;
1ff442ca 1054
7333d403 1055 case tok_hdrext:
09a6de7e 1056 parse_header_extension_decl ();
7333d403
AD
1057 break;
1058
1059 case tok_srcext:
09a6de7e 1060 parse_source_extension_decl ();
7333d403
AD
1061 break;
1062
b6610515
RA
1063 case tok_define:
1064 parse_macro_decl ();
1065 break;
2ba3b73c
MA
1066
1067 case tok_skel:
1068 parse_skel_decl ();
1069 break;
b6610515 1070
511e79b3 1071 case tok_noop:
a70083a3 1072 break;
1ff442ca 1073
a70083a3
AD
1074 default:
1075 complain (_("unrecognized: %s"), token_buffer);
1076 skip_to_char ('%');
1077 }
1078 }
1079 else if (c == EOF)
1080 fatal (_("no input grammar"));
1081 else
1082 {
ff4a34be
AD
1083 char buf[] = "c";
1084 buf[0] = c;
1085 complain (_("unknown character: %s"), quote (buf));
a70083a3 1086 skip_to_char ('%');
1ff442ca 1087 }
1ff442ca 1088 }
1ff442ca 1089}
a70083a3
AD
1090\f
1091/*-------------------------------------------------------------------.
1092| Assuming that a `{' has just been seen, copy everything up to the |
1093| matching `}' into the actions file. STACK_OFFSET is the number of |
1094| values in the current rule so far, which says where to find `$0' |
1095| with respect to the top of the stack. |
1096`-------------------------------------------------------------------*/
1ff442ca 1097
4a120d45 1098static void
79282c5a 1099copy_action (symbol_list *rule, int stack_offset)
1ff442ca 1100{
a70083a3 1101 int c;
a70083a3 1102 int count;
8c7ebe49 1103 char buf[4096];
1ff442ca
NF
1104
1105 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1106 if (semantic_parser)
1107 stack_offset = 0;
1ff442ca 1108
8c7ebe49
AD
1109 sprintf (buf, "\ncase %d:\n", nrules);
1110 obstack_grow (&action_obstack, buf, strlen (buf));
1111
89cab50d 1112 if (!no_lines_flag)
8c7ebe49 1113 {
2a91a95e 1114 sprintf (buf, "#line %d %s\n",
dda680cb
PB
1115 lineno, quotearg_style (c_quoting_style,
1116 macro_find ("filename")));
8c7ebe49
AD
1117 obstack_grow (&action_obstack, buf, strlen (buf));
1118 }
1119 obstack_1grow (&action_obstack, '{');
1ff442ca
NF
1120
1121 count = 1;
a70083a3 1122 c = getc (finput);
1ff442ca
NF
1123
1124 while (count > 0)
1125 {
1126 while (c != '}')
a70083a3
AD
1127 {
1128 switch (c)
1ff442ca
NF
1129 {
1130 case '\n':
8c7ebe49 1131 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1132 lineno++;
1133 break;
1134
1135 case '{':
8c7ebe49 1136 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1137 count++;
1138 break;
1139
1140 case '\'':
1141 case '"':
337bab46 1142 copy_string (finput, &action_obstack, c);
1ff442ca
NF
1143 break;
1144
1145 case '/':
337bab46 1146 copy_comment (finput, &action_obstack);
1ff442ca
NF
1147 break;
1148
1149 case '$':
337bab46 1150 copy_dollar (finput, &action_obstack,
8c7ebe49 1151 rule, stack_offset);
1ff442ca
NF
1152 break;
1153
1154 case '@':
337bab46 1155 copy_at (finput, &action_obstack,
8c7ebe49 1156 stack_offset);
6666f98f 1157 break;
1ff442ca
NF
1158
1159 case EOF:
27821bff 1160 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1161
1162 default:
8c7ebe49 1163 obstack_1grow (&action_obstack, c);
a70083a3
AD
1164 }
1165
1166 c = getc (finput);
1167 }
1168
1169 /* above loop exits when c is '}' */
1170
1171 if (--count)
1172 {
8c7ebe49 1173 obstack_1grow (&action_obstack, c);
a70083a3
AD
1174 c = getc (finput);
1175 }
1176 }
1177
ff4423cc 1178 obstack_sgrow (&action_obstack, ";\n break;}");
a70083a3
AD
1179}
1180\f
1181/*-------------------------------------------------------------------.
1182| After `%guard' is seen in the input file, copy the actual guard |
1183| into the guards file. If the guard is followed by an action, copy |
1184| that into the actions file. STACK_OFFSET is the number of values |
1185| in the current rule so far, which says where to find `$0' with |
1186| respect to the top of the stack, for the simple parser in which |
1187| the stack is not popped until after the guard is run. |
1188`-------------------------------------------------------------------*/
1189
1190static void
79282c5a 1191copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1192{
1193 int c;
a70083a3 1194 int count;
a70083a3
AD
1195 int brace_flag = 0;
1196
1197 /* offset is always 0 if parser has already popped the stack pointer */
1198 if (semantic_parser)
1199 stack_offset = 0;
1200
ea5607fd 1201 obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
89cab50d 1202 if (!no_lines_flag)
ea5607fd 1203 obstack_fgrow2 (&guard_obstack, "#line %d %s\n",
682d48cd
PB
1204 lineno, quotearg_style (c_quoting_style,
1205 macro_find ("filename")));
ea5607fd 1206 obstack_1grow (&guard_obstack, '{');
a70083a3
AD
1207
1208 count = 0;
1209 c = getc (finput);
1210
1211 while (brace_flag ? (count > 0) : (c != ';'))
1212 {
1213 switch (c)
1214 {
1215 case '\n':
ea5607fd 1216 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1217 lineno++;
1218 break;
1219
1220 case '{':
ea5607fd 1221 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1222 brace_flag = 1;
1223 count++;
1224 break;
1225
1226 case '}':
ea5607fd 1227 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1228 if (count > 0)
1229 count--;
1230 else
1231 {
1232 complain (_("unmatched %s"), "`}'");
1233 c = getc (finput); /* skip it */
1234 }
1235 break;
1236
1237 case '\'':
1238 case '"':
337bab46 1239 copy_string (finput, &guard_obstack, c);
a70083a3
AD
1240 break;
1241
1242 case '/':
337bab46 1243 copy_comment (finput, &guard_obstack);
a70083a3
AD
1244 break;
1245
1246 case '$':
337bab46 1247 copy_dollar (finput, &guard_obstack, rule, stack_offset);
a70083a3 1248 break;
1ff442ca 1249
a70083a3 1250 case '@':
337bab46 1251 copy_at (finput, &guard_obstack, stack_offset);
a70083a3 1252 break;
1ff442ca 1253
a70083a3
AD
1254 case EOF:
1255 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1256
a70083a3 1257 default:
ea5607fd 1258 obstack_1grow (&guard_obstack, c);
1ff442ca 1259 }
a70083a3
AD
1260
1261 if (c != '}' || count != 0)
1262 c = getc (finput);
1ff442ca
NF
1263 }
1264
a70083a3
AD
1265 c = skip_white_space ();
1266
ff4423cc 1267 obstack_sgrow (&guard_obstack, ";\n break;}");
a70083a3
AD
1268 if (c == '{')
1269 copy_action (rule, stack_offset);
1270 else if (c == '=')
1271 {
1272 c = getc (finput); /* why not skip_white_space -wjh */
1273 if (c == '{')
1274 copy_action (rule, stack_offset);
1275 }
1276 else
1277 ungetc (c, finput);
1ff442ca 1278}
a70083a3
AD
1279\f
1280
1281static void
1282record_rule_line (void)
1283{
1284 /* Record each rule's source line number in rline table. */
1ff442ca 1285
a70083a3
AD
1286 if (nrules >= rline_allocated)
1287 {
1288 rline_allocated = nrules * 2;
d7913476 1289 rline = XREALLOC (rline, short, rline_allocated);
a70083a3
AD
1290 }
1291 rline[nrules] = lineno;
1292}
1ff442ca
NF
1293
1294
a70083a3
AD
1295/*-------------------------------------------------------------------.
1296| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1297| with the user's names. |
1298`-------------------------------------------------------------------*/
1ff442ca 1299
4a120d45 1300static bucket *
118fb205 1301gensym (void)
1ff442ca 1302{
274d42ce
AD
1303 /* Incremented for each generated symbol */
1304 static int gensym_count = 0;
1305 static char buf[256];
1306
a70083a3 1307 bucket *sym;
1ff442ca 1308
274d42ce
AD
1309 sprintf (buf, "@%d", ++gensym_count);
1310 token_buffer = buf;
a70083a3 1311 sym = getsym (token_buffer);
d7020c20 1312 sym->class = nterm_sym;
1ff442ca 1313 sym->value = nvars++;
36281465 1314 return sym;
1ff442ca
NF
1315}
1316
a70083a3
AD
1317#if 0
1318/*------------------------------------------------------------------.
1319| read in a %type declaration and record its information for |
1320| get_type_name to access. This is unused. It is only called from |
1321| the #if 0 part of readgram |
1322`------------------------------------------------------------------*/
1323
1324static int
1325get_type (void)
1326{
1327 int k;
f17bcd1f 1328 token_t token;
a70083a3
AD
1329 char *name;
1330
f17bcd1f 1331 token = lex ();
a70083a3 1332
f17bcd1f 1333 if (token != tok_typename)
a70083a3
AD
1334 {
1335 complain (_("invalid %s declaration"), "%type");
1336 return t;
1337 }
1338
95e36146 1339 name = xstrdup (token_buffer);
a70083a3
AD
1340
1341 for (;;)
1342 {
f17bcd1f 1343 token = lex ();
a70083a3 1344
f17bcd1f 1345 switch (token)
a70083a3 1346 {
511e79b3 1347 case tok_semicolon:
a70083a3
AD
1348 return lex ();
1349
511e79b3 1350 case tok_comma:
a70083a3
AD
1351 break;
1352
511e79b3 1353 case tok_identifier:
a70083a3
AD
1354 if (symval->type_name == NULL)
1355 symval->type_name = name;
1356 else if (strcmp (name, symval->type_name) != 0)
1357 complain (_("type redeclaration for %s"), symval->tag);
1358
1359 break;
1360
1361 default:
f17bcd1f 1362 return token;
a70083a3
AD
1363 }
1364 }
1365}
1ff442ca 1366
a70083a3
AD
1367#endif
1368\f
1369/*------------------------------------------------------------------.
1370| Parse the input grammar into a one symbol_list structure. Each |
1371| rule is represented by a sequence of symbols: the left hand side |
1372| followed by the contents of the right hand side, followed by a |
1373| null pointer instead of a symbol to terminate the rule. The next |
1374| symbol is the lhs of the following rule. |
1375| |
1376| All guards and actions are copied out to the appropriate files, |
1377| labelled by the rule number they apply to. |
1378`------------------------------------------------------------------*/
1ff442ca 1379
4a120d45 1380static void
118fb205 1381readgram (void)
1ff442ca 1382{
f17bcd1f 1383 token_t t;
a70083a3
AD
1384 bucket *lhs = NULL;
1385 symbol_list *p;
1386 symbol_list *p1;
1387 bucket *bp;
1ff442ca 1388
ff4a34be
AD
1389 /* Points to first symbol_list of current rule. its symbol is the
1390 lhs of the rule. */
1391 symbol_list *crule;
1392 /* Points to the symbol_list preceding crule. */
1393 symbol_list *crule1;
1ff442ca
NF
1394
1395 p1 = NULL;
1396
a70083a3 1397 t = lex ();
1ff442ca 1398
511e79b3 1399 while (t != tok_two_percents && t != tok_eof)
1ff442ca 1400 {
511e79b3 1401 if (t == tok_identifier || t == tok_bar)
1ff442ca 1402 {
89cab50d 1403 int action_flag = 0;
ff4a34be
AD
1404 /* Number of symbols in rhs of this rule so far */
1405 int rulelength = 0;
1ff442ca
NF
1406 int xactions = 0; /* JF for error checking */
1407 bucket *first_rhs = 0;
1408
511e79b3 1409 if (t == tok_identifier)
1ff442ca
NF
1410 {
1411 lhs = symval;
943819bf
RS
1412
1413 if (!start_flag)
1414 {
1415 startval = lhs;
1416 start_flag = 1;
1417 }
a083fbbf 1418
a70083a3 1419 t = lex ();
511e79b3 1420 if (t != tok_colon)
943819bf 1421 {
a0f6b076 1422 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1423 unlex (t);
943819bf 1424 }
1ff442ca
NF
1425 }
1426
511e79b3 1427 if (nrules == 0 && t == tok_bar)
1ff442ca 1428 {
a0f6b076 1429 complain (_("grammar starts with vertical bar"));
943819bf 1430 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1431 }
1ff442ca
NF
1432 /* start a new rule and record its lhs. */
1433
1434 nrules++;
1435 nitems++;
1436
1437 record_rule_line ();
1438
d7913476 1439 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1440 p->sym = lhs;
1441
1442 crule1 = p1;
1443 if (p1)
1444 p1->next = p;
1445 else
1446 grammar = p;
1447
1448 p1 = p;
1449 crule = p;
1450
1451 /* mark the rule's lhs as a nonterminal if not already so. */
1452
d7020c20 1453 if (lhs->class == unknown_sym)
1ff442ca 1454 {
d7020c20 1455 lhs->class = nterm_sym;
1ff442ca
NF
1456 lhs->value = nvars;
1457 nvars++;
1458 }
d7020c20 1459 else if (lhs->class == token_sym)
a0f6b076 1460 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1461
1462 /* read the rhs of the rule. */
1463
1464 for (;;)
1465 {
a70083a3 1466 t = lex ();
511e79b3 1467 if (t == tok_prec)
943819bf 1468 {
a70083a3 1469 t = lex ();
943819bf 1470 crule->ruleprec = symval;
a70083a3 1471 t = lex ();
943819bf 1472 }
1ff442ca 1473
511e79b3 1474 if (!(t == tok_identifier || t == tok_left_curly))
a70083a3 1475 break;
1ff442ca
NF
1476
1477 /* If next token is an identifier, see if a colon follows it.
a70083a3 1478 If one does, exit this rule now. */
511e79b3 1479 if (t == tok_identifier)
1ff442ca 1480 {
a70083a3 1481 bucket *ssave;
f17bcd1f 1482 token_t t1;
1ff442ca
NF
1483
1484 ssave = symval;
a70083a3
AD
1485 t1 = lex ();
1486 unlex (t1);
1ff442ca 1487 symval = ssave;
511e79b3 1488 if (t1 == tok_colon)
a70083a3 1489 break;
1ff442ca 1490
a70083a3 1491 if (!first_rhs) /* JF */
1ff442ca
NF
1492 first_rhs = symval;
1493 /* Not followed by colon =>
1494 process as part of this rule's rhs. */
1495 }
1496
1497 /* If we just passed an action, that action was in the middle
a70083a3
AD
1498 of a rule, so make a dummy rule to reduce it to a
1499 non-terminal. */
89cab50d 1500 if (action_flag)
1ff442ca 1501 {
a70083a3 1502 bucket *sdummy;
1ff442ca 1503
f282676b
AD
1504 /* Since the action was written out with this rule's
1505 number, we must give the new rule this number by
1506 inserting the new rule before it. */
1ff442ca
NF
1507
1508 /* Make a dummy nonterminal, a gensym. */
a70083a3 1509 sdummy = gensym ();
1ff442ca
NF
1510
1511 /* Make a new rule, whose body is empty,
1512 before the current one, so that the action
1513 just read can belong to it. */
1514 nrules++;
1515 nitems++;
1516 record_rule_line ();
d7913476 1517 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1518 if (crule1)
1519 crule1->next = p;
a70083a3
AD
1520 else
1521 grammar = p;
1ff442ca 1522 p->sym = sdummy;
d7913476 1523 crule1 = XCALLOC (symbol_list, 1);
1ff442ca
NF
1524 p->next = crule1;
1525 crule1->next = crule;
1526
f282676b
AD
1527 /* Insert the dummy generated by that rule into this
1528 rule. */
1ff442ca 1529 nitems++;
d7913476 1530 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1531 p->sym = sdummy;
1532 p1->next = p;
1533 p1 = p;
1534
89cab50d 1535 action_flag = 0;
1ff442ca
NF
1536 }
1537
511e79b3 1538 if (t == tok_identifier)
1ff442ca
NF
1539 {
1540 nitems++;
d7913476 1541 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1542 p->sym = symval;
1543 p1->next = p;
1544 p1 = p;
1545 }
a70083a3 1546 else /* handle an action. */
1ff442ca 1547 {
a70083a3 1548 copy_action (crule, rulelength);
89cab50d 1549 action_flag = 1;
1ff442ca
NF
1550 xactions++; /* JF */
1551 }
1552 rulelength++;
a70083a3 1553 } /* end of read rhs of rule */
1ff442ca
NF
1554
1555 /* Put an empty link in the list to mark the end of this rule */
d7913476 1556 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1557 p1->next = p;
1558 p1 = p;
1559
511e79b3 1560 if (t == tok_prec)
1ff442ca 1561 {
a0f6b076 1562 complain (_("two @prec's in a row"));
a70083a3 1563 t = lex ();
1ff442ca 1564 crule->ruleprec = symval;
a70083a3 1565 t = lex ();
1ff442ca 1566 }
511e79b3 1567 if (t == tok_guard)
1ff442ca 1568 {
a70083a3 1569 if (!semantic_parser)
ff4a34be 1570 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1571
a70083a3
AD
1572 copy_guard (crule, rulelength);
1573 t = lex ();
1ff442ca 1574 }
511e79b3 1575 else if (t == tok_left_curly)
1ff442ca 1576 {
a70083a3 1577 /* This case never occurs -wjh */
89cab50d 1578 if (action_flag)
a0f6b076 1579 complain (_("two actions at end of one rule"));
a70083a3 1580 copy_action (crule, rulelength);
89cab50d 1581 action_flag = 1;
943819bf 1582 xactions++; /* -wjh */
a70083a3 1583 t = lex ();
1ff442ca 1584 }
a0f6b076 1585 /* If $$ is being set in default way, report if any type
6666f98f
AD
1586 mismatch. */
1587 else if (!xactions
a70083a3 1588 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1589 {
6666f98f
AD
1590 if (lhs->type_name == 0
1591 || first_rhs->type_name == 0
a70083a3 1592 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1593 complain (_("type clash (`%s' `%s') on default action"),
1594 lhs->type_name ? lhs->type_name : "",
a70083a3 1595 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1596 }
1597 /* Warn if there is no default for $$ but we need one. */
1598 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1599 complain (_("empty rule for typed nonterminal, and no action"));
511e79b3 1600 if (t == tok_semicolon)
a70083a3 1601 t = lex ();
a083fbbf 1602 }
943819bf 1603#if 0
a70083a3 1604 /* these things can appear as alternatives to rules. */
943819bf
RS
1605/* NO, they cannot.
1606 a) none of the documentation allows them
1607 b) most of them scan forward until finding a next %
1608 thus they may swallow lots of intervening rules
1609*/
511e79b3 1610 else if (t == tok_token)
1ff442ca 1611 {
d7020c20 1612 parse_token_decl (token_sym, nterm_sym);
a70083a3 1613 t = lex ();
1ff442ca 1614 }
511e79b3 1615 else if (t == tok_nterm)
1ff442ca 1616 {
d7020c20 1617 parse_token_decl (nterm_sym, token_sym);
a70083a3 1618 t = lex ();
1ff442ca 1619 }
511e79b3 1620 else if (t == tok_type)
1ff442ca 1621 {
a70083a3 1622 t = get_type ();
1ff442ca 1623 }
511e79b3 1624 else if (t == tok_union)
1ff442ca 1625 {
a70083a3
AD
1626 parse_union_decl ();
1627 t = lex ();
1ff442ca 1628 }
511e79b3 1629 else if (t == tok_expect)
1ff442ca 1630 {
a70083a3
AD
1631 parse_expect_decl ();
1632 t = lex ();
1ff442ca 1633 }
511e79b3 1634 else if (t == tok_start)
1ff442ca 1635 {
a70083a3
AD
1636 parse_start_decl ();
1637 t = lex ();
1ff442ca 1638 }
943819bf
RS
1639#endif
1640
1ff442ca 1641 else
943819bf 1642 {
d01c415b 1643 complain (_("invalid input: %s"), quote (token_buffer));
a70083a3 1644 t = lex ();
943819bf 1645 }
1ff442ca
NF
1646 }
1647
943819bf
RS
1648 /* grammar has been read. Do some checking */
1649
1ff442ca 1650 if (nsyms > MAXSHORT)
a0f6b076
AD
1651 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1652 MAXSHORT);
1ff442ca 1653 if (nrules == 0)
a0f6b076 1654 fatal (_("no rules in the input grammar"));
1ff442ca 1655
1ff442ca
NF
1656 /* Report any undefined symbols and consider them nonterminals. */
1657
1658 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1659 if (bp->class == unknown_sym)
1ff442ca 1660 {
a70083a3
AD
1661 complain (_
1662 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1663 bp->tag);
d7020c20 1664 bp->class = nterm_sym;
1ff442ca
NF
1665 bp->value = nvars++;
1666 }
1667
1668 ntokens = nsyms - nvars;
1669}
ff48177d
MA
1670
1671/* At the end of the grammar file, some C source code must
63c2d5de 1672 be stored. It is going to be associated to the epilogue
ff48177d
MA
1673 directive. */
1674static void
1675read_additionnal_code (void)
1676{
1677 char c;
63c2d5de 1678 struct obstack el_obstack;
ff48177d 1679
63c2d5de 1680 obstack_init (&el_obstack);
ff48177d
MA
1681
1682 while ((c = getc (finput)) != EOF)
63c2d5de 1683 obstack_1grow (&el_obstack, c);
ff48177d 1684
63c2d5de
MA
1685 obstack_1grow (&el_obstack, 0);
1686 macro_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1687}
1688
a70083a3
AD
1689\f
1690/*--------------------------------------------------------------.
1691| For named tokens, but not literal ones, define the name. The |
1692| value is the user token number. |
1693`--------------------------------------------------------------*/
1ff442ca 1694
4a120d45 1695static void
896fe5c1 1696output_token_defines (struct obstack *oout)
1ff442ca 1697{
a70083a3
AD
1698 bucket *bp;
1699 char *cp, *symbol;
1700 char c;
1ff442ca 1701
a70083a3 1702 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1703 {
a70083a3
AD
1704 symbol = bp->tag; /* get symbol */
1705
1706 if (bp->value >= ntokens)
1707 continue;
1708 if (bp->user_token_number == SALIAS)
1709 continue;
1710 if ('\'' == *symbol)
1711 continue; /* skip literal character */
1712 if (bp == errtoken)
1713 continue; /* skip error token */
1714 if ('\"' == *symbol)
1ff442ca 1715 {
a70083a3
AD
1716 /* use literal string only if given a symbol with an alias */
1717 if (bp->alias)
1718 symbol = bp->alias->tag;
1719 else
1720 continue;
1721 }
1ff442ca 1722
a70083a3
AD
1723 /* Don't #define nonliteral tokens whose names contain periods. */
1724 cp = symbol;
1725 while ((c = *cp++) && c != '.');
1726 if (c != '\0')
1727 continue;
1ff442ca 1728
0b8afb77 1729 obstack_fgrow2 (oout, "# define\t%s\t%d\n",
896fe5c1 1730 symbol,
62ab6972 1731 (translations ? bp->user_token_number : bp->value));
a70083a3 1732 if (semantic_parser)
0b8afb77 1733 obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
1734 }
1735}
1ff442ca
NF
1736
1737
a70083a3
AD
1738/*------------------------------------------------------------------.
1739| Assign symbol numbers, and write definition of token names into |
b2ca4022 1740| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1741| of symbols. |
1742`------------------------------------------------------------------*/
1ff442ca 1743
4a120d45 1744static void
118fb205 1745packsymbols (void)
1ff442ca 1746{
a70083a3
AD
1747 bucket *bp;
1748 int tokno = 1;
1749 int i;
1750 int last_user_token_number;
4a120d45 1751 static char DOLLAR[] = "$";
1ff442ca 1752
d7913476 1753 tags = XCALLOC (char *, nsyms + 1);
4a120d45 1754 tags[0] = DOLLAR;
d7913476 1755 user_toknums = XCALLOC (short, nsyms + 1);
943819bf 1756 user_toknums[0] = 0;
1ff442ca 1757
d7913476
AD
1758 sprec = XCALLOC (short, nsyms);
1759 sassoc = XCALLOC (short, nsyms);
1ff442ca
NF
1760
1761 max_user_token_number = 256;
1762 last_user_token_number = 256;
1763
1764 for (bp = firstsymbol; bp; bp = bp->next)
1765 {
d7020c20 1766 if (bp->class == nterm_sym)
1ff442ca
NF
1767 {
1768 bp->value += ntokens;
1769 }
943819bf
RS
1770 else if (bp->alias)
1771 {
0a6384c4
AD
1772 /* this symbol and its alias are a single token defn.
1773 allocate a tokno, and assign to both check agreement of
1774 ->prec and ->assoc fields and make both the same */
1775 if (bp->value == 0)
1776 bp->value = bp->alias->value = tokno++;
943819bf 1777
0a6384c4
AD
1778 if (bp->prec != bp->alias->prec)
1779 {
1780 if (bp->prec != 0 && bp->alias->prec != 0
1781 && bp->user_token_number == SALIAS)
a0f6b076
AD
1782 complain (_("conflicting precedences for %s and %s"),
1783 bp->tag, bp->alias->tag);
0a6384c4
AD
1784 if (bp->prec != 0)
1785 bp->alias->prec = bp->prec;
1786 else
1787 bp->prec = bp->alias->prec;
1788 }
943819bf 1789
0a6384c4
AD
1790 if (bp->assoc != bp->alias->assoc)
1791 {
a0f6b076
AD
1792 if (bp->assoc != 0 && bp->alias->assoc != 0
1793 && bp->user_token_number == SALIAS)
1794 complain (_("conflicting assoc values for %s and %s"),
1795 bp->tag, bp->alias->tag);
1796 if (bp->assoc != 0)
1797 bp->alias->assoc = bp->assoc;
1798 else
1799 bp->assoc = bp->alias->assoc;
1800 }
0a6384c4
AD
1801
1802 if (bp->user_token_number == SALIAS)
a70083a3 1803 continue; /* do not do processing below for SALIASs */
943819bf 1804
a70083a3 1805 }
d7020c20 1806 else /* bp->class == token_sym */
943819bf
RS
1807 {
1808 bp->value = tokno++;
1809 }
1810
d7020c20 1811 if (bp->class == token_sym)
1ff442ca
NF
1812 {
1813 if (translations && !(bp->user_token_number))
1814 bp->user_token_number = ++last_user_token_number;
1815 if (bp->user_token_number > max_user_token_number)
1816 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1817 }
1818
1819 tags[bp->value] = bp->tag;
943819bf 1820 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1821 sprec[bp->value] = bp->prec;
1822 sassoc[bp->value] = bp->assoc;
1823
1824 }
1825
1826 if (translations)
1827 {
a70083a3 1828 int j;
1ff442ca 1829
d7913476 1830 token_translations = XCALLOC (short, max_user_token_number + 1);
1ff442ca 1831
0a6384c4 1832 /* initialize all entries for literal tokens to 2, the internal
a70083a3
AD
1833 token number for $undefined., which represents all invalid
1834 inputs. */
4a120d45 1835 for (j = 0; j <= max_user_token_number; j++)
a70083a3 1836 token_translations[j] = 2;
1ff442ca 1837
943819bf 1838 for (bp = firstsymbol; bp; bp = bp->next)
a70083a3
AD
1839 {
1840 if (bp->value >= ntokens)
1841 continue; /* non-terminal */
1842 if (bp->user_token_number == SALIAS)
0a6384c4 1843 continue;
a70083a3 1844 if (token_translations[bp->user_token_number] != 2)
a0f6b076
AD
1845 complain (_("tokens %s and %s both assigned number %d"),
1846 tags[token_translations[bp->user_token_number]],
a70083a3
AD
1847 bp->tag, bp->user_token_number);
1848 token_translations[bp->user_token_number] = bp->value;
1849 }
1ff442ca
NF
1850 }
1851
1852 error_token_number = errtoken->value;
1853
b6610515
RA
1854 output_token_defines (&output_obstack);
1855 obstack_1grow (&output_obstack, 0);
1856 macro_insert ("tokendef", obstack_finish (&output_obstack));
1857
d8cb5183
MA
1858#if 0
1859 if (!no_parser_flag)
1860 output_token_defines (&table_obstack);
1861#endif
1ff442ca 1862
d7020c20 1863 if (startval->class == unknown_sym)
a0f6b076 1864 fatal (_("the start symbol %s is undefined"), startval->tag);
d7020c20 1865 else if (startval->class == token_sym)
a0f6b076 1866 fatal (_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1867
1868 start_symbol = startval->value;
1869
89cab50d 1870 if (defines_flag)
1ff442ca 1871 {
896fe5c1 1872 output_token_defines (&defines_obstack);
1ff442ca
NF
1873
1874 if (!pure_parser)
1875 {
1876 if (spec_name_prefix)
896fe5c1
AD
1877 obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1878 spec_name_prefix);
1ff442ca 1879 else
ff4423cc 1880 obstack_sgrow (&defines_obstack,
573c1d9f 1881 "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1882 }
1883
1884 if (semantic_parser)
1885 for (i = ntokens; i < nsyms; i++)
1886 {
1887 /* don't make these for dummy nonterminals made by gensym. */
1888 if (*tags[i] != '@')
896fe5c1 1889 obstack_fgrow2 (&defines_obstack,
0b8afb77 1890 "# define\tNT%s\t%d\n", tags[i], i);
1ff442ca
NF
1891 }
1892#if 0
1893 /* `fdefines' is now a temporary file, so we need to copy its
1894 contents in `done', so we can't close it here. */
a70083a3 1895 fclose (fdefines);
1ff442ca
NF
1896 fdefines = NULL;
1897#endif
1898 }
1899}
a083fbbf 1900
1ff442ca 1901
a70083a3
AD
1902/*---------------------------------------------------------------.
1903| Convert the rules into the representation using RRHS, RLHS and |
1904| RITEMS. |
1905`---------------------------------------------------------------*/
1ff442ca 1906
4a120d45 1907static void
118fb205 1908packgram (void)
1ff442ca 1909{
a70083a3
AD
1910 int itemno;
1911 int ruleno;
1912 symbol_list *p;
1ff442ca
NF
1913
1914 bucket *ruleprec;
1915
d7913476
AD
1916 ritem = XCALLOC (short, nitems + 1);
1917 rlhs = XCALLOC (short, nrules) - 1;
1918 rrhs = XCALLOC (short, nrules) - 1;
1919 rprec = XCALLOC (short, nrules) - 1;
1920 rprecsym = XCALLOC (short, nrules) - 1;
1921 rassoc = XCALLOC (short, nrules) - 1;
1ff442ca
NF
1922
1923 itemno = 0;
1924 ruleno = 1;
1925
1926 p = grammar;
1927 while (p)
1928 {
1929 rlhs[ruleno] = p->sym->value;
1930 rrhs[ruleno] = itemno;
1931 ruleprec = p->ruleprec;
1932
1933 p = p->next;
1934 while (p && p->sym)
1935 {
1936 ritem[itemno++] = p->sym->value;
1937 /* A rule gets by default the precedence and associativity
1938 of the last token in it. */
d7020c20 1939 if (p->sym->class == token_sym)
1ff442ca
NF
1940 {
1941 rprec[ruleno] = p->sym->prec;
1942 rassoc[ruleno] = p->sym->assoc;
1943 }
a70083a3
AD
1944 if (p)
1945 p = p->next;
1ff442ca
NF
1946 }
1947
1948 /* If this rule has a %prec,
a70083a3 1949 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1950 if (ruleprec)
1951 {
a70083a3
AD
1952 rprec[ruleno] = ruleprec->prec;
1953 rassoc[ruleno] = ruleprec->assoc;
1ff442ca
NF
1954 rprecsym[ruleno] = ruleprec->value;
1955 }
1956
1957 ritem[itemno++] = -ruleno;
1958 ruleno++;
1959
a70083a3
AD
1960 if (p)
1961 p = p->next;
1ff442ca
NF
1962 }
1963
1964 ritem[itemno] = 0;
1965}
a70083a3
AD
1966\f
1967/*-------------------------------------------------------------------.
1968| Read in the grammar specification and record it in the format |
ea5607fd 1969| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1970| and all actions into ACTION_OBSTACK, in each case forming the body |
1971| of a C function (YYGUARD or YYACTION) which contains a switch |
1972| statement to decide which guard or action to execute. |
a70083a3
AD
1973`-------------------------------------------------------------------*/
1974
1975void
1976reader (void)
1977{
1978 start_flag = 0;
1979 startval = NULL; /* start symbol not specified yet. */
1980
1981#if 0
1982 /* initially assume token number translation not needed. */
1983 translations = 0;
1984#endif
1985 /* Nowadays translations is always set to 1, since we give `error' a
1986 user-token-number to satisfy the Posix demand for YYERRCODE==256.
1987 */
1988 translations = 1;
1989
1990 nsyms = 1;
1991 nvars = 0;
1992 nrules = 0;
1993 nitems = 0;
1994 rline_allocated = 10;
d7913476 1995 rline = XCALLOC (short, rline_allocated);
a70083a3
AD
1996
1997 typed = 0;
1998 lastprec = 0;
1999
a70083a3
AD
2000 semantic_parser = 0;
2001 pure_parser = 0;
a70083a3
AD
2002
2003 grammar = NULL;
2004
2005 init_lex ();
2006 lineno = 1;
2007
82e236e2
RA
2008 /* Initialize the macro obstack. */
2009 obstack_init (&macro_obstack);
2010
a70083a3
AD
2011 /* Initialize the symbol table. */
2012 tabinit ();
b6610515 2013
a70083a3
AD
2014 /* Construct the error token */
2015 errtoken = getsym ("error");
d7020c20 2016 errtoken->class = token_sym;
a70083a3 2017 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 2018
a70083a3
AD
2019 /* Construct a token that represents all undefined literal tokens.
2020 It is always token number 2. */
2021 undeftoken = getsym ("$undefined.");
d7020c20 2022 undeftoken->class = token_sym;
a70083a3
AD
2023 undeftoken->user_token_number = 2;
2024
896fe5c1
AD
2025 /* Read the declaration section. Copy %{ ... %} groups to
2026 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
2027 etc. found there. */
a70083a3 2028 read_declarations ();
a70083a3
AD
2029 /* Read in the grammar, build grammar in list form. Write out
2030 guards and actions. */
2031 readgram ();
ff48177d
MA
2032 /* Some C code is given at the end of the grammar file. */
2033 read_additionnal_code ();
a70083a3
AD
2034 /* Now we know whether we need the line-number stack. If we do,
2035 write its type into the .tab.h file. */
b33160bf
MA
2036#if 0
2037 if (defines_flag)
2038 reader_output_yylsp (&defines_obstack);
b33160bf 2039#endif
a70083a3
AD
2040 /* Assign the symbols their symbol numbers. Write #defines for the
2041 token symbols into FDEFINES if requested. */
2042 packsymbols ();
2043 /* Convert the grammar into the format described in gram.h. */
2044 packgram ();
2045 /* Free the symbol table data structure since symbols are now all
2046 referred to by symbol number. */
2047 free_symtab ();
2048}