]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/reader.c (read_additionnal_code): Rename %%user_code to
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
8c7ebe49 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
8c7ebe49 24#include "obstack.h"
2a91a95e
AD
25#include "quotearg.h"
26#include "quote.h"
ceed8467 27#include "getargs.h"
1ff442ca 28#include "files.h"
d7913476 29#include "xalloc.h"
1ff442ca
NF
30#include "symtab.h"
31#include "lex.h"
32#include "gram.h"
a0f6b076 33#include "complain.h"
6c89f1c1 34#include "output.h"
b2ca4022 35#include "reader.h"
340ef489 36#include "conflicts.h"
b6610515 37#include "macrotab.h"
1ff442ca 38
1ff442ca 39/* Number of slots allocated (but not necessarily used yet) in `rline' */
4a120d45 40static int rline_allocated;
1ff442ca 41
a70083a3
AD
42typedef struct symbol_list
43{
44 struct symbol_list *next;
45 bucket *sym;
46 bucket *ruleprec;
47}
48symbol_list;
118fb205 49
1ff442ca 50int lineno;
1ff442ca 51char **tags;
d019d655 52short *user_toknums;
4a120d45
JT
53static symbol_list *grammar;
54static int start_flag;
55static bucket *startval;
1ff442ca
NF
56
57/* Nonzero if components of semantic values are used, implying
58 they must be unions. */
59static int value_components_used;
60
d7020c20
AD
61/* Nonzero if %union has been seen. */
62static int typed;
1ff442ca 63
d7020c20
AD
64/* Incremented for each %left, %right or %nonassoc seen */
65static int lastprec;
1ff442ca 66
1ff442ca 67static bucket *errtoken;
5b2e3c89 68static bucket *undeftoken;
0d533154 69\f
a70083a3 70
0d533154
AD
71/*===================\
72| Low level lexing. |
73\===================*/
943819bf
RS
74
75static void
118fb205 76skip_to_char (int target)
943819bf
RS
77{
78 int c;
79 if (target == '\n')
a0f6b076 80 complain (_(" Skipping to next \\n"));
943819bf 81 else
a0f6b076 82 complain (_(" Skipping to next %c"), target);
943819bf
RS
83
84 do
0d533154 85 c = skip_white_space ();
943819bf 86 while (c != target && c != EOF);
a083fbbf 87 if (c != EOF)
0d533154 88 ungetc (c, finput);
943819bf
RS
89}
90
91
0d533154
AD
92/*---------------------------------------------------------.
93| Read a signed integer from STREAM and return its value. |
94`---------------------------------------------------------*/
95
96static inline int
97read_signed_integer (FILE *stream)
98{
a70083a3
AD
99 int c = getc (stream);
100 int sign = 1;
101 int n = 0;
0d533154
AD
102
103 if (c == '-')
104 {
105 c = getc (stream);
106 sign = -1;
107 }
108
109 while (isdigit (c))
110 {
111 n = 10 * n + (c - '0');
112 c = getc (stream);
113 }
114
115 ungetc (c, stream);
116
117 return sign * n;
118}
119\f
79282c5a
AD
120/*--------------------------------------------------------------.
121| Get the data type (alternative in the union) of the value for |
122| symbol N in rule RULE. |
123`--------------------------------------------------------------*/
124
125static char *
126get_type_name (int n, symbol_list * rule)
127{
128 int i;
129 symbol_list *rp;
130
131 if (n < 0)
132 {
133 complain (_("invalid $ value"));
134 return NULL;
135 }
136
137 rp = rule;
138 i = 0;
139
140 while (i < n)
141 {
142 rp = rp->next;
143 if (rp == NULL || rp->sym == NULL)
144 {
145 complain (_("invalid $ value"));
146 return NULL;
147 }
148 i++;
149 }
150
151 return rp->sym->type_name;
152}
153\f
337bab46
AD
154/*------------------------------------------------------------.
155| Dump the string from FIN to OOUT if non null. MATCH is the |
156| delimiter of the string (either ' or "). |
157`------------------------------------------------------------*/
ae3c3164
AD
158
159static inline void
b6610515 160copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
161{
162 int c;
163
b6610515
RA
164 if (store)
165 obstack_1grow (oout, match);
8c7ebe49 166
4a120d45 167 c = getc (fin);
ae3c3164
AD
168
169 while (c != match)
170 {
171 if (c == EOF)
172 fatal (_("unterminated string at end of file"));
173 if (c == '\n')
174 {
a0f6b076 175 complain (_("unterminated string"));
4a120d45 176 ungetc (c, fin);
ae3c3164
AD
177 c = match; /* invent terminator */
178 continue;
179 }
180
337bab46 181 obstack_1grow (oout, c);
ae3c3164
AD
182
183 if (c == '\\')
184 {
4a120d45 185 c = getc (fin);
ae3c3164
AD
186 if (c == EOF)
187 fatal (_("unterminated string at end of file"));
337bab46 188 obstack_1grow (oout, c);
8c7ebe49 189
ae3c3164
AD
190 if (c == '\n')
191 lineno++;
192 }
193
a70083a3 194 c = getc (fin);
ae3c3164
AD
195 }
196
b6610515
RA
197 if (store)
198 obstack_1grow (oout, c);
199}
200
201/* FIXME. */
202
203static inline void
204copy_string (FILE *fin, struct obstack *oout, int match)
205{
206 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
207}
208
b6610515
RA
209/* FIXME. */
210
211static inline void
212copy_identifier (FILE *fin, struct obstack *oout)
213{
214 int c;
215
216 while (isalnum (c = getc (fin)) || c == '_')
217 obstack_1grow (oout, c);
218
219 ungetc (c, fin);
220}
ae3c3164 221
337bab46
AD
222/*-----------------------------------------------------------------.
223| Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
224| NULL). In fact we just saw a `/', which might or might not be a |
225| comment. In any case, copy what we saw. |
226| |
227| OUT2 might be NULL. |
228`-----------------------------------------------------------------*/
ae3c3164
AD
229
230static inline void
337bab46 231copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
ae3c3164
AD
232{
233 int cplus_comment;
a70083a3 234 int ended;
550a72a3
AD
235 int c;
236
237 /* We read a `/', output it. */
337bab46 238 obstack_1grow (oout1, '/');
896fe5c1
AD
239 if (oout2)
240 obstack_1grow (oout2, '/');
550a72a3
AD
241
242 switch ((c = getc (fin)))
243 {
244 case '/':
245 cplus_comment = 1;
246 break;
247 case '*':
248 cplus_comment = 0;
249 break;
250 default:
251 ungetc (c, fin);
252 return;
253 }
ae3c3164 254
337bab46 255 obstack_1grow (oout1, c);
896fe5c1
AD
256 if (oout2)
257 obstack_1grow (oout2, c);
550a72a3 258 c = getc (fin);
ae3c3164
AD
259
260 ended = 0;
261 while (!ended)
262 {
263 if (!cplus_comment && c == '*')
264 {
265 while (c == '*')
266 {
337bab46 267 obstack_1grow (oout1, c);
896fe5c1
AD
268 if (oout2)
269 obstack_1grow (oout2, c);
550a72a3 270 c = getc (fin);
ae3c3164
AD
271 }
272
273 if (c == '/')
274 {
337bab46 275 obstack_1grow (oout1, c);
896fe5c1
AD
276 if (oout2)
277 obstack_1grow (oout2, c);
ae3c3164
AD
278 ended = 1;
279 }
280 }
281 else if (c == '\n')
282 {
283 lineno++;
337bab46 284 obstack_1grow (oout1, c);
896fe5c1
AD
285 if (oout2)
286 obstack_1grow (oout2, c);
ae3c3164
AD
287 if (cplus_comment)
288 ended = 1;
289 else
550a72a3 290 c = getc (fin);
ae3c3164
AD
291 }
292 else if (c == EOF)
293 fatal (_("unterminated comment"));
294 else
295 {
337bab46 296 obstack_1grow (oout1, c);
896fe5c1
AD
297 if (oout2)
298 obstack_1grow (oout2, c);
550a72a3 299 c = getc (fin);
ae3c3164
AD
300 }
301 }
302}
303
304
550a72a3
AD
305/*-------------------------------------------------------------------.
306| Dump the comment (actually the current string starting with a `/') |
337bab46 307| from FIN to OOUT. |
550a72a3 308`-------------------------------------------------------------------*/
27821bff
AD
309
310static inline void
337bab46 311copy_comment (FILE *fin, struct obstack *oout)
27821bff 312{
337bab46 313 copy_comment2 (fin, oout, NULL);
27821bff
AD
314}
315
316
a70083a3 317/*-----------------------------------------------------------------.
337bab46 318| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
319| reference to this location. STACK_OFFSET is the number of values |
320| in the current rule so far, which says where to find `$0' with |
321| respect to the top of the stack. |
322`-----------------------------------------------------------------*/
1ff442ca 323
a70083a3 324static inline void
337bab46 325copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 326{
a70083a3 327 int c;
1ff442ca 328
a70083a3
AD
329 c = getc (fin);
330 if (c == '$')
1ff442ca 331 {
ff4423cc 332 obstack_sgrow (oout, "yyloc");
89cab50d 333 locations_flag = 1;
a70083a3
AD
334 }
335 else if (isdigit (c) || c == '-')
336 {
337 int n;
1ff442ca 338
a70083a3
AD
339 ungetc (c, fin);
340 n = read_signed_integer (fin);
943819bf 341
337bab46 342 obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
89cab50d 343 locations_flag = 1;
1ff442ca 344 }
a70083a3 345 else
ff4a34be
AD
346 {
347 char buf[] = "@c";
348 buf[1] = c;
349 complain (_("%s is invalid"), quote (buf));
350 }
1ff442ca 351}
79282c5a
AD
352
353
354/*-------------------------------------------------------------------.
355| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
356| |
357| Possible inputs: $[<TYPENAME>]($|integer) |
358| |
337bab46 359| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
360| the number of values in the current rule so far, which says where |
361| to find `$0' with respect to the top of the stack. |
362`-------------------------------------------------------------------*/
363
364static inline void
337bab46 365copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
366 symbol_list *rule, int stack_offset)
367{
368 int c = getc (fin);
b0ce6046 369 const char *type_name = NULL;
79282c5a 370
f282676b 371 /* Get the type name if explicit. */
79282c5a
AD
372 if (c == '<')
373 {
f282676b 374 read_type_name (fin);
79282c5a
AD
375 type_name = token_buffer;
376 value_components_used = 1;
79282c5a
AD
377 c = getc (fin);
378 }
379
380 if (c == '$')
381 {
ff4423cc 382 obstack_sgrow (oout, "yyval");
8c7ebe49 383
79282c5a
AD
384 if (!type_name)
385 type_name = get_type_name (0, rule);
386 if (type_name)
337bab46 387 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
388 if (!type_name && typed)
389 complain (_("$$ of `%s' has no declared type"),
390 rule->sym->tag);
391 }
392 else if (isdigit (c) || c == '-')
393 {
394 int n;
395 ungetc (c, fin);
396 n = read_signed_integer (fin);
397
398 if (!type_name && n > 0)
399 type_name = get_type_name (n, rule);
400
337bab46 401 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
8c7ebe49 402
79282c5a 403 if (type_name)
337bab46 404 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
405 if (!type_name && typed)
406 complain (_("$%d of `%s' has no declared type"),
407 n, rule->sym->tag);
408 }
409 else
410 {
411 char buf[] = "$c";
412 buf[1] = c;
413 complain (_("%s is invalid"), quote (buf));
414 }
415}
a70083a3
AD
416\f
417/*-------------------------------------------------------------------.
418| Copy the contents of a `%{ ... %}' into the definitions file. The |
419| `%{' has already been read. Return after reading the `%}'. |
420`-------------------------------------------------------------------*/
1ff442ca 421
4a120d45 422static void
118fb205 423copy_definition (void)
1ff442ca 424{
a70083a3 425 int c;
ae3c3164 426 /* -1 while reading a character if prev char was %. */
a70083a3 427 int after_percent;
1ff442ca 428
b6610515 429#if 0
89cab50d 430 if (!no_lines_flag)
2a91a95e
AD
431 obstack_fgrow2 (&attrs_obstack, "#line %d %s\n",
432 lineno, quotearg_style (c_quoting_style, infile));
b6610515 433#endif
1ff442ca
NF
434
435 after_percent = 0;
436
ae3c3164 437 c = getc (finput);
1ff442ca
NF
438
439 for (;;)
440 {
441 switch (c)
442 {
443 case '\n':
dd60faec 444 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
445 lineno++;
446 break;
447
448 case '%':
a70083a3 449 after_percent = -1;
1ff442ca 450 break;
a083fbbf 451
1ff442ca
NF
452 case '\'':
453 case '"':
337bab46 454 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
455 break;
456
457 case '/':
337bab46 458 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
459 break;
460
461 case EOF:
a70083a3 462 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
463
464 default:
dd60faec 465 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
466 }
467
a70083a3 468 c = getc (finput);
1ff442ca
NF
469
470 if (after_percent)
471 {
472 if (c == '}')
473 return;
dd60faec 474 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
475 }
476 after_percent = 0;
1ff442ca 477 }
1ff442ca
NF
478}
479
480
d7020c20
AD
481/*-------------------------------------------------------------------.
482| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
483| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
484| are reversed. |
485`-------------------------------------------------------------------*/
1ff442ca 486
4a120d45 487static void
d7020c20 488parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 489{
f17bcd1f 490 token_t token = 0;
a70083a3 491 char *typename = 0;
1ff442ca 492
1e9798d5
AD
493 /* The symbol being defined. */
494 struct bucket *symbol = NULL;
495
496 /* After `%token' and `%nterm', any number of symbols maybe be
497 defined. */
1ff442ca
NF
498 for (;;)
499 {
e6011337
JT
500 int tmp_char = ungetc (skip_white_space (), finput);
501
1e9798d5
AD
502 /* `%' (for instance from `%token', or from `%%' etc.) is the
503 only valid means to end this declaration. */
e6011337 504 if (tmp_char == '%')
1ff442ca 505 return;
e6011337 506 if (tmp_char == EOF)
a0f6b076 507 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 508
a70083a3 509 token = lex ();
511e79b3 510 if (token == tok_comma)
943819bf
RS
511 {
512 symbol = NULL;
513 continue;
514 }
511e79b3 515 if (token == tok_typename)
1ff442ca 516 {
95e36146 517 typename = xstrdup (token_buffer);
1ff442ca 518 value_components_used = 1;
943819bf
RS
519 symbol = NULL;
520 }
511e79b3 521 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 522 {
8e03724b
AD
523 if (symval->alias)
524 warn (_("symbol `%s' used more than once as a literal string"),
525 symval->tag);
526 else if (symbol->alias)
527 warn (_("symbol `%s' given more than one literal string"),
528 symbol->tag);
529 else
530 {
531 symval->class = token_sym;
532 symval->type_name = typename;
533 symval->user_token_number = symbol->user_token_number;
534 symbol->user_token_number = SALIAS;
535 symval->alias = symbol;
536 symbol->alias = symval;
537 /* symbol and symval combined are only one symbol */
538 nsyms--;
539 }
943819bf 540 translations = 1;
8e03724b 541 symbol = NULL;
1ff442ca 542 }
511e79b3 543 else if (token == tok_identifier)
1ff442ca
NF
544 {
545 int oldclass = symval->class;
943819bf 546 symbol = symval;
1ff442ca 547
943819bf 548 if (symbol->class == what_is_not)
a0f6b076 549 complain (_("symbol %s redefined"), symbol->tag);
943819bf 550 symbol->class = what_is;
d7020c20 551 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 552 symbol->value = nvars++;
1ff442ca
NF
553
554 if (typename)
555 {
943819bf
RS
556 if (symbol->type_name == NULL)
557 symbol->type_name = typename;
a70083a3 558 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 559 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
560 }
561 }
511e79b3 562 else if (symbol && token == tok_number)
a70083a3 563 {
943819bf 564 symbol->user_token_number = numval;
1ff442ca 565 translations = 1;
a70083a3 566 }
1ff442ca 567 else
943819bf 568 {
a0f6b076 569 complain (_("`%s' is invalid in %s"),
d7020c20 570 token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 571 skip_to_char ('%');
943819bf 572 }
1ff442ca
NF
573 }
574
575}
576
1ff442ca 577
d7020c20
AD
578/*------------------------------.
579| Parse what comes after %start |
580`------------------------------*/
1ff442ca 581
4a120d45 582static void
118fb205 583parse_start_decl (void)
1ff442ca
NF
584{
585 if (start_flag)
27821bff 586 complain (_("multiple %s declarations"), "%start");
511e79b3 587 if (lex () != tok_identifier)
27821bff 588 complain (_("invalid %s declaration"), "%start");
943819bf
RS
589 else
590 {
591 start_flag = 1;
592 startval = symval;
593 }
1ff442ca
NF
594}
595
a70083a3
AD
596/*-----------------------------------------------------------.
597| read in a %type declaration and record its information for |
598| get_type_name to access |
599`-----------------------------------------------------------*/
600
601static void
602parse_type_decl (void)
603{
a70083a3
AD
604 char *name;
605
511e79b3 606 if (lex () != tok_typename)
a70083a3
AD
607 {
608 complain ("%s", _("%type declaration has no <typename>"));
609 skip_to_char ('%');
610 return;
611 }
612
95e36146 613 name = xstrdup (token_buffer);
a70083a3
AD
614
615 for (;;)
616 {
f17bcd1f 617 token_t t;
a70083a3
AD
618 int tmp_char = ungetc (skip_white_space (), finput);
619
620 if (tmp_char == '%')
621 return;
622 if (tmp_char == EOF)
623 fatal (_("Premature EOF after %s"), token_buffer);
624
625 t = lex ();
626
627 switch (t)
1ff442ca
NF
628 {
629
511e79b3
AD
630 case tok_comma:
631 case tok_semicolon:
1ff442ca
NF
632 break;
633
511e79b3 634 case tok_identifier:
1ff442ca
NF
635 if (symval->type_name == NULL)
636 symval->type_name = name;
a70083a3 637 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 638 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
639
640 break;
641
642 default:
a0f6b076
AD
643 complain (_("invalid %%type declaration due to item: %s"),
644 token_buffer);
a70083a3 645 skip_to_char ('%');
1ff442ca
NF
646 }
647 }
648}
649
650
651
d7020c20
AD
652/*----------------------------------------------------------------.
653| Read in a %left, %right or %nonassoc declaration and record its |
654| information. |
655`----------------------------------------------------------------*/
1ff442ca 656
4a120d45 657static void
d7020c20 658parse_assoc_decl (associativity assoc)
1ff442ca 659{
a70083a3
AD
660 char *name = NULL;
661 int prev = 0;
1ff442ca 662
a70083a3 663 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 664
1ff442ca
NF
665 for (;;)
666 {
f17bcd1f 667 token_t t;
e6011337 668 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 669
e6011337 670 if (tmp_char == '%')
1ff442ca 671 return;
e6011337 672 if (tmp_char == EOF)
a0f6b076 673 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 674
a70083a3 675 t = lex ();
1ff442ca
NF
676
677 switch (t)
678 {
511e79b3 679 case tok_typename:
95e36146 680 name = xstrdup (token_buffer);
1ff442ca
NF
681 break;
682
511e79b3 683 case tok_comma:
1ff442ca
NF
684 break;
685
511e79b3 686 case tok_identifier:
1ff442ca 687 if (symval->prec != 0)
a0f6b076 688 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
689 symval->prec = lastprec;
690 symval->assoc = assoc;
d7020c20 691 if (symval->class == nterm_sym)
a0f6b076 692 complain (_("symbol %s redefined"), symval->tag);
d7020c20 693 symval->class = token_sym;
1ff442ca 694 if (name)
a70083a3 695 { /* record the type, if one is specified */
1ff442ca
NF
696 if (symval->type_name == NULL)
697 symval->type_name = name;
a70083a3 698 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 699 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
700 }
701 break;
702
511e79b3
AD
703 case tok_number:
704 if (prev == tok_identifier)
a70083a3 705 {
1ff442ca
NF
706 symval->user_token_number = numval;
707 translations = 1;
a70083a3
AD
708 }
709 else
710 {
711 complain (_
712 ("invalid text (%s) - number should be after identifier"),
713token_buffer);
714 skip_to_char ('%');
715 }
1ff442ca
NF
716 break;
717
511e79b3 718 case tok_semicolon:
1ff442ca
NF
719 return;
720
721 default:
a0f6b076 722 complain (_("unexpected item: %s"), token_buffer);
a70083a3 723 skip_to_char ('%');
1ff442ca
NF
724 }
725
726 prev = t;
727
728 }
729}
730
731
732
dd60faec
AD
733/*--------------------------------------------------------------.
734| Copy the union declaration into ATTRS_OBSTACK (and fdefines), |
735| where it is made into the definition of YYSTYPE, the type of |
736| elements of the parser value stack. |
737`--------------------------------------------------------------*/
1ff442ca 738
4a120d45 739static void
118fb205 740parse_union_decl (void)
1ff442ca 741{
a70083a3
AD
742 int c;
743 int count = 0;
1ff442ca
NF
744
745 if (typed)
27821bff 746 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
747
748 typed = 1;
749
89cab50d 750 if (!no_lines_flag)
2a91a95e
AD
751 obstack_fgrow2 (&attrs_obstack, "\n#line %d %s\n",
752 lineno, quotearg_style (c_quoting_style, infile));
1ff442ca 753 else
dd60faec 754 obstack_1grow (&attrs_obstack, '\n');
1ff442ca 755
ff4423cc 756 obstack_sgrow (&attrs_obstack, "typedef union");
896fe5c1 757 if (defines_flag)
ff4423cc 758 obstack_sgrow (&defines_obstack, "typedef union");
1ff442ca 759
27821bff 760 c = getc (finput);
1ff442ca
NF
761
762 while (c != EOF)
763 {
dd60faec 764 obstack_1grow (&attrs_obstack, c);
896fe5c1 765 if (defines_flag)
d7045ec6 766 obstack_1grow (&defines_obstack, c);
1ff442ca
NF
767
768 switch (c)
769 {
770 case '\n':
771 lineno++;
772 break;
773
774 case '/':
337bab46 775 copy_comment2 (finput, &defines_obstack, &attrs_obstack);
1ff442ca
NF
776 break;
777
1ff442ca
NF
778 case '{':
779 count++;
780 break;
781
782 case '}':
783 if (count == 0)
27821bff 784 complain (_("unmatched %s"), "`}'");
1ff442ca 785 count--;
943819bf 786 if (count <= 0)
1ff442ca 787 {
ff4423cc 788 obstack_sgrow (&attrs_obstack, " YYSTYPE;\n");
896fe5c1 789 if (defines_flag)
ff4423cc 790 obstack_sgrow (&defines_obstack, " YYSTYPE;\n");
1ff442ca 791 /* JF don't choke on trailing semi */
27821bff
AD
792 c = skip_white_space ();
793 if (c != ';')
a70083a3 794 ungetc (c, finput);
1ff442ca
NF
795 return;
796 }
797 }
798
27821bff 799 c = getc (finput);
1ff442ca
NF
800 }
801}
802
d7020c20
AD
803
804/*-------------------------------------------------------.
805| Parse the declaration %expect N which says to expect N |
806| shift-reduce conflicts. |
807`-------------------------------------------------------*/
1ff442ca 808
4a120d45 809static void
118fb205 810parse_expect_decl (void)
1ff442ca 811{
131e2fef 812 int c = skip_white_space ();
1ff442ca
NF
813 ungetc (c, finput);
814
131e2fef 815 if (!isdigit (c))
79282c5a 816 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
817 else
818 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
819}
820
a70083a3
AD
821
822/*-------------------------------------------------------------------.
823| Parse what comes after %thong. the full syntax is |
824| |
825| %thong <type> token number literal |
826| |
827| the <type> or number may be omitted. The number specifies the |
828| user_token_number. |
829| |
830| Two symbols are entered in the table, one for the token symbol and |
831| one for the literal. Both are given the <type>, if any, from the |
832| declaration. The ->user_token_number of the first is SALIAS and |
833| the ->user_token_number of the second is set to the number, if |
834| any, from the declaration. The two symbols are linked via |
835| pointers in their ->alias fields. |
836| |
837| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
838| only the literal string is retained it is the literal string that |
839| is output to yytname |
840`-------------------------------------------------------------------*/
841
842static void
843parse_thong_decl (void)
7b306f52 844{
f17bcd1f 845 token_t token;
a70083a3
AD
846 struct bucket *symbol;
847 char *typename = 0;
95e36146 848 int usrtoknum;
7b306f52 849
a70083a3
AD
850 translations = 1;
851 token = lex (); /* fetch typename or first token */
511e79b3 852 if (token == tok_typename)
7b306f52 853 {
95e36146 854 typename = xstrdup (token_buffer);
a70083a3
AD
855 value_components_used = 1;
856 token = lex (); /* fetch first token */
7b306f52 857 }
7b306f52 858
a70083a3 859 /* process first token */
7b306f52 860
511e79b3 861 if (token != tok_identifier)
a70083a3
AD
862 {
863 complain (_("unrecognized item %s, expected an identifier"),
864 token_buffer);
865 skip_to_char ('%');
866 return;
7b306f52 867 }
d7020c20 868 symval->class = token_sym;
a70083a3
AD
869 symval->type_name = typename;
870 symval->user_token_number = SALIAS;
871 symbol = symval;
7b306f52 872
a70083a3 873 token = lex (); /* get number or literal string */
1ff442ca 874
511e79b3 875 if (token == tok_number)
943819bf 876 {
a70083a3
AD
877 usrtoknum = numval;
878 token = lex (); /* okay, did number, now get literal */
943819bf 879 }
a70083a3
AD
880 else
881 usrtoknum = 0;
1ff442ca 882
a70083a3 883 /* process literal string token */
1ff442ca 884
511e79b3 885 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 886 {
a70083a3
AD
887 complain (_("expected string constant instead of %s"), token_buffer);
888 skip_to_char ('%');
889 return;
1ff442ca 890 }
d7020c20 891 symval->class = token_sym;
a70083a3
AD
892 symval->type_name = typename;
893 symval->user_token_number = usrtoknum;
1ff442ca 894
a70083a3
AD
895 symval->alias = symbol;
896 symbol->alias = symval;
1ff442ca 897
79282c5a
AD
898 /* symbol and symval combined are only one symbol. */
899 nsyms--;
a70083a3 900}
3cef001a 901
b6610515
RA
902/* FIXME. */
903
904static void
905parse_macro_decl (void)
906{
907 int ch = ungetc (skip_white_space (), finput);
908 char* macro_key;
909 char* macro_value;
b6610515
RA
910
911 /* Read key. */
912 if (!isalpha (ch) && ch != '_')
913 {
914 complain (_("invalid %s declaration"), "%define");
915 skip_to_char ('%');
916 return;
917 }
918 copy_identifier (finput, &macro_obstack);
82e236e2 919 obstack_1grow (&macro_obstack, 0);
b6610515
RA
920 macro_key = obstack_finish (&macro_obstack);
921
922 /* Read value. */
923 ch = skip_white_space ();
924 if (ch != '"')
925 {
926 ungetc (ch, finput);
927 if (ch != EOF)
928 {
929 complain (_("invalid %s declaration"), "%define");
930 skip_to_char ('%');
931 return;
932 }
933 else
934 fatal (_("Premature EOF after %s"), "\"");
935 }
82e236e2
RA
936 copy_string2 (finput, &macro_obstack, '"', 0);
937 obstack_1grow (&macro_obstack, 0);
b6610515
RA
938 macro_value = obstack_finish (&macro_obstack);
939
b6610515
RA
940 /* Store the (key, value) pair in the environment. */
941 macro_insert (macro_key, macro_value);
942}
943
2ba3b73c
MA
944
945/*----------------------------------.
946| Parse what comes after %skeleton. |
947`----------------------------------*/
948
949void
950parse_skel_decl (void)
951{
952 /* Complete with parse_dquoted_param () on the CVS branch 1.29. */
953}
954
7333d403
AD
955/*------------------------------------------.
956| Parse what comes after %header_extension. |
957`------------------------------------------*/
958
959static void
960parse_header_extension_decl (void)
961{
962 char buff[32];
3e3da797 963
7333d403
AD
964 if (header_extension)
965 complain (_("multiple %%header_extension declarations"));
966 fscanf (finput, "%s", buff);
3e3da797 967 header_extension = xstrdup (buff);
7333d403
AD
968}
969
970/*------------------------------------------.
971| Parse what comes after %source_extension. |
972`------------------------------------------*/
973
974static void
975parse_source_extension_decl (void)
976{
977 char buff[32];
3e3da797 978
7333d403
AD
979 if (src_extension)
980 complain (_("multiple %%source_extension declarations"));
981 fscanf (finput, "%s", buff);
3e3da797 982 src_extension = xstrdup (buff);
7333d403 983}
d7020c20 984
a70083a3
AD
985/*----------------------------------------------------------------.
986| Read from finput until `%%' is seen. Discard the `%%'. Handle |
987| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 988| groups to ATTRS_OBSTACK. |
a70083a3 989`----------------------------------------------------------------*/
1ff442ca 990
4a120d45 991static void
a70083a3 992read_declarations (void)
1ff442ca 993{
a70083a3
AD
994 int c;
995 int tok;
1ff442ca 996
a70083a3 997 for (;;)
1ff442ca 998 {
a70083a3 999 c = skip_white_space ();
1ff442ca 1000
a70083a3
AD
1001 if (c == '%')
1002 {
1003 tok = parse_percent_token ();
1ff442ca 1004
a70083a3 1005 switch (tok)
943819bf 1006 {
511e79b3 1007 case tok_two_percents:
a70083a3 1008 return;
1ff442ca 1009
511e79b3 1010 case tok_percent_left_curly:
a70083a3
AD
1011 copy_definition ();
1012 break;
1ff442ca 1013
511e79b3 1014 case tok_token:
d7020c20 1015 parse_token_decl (token_sym, nterm_sym);
a70083a3 1016 break;
1ff442ca 1017
511e79b3 1018 case tok_nterm:
d7020c20 1019 parse_token_decl (nterm_sym, token_sym);
a70083a3 1020 break;
1ff442ca 1021
511e79b3 1022 case tok_type:
a70083a3
AD
1023 parse_type_decl ();
1024 break;
1ff442ca 1025
511e79b3 1026 case tok_start:
a70083a3
AD
1027 parse_start_decl ();
1028 break;
118fb205 1029
511e79b3 1030 case tok_union:
a70083a3
AD
1031 parse_union_decl ();
1032 break;
1ff442ca 1033
511e79b3 1034 case tok_expect:
a70083a3
AD
1035 parse_expect_decl ();
1036 break;
6deb4447 1037
511e79b3 1038 case tok_thong:
a70083a3
AD
1039 parse_thong_decl ();
1040 break;
d7020c20 1041
511e79b3 1042 case tok_left:
d7020c20 1043 parse_assoc_decl (left_assoc);
a70083a3 1044 break;
1ff442ca 1045
511e79b3 1046 case tok_right:
d7020c20 1047 parse_assoc_decl (right_assoc);
a70083a3 1048 break;
1ff442ca 1049
511e79b3 1050 case tok_nonassoc:
d7020c20 1051 parse_assoc_decl (non_assoc);
a70083a3 1052 break;
1ff442ca 1053
7333d403 1054 case tok_hdrext:
09a6de7e 1055 parse_header_extension_decl ();
7333d403
AD
1056 break;
1057
1058 case tok_srcext:
09a6de7e 1059 parse_source_extension_decl ();
7333d403
AD
1060 break;
1061
b6610515
RA
1062 case tok_define:
1063 parse_macro_decl ();
1064 break;
2ba3b73c
MA
1065
1066 case tok_skel:
1067 parse_skel_decl ();
1068 break;
b6610515 1069
511e79b3 1070 case tok_noop:
a70083a3 1071 break;
1ff442ca 1072
a70083a3
AD
1073 default:
1074 complain (_("unrecognized: %s"), token_buffer);
1075 skip_to_char ('%');
1076 }
1077 }
1078 else if (c == EOF)
1079 fatal (_("no input grammar"));
1080 else
1081 {
ff4a34be
AD
1082 char buf[] = "c";
1083 buf[0] = c;
1084 complain (_("unknown character: %s"), quote (buf));
a70083a3 1085 skip_to_char ('%');
1ff442ca 1086 }
1ff442ca 1087 }
1ff442ca 1088}
a70083a3
AD
1089\f
1090/*-------------------------------------------------------------------.
1091| Assuming that a `{' has just been seen, copy everything up to the |
1092| matching `}' into the actions file. STACK_OFFSET is the number of |
1093| values in the current rule so far, which says where to find `$0' |
1094| with respect to the top of the stack. |
1095`-------------------------------------------------------------------*/
1ff442ca 1096
4a120d45 1097static void
79282c5a 1098copy_action (symbol_list *rule, int stack_offset)
1ff442ca 1099{
a70083a3 1100 int c;
a70083a3 1101 int count;
8c7ebe49 1102 char buf[4096];
1ff442ca
NF
1103
1104 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1105 if (semantic_parser)
1106 stack_offset = 0;
1ff442ca 1107
8c7ebe49
AD
1108 sprintf (buf, "\ncase %d:\n", nrules);
1109 obstack_grow (&action_obstack, buf, strlen (buf));
1110
89cab50d 1111 if (!no_lines_flag)
8c7ebe49 1112 {
2a91a95e
AD
1113 sprintf (buf, "#line %d %s\n",
1114 lineno, quotearg_style (c_quoting_style, infile));
8c7ebe49
AD
1115 obstack_grow (&action_obstack, buf, strlen (buf));
1116 }
1117 obstack_1grow (&action_obstack, '{');
1ff442ca
NF
1118
1119 count = 1;
a70083a3 1120 c = getc (finput);
1ff442ca
NF
1121
1122 while (count > 0)
1123 {
1124 while (c != '}')
a70083a3
AD
1125 {
1126 switch (c)
1ff442ca
NF
1127 {
1128 case '\n':
8c7ebe49 1129 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1130 lineno++;
1131 break;
1132
1133 case '{':
8c7ebe49 1134 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1135 count++;
1136 break;
1137
1138 case '\'':
1139 case '"':
337bab46 1140 copy_string (finput, &action_obstack, c);
1ff442ca
NF
1141 break;
1142
1143 case '/':
337bab46 1144 copy_comment (finput, &action_obstack);
1ff442ca
NF
1145 break;
1146
1147 case '$':
337bab46 1148 copy_dollar (finput, &action_obstack,
8c7ebe49 1149 rule, stack_offset);
1ff442ca
NF
1150 break;
1151
1152 case '@':
337bab46 1153 copy_at (finput, &action_obstack,
8c7ebe49 1154 stack_offset);
6666f98f 1155 break;
1ff442ca
NF
1156
1157 case EOF:
27821bff 1158 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1159
1160 default:
8c7ebe49 1161 obstack_1grow (&action_obstack, c);
a70083a3
AD
1162 }
1163
1164 c = getc (finput);
1165 }
1166
1167 /* above loop exits when c is '}' */
1168
1169 if (--count)
1170 {
8c7ebe49 1171 obstack_1grow (&action_obstack, c);
a70083a3
AD
1172 c = getc (finput);
1173 }
1174 }
1175
ff4423cc 1176 obstack_sgrow (&action_obstack, ";\n break;}");
a70083a3
AD
1177}
1178\f
1179/*-------------------------------------------------------------------.
1180| After `%guard' is seen in the input file, copy the actual guard |
1181| into the guards file. If the guard is followed by an action, copy |
1182| that into the actions file. STACK_OFFSET is the number of values |
1183| in the current rule so far, which says where to find `$0' with |
1184| respect to the top of the stack, for the simple parser in which |
1185| the stack is not popped until after the guard is run. |
1186`-------------------------------------------------------------------*/
1187
1188static void
79282c5a 1189copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1190{
1191 int c;
a70083a3 1192 int count;
a70083a3
AD
1193 int brace_flag = 0;
1194
1195 /* offset is always 0 if parser has already popped the stack pointer */
1196 if (semantic_parser)
1197 stack_offset = 0;
1198
ea5607fd 1199 obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
89cab50d 1200 if (!no_lines_flag)
ea5607fd
AD
1201 obstack_fgrow2 (&guard_obstack, "#line %d %s\n",
1202 lineno, quotearg_style (c_quoting_style, infile));
1203 obstack_1grow (&guard_obstack, '{');
a70083a3
AD
1204
1205 count = 0;
1206 c = getc (finput);
1207
1208 while (brace_flag ? (count > 0) : (c != ';'))
1209 {
1210 switch (c)
1211 {
1212 case '\n':
ea5607fd 1213 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1214 lineno++;
1215 break;
1216
1217 case '{':
ea5607fd 1218 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1219 brace_flag = 1;
1220 count++;
1221 break;
1222
1223 case '}':
ea5607fd 1224 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1225 if (count > 0)
1226 count--;
1227 else
1228 {
1229 complain (_("unmatched %s"), "`}'");
1230 c = getc (finput); /* skip it */
1231 }
1232 break;
1233
1234 case '\'':
1235 case '"':
337bab46 1236 copy_string (finput, &guard_obstack, c);
a70083a3
AD
1237 break;
1238
1239 case '/':
337bab46 1240 copy_comment (finput, &guard_obstack);
a70083a3
AD
1241 break;
1242
1243 case '$':
337bab46 1244 copy_dollar (finput, &guard_obstack, rule, stack_offset);
a70083a3 1245 break;
1ff442ca 1246
a70083a3 1247 case '@':
337bab46 1248 copy_at (finput, &guard_obstack, stack_offset);
a70083a3 1249 break;
1ff442ca 1250
a70083a3
AD
1251 case EOF:
1252 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1253
a70083a3 1254 default:
ea5607fd 1255 obstack_1grow (&guard_obstack, c);
1ff442ca 1256 }
a70083a3
AD
1257
1258 if (c != '}' || count != 0)
1259 c = getc (finput);
1ff442ca
NF
1260 }
1261
a70083a3
AD
1262 c = skip_white_space ();
1263
ff4423cc 1264 obstack_sgrow (&guard_obstack, ";\n break;}");
a70083a3
AD
1265 if (c == '{')
1266 copy_action (rule, stack_offset);
1267 else if (c == '=')
1268 {
1269 c = getc (finput); /* why not skip_white_space -wjh */
1270 if (c == '{')
1271 copy_action (rule, stack_offset);
1272 }
1273 else
1274 ungetc (c, finput);
1ff442ca 1275}
a70083a3
AD
1276\f
1277
1278static void
1279record_rule_line (void)
1280{
1281 /* Record each rule's source line number in rline table. */
1ff442ca 1282
a70083a3
AD
1283 if (nrules >= rline_allocated)
1284 {
1285 rline_allocated = nrules * 2;
d7913476 1286 rline = XREALLOC (rline, short, rline_allocated);
a70083a3
AD
1287 }
1288 rline[nrules] = lineno;
1289}
1ff442ca
NF
1290
1291
a70083a3
AD
1292/*-------------------------------------------------------------------.
1293| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1294| with the user's names. |
1295`-------------------------------------------------------------------*/
1ff442ca 1296
4a120d45 1297static bucket *
118fb205 1298gensym (void)
1ff442ca 1299{
274d42ce
AD
1300 /* Incremented for each generated symbol */
1301 static int gensym_count = 0;
1302 static char buf[256];
1303
a70083a3 1304 bucket *sym;
1ff442ca 1305
274d42ce
AD
1306 sprintf (buf, "@%d", ++gensym_count);
1307 token_buffer = buf;
a70083a3 1308 sym = getsym (token_buffer);
d7020c20 1309 sym->class = nterm_sym;
1ff442ca 1310 sym->value = nvars++;
36281465 1311 return sym;
1ff442ca
NF
1312}
1313
a70083a3
AD
1314#if 0
1315/*------------------------------------------------------------------.
1316| read in a %type declaration and record its information for |
1317| get_type_name to access. This is unused. It is only called from |
1318| the #if 0 part of readgram |
1319`------------------------------------------------------------------*/
1320
1321static int
1322get_type (void)
1323{
1324 int k;
f17bcd1f 1325 token_t token;
a70083a3
AD
1326 char *name;
1327
f17bcd1f 1328 token = lex ();
a70083a3 1329
f17bcd1f 1330 if (token != tok_typename)
a70083a3
AD
1331 {
1332 complain (_("invalid %s declaration"), "%type");
1333 return t;
1334 }
1335
95e36146 1336 name = xstrdup (token_buffer);
a70083a3
AD
1337
1338 for (;;)
1339 {
f17bcd1f 1340 token = lex ();
a70083a3 1341
f17bcd1f 1342 switch (token)
a70083a3 1343 {
511e79b3 1344 case tok_semicolon:
a70083a3
AD
1345 return lex ();
1346
511e79b3 1347 case tok_comma:
a70083a3
AD
1348 break;
1349
511e79b3 1350 case tok_identifier:
a70083a3
AD
1351 if (symval->type_name == NULL)
1352 symval->type_name = name;
1353 else if (strcmp (name, symval->type_name) != 0)
1354 complain (_("type redeclaration for %s"), symval->tag);
1355
1356 break;
1357
1358 default:
f17bcd1f 1359 return token;
a70083a3
AD
1360 }
1361 }
1362}
1ff442ca 1363
a70083a3
AD
1364#endif
1365\f
1366/*------------------------------------------------------------------.
1367| Parse the input grammar into a one symbol_list structure. Each |
1368| rule is represented by a sequence of symbols: the left hand side |
1369| followed by the contents of the right hand side, followed by a |
1370| null pointer instead of a symbol to terminate the rule. The next |
1371| symbol is the lhs of the following rule. |
1372| |
1373| All guards and actions are copied out to the appropriate files, |
1374| labelled by the rule number they apply to. |
1375`------------------------------------------------------------------*/
1ff442ca 1376
4a120d45 1377static void
118fb205 1378readgram (void)
1ff442ca 1379{
f17bcd1f 1380 token_t t;
a70083a3
AD
1381 bucket *lhs = NULL;
1382 symbol_list *p;
1383 symbol_list *p1;
1384 bucket *bp;
1ff442ca 1385
ff4a34be
AD
1386 /* Points to first symbol_list of current rule. its symbol is the
1387 lhs of the rule. */
1388 symbol_list *crule;
1389 /* Points to the symbol_list preceding crule. */
1390 symbol_list *crule1;
1ff442ca
NF
1391
1392 p1 = NULL;
1393
a70083a3 1394 t = lex ();
1ff442ca 1395
511e79b3 1396 while (t != tok_two_percents && t != tok_eof)
1ff442ca 1397 {
511e79b3 1398 if (t == tok_identifier || t == tok_bar)
1ff442ca 1399 {
89cab50d 1400 int action_flag = 0;
ff4a34be
AD
1401 /* Number of symbols in rhs of this rule so far */
1402 int rulelength = 0;
1ff442ca
NF
1403 int xactions = 0; /* JF for error checking */
1404 bucket *first_rhs = 0;
1405
511e79b3 1406 if (t == tok_identifier)
1ff442ca
NF
1407 {
1408 lhs = symval;
943819bf
RS
1409
1410 if (!start_flag)
1411 {
1412 startval = lhs;
1413 start_flag = 1;
1414 }
a083fbbf 1415
a70083a3 1416 t = lex ();
511e79b3 1417 if (t != tok_colon)
943819bf 1418 {
a0f6b076 1419 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1420 unlex (t);
943819bf 1421 }
1ff442ca
NF
1422 }
1423
511e79b3 1424 if (nrules == 0 && t == tok_bar)
1ff442ca 1425 {
a0f6b076 1426 complain (_("grammar starts with vertical bar"));
943819bf 1427 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1428 }
1ff442ca
NF
1429 /* start a new rule and record its lhs. */
1430
1431 nrules++;
1432 nitems++;
1433
1434 record_rule_line ();
1435
d7913476 1436 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1437 p->sym = lhs;
1438
1439 crule1 = p1;
1440 if (p1)
1441 p1->next = p;
1442 else
1443 grammar = p;
1444
1445 p1 = p;
1446 crule = p;
1447
1448 /* mark the rule's lhs as a nonterminal if not already so. */
1449
d7020c20 1450 if (lhs->class == unknown_sym)
1ff442ca 1451 {
d7020c20 1452 lhs->class = nterm_sym;
1ff442ca
NF
1453 lhs->value = nvars;
1454 nvars++;
1455 }
d7020c20 1456 else if (lhs->class == token_sym)
a0f6b076 1457 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1458
1459 /* read the rhs of the rule. */
1460
1461 for (;;)
1462 {
a70083a3 1463 t = lex ();
511e79b3 1464 if (t == tok_prec)
943819bf 1465 {
a70083a3 1466 t = lex ();
943819bf 1467 crule->ruleprec = symval;
a70083a3 1468 t = lex ();
943819bf 1469 }
1ff442ca 1470
511e79b3 1471 if (!(t == tok_identifier || t == tok_left_curly))
a70083a3 1472 break;
1ff442ca
NF
1473
1474 /* If next token is an identifier, see if a colon follows it.
a70083a3 1475 If one does, exit this rule now. */
511e79b3 1476 if (t == tok_identifier)
1ff442ca 1477 {
a70083a3 1478 bucket *ssave;
f17bcd1f 1479 token_t t1;
1ff442ca
NF
1480
1481 ssave = symval;
a70083a3
AD
1482 t1 = lex ();
1483 unlex (t1);
1ff442ca 1484 symval = ssave;
511e79b3 1485 if (t1 == tok_colon)
a70083a3 1486 break;
1ff442ca 1487
a70083a3 1488 if (!first_rhs) /* JF */
1ff442ca
NF
1489 first_rhs = symval;
1490 /* Not followed by colon =>
1491 process as part of this rule's rhs. */
1492 }
1493
1494 /* If we just passed an action, that action was in the middle
a70083a3
AD
1495 of a rule, so make a dummy rule to reduce it to a
1496 non-terminal. */
89cab50d 1497 if (action_flag)
1ff442ca 1498 {
a70083a3 1499 bucket *sdummy;
1ff442ca 1500
f282676b
AD
1501 /* Since the action was written out with this rule's
1502 number, we must give the new rule this number by
1503 inserting the new rule before it. */
1ff442ca
NF
1504
1505 /* Make a dummy nonterminal, a gensym. */
a70083a3 1506 sdummy = gensym ();
1ff442ca
NF
1507
1508 /* Make a new rule, whose body is empty,
1509 before the current one, so that the action
1510 just read can belong to it. */
1511 nrules++;
1512 nitems++;
1513 record_rule_line ();
d7913476 1514 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1515 if (crule1)
1516 crule1->next = p;
a70083a3
AD
1517 else
1518 grammar = p;
1ff442ca 1519 p->sym = sdummy;
d7913476 1520 crule1 = XCALLOC (symbol_list, 1);
1ff442ca
NF
1521 p->next = crule1;
1522 crule1->next = crule;
1523
f282676b
AD
1524 /* Insert the dummy generated by that rule into this
1525 rule. */
1ff442ca 1526 nitems++;
d7913476 1527 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1528 p->sym = sdummy;
1529 p1->next = p;
1530 p1 = p;
1531
89cab50d 1532 action_flag = 0;
1ff442ca
NF
1533 }
1534
511e79b3 1535 if (t == tok_identifier)
1ff442ca
NF
1536 {
1537 nitems++;
d7913476 1538 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1539 p->sym = symval;
1540 p1->next = p;
1541 p1 = p;
1542 }
a70083a3 1543 else /* handle an action. */
1ff442ca 1544 {
a70083a3 1545 copy_action (crule, rulelength);
89cab50d 1546 action_flag = 1;
1ff442ca
NF
1547 xactions++; /* JF */
1548 }
1549 rulelength++;
a70083a3 1550 } /* end of read rhs of rule */
1ff442ca
NF
1551
1552 /* Put an empty link in the list to mark the end of this rule */
d7913476 1553 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1554 p1->next = p;
1555 p1 = p;
1556
511e79b3 1557 if (t == tok_prec)
1ff442ca 1558 {
a0f6b076 1559 complain (_("two @prec's in a row"));
a70083a3 1560 t = lex ();
1ff442ca 1561 crule->ruleprec = symval;
a70083a3 1562 t = lex ();
1ff442ca 1563 }
511e79b3 1564 if (t == tok_guard)
1ff442ca 1565 {
a70083a3 1566 if (!semantic_parser)
ff4a34be 1567 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1568
a70083a3
AD
1569 copy_guard (crule, rulelength);
1570 t = lex ();
1ff442ca 1571 }
511e79b3 1572 else if (t == tok_left_curly)
1ff442ca 1573 {
a70083a3 1574 /* This case never occurs -wjh */
89cab50d 1575 if (action_flag)
a0f6b076 1576 complain (_("two actions at end of one rule"));
a70083a3 1577 copy_action (crule, rulelength);
89cab50d 1578 action_flag = 1;
943819bf 1579 xactions++; /* -wjh */
a70083a3 1580 t = lex ();
1ff442ca 1581 }
a0f6b076 1582 /* If $$ is being set in default way, report if any type
6666f98f
AD
1583 mismatch. */
1584 else if (!xactions
a70083a3 1585 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1586 {
6666f98f
AD
1587 if (lhs->type_name == 0
1588 || first_rhs->type_name == 0
a70083a3 1589 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1590 complain (_("type clash (`%s' `%s') on default action"),
1591 lhs->type_name ? lhs->type_name : "",
a70083a3 1592 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1593 }
1594 /* Warn if there is no default for $$ but we need one. */
1595 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1596 complain (_("empty rule for typed nonterminal, and no action"));
511e79b3 1597 if (t == tok_semicolon)
a70083a3 1598 t = lex ();
a083fbbf 1599 }
943819bf 1600#if 0
a70083a3 1601 /* these things can appear as alternatives to rules. */
943819bf
RS
1602/* NO, they cannot.
1603 a) none of the documentation allows them
1604 b) most of them scan forward until finding a next %
1605 thus they may swallow lots of intervening rules
1606*/
511e79b3 1607 else if (t == tok_token)
1ff442ca 1608 {
d7020c20 1609 parse_token_decl (token_sym, nterm_sym);
a70083a3 1610 t = lex ();
1ff442ca 1611 }
511e79b3 1612 else if (t == tok_nterm)
1ff442ca 1613 {
d7020c20 1614 parse_token_decl (nterm_sym, token_sym);
a70083a3 1615 t = lex ();
1ff442ca 1616 }
511e79b3 1617 else if (t == tok_type)
1ff442ca 1618 {
a70083a3 1619 t = get_type ();
1ff442ca 1620 }
511e79b3 1621 else if (t == tok_union)
1ff442ca 1622 {
a70083a3
AD
1623 parse_union_decl ();
1624 t = lex ();
1ff442ca 1625 }
511e79b3 1626 else if (t == tok_expect)
1ff442ca 1627 {
a70083a3
AD
1628 parse_expect_decl ();
1629 t = lex ();
1ff442ca 1630 }
511e79b3 1631 else if (t == tok_start)
1ff442ca 1632 {
a70083a3
AD
1633 parse_start_decl ();
1634 t = lex ();
1ff442ca 1635 }
943819bf
RS
1636#endif
1637
1ff442ca 1638 else
943819bf 1639 {
d01c415b 1640 complain (_("invalid input: %s"), quote (token_buffer));
a70083a3 1641 t = lex ();
943819bf 1642 }
1ff442ca
NF
1643 }
1644
943819bf
RS
1645 /* grammar has been read. Do some checking */
1646
1ff442ca 1647 if (nsyms > MAXSHORT)
a0f6b076
AD
1648 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1649 MAXSHORT);
1ff442ca 1650 if (nrules == 0)
a0f6b076 1651 fatal (_("no rules in the input grammar"));
1ff442ca 1652
1ff442ca
NF
1653 /* Report any undefined symbols and consider them nonterminals. */
1654
1655 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1656 if (bp->class == unknown_sym)
1ff442ca 1657 {
a70083a3
AD
1658 complain (_
1659 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1660 bp->tag);
d7020c20 1661 bp->class = nterm_sym;
1ff442ca
NF
1662 bp->value = nvars++;
1663 }
1664
1665 ntokens = nsyms - nvars;
1666}
ff48177d
MA
1667
1668/* At the end of the grammar file, some C source code must
63c2d5de 1669 be stored. It is going to be associated to the epilogue
ff48177d
MA
1670 directive. */
1671static void
1672read_additionnal_code (void)
1673{
1674 char c;
63c2d5de 1675 struct obstack el_obstack;
ff48177d 1676
63c2d5de 1677 obstack_init (&el_obstack);
ff48177d
MA
1678
1679 while ((c = getc (finput)) != EOF)
63c2d5de 1680 obstack_1grow (&el_obstack, c);
ff48177d 1681
63c2d5de
MA
1682 obstack_1grow (&el_obstack, 0);
1683 macro_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1684}
1685
a70083a3
AD
1686\f
1687/*--------------------------------------------------------------.
1688| For named tokens, but not literal ones, define the name. The |
1689| value is the user token number. |
1690`--------------------------------------------------------------*/
1ff442ca 1691
4a120d45 1692static void
896fe5c1 1693output_token_defines (struct obstack *oout)
1ff442ca 1694{
a70083a3
AD
1695 bucket *bp;
1696 char *cp, *symbol;
1697 char c;
1ff442ca 1698
a70083a3 1699 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1700 {
a70083a3
AD
1701 symbol = bp->tag; /* get symbol */
1702
1703 if (bp->value >= ntokens)
1704 continue;
1705 if (bp->user_token_number == SALIAS)
1706 continue;
1707 if ('\'' == *symbol)
1708 continue; /* skip literal character */
1709 if (bp == errtoken)
1710 continue; /* skip error token */
1711 if ('\"' == *symbol)
1ff442ca 1712 {
a70083a3
AD
1713 /* use literal string only if given a symbol with an alias */
1714 if (bp->alias)
1715 symbol = bp->alias->tag;
1716 else
1717 continue;
1718 }
1ff442ca 1719
a70083a3
AD
1720 /* Don't #define nonliteral tokens whose names contain periods. */
1721 cp = symbol;
1722 while ((c = *cp++) && c != '.');
1723 if (c != '\0')
1724 continue;
1ff442ca 1725
0b8afb77 1726 obstack_fgrow2 (oout, "# define\t%s\t%d\n",
896fe5c1 1727 symbol,
62ab6972 1728 (translations ? bp->user_token_number : bp->value));
a70083a3 1729 if (semantic_parser)
0b8afb77 1730 obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
1731 }
1732}
1ff442ca
NF
1733
1734
a70083a3
AD
1735/*------------------------------------------------------------------.
1736| Assign symbol numbers, and write definition of token names into |
b2ca4022 1737| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1738| of symbols. |
1739`------------------------------------------------------------------*/
1ff442ca 1740
4a120d45 1741static void
118fb205 1742packsymbols (void)
1ff442ca 1743{
a70083a3
AD
1744 bucket *bp;
1745 int tokno = 1;
1746 int i;
1747 int last_user_token_number;
4a120d45 1748 static char DOLLAR[] = "$";
1ff442ca 1749
d7913476 1750 tags = XCALLOC (char *, nsyms + 1);
4a120d45 1751 tags[0] = DOLLAR;
d7913476 1752 user_toknums = XCALLOC (short, nsyms + 1);
943819bf 1753 user_toknums[0] = 0;
1ff442ca 1754
d7913476
AD
1755 sprec = XCALLOC (short, nsyms);
1756 sassoc = XCALLOC (short, nsyms);
1ff442ca
NF
1757
1758 max_user_token_number = 256;
1759 last_user_token_number = 256;
1760
1761 for (bp = firstsymbol; bp; bp = bp->next)
1762 {
d7020c20 1763 if (bp->class == nterm_sym)
1ff442ca
NF
1764 {
1765 bp->value += ntokens;
1766 }
943819bf
RS
1767 else if (bp->alias)
1768 {
0a6384c4
AD
1769 /* this symbol and its alias are a single token defn.
1770 allocate a tokno, and assign to both check agreement of
1771 ->prec and ->assoc fields and make both the same */
1772 if (bp->value == 0)
1773 bp->value = bp->alias->value = tokno++;
943819bf 1774
0a6384c4
AD
1775 if (bp->prec != bp->alias->prec)
1776 {
1777 if (bp->prec != 0 && bp->alias->prec != 0
1778 && bp->user_token_number == SALIAS)
a0f6b076
AD
1779 complain (_("conflicting precedences for %s and %s"),
1780 bp->tag, bp->alias->tag);
0a6384c4
AD
1781 if (bp->prec != 0)
1782 bp->alias->prec = bp->prec;
1783 else
1784 bp->prec = bp->alias->prec;
1785 }
943819bf 1786
0a6384c4
AD
1787 if (bp->assoc != bp->alias->assoc)
1788 {
a0f6b076
AD
1789 if (bp->assoc != 0 && bp->alias->assoc != 0
1790 && bp->user_token_number == SALIAS)
1791 complain (_("conflicting assoc values for %s and %s"),
1792 bp->tag, bp->alias->tag);
1793 if (bp->assoc != 0)
1794 bp->alias->assoc = bp->assoc;
1795 else
1796 bp->assoc = bp->alias->assoc;
1797 }
0a6384c4
AD
1798
1799 if (bp->user_token_number == SALIAS)
a70083a3 1800 continue; /* do not do processing below for SALIASs */
943819bf 1801
a70083a3 1802 }
d7020c20 1803 else /* bp->class == token_sym */
943819bf
RS
1804 {
1805 bp->value = tokno++;
1806 }
1807
d7020c20 1808 if (bp->class == token_sym)
1ff442ca
NF
1809 {
1810 if (translations && !(bp->user_token_number))
1811 bp->user_token_number = ++last_user_token_number;
1812 if (bp->user_token_number > max_user_token_number)
1813 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1814 }
1815
1816 tags[bp->value] = bp->tag;
943819bf 1817 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1818 sprec[bp->value] = bp->prec;
1819 sassoc[bp->value] = bp->assoc;
1820
1821 }
1822
1823 if (translations)
1824 {
a70083a3 1825 int j;
1ff442ca 1826
d7913476 1827 token_translations = XCALLOC (short, max_user_token_number + 1);
1ff442ca 1828
0a6384c4 1829 /* initialize all entries for literal tokens to 2, the internal
a70083a3
AD
1830 token number for $undefined., which represents all invalid
1831 inputs. */
4a120d45 1832 for (j = 0; j <= max_user_token_number; j++)
a70083a3 1833 token_translations[j] = 2;
1ff442ca 1834
943819bf 1835 for (bp = firstsymbol; bp; bp = bp->next)
a70083a3
AD
1836 {
1837 if (bp->value >= ntokens)
1838 continue; /* non-terminal */
1839 if (bp->user_token_number == SALIAS)
0a6384c4 1840 continue;
a70083a3 1841 if (token_translations[bp->user_token_number] != 2)
a0f6b076
AD
1842 complain (_("tokens %s and %s both assigned number %d"),
1843 tags[token_translations[bp->user_token_number]],
a70083a3
AD
1844 bp->tag, bp->user_token_number);
1845 token_translations[bp->user_token_number] = bp->value;
1846 }
1ff442ca
NF
1847 }
1848
1849 error_token_number = errtoken->value;
1850
b6610515
RA
1851 output_token_defines (&output_obstack);
1852 obstack_1grow (&output_obstack, 0);
1853 macro_insert ("tokendef", obstack_finish (&output_obstack));
1854
d8cb5183
MA
1855#if 0
1856 if (!no_parser_flag)
1857 output_token_defines (&table_obstack);
1858#endif
1ff442ca 1859
d7020c20 1860 if (startval->class == unknown_sym)
a0f6b076 1861 fatal (_("the start symbol %s is undefined"), startval->tag);
d7020c20 1862 else if (startval->class == token_sym)
a0f6b076 1863 fatal (_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1864
1865 start_symbol = startval->value;
1866
89cab50d 1867 if (defines_flag)
1ff442ca 1868 {
896fe5c1 1869 output_token_defines (&defines_obstack);
1ff442ca
NF
1870
1871 if (!pure_parser)
1872 {
1873 if (spec_name_prefix)
896fe5c1
AD
1874 obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1875 spec_name_prefix);
1ff442ca 1876 else
ff4423cc 1877 obstack_sgrow (&defines_obstack,
573c1d9f 1878 "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1879 }
1880
1881 if (semantic_parser)
1882 for (i = ntokens; i < nsyms; i++)
1883 {
1884 /* don't make these for dummy nonterminals made by gensym. */
1885 if (*tags[i] != '@')
896fe5c1 1886 obstack_fgrow2 (&defines_obstack,
0b8afb77 1887 "# define\tNT%s\t%d\n", tags[i], i);
1ff442ca
NF
1888 }
1889#if 0
1890 /* `fdefines' is now a temporary file, so we need to copy its
1891 contents in `done', so we can't close it here. */
a70083a3 1892 fclose (fdefines);
1ff442ca
NF
1893 fdefines = NULL;
1894#endif
1895 }
1896}
a083fbbf 1897
1ff442ca 1898
a70083a3
AD
1899/*---------------------------------------------------------------.
1900| Convert the rules into the representation using RRHS, RLHS and |
1901| RITEMS. |
1902`---------------------------------------------------------------*/
1ff442ca 1903
4a120d45 1904static void
118fb205 1905packgram (void)
1ff442ca 1906{
a70083a3
AD
1907 int itemno;
1908 int ruleno;
1909 symbol_list *p;
1ff442ca
NF
1910
1911 bucket *ruleprec;
1912
d7913476
AD
1913 ritem = XCALLOC (short, nitems + 1);
1914 rlhs = XCALLOC (short, nrules) - 1;
1915 rrhs = XCALLOC (short, nrules) - 1;
1916 rprec = XCALLOC (short, nrules) - 1;
1917 rprecsym = XCALLOC (short, nrules) - 1;
1918 rassoc = XCALLOC (short, nrules) - 1;
1ff442ca
NF
1919
1920 itemno = 0;
1921 ruleno = 1;
1922
1923 p = grammar;
1924 while (p)
1925 {
1926 rlhs[ruleno] = p->sym->value;
1927 rrhs[ruleno] = itemno;
1928 ruleprec = p->ruleprec;
1929
1930 p = p->next;
1931 while (p && p->sym)
1932 {
1933 ritem[itemno++] = p->sym->value;
1934 /* A rule gets by default the precedence and associativity
1935 of the last token in it. */
d7020c20 1936 if (p->sym->class == token_sym)
1ff442ca
NF
1937 {
1938 rprec[ruleno] = p->sym->prec;
1939 rassoc[ruleno] = p->sym->assoc;
1940 }
a70083a3
AD
1941 if (p)
1942 p = p->next;
1ff442ca
NF
1943 }
1944
1945 /* If this rule has a %prec,
a70083a3 1946 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1947 if (ruleprec)
1948 {
a70083a3
AD
1949 rprec[ruleno] = ruleprec->prec;
1950 rassoc[ruleno] = ruleprec->assoc;
1ff442ca
NF
1951 rprecsym[ruleno] = ruleprec->value;
1952 }
1953
1954 ritem[itemno++] = -ruleno;
1955 ruleno++;
1956
a70083a3
AD
1957 if (p)
1958 p = p->next;
1ff442ca
NF
1959 }
1960
1961 ritem[itemno] = 0;
1962}
a70083a3
AD
1963\f
1964/*-------------------------------------------------------------------.
1965| Read in the grammar specification and record it in the format |
ea5607fd 1966| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1967| and all actions into ACTION_OBSTACK, in each case forming the body |
1968| of a C function (YYGUARD or YYACTION) which contains a switch |
1969| statement to decide which guard or action to execute. |
a70083a3
AD
1970`-------------------------------------------------------------------*/
1971
1972void
1973reader (void)
1974{
1975 start_flag = 0;
1976 startval = NULL; /* start symbol not specified yet. */
1977
1978#if 0
1979 /* initially assume token number translation not needed. */
1980 translations = 0;
1981#endif
1982 /* Nowadays translations is always set to 1, since we give `error' a
1983 user-token-number to satisfy the Posix demand for YYERRCODE==256.
1984 */
1985 translations = 1;
1986
1987 nsyms = 1;
1988 nvars = 0;
1989 nrules = 0;
1990 nitems = 0;
1991 rline_allocated = 10;
d7913476 1992 rline = XCALLOC (short, rline_allocated);
a70083a3
AD
1993
1994 typed = 0;
1995 lastprec = 0;
1996
a70083a3
AD
1997 semantic_parser = 0;
1998 pure_parser = 0;
a70083a3
AD
1999
2000 grammar = NULL;
2001
2002 init_lex ();
2003 lineno = 1;
2004
82e236e2
RA
2005 /* Initialize the macro obstack. */
2006 obstack_init (&macro_obstack);
2007
a70083a3
AD
2008 /* Initialize the symbol table. */
2009 tabinit ();
b6610515 2010
a70083a3
AD
2011 /* Construct the error token */
2012 errtoken = getsym ("error");
d7020c20 2013 errtoken->class = token_sym;
a70083a3 2014 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 2015
a70083a3
AD
2016 /* Construct a token that represents all undefined literal tokens.
2017 It is always token number 2. */
2018 undeftoken = getsym ("$undefined.");
d7020c20 2019 undeftoken->class = token_sym;
a70083a3
AD
2020 undeftoken->user_token_number = 2;
2021
896fe5c1
AD
2022 /* Read the declaration section. Copy %{ ... %} groups to
2023 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
2024 etc. found there. */
a70083a3 2025 read_declarations ();
a70083a3
AD
2026 /* Read in the grammar, build grammar in list form. Write out
2027 guards and actions. */
2028 readgram ();
ff48177d
MA
2029 /* Some C code is given at the end of the grammar file. */
2030 read_additionnal_code ();
a70083a3
AD
2031 /* Now we know whether we need the line-number stack. If we do,
2032 write its type into the .tab.h file. */
b33160bf
MA
2033#if 0
2034 if (defines_flag)
2035 reader_output_yylsp (&defines_obstack);
b33160bf 2036#endif
a70083a3
AD
2037 /* Assign the symbols their symbol numbers. Write #defines for the
2038 token symbols into FDEFINES if requested. */
2039 packsymbols ();
2040 /* Convert the grammar into the format described in gram.h. */
2041 packgram ();
2042 /* Free the symbol table data structure since symbols are now all
2043 referred to by symbol number. */
2044 free_symtab ();
2045}