]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/options.c (option_table): Adjust.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
8c7ebe49 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
8c7ebe49 24#include "obstack.h"
2a91a95e
AD
25#include "quotearg.h"
26#include "quote.h"
ceed8467 27#include "getargs.h"
1ff442ca 28#include "files.h"
d7913476 29#include "xalloc.h"
1ff442ca 30#include "symtab.h"
82b6d266 31#include "options.h"
1ff442ca
NF
32#include "lex.h"
33#include "gram.h"
a0f6b076 34#include "complain.h"
6c89f1c1 35#include "output.h"
b2ca4022 36#include "reader.h"
340ef489 37#include "conflicts.h"
11d82f03 38#include "muscle_tab.h"
1ff442ca 39
1ff442ca 40/* Number of slots allocated (but not necessarily used yet) in `rline' */
4a120d45 41static int rline_allocated;
1ff442ca 42
a70083a3
AD
43typedef struct symbol_list
44{
45 struct symbol_list *next;
46 bucket *sym;
47 bucket *ruleprec;
48}
49symbol_list;
118fb205 50
1ff442ca 51int lineno;
1ff442ca 52char **tags;
d019d655 53short *user_toknums;
4a120d45
JT
54static symbol_list *grammar;
55static int start_flag;
56static bucket *startval;
1ff442ca
NF
57
58/* Nonzero if components of semantic values are used, implying
59 they must be unions. */
60static int value_components_used;
61
d7020c20
AD
62/* Nonzero if %union has been seen. */
63static int typed;
1ff442ca 64
d7020c20
AD
65/* Incremented for each %left, %right or %nonassoc seen */
66static int lastprec;
1ff442ca 67
1ff442ca 68static bucket *errtoken;
5b2e3c89 69static bucket *undeftoken;
0d533154 70\f
a70083a3 71
0d533154
AD
72/*===================\
73| Low level lexing. |
74\===================*/
943819bf
RS
75
76static void
118fb205 77skip_to_char (int target)
943819bf
RS
78{
79 int c;
80 if (target == '\n')
a0f6b076 81 complain (_(" Skipping to next \\n"));
943819bf 82 else
a0f6b076 83 complain (_(" Skipping to next %c"), target);
943819bf
RS
84
85 do
0d533154 86 c = skip_white_space ();
943819bf 87 while (c != target && c != EOF);
a083fbbf 88 if (c != EOF)
0d533154 89 ungetc (c, finput);
943819bf
RS
90}
91
92
0d533154
AD
93/*---------------------------------------------------------.
94| Read a signed integer from STREAM and return its value. |
95`---------------------------------------------------------*/
96
97static inline int
98read_signed_integer (FILE *stream)
99{
a70083a3
AD
100 int c = getc (stream);
101 int sign = 1;
102 int n = 0;
0d533154
AD
103
104 if (c == '-')
105 {
106 c = getc (stream);
107 sign = -1;
108 }
109
110 while (isdigit (c))
111 {
112 n = 10 * n + (c - '0');
113 c = getc (stream);
114 }
115
116 ungetc (c, stream);
117
118 return sign * n;
119}
120\f
79282c5a
AD
121/*--------------------------------------------------------------.
122| Get the data type (alternative in the union) of the value for |
123| symbol N in rule RULE. |
124`--------------------------------------------------------------*/
125
126static char *
127get_type_name (int n, symbol_list * rule)
128{
129 int i;
130 symbol_list *rp;
131
132 if (n < 0)
133 {
134 complain (_("invalid $ value"));
135 return NULL;
136 }
137
138 rp = rule;
139 i = 0;
140
141 while (i < n)
142 {
143 rp = rp->next;
144 if (rp == NULL || rp->sym == NULL)
145 {
146 complain (_("invalid $ value"));
147 return NULL;
148 }
149 i++;
150 }
151
152 return rp->sym->type_name;
153}
154\f
337bab46
AD
155/*------------------------------------------------------------.
156| Dump the string from FIN to OOUT if non null. MATCH is the |
157| delimiter of the string (either ' or "). |
158`------------------------------------------------------------*/
ae3c3164
AD
159
160static inline void
b6610515 161copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
162{
163 int c;
164
b6610515
RA
165 if (store)
166 obstack_1grow (oout, match);
8c7ebe49 167
4a120d45 168 c = getc (fin);
ae3c3164
AD
169
170 while (c != match)
171 {
172 if (c == EOF)
173 fatal (_("unterminated string at end of file"));
174 if (c == '\n')
175 {
a0f6b076 176 complain (_("unterminated string"));
4a120d45 177 ungetc (c, fin);
ae3c3164
AD
178 c = match; /* invent terminator */
179 continue;
180 }
181
337bab46 182 obstack_1grow (oout, c);
ae3c3164
AD
183
184 if (c == '\\')
185 {
4a120d45 186 c = getc (fin);
ae3c3164
AD
187 if (c == EOF)
188 fatal (_("unterminated string at end of file"));
337bab46 189 obstack_1grow (oout, c);
8c7ebe49 190
ae3c3164
AD
191 if (c == '\n')
192 lineno++;
193 }
194
a70083a3 195 c = getc (fin);
ae3c3164
AD
196 }
197
b6610515
RA
198 if (store)
199 obstack_1grow (oout, c);
200}
201
202/* FIXME. */
203
204static inline void
205copy_string (FILE *fin, struct obstack *oout, int match)
206{
207 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
208}
209
b6610515
RA
210/* FIXME. */
211
212static inline void
213copy_identifier (FILE *fin, struct obstack *oout)
214{
215 int c;
216
217 while (isalnum (c = getc (fin)) || c == '_')
218 obstack_1grow (oout, c);
219
220 ungetc (c, fin);
221}
ae3c3164 222
337bab46
AD
223/*-----------------------------------------------------------------.
224| Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
225| NULL). In fact we just saw a `/', which might or might not be a |
226| comment. In any case, copy what we saw. |
227| |
228| OUT2 might be NULL. |
229`-----------------------------------------------------------------*/
ae3c3164
AD
230
231static inline void
337bab46 232copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
ae3c3164
AD
233{
234 int cplus_comment;
a70083a3 235 int ended;
550a72a3
AD
236 int c;
237
238 /* We read a `/', output it. */
337bab46 239 obstack_1grow (oout1, '/');
896fe5c1
AD
240 if (oout2)
241 obstack_1grow (oout2, '/');
550a72a3
AD
242
243 switch ((c = getc (fin)))
244 {
245 case '/':
246 cplus_comment = 1;
247 break;
248 case '*':
249 cplus_comment = 0;
250 break;
251 default:
252 ungetc (c, fin);
253 return;
254 }
ae3c3164 255
337bab46 256 obstack_1grow (oout1, c);
896fe5c1
AD
257 if (oout2)
258 obstack_1grow (oout2, c);
550a72a3 259 c = getc (fin);
ae3c3164
AD
260
261 ended = 0;
262 while (!ended)
263 {
264 if (!cplus_comment && c == '*')
265 {
266 while (c == '*')
267 {
337bab46 268 obstack_1grow (oout1, c);
896fe5c1
AD
269 if (oout2)
270 obstack_1grow (oout2, c);
550a72a3 271 c = getc (fin);
ae3c3164
AD
272 }
273
274 if (c == '/')
275 {
337bab46 276 obstack_1grow (oout1, c);
896fe5c1
AD
277 if (oout2)
278 obstack_1grow (oout2, c);
ae3c3164
AD
279 ended = 1;
280 }
281 }
282 else if (c == '\n')
283 {
284 lineno++;
337bab46 285 obstack_1grow (oout1, c);
896fe5c1
AD
286 if (oout2)
287 obstack_1grow (oout2, c);
ae3c3164
AD
288 if (cplus_comment)
289 ended = 1;
290 else
550a72a3 291 c = getc (fin);
ae3c3164
AD
292 }
293 else if (c == EOF)
294 fatal (_("unterminated comment"));
295 else
296 {
337bab46 297 obstack_1grow (oout1, c);
896fe5c1
AD
298 if (oout2)
299 obstack_1grow (oout2, c);
550a72a3 300 c = getc (fin);
ae3c3164
AD
301 }
302 }
303}
304
305
550a72a3
AD
306/*-------------------------------------------------------------------.
307| Dump the comment (actually the current string starting with a `/') |
337bab46 308| from FIN to OOUT. |
550a72a3 309`-------------------------------------------------------------------*/
27821bff
AD
310
311static inline void
337bab46 312copy_comment (FILE *fin, struct obstack *oout)
27821bff 313{
337bab46 314 copy_comment2 (fin, oout, NULL);
27821bff
AD
315}
316
317
a70083a3 318/*-----------------------------------------------------------------.
337bab46 319| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
320| reference to this location. STACK_OFFSET is the number of values |
321| in the current rule so far, which says where to find `$0' with |
322| respect to the top of the stack. |
323`-----------------------------------------------------------------*/
1ff442ca 324
a70083a3 325static inline void
337bab46 326copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 327{
a70083a3 328 int c;
1ff442ca 329
a70083a3
AD
330 c = getc (fin);
331 if (c == '$')
1ff442ca 332 {
ff4423cc 333 obstack_sgrow (oout, "yyloc");
89cab50d 334 locations_flag = 1;
a70083a3
AD
335 }
336 else if (isdigit (c) || c == '-')
337 {
338 int n;
1ff442ca 339
a70083a3
AD
340 ungetc (c, fin);
341 n = read_signed_integer (fin);
943819bf 342
337bab46 343 obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
89cab50d 344 locations_flag = 1;
1ff442ca 345 }
a70083a3 346 else
ff4a34be
AD
347 {
348 char buf[] = "@c";
349 buf[1] = c;
350 complain (_("%s is invalid"), quote (buf));
351 }
1ff442ca 352}
79282c5a
AD
353
354
355/*-------------------------------------------------------------------.
356| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
357| |
358| Possible inputs: $[<TYPENAME>]($|integer) |
359| |
337bab46 360| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
361| the number of values in the current rule so far, which says where |
362| to find `$0' with respect to the top of the stack. |
363`-------------------------------------------------------------------*/
364
365static inline void
337bab46 366copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
367 symbol_list *rule, int stack_offset)
368{
369 int c = getc (fin);
b0ce6046 370 const char *type_name = NULL;
79282c5a 371
f282676b 372 /* Get the type name if explicit. */
79282c5a
AD
373 if (c == '<')
374 {
f282676b 375 read_type_name (fin);
79282c5a
AD
376 type_name = token_buffer;
377 value_components_used = 1;
79282c5a
AD
378 c = getc (fin);
379 }
380
381 if (c == '$')
382 {
ff4423cc 383 obstack_sgrow (oout, "yyval");
8c7ebe49 384
79282c5a
AD
385 if (!type_name)
386 type_name = get_type_name (0, rule);
387 if (type_name)
337bab46 388 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
389 if (!type_name && typed)
390 complain (_("$$ of `%s' has no declared type"),
391 rule->sym->tag);
392 }
393 else if (isdigit (c) || c == '-')
394 {
395 int n;
396 ungetc (c, fin);
397 n = read_signed_integer (fin);
398
399 if (!type_name && n > 0)
400 type_name = get_type_name (n, rule);
401
337bab46 402 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
8c7ebe49 403
79282c5a 404 if (type_name)
337bab46 405 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
406 if (!type_name && typed)
407 complain (_("$%d of `%s' has no declared type"),
408 n, rule->sym->tag);
409 }
410 else
411 {
412 char buf[] = "$c";
413 buf[1] = c;
414 complain (_("%s is invalid"), quote (buf));
415 }
416}
a70083a3
AD
417\f
418/*-------------------------------------------------------------------.
419| Copy the contents of a `%{ ... %}' into the definitions file. The |
420| `%{' has already been read. Return after reading the `%}'. |
421`-------------------------------------------------------------------*/
1ff442ca 422
4a120d45 423static void
118fb205 424copy_definition (void)
1ff442ca 425{
a70083a3 426 int c;
ae3c3164 427 /* -1 while reading a character if prev char was %. */
a70083a3 428 int after_percent;
1ff442ca 429
b6610515 430#if 0
89cab50d 431 if (!no_lines_flag)
2a91a95e
AD
432 obstack_fgrow2 (&attrs_obstack, "#line %d %s\n",
433 lineno, quotearg_style (c_quoting_style, infile));
b6610515 434#endif
1ff442ca
NF
435
436 after_percent = 0;
437
ae3c3164 438 c = getc (finput);
1ff442ca
NF
439
440 for (;;)
441 {
442 switch (c)
443 {
444 case '\n':
dd60faec 445 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
446 lineno++;
447 break;
448
449 case '%':
a70083a3 450 after_percent = -1;
1ff442ca 451 break;
a083fbbf 452
1ff442ca
NF
453 case '\'':
454 case '"':
337bab46 455 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
456 break;
457
458 case '/':
337bab46 459 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
460 break;
461
462 case EOF:
a70083a3 463 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
464
465 default:
dd60faec 466 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
467 }
468
a70083a3 469 c = getc (finput);
1ff442ca
NF
470
471 if (after_percent)
472 {
473 if (c == '}')
474 return;
dd60faec 475 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
476 }
477 after_percent = 0;
1ff442ca 478 }
1ff442ca
NF
479}
480
481
d7020c20
AD
482/*-------------------------------------------------------------------.
483| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
484| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
485| are reversed. |
486`-------------------------------------------------------------------*/
1ff442ca 487
4a120d45 488static void
d7020c20 489parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 490{
f17bcd1f 491 token_t token = 0;
a70083a3 492 char *typename = 0;
1ff442ca 493
1e9798d5
AD
494 /* The symbol being defined. */
495 struct bucket *symbol = NULL;
496
497 /* After `%token' and `%nterm', any number of symbols maybe be
498 defined. */
1ff442ca
NF
499 for (;;)
500 {
e6011337
JT
501 int tmp_char = ungetc (skip_white_space (), finput);
502
1e9798d5
AD
503 /* `%' (for instance from `%token', or from `%%' etc.) is the
504 only valid means to end this declaration. */
e6011337 505 if (tmp_char == '%')
1ff442ca 506 return;
e6011337 507 if (tmp_char == EOF)
a0f6b076 508 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 509
a70083a3 510 token = lex ();
511e79b3 511 if (token == tok_comma)
943819bf
RS
512 {
513 symbol = NULL;
514 continue;
515 }
511e79b3 516 if (token == tok_typename)
1ff442ca 517 {
95e36146 518 typename = xstrdup (token_buffer);
1ff442ca 519 value_components_used = 1;
943819bf
RS
520 symbol = NULL;
521 }
511e79b3 522 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 523 {
8e03724b
AD
524 if (symval->alias)
525 warn (_("symbol `%s' used more than once as a literal string"),
526 symval->tag);
527 else if (symbol->alias)
528 warn (_("symbol `%s' given more than one literal string"),
529 symbol->tag);
530 else
531 {
532 symval->class = token_sym;
533 symval->type_name = typename;
534 symval->user_token_number = symbol->user_token_number;
535 symbol->user_token_number = SALIAS;
536 symval->alias = symbol;
537 symbol->alias = symval;
538 /* symbol and symval combined are only one symbol */
539 nsyms--;
540 }
943819bf 541 translations = 1;
8e03724b 542 symbol = NULL;
1ff442ca 543 }
511e79b3 544 else if (token == tok_identifier)
1ff442ca
NF
545 {
546 int oldclass = symval->class;
943819bf 547 symbol = symval;
1ff442ca 548
943819bf 549 if (symbol->class == what_is_not)
a0f6b076 550 complain (_("symbol %s redefined"), symbol->tag);
943819bf 551 symbol->class = what_is;
d7020c20 552 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 553 symbol->value = nvars++;
1ff442ca
NF
554
555 if (typename)
556 {
943819bf
RS
557 if (symbol->type_name == NULL)
558 symbol->type_name = typename;
a70083a3 559 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 560 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
561 }
562 }
511e79b3 563 else if (symbol && token == tok_number)
a70083a3 564 {
943819bf 565 symbol->user_token_number = numval;
1ff442ca 566 translations = 1;
a70083a3 567 }
1ff442ca 568 else
943819bf 569 {
a0f6b076 570 complain (_("`%s' is invalid in %s"),
d7020c20 571 token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 572 skip_to_char ('%');
943819bf 573 }
1ff442ca
NF
574 }
575
576}
577
1ff442ca 578
d7020c20
AD
579/*------------------------------.
580| Parse what comes after %start |
581`------------------------------*/
1ff442ca 582
4a120d45 583static void
118fb205 584parse_start_decl (void)
1ff442ca
NF
585{
586 if (start_flag)
27821bff 587 complain (_("multiple %s declarations"), "%start");
511e79b3 588 if (lex () != tok_identifier)
27821bff 589 complain (_("invalid %s declaration"), "%start");
943819bf
RS
590 else
591 {
592 start_flag = 1;
593 startval = symval;
594 }
1ff442ca
NF
595}
596
a70083a3
AD
597/*-----------------------------------------------------------.
598| read in a %type declaration and record its information for |
599| get_type_name to access |
600`-----------------------------------------------------------*/
601
602static void
603parse_type_decl (void)
604{
a70083a3
AD
605 char *name;
606
511e79b3 607 if (lex () != tok_typename)
a70083a3
AD
608 {
609 complain ("%s", _("%type declaration has no <typename>"));
610 skip_to_char ('%');
611 return;
612 }
613
95e36146 614 name = xstrdup (token_buffer);
a70083a3
AD
615
616 for (;;)
617 {
f17bcd1f 618 token_t t;
a70083a3
AD
619 int tmp_char = ungetc (skip_white_space (), finput);
620
621 if (tmp_char == '%')
622 return;
623 if (tmp_char == EOF)
624 fatal (_("Premature EOF after %s"), token_buffer);
625
626 t = lex ();
627
628 switch (t)
1ff442ca
NF
629 {
630
511e79b3
AD
631 case tok_comma:
632 case tok_semicolon:
1ff442ca
NF
633 break;
634
511e79b3 635 case tok_identifier:
1ff442ca
NF
636 if (symval->type_name == NULL)
637 symval->type_name = name;
a70083a3 638 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 639 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
640
641 break;
642
643 default:
a0f6b076
AD
644 complain (_("invalid %%type declaration due to item: %s"),
645 token_buffer);
a70083a3 646 skip_to_char ('%');
1ff442ca
NF
647 }
648 }
649}
650
651
652
d7020c20
AD
653/*----------------------------------------------------------------.
654| Read in a %left, %right or %nonassoc declaration and record its |
655| information. |
656`----------------------------------------------------------------*/
1ff442ca 657
4a120d45 658static void
d7020c20 659parse_assoc_decl (associativity assoc)
1ff442ca 660{
a70083a3
AD
661 char *name = NULL;
662 int prev = 0;
1ff442ca 663
a70083a3 664 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 665
1ff442ca
NF
666 for (;;)
667 {
f17bcd1f 668 token_t t;
e6011337 669 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 670
e6011337 671 if (tmp_char == '%')
1ff442ca 672 return;
e6011337 673 if (tmp_char == EOF)
a0f6b076 674 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 675
a70083a3 676 t = lex ();
1ff442ca
NF
677
678 switch (t)
679 {
511e79b3 680 case tok_typename:
95e36146 681 name = xstrdup (token_buffer);
1ff442ca
NF
682 break;
683
511e79b3 684 case tok_comma:
1ff442ca
NF
685 break;
686
511e79b3 687 case tok_identifier:
1ff442ca 688 if (symval->prec != 0)
a0f6b076 689 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
690 symval->prec = lastprec;
691 symval->assoc = assoc;
d7020c20 692 if (symval->class == nterm_sym)
a0f6b076 693 complain (_("symbol %s redefined"), symval->tag);
d7020c20 694 symval->class = token_sym;
1ff442ca 695 if (name)
a70083a3 696 { /* record the type, if one is specified */
1ff442ca
NF
697 if (symval->type_name == NULL)
698 symval->type_name = name;
a70083a3 699 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 700 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
701 }
702 break;
703
511e79b3
AD
704 case tok_number:
705 if (prev == tok_identifier)
a70083a3 706 {
1ff442ca
NF
707 symval->user_token_number = numval;
708 translations = 1;
a70083a3
AD
709 }
710 else
711 {
712 complain (_
713 ("invalid text (%s) - number should be after identifier"),
714token_buffer);
715 skip_to_char ('%');
716 }
1ff442ca
NF
717 break;
718
511e79b3 719 case tok_semicolon:
1ff442ca
NF
720 return;
721
722 default:
a0f6b076 723 complain (_("unexpected item: %s"), token_buffer);
a70083a3 724 skip_to_char ('%');
1ff442ca
NF
725 }
726
727 prev = t;
728
729 }
730}
731
732
733
dd60faec
AD
734/*--------------------------------------------------------------.
735| Copy the union declaration into ATTRS_OBSTACK (and fdefines), |
736| where it is made into the definition of YYSTYPE, the type of |
737| elements of the parser value stack. |
738`--------------------------------------------------------------*/
1ff442ca 739
4a120d45 740static void
118fb205 741parse_union_decl (void)
1ff442ca 742{
a70083a3
AD
743 int c;
744 int count = 0;
1ff442ca
NF
745
746 if (typed)
27821bff 747 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
748
749 typed = 1;
750
89cab50d 751 if (!no_lines_flag)
2a91a95e 752 obstack_fgrow2 (&attrs_obstack, "\n#line %d %s\n",
dda680cb 753 lineno, quotearg_style (c_quoting_style,
11d82f03 754 muscle_find("filename")));
1ff442ca 755 else
dd60faec 756 obstack_1grow (&attrs_obstack, '\n');
1ff442ca 757
ff4423cc 758 obstack_sgrow (&attrs_obstack, "typedef union");
896fe5c1 759 if (defines_flag)
ff4423cc 760 obstack_sgrow (&defines_obstack, "typedef union");
1ff442ca 761
27821bff 762 c = getc (finput);
1ff442ca
NF
763
764 while (c != EOF)
765 {
dd60faec 766 obstack_1grow (&attrs_obstack, c);
896fe5c1 767 if (defines_flag)
d7045ec6 768 obstack_1grow (&defines_obstack, c);
1ff442ca
NF
769
770 switch (c)
771 {
772 case '\n':
773 lineno++;
774 break;
775
776 case '/':
337bab46 777 copy_comment2 (finput, &defines_obstack, &attrs_obstack);
1ff442ca
NF
778 break;
779
1ff442ca
NF
780 case '{':
781 count++;
782 break;
783
784 case '}':
785 if (count == 0)
27821bff 786 complain (_("unmatched %s"), "`}'");
1ff442ca 787 count--;
943819bf 788 if (count <= 0)
1ff442ca 789 {
ff4423cc 790 obstack_sgrow (&attrs_obstack, " YYSTYPE;\n");
896fe5c1 791 if (defines_flag)
ff4423cc 792 obstack_sgrow (&defines_obstack, " YYSTYPE;\n");
1ff442ca 793 /* JF don't choke on trailing semi */
27821bff
AD
794 c = skip_white_space ();
795 if (c != ';')
a70083a3 796 ungetc (c, finput);
1ff442ca
NF
797 return;
798 }
799 }
800
27821bff 801 c = getc (finput);
1ff442ca
NF
802 }
803}
804
d7020c20
AD
805
806/*-------------------------------------------------------.
807| Parse the declaration %expect N which says to expect N |
808| shift-reduce conflicts. |
809`-------------------------------------------------------*/
1ff442ca 810
4a120d45 811static void
118fb205 812parse_expect_decl (void)
1ff442ca 813{
131e2fef 814 int c = skip_white_space ();
1ff442ca
NF
815 ungetc (c, finput);
816
131e2fef 817 if (!isdigit (c))
79282c5a 818 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
819 else
820 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
821}
822
a70083a3
AD
823
824/*-------------------------------------------------------------------.
825| Parse what comes after %thong. the full syntax is |
826| |
827| %thong <type> token number literal |
828| |
829| the <type> or number may be omitted. The number specifies the |
830| user_token_number. |
831| |
832| Two symbols are entered in the table, one for the token symbol and |
833| one for the literal. Both are given the <type>, if any, from the |
834| declaration. The ->user_token_number of the first is SALIAS and |
835| the ->user_token_number of the second is set to the number, if |
836| any, from the declaration. The two symbols are linked via |
837| pointers in their ->alias fields. |
838| |
839| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
840| only the literal string is retained it is the literal string that |
841| is output to yytname |
842`-------------------------------------------------------------------*/
843
844static void
845parse_thong_decl (void)
7b306f52 846{
f17bcd1f 847 token_t token;
a70083a3
AD
848 struct bucket *symbol;
849 char *typename = 0;
95e36146 850 int usrtoknum;
7b306f52 851
a70083a3
AD
852 translations = 1;
853 token = lex (); /* fetch typename or first token */
511e79b3 854 if (token == tok_typename)
7b306f52 855 {
95e36146 856 typename = xstrdup (token_buffer);
a70083a3
AD
857 value_components_used = 1;
858 token = lex (); /* fetch first token */
7b306f52 859 }
7b306f52 860
a70083a3 861 /* process first token */
7b306f52 862
511e79b3 863 if (token != tok_identifier)
a70083a3
AD
864 {
865 complain (_("unrecognized item %s, expected an identifier"),
866 token_buffer);
867 skip_to_char ('%');
868 return;
7b306f52 869 }
d7020c20 870 symval->class = token_sym;
a70083a3
AD
871 symval->type_name = typename;
872 symval->user_token_number = SALIAS;
873 symbol = symval;
7b306f52 874
a70083a3 875 token = lex (); /* get number or literal string */
1ff442ca 876
511e79b3 877 if (token == tok_number)
943819bf 878 {
a70083a3
AD
879 usrtoknum = numval;
880 token = lex (); /* okay, did number, now get literal */
943819bf 881 }
a70083a3
AD
882 else
883 usrtoknum = 0;
1ff442ca 884
a70083a3 885 /* process literal string token */
1ff442ca 886
511e79b3 887 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 888 {
a70083a3
AD
889 complain (_("expected string constant instead of %s"), token_buffer);
890 skip_to_char ('%');
891 return;
1ff442ca 892 }
d7020c20 893 symval->class = token_sym;
a70083a3
AD
894 symval->type_name = typename;
895 symval->user_token_number = usrtoknum;
1ff442ca 896
a70083a3
AD
897 symval->alias = symbol;
898 symbol->alias = symval;
1ff442ca 899
79282c5a
AD
900 /* symbol and symval combined are only one symbol. */
901 nsyms--;
a70083a3 902}
3cef001a 903
b6610515
RA
904/* FIXME. */
905
906static void
11d82f03 907parse_muscle_decl (void)
b6610515
RA
908{
909 int ch = ungetc (skip_white_space (), finput);
11d82f03
MA
910 char* muscle_key;
911 char* muscle_value;
b6610515
RA
912
913 /* Read key. */
914 if (!isalpha (ch) && ch != '_')
915 {
916 complain (_("invalid %s declaration"), "%define");
917 skip_to_char ('%');
918 return;
919 }
11d82f03
MA
920 copy_identifier (finput, &muscle_obstack);
921 obstack_1grow (&muscle_obstack, 0);
922 muscle_key = obstack_finish (&muscle_obstack);
b6610515
RA
923
924 /* Read value. */
925 ch = skip_white_space ();
926 if (ch != '"')
927 {
928 ungetc (ch, finput);
929 if (ch != EOF)
930 {
931 complain (_("invalid %s declaration"), "%define");
932 skip_to_char ('%');
933 return;
934 }
935 else
936 fatal (_("Premature EOF after %s"), "\"");
937 }
11d82f03
MA
938 copy_string2 (finput, &muscle_obstack, '"', 0);
939 obstack_1grow (&muscle_obstack, 0);
940 muscle_value = obstack_finish (&muscle_obstack);
b6610515 941
b6610515 942 /* Store the (key, value) pair in the environment. */
11d82f03 943 muscle_insert (muscle_key, muscle_value);
b6610515
RA
944}
945
2ba3b73c
MA
946
947/*----------------------------------.
948| Parse what comes after %skeleton. |
949`----------------------------------*/
950
951void
952parse_skel_decl (void)
953{
954 /* Complete with parse_dquoted_param () on the CVS branch 1.29. */
955}
956
7333d403
AD
957/*------------------------------------------.
958| Parse what comes after %header_extension. |
959`------------------------------------------*/
960
961static void
962parse_header_extension_decl (void)
963{
964 char buff[32];
3e3da797 965
7333d403
AD
966 if (header_extension)
967 complain (_("multiple %%header_extension declarations"));
968 fscanf (finput, "%s", buff);
3e3da797 969 header_extension = xstrdup (buff);
7333d403
AD
970}
971
972/*------------------------------------------.
973| Parse what comes after %source_extension. |
974`------------------------------------------*/
975
976static void
977parse_source_extension_decl (void)
978{
979 char buff[32];
3e3da797 980
7333d403
AD
981 if (src_extension)
982 complain (_("multiple %%source_extension declarations"));
983 fscanf (finput, "%s", buff);
3e3da797 984 src_extension = xstrdup (buff);
7333d403 985}
d7020c20 986
a70083a3
AD
987/*----------------------------------------------------------------.
988| Read from finput until `%%' is seen. Discard the `%%'. Handle |
989| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 990| groups to ATTRS_OBSTACK. |
a70083a3 991`----------------------------------------------------------------*/
1ff442ca 992
4a120d45 993static void
a70083a3 994read_declarations (void)
1ff442ca 995{
a70083a3
AD
996 int c;
997 int tok;
1ff442ca 998
a70083a3 999 for (;;)
1ff442ca 1000 {
a70083a3 1001 c = skip_white_space ();
1ff442ca 1002
a70083a3
AD
1003 if (c == '%')
1004 {
1005 tok = parse_percent_token ();
1ff442ca 1006
a70083a3 1007 switch (tok)
943819bf 1008 {
511e79b3 1009 case tok_two_percents:
a70083a3 1010 return;
1ff442ca 1011
511e79b3 1012 case tok_percent_left_curly:
a70083a3
AD
1013 copy_definition ();
1014 break;
1ff442ca 1015
511e79b3 1016 case tok_token:
d7020c20 1017 parse_token_decl (token_sym, nterm_sym);
a70083a3 1018 break;
1ff442ca 1019
511e79b3 1020 case tok_nterm:
d7020c20 1021 parse_token_decl (nterm_sym, token_sym);
a70083a3 1022 break;
1ff442ca 1023
511e79b3 1024 case tok_type:
a70083a3
AD
1025 parse_type_decl ();
1026 break;
1ff442ca 1027
511e79b3 1028 case tok_start:
a70083a3
AD
1029 parse_start_decl ();
1030 break;
118fb205 1031
511e79b3 1032 case tok_union:
a70083a3
AD
1033 parse_union_decl ();
1034 break;
1ff442ca 1035
511e79b3 1036 case tok_expect:
a70083a3
AD
1037 parse_expect_decl ();
1038 break;
6deb4447 1039
511e79b3 1040 case tok_thong:
a70083a3
AD
1041 parse_thong_decl ();
1042 break;
d7020c20 1043
511e79b3 1044 case tok_left:
d7020c20 1045 parse_assoc_decl (left_assoc);
a70083a3 1046 break;
1ff442ca 1047
511e79b3 1048 case tok_right:
d7020c20 1049 parse_assoc_decl (right_assoc);
a70083a3 1050 break;
1ff442ca 1051
511e79b3 1052 case tok_nonassoc:
d7020c20 1053 parse_assoc_decl (non_assoc);
a70083a3 1054 break;
1ff442ca 1055
7333d403 1056 case tok_hdrext:
09a6de7e 1057 parse_header_extension_decl ();
7333d403
AD
1058 break;
1059
1060 case tok_srcext:
09a6de7e 1061 parse_source_extension_decl ();
7333d403
AD
1062 break;
1063
b6610515 1064 case tok_define:
11d82f03 1065 parse_muscle_decl ();
b6610515 1066 break;
2ba3b73c
MA
1067
1068 case tok_skel:
1069 parse_skel_decl ();
1070 break;
b6610515 1071
511e79b3 1072 case tok_noop:
a70083a3 1073 break;
1ff442ca 1074
a70083a3
AD
1075 default:
1076 complain (_("unrecognized: %s"), token_buffer);
1077 skip_to_char ('%');
1078 }
1079 }
1080 else if (c == EOF)
1081 fatal (_("no input grammar"));
1082 else
1083 {
ff4a34be
AD
1084 char buf[] = "c";
1085 buf[0] = c;
1086 complain (_("unknown character: %s"), quote (buf));
a70083a3 1087 skip_to_char ('%');
1ff442ca 1088 }
1ff442ca 1089 }
1ff442ca 1090}
a70083a3
AD
1091\f
1092/*-------------------------------------------------------------------.
1093| Assuming that a `{' has just been seen, copy everything up to the |
1094| matching `}' into the actions file. STACK_OFFSET is the number of |
1095| values in the current rule so far, which says where to find `$0' |
1096| with respect to the top of the stack. |
1097`-------------------------------------------------------------------*/
1ff442ca 1098
4a120d45 1099static void
79282c5a 1100copy_action (symbol_list *rule, int stack_offset)
1ff442ca 1101{
a70083a3 1102 int c;
a70083a3 1103 int count;
8c7ebe49 1104 char buf[4096];
1ff442ca
NF
1105
1106 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1107 if (semantic_parser)
1108 stack_offset = 0;
1ff442ca 1109
8c7ebe49
AD
1110 sprintf (buf, "\ncase %d:\n", nrules);
1111 obstack_grow (&action_obstack, buf, strlen (buf));
1112
89cab50d 1113 if (!no_lines_flag)
8c7ebe49 1114 {
2a91a95e 1115 sprintf (buf, "#line %d %s\n",
dda680cb 1116 lineno, quotearg_style (c_quoting_style,
11d82f03 1117 muscle_find ("filename")));
8c7ebe49
AD
1118 obstack_grow (&action_obstack, buf, strlen (buf));
1119 }
1120 obstack_1grow (&action_obstack, '{');
1ff442ca
NF
1121
1122 count = 1;
a70083a3 1123 c = getc (finput);
1ff442ca
NF
1124
1125 while (count > 0)
1126 {
1127 while (c != '}')
a70083a3
AD
1128 {
1129 switch (c)
1ff442ca
NF
1130 {
1131 case '\n':
8c7ebe49 1132 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1133 lineno++;
1134 break;
1135
1136 case '{':
8c7ebe49 1137 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1138 count++;
1139 break;
1140
1141 case '\'':
1142 case '"':
337bab46 1143 copy_string (finput, &action_obstack, c);
1ff442ca
NF
1144 break;
1145
1146 case '/':
337bab46 1147 copy_comment (finput, &action_obstack);
1ff442ca
NF
1148 break;
1149
1150 case '$':
337bab46 1151 copy_dollar (finput, &action_obstack,
8c7ebe49 1152 rule, stack_offset);
1ff442ca
NF
1153 break;
1154
1155 case '@':
337bab46 1156 copy_at (finput, &action_obstack,
8c7ebe49 1157 stack_offset);
6666f98f 1158 break;
1ff442ca
NF
1159
1160 case EOF:
27821bff 1161 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1162
1163 default:
8c7ebe49 1164 obstack_1grow (&action_obstack, c);
a70083a3
AD
1165 }
1166
1167 c = getc (finput);
1168 }
1169
1170 /* above loop exits when c is '}' */
1171
1172 if (--count)
1173 {
8c7ebe49 1174 obstack_1grow (&action_obstack, c);
a70083a3
AD
1175 c = getc (finput);
1176 }
1177 }
1178
ff4423cc 1179 obstack_sgrow (&action_obstack, ";\n break;}");
a70083a3
AD
1180}
1181\f
1182/*-------------------------------------------------------------------.
1183| After `%guard' is seen in the input file, copy the actual guard |
1184| into the guards file. If the guard is followed by an action, copy |
1185| that into the actions file. STACK_OFFSET is the number of values |
1186| in the current rule so far, which says where to find `$0' with |
1187| respect to the top of the stack, for the simple parser in which |
1188| the stack is not popped until after the guard is run. |
1189`-------------------------------------------------------------------*/
1190
1191static void
79282c5a 1192copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1193{
1194 int c;
a70083a3 1195 int count;
a70083a3
AD
1196 int brace_flag = 0;
1197
1198 /* offset is always 0 if parser has already popped the stack pointer */
1199 if (semantic_parser)
1200 stack_offset = 0;
1201
ea5607fd 1202 obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
89cab50d 1203 if (!no_lines_flag)
ea5607fd 1204 obstack_fgrow2 (&guard_obstack, "#line %d %s\n",
682d48cd 1205 lineno, quotearg_style (c_quoting_style,
11d82f03 1206 muscle_find ("filename")));
ea5607fd 1207 obstack_1grow (&guard_obstack, '{');
a70083a3
AD
1208
1209 count = 0;
1210 c = getc (finput);
1211
1212 while (brace_flag ? (count > 0) : (c != ';'))
1213 {
1214 switch (c)
1215 {
1216 case '\n':
ea5607fd 1217 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1218 lineno++;
1219 break;
1220
1221 case '{':
ea5607fd 1222 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1223 brace_flag = 1;
1224 count++;
1225 break;
1226
1227 case '}':
ea5607fd 1228 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1229 if (count > 0)
1230 count--;
1231 else
1232 {
1233 complain (_("unmatched %s"), "`}'");
1234 c = getc (finput); /* skip it */
1235 }
1236 break;
1237
1238 case '\'':
1239 case '"':
337bab46 1240 copy_string (finput, &guard_obstack, c);
a70083a3
AD
1241 break;
1242
1243 case '/':
337bab46 1244 copy_comment (finput, &guard_obstack);
a70083a3
AD
1245 break;
1246
1247 case '$':
337bab46 1248 copy_dollar (finput, &guard_obstack, rule, stack_offset);
a70083a3 1249 break;
1ff442ca 1250
a70083a3 1251 case '@':
337bab46 1252 copy_at (finput, &guard_obstack, stack_offset);
a70083a3 1253 break;
1ff442ca 1254
a70083a3
AD
1255 case EOF:
1256 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1257
a70083a3 1258 default:
ea5607fd 1259 obstack_1grow (&guard_obstack, c);
1ff442ca 1260 }
a70083a3
AD
1261
1262 if (c != '}' || count != 0)
1263 c = getc (finput);
1ff442ca
NF
1264 }
1265
a70083a3
AD
1266 c = skip_white_space ();
1267
ff4423cc 1268 obstack_sgrow (&guard_obstack, ";\n break;}");
a70083a3
AD
1269 if (c == '{')
1270 copy_action (rule, stack_offset);
1271 else if (c == '=')
1272 {
1273 c = getc (finput); /* why not skip_white_space -wjh */
1274 if (c == '{')
1275 copy_action (rule, stack_offset);
1276 }
1277 else
1278 ungetc (c, finput);
1ff442ca 1279}
a70083a3
AD
1280\f
1281
1282static void
1283record_rule_line (void)
1284{
1285 /* Record each rule's source line number in rline table. */
1ff442ca 1286
a70083a3
AD
1287 if (nrules >= rline_allocated)
1288 {
1289 rline_allocated = nrules * 2;
d7913476 1290 rline = XREALLOC (rline, short, rline_allocated);
a70083a3
AD
1291 }
1292 rline[nrules] = lineno;
1293}
1ff442ca
NF
1294
1295
a70083a3
AD
1296/*-------------------------------------------------------------------.
1297| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1298| with the user's names. |
1299`-------------------------------------------------------------------*/
1ff442ca 1300
4a120d45 1301static bucket *
118fb205 1302gensym (void)
1ff442ca 1303{
274d42ce
AD
1304 /* Incremented for each generated symbol */
1305 static int gensym_count = 0;
1306 static char buf[256];
1307
a70083a3 1308 bucket *sym;
1ff442ca 1309
274d42ce
AD
1310 sprintf (buf, "@%d", ++gensym_count);
1311 token_buffer = buf;
a70083a3 1312 sym = getsym (token_buffer);
d7020c20 1313 sym->class = nterm_sym;
1ff442ca 1314 sym->value = nvars++;
36281465 1315 return sym;
1ff442ca
NF
1316}
1317
a70083a3
AD
1318#if 0
1319/*------------------------------------------------------------------.
1320| read in a %type declaration and record its information for |
1321| get_type_name to access. This is unused. It is only called from |
1322| the #if 0 part of readgram |
1323`------------------------------------------------------------------*/
1324
1325static int
1326get_type (void)
1327{
1328 int k;
f17bcd1f 1329 token_t token;
a70083a3
AD
1330 char *name;
1331
f17bcd1f 1332 token = lex ();
a70083a3 1333
f17bcd1f 1334 if (token != tok_typename)
a70083a3
AD
1335 {
1336 complain (_("invalid %s declaration"), "%type");
1337 return t;
1338 }
1339
95e36146 1340 name = xstrdup (token_buffer);
a70083a3
AD
1341
1342 for (;;)
1343 {
f17bcd1f 1344 token = lex ();
a70083a3 1345
f17bcd1f 1346 switch (token)
a70083a3 1347 {
511e79b3 1348 case tok_semicolon:
a70083a3
AD
1349 return lex ();
1350
511e79b3 1351 case tok_comma:
a70083a3
AD
1352 break;
1353
511e79b3 1354 case tok_identifier:
a70083a3
AD
1355 if (symval->type_name == NULL)
1356 symval->type_name = name;
1357 else if (strcmp (name, symval->type_name) != 0)
1358 complain (_("type redeclaration for %s"), symval->tag);
1359
1360 break;
1361
1362 default:
f17bcd1f 1363 return token;
a70083a3
AD
1364 }
1365 }
1366}
1ff442ca 1367
a70083a3
AD
1368#endif
1369\f
1370/*------------------------------------------------------------------.
1371| Parse the input grammar into a one symbol_list structure. Each |
1372| rule is represented by a sequence of symbols: the left hand side |
1373| followed by the contents of the right hand side, followed by a |
1374| null pointer instead of a symbol to terminate the rule. The next |
1375| symbol is the lhs of the following rule. |
1376| |
1377| All guards and actions are copied out to the appropriate files, |
1378| labelled by the rule number they apply to. |
1379`------------------------------------------------------------------*/
1ff442ca 1380
4a120d45 1381static void
118fb205 1382readgram (void)
1ff442ca 1383{
f17bcd1f 1384 token_t t;
a70083a3
AD
1385 bucket *lhs = NULL;
1386 symbol_list *p;
1387 symbol_list *p1;
1388 bucket *bp;
1ff442ca 1389
ff4a34be
AD
1390 /* Points to first symbol_list of current rule. its symbol is the
1391 lhs of the rule. */
1392 symbol_list *crule;
1393 /* Points to the symbol_list preceding crule. */
1394 symbol_list *crule1;
1ff442ca
NF
1395
1396 p1 = NULL;
1397
a70083a3 1398 t = lex ();
1ff442ca 1399
511e79b3 1400 while (t != tok_two_percents && t != tok_eof)
1ff442ca 1401 {
511e79b3 1402 if (t == tok_identifier || t == tok_bar)
1ff442ca 1403 {
89cab50d 1404 int action_flag = 0;
ff4a34be
AD
1405 /* Number of symbols in rhs of this rule so far */
1406 int rulelength = 0;
1ff442ca
NF
1407 int xactions = 0; /* JF for error checking */
1408 bucket *first_rhs = 0;
1409
511e79b3 1410 if (t == tok_identifier)
1ff442ca
NF
1411 {
1412 lhs = symval;
943819bf
RS
1413
1414 if (!start_flag)
1415 {
1416 startval = lhs;
1417 start_flag = 1;
1418 }
a083fbbf 1419
a70083a3 1420 t = lex ();
511e79b3 1421 if (t != tok_colon)
943819bf 1422 {
a0f6b076 1423 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1424 unlex (t);
943819bf 1425 }
1ff442ca
NF
1426 }
1427
511e79b3 1428 if (nrules == 0 && t == tok_bar)
1ff442ca 1429 {
a0f6b076 1430 complain (_("grammar starts with vertical bar"));
943819bf 1431 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1432 }
1ff442ca
NF
1433 /* start a new rule and record its lhs. */
1434
1435 nrules++;
1436 nitems++;
1437
1438 record_rule_line ();
1439
d7913476 1440 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1441 p->sym = lhs;
1442
1443 crule1 = p1;
1444 if (p1)
1445 p1->next = p;
1446 else
1447 grammar = p;
1448
1449 p1 = p;
1450 crule = p;
1451
1452 /* mark the rule's lhs as a nonterminal if not already so. */
1453
d7020c20 1454 if (lhs->class == unknown_sym)
1ff442ca 1455 {
d7020c20 1456 lhs->class = nterm_sym;
1ff442ca
NF
1457 lhs->value = nvars;
1458 nvars++;
1459 }
d7020c20 1460 else if (lhs->class == token_sym)
a0f6b076 1461 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1462
1463 /* read the rhs of the rule. */
1464
1465 for (;;)
1466 {
a70083a3 1467 t = lex ();
511e79b3 1468 if (t == tok_prec)
943819bf 1469 {
a70083a3 1470 t = lex ();
943819bf 1471 crule->ruleprec = symval;
a70083a3 1472 t = lex ();
943819bf 1473 }
1ff442ca 1474
511e79b3 1475 if (!(t == tok_identifier || t == tok_left_curly))
a70083a3 1476 break;
1ff442ca
NF
1477
1478 /* If next token is an identifier, see if a colon follows it.
a70083a3 1479 If one does, exit this rule now. */
511e79b3 1480 if (t == tok_identifier)
1ff442ca 1481 {
a70083a3 1482 bucket *ssave;
f17bcd1f 1483 token_t t1;
1ff442ca
NF
1484
1485 ssave = symval;
a70083a3
AD
1486 t1 = lex ();
1487 unlex (t1);
1ff442ca 1488 symval = ssave;
511e79b3 1489 if (t1 == tok_colon)
a70083a3 1490 break;
1ff442ca 1491
a70083a3 1492 if (!first_rhs) /* JF */
1ff442ca
NF
1493 first_rhs = symval;
1494 /* Not followed by colon =>
1495 process as part of this rule's rhs. */
1496 }
1497
1498 /* If we just passed an action, that action was in the middle
a70083a3
AD
1499 of a rule, so make a dummy rule to reduce it to a
1500 non-terminal. */
89cab50d 1501 if (action_flag)
1ff442ca 1502 {
a70083a3 1503 bucket *sdummy;
1ff442ca 1504
f282676b
AD
1505 /* Since the action was written out with this rule's
1506 number, we must give the new rule this number by
1507 inserting the new rule before it. */
1ff442ca
NF
1508
1509 /* Make a dummy nonterminal, a gensym. */
a70083a3 1510 sdummy = gensym ();
1ff442ca
NF
1511
1512 /* Make a new rule, whose body is empty,
1513 before the current one, so that the action
1514 just read can belong to it. */
1515 nrules++;
1516 nitems++;
1517 record_rule_line ();
d7913476 1518 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1519 if (crule1)
1520 crule1->next = p;
a70083a3
AD
1521 else
1522 grammar = p;
1ff442ca 1523 p->sym = sdummy;
d7913476 1524 crule1 = XCALLOC (symbol_list, 1);
1ff442ca
NF
1525 p->next = crule1;
1526 crule1->next = crule;
1527
f282676b
AD
1528 /* Insert the dummy generated by that rule into this
1529 rule. */
1ff442ca 1530 nitems++;
d7913476 1531 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1532 p->sym = sdummy;
1533 p1->next = p;
1534 p1 = p;
1535
89cab50d 1536 action_flag = 0;
1ff442ca
NF
1537 }
1538
511e79b3 1539 if (t == tok_identifier)
1ff442ca
NF
1540 {
1541 nitems++;
d7913476 1542 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1543 p->sym = symval;
1544 p1->next = p;
1545 p1 = p;
1546 }
a70083a3 1547 else /* handle an action. */
1ff442ca 1548 {
a70083a3 1549 copy_action (crule, rulelength);
89cab50d 1550 action_flag = 1;
1ff442ca
NF
1551 xactions++; /* JF */
1552 }
1553 rulelength++;
a70083a3 1554 } /* end of read rhs of rule */
1ff442ca
NF
1555
1556 /* Put an empty link in the list to mark the end of this rule */
d7913476 1557 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1558 p1->next = p;
1559 p1 = p;
1560
511e79b3 1561 if (t == tok_prec)
1ff442ca 1562 {
a0f6b076 1563 complain (_("two @prec's in a row"));
a70083a3 1564 t = lex ();
1ff442ca 1565 crule->ruleprec = symval;
a70083a3 1566 t = lex ();
1ff442ca 1567 }
511e79b3 1568 if (t == tok_guard)
1ff442ca 1569 {
a70083a3 1570 if (!semantic_parser)
ff4a34be 1571 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1572
a70083a3
AD
1573 copy_guard (crule, rulelength);
1574 t = lex ();
1ff442ca 1575 }
511e79b3 1576 else if (t == tok_left_curly)
1ff442ca 1577 {
a70083a3 1578 /* This case never occurs -wjh */
89cab50d 1579 if (action_flag)
a0f6b076 1580 complain (_("two actions at end of one rule"));
a70083a3 1581 copy_action (crule, rulelength);
89cab50d 1582 action_flag = 1;
943819bf 1583 xactions++; /* -wjh */
a70083a3 1584 t = lex ();
1ff442ca 1585 }
a0f6b076 1586 /* If $$ is being set in default way, report if any type
6666f98f
AD
1587 mismatch. */
1588 else if (!xactions
a70083a3 1589 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1590 {
6666f98f
AD
1591 if (lhs->type_name == 0
1592 || first_rhs->type_name == 0
a70083a3 1593 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1594 complain (_("type clash (`%s' `%s') on default action"),
1595 lhs->type_name ? lhs->type_name : "",
a70083a3 1596 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1597 }
1598 /* Warn if there is no default for $$ but we need one. */
1599 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1600 complain (_("empty rule for typed nonterminal, and no action"));
511e79b3 1601 if (t == tok_semicolon)
a70083a3 1602 t = lex ();
a083fbbf 1603 }
943819bf 1604#if 0
a70083a3 1605 /* these things can appear as alternatives to rules. */
943819bf
RS
1606/* NO, they cannot.
1607 a) none of the documentation allows them
1608 b) most of them scan forward until finding a next %
1609 thus they may swallow lots of intervening rules
1610*/
511e79b3 1611 else if (t == tok_token)
1ff442ca 1612 {
d7020c20 1613 parse_token_decl (token_sym, nterm_sym);
a70083a3 1614 t = lex ();
1ff442ca 1615 }
511e79b3 1616 else if (t == tok_nterm)
1ff442ca 1617 {
d7020c20 1618 parse_token_decl (nterm_sym, token_sym);
a70083a3 1619 t = lex ();
1ff442ca 1620 }
511e79b3 1621 else if (t == tok_type)
1ff442ca 1622 {
a70083a3 1623 t = get_type ();
1ff442ca 1624 }
511e79b3 1625 else if (t == tok_union)
1ff442ca 1626 {
a70083a3
AD
1627 parse_union_decl ();
1628 t = lex ();
1ff442ca 1629 }
511e79b3 1630 else if (t == tok_expect)
1ff442ca 1631 {
a70083a3
AD
1632 parse_expect_decl ();
1633 t = lex ();
1ff442ca 1634 }
511e79b3 1635 else if (t == tok_start)
1ff442ca 1636 {
a70083a3
AD
1637 parse_start_decl ();
1638 t = lex ();
1ff442ca 1639 }
943819bf
RS
1640#endif
1641
1ff442ca 1642 else
943819bf 1643 {
d01c415b 1644 complain (_("invalid input: %s"), quote (token_buffer));
a70083a3 1645 t = lex ();
943819bf 1646 }
1ff442ca
NF
1647 }
1648
943819bf
RS
1649 /* grammar has been read. Do some checking */
1650
1ff442ca 1651 if (nsyms > MAXSHORT)
a0f6b076
AD
1652 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1653 MAXSHORT);
1ff442ca 1654 if (nrules == 0)
a0f6b076 1655 fatal (_("no rules in the input grammar"));
1ff442ca 1656
1ff442ca
NF
1657 /* Report any undefined symbols and consider them nonterminals. */
1658
1659 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1660 if (bp->class == unknown_sym)
1ff442ca 1661 {
a70083a3
AD
1662 complain (_
1663 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1664 bp->tag);
d7020c20 1665 bp->class = nterm_sym;
1ff442ca
NF
1666 bp->value = nvars++;
1667 }
1668
1669 ntokens = nsyms - nvars;
1670}
ff48177d
MA
1671
1672/* At the end of the grammar file, some C source code must
63c2d5de 1673 be stored. It is going to be associated to the epilogue
ff48177d
MA
1674 directive. */
1675static void
1676read_additionnal_code (void)
1677{
1678 char c;
63c2d5de 1679 struct obstack el_obstack;
ff48177d 1680
63c2d5de 1681 obstack_init (&el_obstack);
ff48177d
MA
1682
1683 while ((c = getc (finput)) != EOF)
63c2d5de 1684 obstack_1grow (&el_obstack, c);
ff48177d 1685
63c2d5de 1686 obstack_1grow (&el_obstack, 0);
11d82f03 1687 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1688}
1689
a70083a3
AD
1690\f
1691/*--------------------------------------------------------------.
1692| For named tokens, but not literal ones, define the name. The |
1693| value is the user token number. |
1694`--------------------------------------------------------------*/
1ff442ca 1695
4a120d45 1696static void
896fe5c1 1697output_token_defines (struct obstack *oout)
1ff442ca 1698{
a70083a3
AD
1699 bucket *bp;
1700 char *cp, *symbol;
1701 char c;
1ff442ca 1702
a70083a3 1703 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1704 {
a70083a3
AD
1705 symbol = bp->tag; /* get symbol */
1706
1707 if (bp->value >= ntokens)
1708 continue;
1709 if (bp->user_token_number == SALIAS)
1710 continue;
1711 if ('\'' == *symbol)
1712 continue; /* skip literal character */
1713 if (bp == errtoken)
1714 continue; /* skip error token */
1715 if ('\"' == *symbol)
1ff442ca 1716 {
a70083a3
AD
1717 /* use literal string only if given a symbol with an alias */
1718 if (bp->alias)
1719 symbol = bp->alias->tag;
1720 else
1721 continue;
1722 }
1ff442ca 1723
a70083a3
AD
1724 /* Don't #define nonliteral tokens whose names contain periods. */
1725 cp = symbol;
1726 while ((c = *cp++) && c != '.');
1727 if (c != '\0')
1728 continue;
1ff442ca 1729
0b8afb77 1730 obstack_fgrow2 (oout, "# define\t%s\t%d\n",
896fe5c1 1731 symbol,
62ab6972 1732 (translations ? bp->user_token_number : bp->value));
a70083a3 1733 if (semantic_parser)
0b8afb77 1734 obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
1735 }
1736}
1ff442ca
NF
1737
1738
a70083a3
AD
1739/*------------------------------------------------------------------.
1740| Assign symbol numbers, and write definition of token names into |
b2ca4022 1741| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1742| of symbols. |
1743`------------------------------------------------------------------*/
1ff442ca 1744
4a120d45 1745static void
118fb205 1746packsymbols (void)
1ff442ca 1747{
a70083a3
AD
1748 bucket *bp;
1749 int tokno = 1;
1750 int i;
1751 int last_user_token_number;
4a120d45 1752 static char DOLLAR[] = "$";
1ff442ca 1753
d7913476 1754 tags = XCALLOC (char *, nsyms + 1);
4a120d45 1755 tags[0] = DOLLAR;
d7913476 1756 user_toknums = XCALLOC (short, nsyms + 1);
943819bf 1757 user_toknums[0] = 0;
1ff442ca 1758
d7913476
AD
1759 sprec = XCALLOC (short, nsyms);
1760 sassoc = XCALLOC (short, nsyms);
1ff442ca
NF
1761
1762 max_user_token_number = 256;
1763 last_user_token_number = 256;
1764
1765 for (bp = firstsymbol; bp; bp = bp->next)
1766 {
d7020c20 1767 if (bp->class == nterm_sym)
1ff442ca
NF
1768 {
1769 bp->value += ntokens;
1770 }
943819bf
RS
1771 else if (bp->alias)
1772 {
0a6384c4
AD
1773 /* this symbol and its alias are a single token defn.
1774 allocate a tokno, and assign to both check agreement of
1775 ->prec and ->assoc fields and make both the same */
1776 if (bp->value == 0)
1777 bp->value = bp->alias->value = tokno++;
943819bf 1778
0a6384c4
AD
1779 if (bp->prec != bp->alias->prec)
1780 {
1781 if (bp->prec != 0 && bp->alias->prec != 0
1782 && bp->user_token_number == SALIAS)
a0f6b076
AD
1783 complain (_("conflicting precedences for %s and %s"),
1784 bp->tag, bp->alias->tag);
0a6384c4
AD
1785 if (bp->prec != 0)
1786 bp->alias->prec = bp->prec;
1787 else
1788 bp->prec = bp->alias->prec;
1789 }
943819bf 1790
0a6384c4
AD
1791 if (bp->assoc != bp->alias->assoc)
1792 {
a0f6b076
AD
1793 if (bp->assoc != 0 && bp->alias->assoc != 0
1794 && bp->user_token_number == SALIAS)
1795 complain (_("conflicting assoc values for %s and %s"),
1796 bp->tag, bp->alias->tag);
1797 if (bp->assoc != 0)
1798 bp->alias->assoc = bp->assoc;
1799 else
1800 bp->assoc = bp->alias->assoc;
1801 }
0a6384c4
AD
1802
1803 if (bp->user_token_number == SALIAS)
a70083a3 1804 continue; /* do not do processing below for SALIASs */
943819bf 1805
a70083a3 1806 }
d7020c20 1807 else /* bp->class == token_sym */
943819bf
RS
1808 {
1809 bp->value = tokno++;
1810 }
1811
d7020c20 1812 if (bp->class == token_sym)
1ff442ca
NF
1813 {
1814 if (translations && !(bp->user_token_number))
1815 bp->user_token_number = ++last_user_token_number;
1816 if (bp->user_token_number > max_user_token_number)
1817 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1818 }
1819
1820 tags[bp->value] = bp->tag;
943819bf 1821 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1822 sprec[bp->value] = bp->prec;
1823 sassoc[bp->value] = bp->assoc;
1824
1825 }
1826
1827 if (translations)
1828 {
a70083a3 1829 int j;
1ff442ca 1830
d7913476 1831 token_translations = XCALLOC (short, max_user_token_number + 1);
1ff442ca 1832
0a6384c4 1833 /* initialize all entries for literal tokens to 2, the internal
a70083a3
AD
1834 token number for $undefined., which represents all invalid
1835 inputs. */
4a120d45 1836 for (j = 0; j <= max_user_token_number; j++)
a70083a3 1837 token_translations[j] = 2;
1ff442ca 1838
943819bf 1839 for (bp = firstsymbol; bp; bp = bp->next)
a70083a3
AD
1840 {
1841 if (bp->value >= ntokens)
1842 continue; /* non-terminal */
1843 if (bp->user_token_number == SALIAS)
0a6384c4 1844 continue;
a70083a3 1845 if (token_translations[bp->user_token_number] != 2)
a0f6b076
AD
1846 complain (_("tokens %s and %s both assigned number %d"),
1847 tags[token_translations[bp->user_token_number]],
a70083a3
AD
1848 bp->tag, bp->user_token_number);
1849 token_translations[bp->user_token_number] = bp->value;
1850 }
1ff442ca
NF
1851 }
1852
1853 error_token_number = errtoken->value;
1854
b6610515
RA
1855 output_token_defines (&output_obstack);
1856 obstack_1grow (&output_obstack, 0);
11d82f03 1857 muscle_insert ("tokendef", obstack_finish (&output_obstack));
b6610515 1858
d8cb5183
MA
1859#if 0
1860 if (!no_parser_flag)
1861 output_token_defines (&table_obstack);
1862#endif
1ff442ca 1863
d7020c20 1864 if (startval->class == unknown_sym)
a0f6b076 1865 fatal (_("the start symbol %s is undefined"), startval->tag);
d7020c20 1866 else if (startval->class == token_sym)
a0f6b076 1867 fatal (_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1868
1869 start_symbol = startval->value;
1870
89cab50d 1871 if (defines_flag)
1ff442ca 1872 {
896fe5c1 1873 output_token_defines (&defines_obstack);
1ff442ca
NF
1874
1875 if (!pure_parser)
1876 {
1877 if (spec_name_prefix)
896fe5c1
AD
1878 obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1879 spec_name_prefix);
1ff442ca 1880 else
ff4423cc 1881 obstack_sgrow (&defines_obstack,
573c1d9f 1882 "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1883 }
1884
1885 if (semantic_parser)
1886 for (i = ntokens; i < nsyms; i++)
1887 {
1888 /* don't make these for dummy nonterminals made by gensym. */
1889 if (*tags[i] != '@')
896fe5c1 1890 obstack_fgrow2 (&defines_obstack,
0b8afb77 1891 "# define\tNT%s\t%d\n", tags[i], i);
1ff442ca
NF
1892 }
1893#if 0
1894 /* `fdefines' is now a temporary file, so we need to copy its
1895 contents in `done', so we can't close it here. */
a70083a3 1896 fclose (fdefines);
1ff442ca
NF
1897 fdefines = NULL;
1898#endif
1899 }
1900}
a083fbbf 1901
1ff442ca 1902
a70083a3
AD
1903/*---------------------------------------------------------------.
1904| Convert the rules into the representation using RRHS, RLHS and |
1905| RITEMS. |
1906`---------------------------------------------------------------*/
1ff442ca 1907
4a120d45 1908static void
118fb205 1909packgram (void)
1ff442ca 1910{
a70083a3
AD
1911 int itemno;
1912 int ruleno;
1913 symbol_list *p;
1ff442ca
NF
1914
1915 bucket *ruleprec;
1916
d7913476
AD
1917 ritem = XCALLOC (short, nitems + 1);
1918 rlhs = XCALLOC (short, nrules) - 1;
1919 rrhs = XCALLOC (short, nrules) - 1;
1920 rprec = XCALLOC (short, nrules) - 1;
1921 rprecsym = XCALLOC (short, nrules) - 1;
1922 rassoc = XCALLOC (short, nrules) - 1;
1ff442ca
NF
1923
1924 itemno = 0;
1925 ruleno = 1;
1926
1927 p = grammar;
1928 while (p)
1929 {
1930 rlhs[ruleno] = p->sym->value;
1931 rrhs[ruleno] = itemno;
1932 ruleprec = p->ruleprec;
1933
1934 p = p->next;
1935 while (p && p->sym)
1936 {
1937 ritem[itemno++] = p->sym->value;
1938 /* A rule gets by default the precedence and associativity
1939 of the last token in it. */
d7020c20 1940 if (p->sym->class == token_sym)
1ff442ca
NF
1941 {
1942 rprec[ruleno] = p->sym->prec;
1943 rassoc[ruleno] = p->sym->assoc;
1944 }
a70083a3
AD
1945 if (p)
1946 p = p->next;
1ff442ca
NF
1947 }
1948
1949 /* If this rule has a %prec,
a70083a3 1950 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1951 if (ruleprec)
1952 {
a70083a3
AD
1953 rprec[ruleno] = ruleprec->prec;
1954 rassoc[ruleno] = ruleprec->assoc;
1ff442ca
NF
1955 rprecsym[ruleno] = ruleprec->value;
1956 }
1957
1958 ritem[itemno++] = -ruleno;
1959 ruleno++;
1960
a70083a3
AD
1961 if (p)
1962 p = p->next;
1ff442ca
NF
1963 }
1964
1965 ritem[itemno] = 0;
1966}
a70083a3
AD
1967\f
1968/*-------------------------------------------------------------------.
1969| Read in the grammar specification and record it in the format |
ea5607fd 1970| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1971| and all actions into ACTION_OBSTACK, in each case forming the body |
1972| of a C function (YYGUARD or YYACTION) which contains a switch |
1973| statement to decide which guard or action to execute. |
a70083a3
AD
1974`-------------------------------------------------------------------*/
1975
1976void
1977reader (void)
1978{
1979 start_flag = 0;
1980 startval = NULL; /* start symbol not specified yet. */
1981
1982#if 0
1983 /* initially assume token number translation not needed. */
1984 translations = 0;
1985#endif
1986 /* Nowadays translations is always set to 1, since we give `error' a
1987 user-token-number to satisfy the Posix demand for YYERRCODE==256.
1988 */
1989 translations = 1;
1990
1991 nsyms = 1;
1992 nvars = 0;
1993 nrules = 0;
1994 nitems = 0;
1995 rline_allocated = 10;
d7913476 1996 rline = XCALLOC (short, rline_allocated);
a70083a3
AD
1997
1998 typed = 0;
1999 lastprec = 0;
2000
a70083a3
AD
2001 semantic_parser = 0;
2002 pure_parser = 0;
a70083a3
AD
2003
2004 grammar = NULL;
2005
2006 init_lex ();
2007 lineno = 1;
2008
11d82f03
MA
2009 /* Initialize the muscle obstack. */
2010 obstack_init (&muscle_obstack);
82e236e2 2011
a70083a3
AD
2012 /* Initialize the symbol table. */
2013 tabinit ();
b6610515 2014
a70083a3
AD
2015 /* Construct the error token */
2016 errtoken = getsym ("error");
d7020c20 2017 errtoken->class = token_sym;
a70083a3 2018 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 2019
a70083a3
AD
2020 /* Construct a token that represents all undefined literal tokens.
2021 It is always token number 2. */
2022 undeftoken = getsym ("$undefined.");
d7020c20 2023 undeftoken->class = token_sym;
a70083a3
AD
2024 undeftoken->user_token_number = 2;
2025
896fe5c1
AD
2026 /* Read the declaration section. Copy %{ ... %} groups to
2027 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
2028 etc. found there. */
a70083a3 2029 read_declarations ();
a70083a3
AD
2030 /* Read in the grammar, build grammar in list form. Write out
2031 guards and actions. */
2032 readgram ();
ff48177d
MA
2033 /* Some C code is given at the end of the grammar file. */
2034 read_additionnal_code ();
b0c4483e 2035
a70083a3 2036 /* Now we know whether we need the line-number stack. If we do,
b0c4483e
PB
2037 write its type into the .tab.h file.
2038 This is no longer need with header skeleton. */
2039
a70083a3
AD
2040 /* Assign the symbols their symbol numbers. Write #defines for the
2041 token symbols into FDEFINES if requested. */
2042 packsymbols ();
2043 /* Convert the grammar into the format described in gram.h. */
2044 packgram ();
2045 /* Free the symbol table data structure since symbols are now all
2046 referred to by symbol number. */
2047 free_symtab ();
2048}