]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/bison.simple: Don't hard code the skeleton line and filename.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
037ca2f1 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca
NF
30#include "lex.h"
31#include "gram.h"
a0f6b076 32#include "complain.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
340ef489 35#include "conflicts.h"
11d82f03 36#include "muscle_tab.h"
1ff442ca 37
a70083a3
AD
38typedef struct symbol_list
39{
40 struct symbol_list *next;
41 bucket *sym;
b29b2ed5 42 int line;
3f96f4dc
AD
43 /* The action is attached to the LHS of a rule. */
44 const char *action;
45 int action_line;
a70083a3
AD
46 bucket *ruleprec;
47}
48symbol_list;
118fb205 49
1ff442ca 50int lineno;
1ff442ca 51char **tags;
d019d655 52short *user_toknums;
4a120d45
JT
53static symbol_list *grammar;
54static int start_flag;
55static bucket *startval;
1ff442ca
NF
56
57/* Nonzero if components of semantic values are used, implying
58 they must be unions. */
59static int value_components_used;
60
d7020c20
AD
61/* Nonzero if %union has been seen. */
62static int typed;
1ff442ca 63
d7020c20
AD
64/* Incremented for each %left, %right or %nonassoc seen */
65static int lastprec;
1ff442ca 66
1ff442ca 67static bucket *errtoken;
5b2e3c89 68static bucket *undeftoken;
b29b2ed5
AD
69
70
6255b435 71static symbol_list *
b29b2ed5
AD
72symbol_list_new (bucket *sym)
73{
74 symbol_list *res = XMALLOC (symbol_list, 1);
75 res->next = NULL;
76 res->sym = sym;
77 res->line = lineno;
78 res->ruleprec = NULL;
79 return res;
80}
81
0d533154 82\f
a70083a3 83
0d533154
AD
84/*===================\
85| Low level lexing. |
86\===================*/
943819bf
RS
87
88static void
118fb205 89skip_to_char (int target)
943819bf
RS
90{
91 int c;
92 if (target == '\n')
a0f6b076 93 complain (_(" Skipping to next \\n"));
943819bf 94 else
a0f6b076 95 complain (_(" Skipping to next %c"), target);
943819bf
RS
96
97 do
0d533154 98 c = skip_white_space ();
943819bf 99 while (c != target && c != EOF);
a083fbbf 100 if (c != EOF)
0d533154 101 ungetc (c, finput);
943819bf
RS
102}
103
104
0d533154
AD
105/*---------------------------------------------------------.
106| Read a signed integer from STREAM and return its value. |
107`---------------------------------------------------------*/
108
109static inline int
110read_signed_integer (FILE *stream)
111{
a70083a3
AD
112 int c = getc (stream);
113 int sign = 1;
114 int n = 0;
0d533154
AD
115
116 if (c == '-')
117 {
118 c = getc (stream);
119 sign = -1;
120 }
121
122 while (isdigit (c))
123 {
124 n = 10 * n + (c - '0');
125 c = getc (stream);
126 }
127
128 ungetc (c, stream);
129
130 return sign * n;
131}
132\f
79282c5a
AD
133/*--------------------------------------------------------------.
134| Get the data type (alternative in the union) of the value for |
135| symbol N in rule RULE. |
136`--------------------------------------------------------------*/
137
138static char *
b29b2ed5 139get_type_name (int n, symbol_list *rule)
79282c5a
AD
140{
141 int i;
142 symbol_list *rp;
143
144 if (n < 0)
145 {
146 complain (_("invalid $ value"));
147 return NULL;
148 }
149
150 rp = rule;
151 i = 0;
152
153 while (i < n)
154 {
155 rp = rp->next;
156 if (rp == NULL || rp->sym == NULL)
157 {
158 complain (_("invalid $ value"));
159 return NULL;
160 }
161 i++;
162 }
163
164 return rp->sym->type_name;
165}
166\f
337bab46
AD
167/*------------------------------------------------------------.
168| Dump the string from FIN to OOUT if non null. MATCH is the |
169| delimiter of the string (either ' or "). |
170`------------------------------------------------------------*/
ae3c3164
AD
171
172static inline void
b6610515 173copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
174{
175 int c;
176
b6610515
RA
177 if (store)
178 obstack_1grow (oout, match);
8c7ebe49 179
4a120d45 180 c = getc (fin);
ae3c3164
AD
181
182 while (c != match)
183 {
184 if (c == EOF)
185 fatal (_("unterminated string at end of file"));
186 if (c == '\n')
187 {
a0f6b076 188 complain (_("unterminated string"));
4a120d45 189 ungetc (c, fin);
ae3c3164
AD
190 c = match; /* invent terminator */
191 continue;
192 }
193
337bab46 194 obstack_1grow (oout, c);
ae3c3164
AD
195
196 if (c == '\\')
197 {
4a120d45 198 c = getc (fin);
ae3c3164
AD
199 if (c == EOF)
200 fatal (_("unterminated string at end of file"));
337bab46 201 obstack_1grow (oout, c);
8c7ebe49 202
ae3c3164
AD
203 if (c == '\n')
204 lineno++;
205 }
206
a70083a3 207 c = getc (fin);
ae3c3164
AD
208 }
209
b6610515
RA
210 if (store)
211 obstack_1grow (oout, c);
212}
213
214/* FIXME. */
215
216static inline void
217copy_string (FILE *fin, struct obstack *oout, int match)
218{
219 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
220}
221
b6610515
RA
222/* FIXME. */
223
224static inline void
225copy_identifier (FILE *fin, struct obstack *oout)
226{
227 int c;
228
229 while (isalnum (c = getc (fin)) || c == '_')
230 obstack_1grow (oout, c);
231
232 ungetc (c, fin);
233}
ae3c3164 234
2666f928
AD
235
236/*------------------------------------------------------------------.
237| Dump the wannabee comment from IN to OOUT. In fact we just saw a |
238| `/', which might or might not be a comment. In any case, copy |
239| what we saw. |
240`------------------------------------------------------------------*/
ae3c3164
AD
241
242static inline void
2666f928 243copy_comment (FILE *fin, struct obstack *oout)
ae3c3164
AD
244{
245 int cplus_comment;
a70083a3 246 int ended;
550a72a3
AD
247 int c;
248
249 /* We read a `/', output it. */
2666f928 250 obstack_1grow (oout, '/');
550a72a3
AD
251
252 switch ((c = getc (fin)))
253 {
254 case '/':
255 cplus_comment = 1;
256 break;
257 case '*':
258 cplus_comment = 0;
259 break;
260 default:
261 ungetc (c, fin);
262 return;
263 }
ae3c3164 264
2666f928 265 obstack_1grow (oout, c);
550a72a3 266 c = getc (fin);
ae3c3164
AD
267
268 ended = 0;
269 while (!ended)
270 {
271 if (!cplus_comment && c == '*')
272 {
273 while (c == '*')
274 {
2666f928 275 obstack_1grow (oout, c);
550a72a3 276 c = getc (fin);
ae3c3164
AD
277 }
278
279 if (c == '/')
280 {
2666f928 281 obstack_1grow (oout, c);
ae3c3164
AD
282 ended = 1;
283 }
284 }
285 else if (c == '\n')
286 {
287 lineno++;
2666f928 288 obstack_1grow (oout, c);
ae3c3164
AD
289 if (cplus_comment)
290 ended = 1;
291 else
550a72a3 292 c = getc (fin);
ae3c3164
AD
293 }
294 else if (c == EOF)
295 fatal (_("unterminated comment"));
296 else
297 {
2666f928 298 obstack_1grow (oout, c);
550a72a3 299 c = getc (fin);
ae3c3164
AD
300 }
301 }
302}
303
304
a70083a3 305/*-----------------------------------------------------------------.
337bab46 306| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
307| reference to this location. STACK_OFFSET is the number of values |
308| in the current rule so far, which says where to find `$0' with |
309| respect to the top of the stack. |
310`-----------------------------------------------------------------*/
1ff442ca 311
a70083a3 312static inline void
337bab46 313copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 314{
a70083a3 315 int c;
1ff442ca 316
a70083a3
AD
317 c = getc (fin);
318 if (c == '$')
1ff442ca 319 {
ff4423cc 320 obstack_sgrow (oout, "yyloc");
89cab50d 321 locations_flag = 1;
a70083a3
AD
322 }
323 else if (isdigit (c) || c == '-')
324 {
325 int n;
1ff442ca 326
a70083a3
AD
327 ungetc (c, fin);
328 n = read_signed_integer (fin);
943819bf 329
337bab46 330 obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
89cab50d 331 locations_flag = 1;
1ff442ca 332 }
a70083a3 333 else
ff4a34be
AD
334 {
335 char buf[] = "@c";
336 buf[1] = c;
337 complain (_("%s is invalid"), quote (buf));
338 }
1ff442ca 339}
79282c5a
AD
340
341
342/*-------------------------------------------------------------------.
343| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
344| |
345| Possible inputs: $[<TYPENAME>]($|integer) |
346| |
337bab46 347| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
348| the number of values in the current rule so far, which says where |
349| to find `$0' with respect to the top of the stack. |
350`-------------------------------------------------------------------*/
351
352static inline void
337bab46 353copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
354 symbol_list *rule, int stack_offset)
355{
356 int c = getc (fin);
b0ce6046 357 const char *type_name = NULL;
79282c5a 358
f282676b 359 /* Get the type name if explicit. */
79282c5a
AD
360 if (c == '<')
361 {
f282676b 362 read_type_name (fin);
79282c5a
AD
363 type_name = token_buffer;
364 value_components_used = 1;
79282c5a
AD
365 c = getc (fin);
366 }
367
368 if (c == '$')
369 {
ff4423cc 370 obstack_sgrow (oout, "yyval");
8c7ebe49 371
79282c5a
AD
372 if (!type_name)
373 type_name = get_type_name (0, rule);
374 if (type_name)
337bab46 375 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
376 if (!type_name && typed)
377 complain (_("$$ of `%s' has no declared type"),
378 rule->sym->tag);
379 }
380 else if (isdigit (c) || c == '-')
381 {
382 int n;
383 ungetc (c, fin);
384 n = read_signed_integer (fin);
385
386 if (!type_name && n > 0)
387 type_name = get_type_name (n, rule);
388
337bab46 389 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
8c7ebe49 390
79282c5a 391 if (type_name)
337bab46 392 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
393 if (!type_name && typed)
394 complain (_("$%d of `%s' has no declared type"),
395 n, rule->sym->tag);
396 }
397 else
398 {
399 char buf[] = "$c";
400 buf[1] = c;
401 complain (_("%s is invalid"), quote (buf));
402 }
403}
a70083a3
AD
404\f
405/*-------------------------------------------------------------------.
406| Copy the contents of a `%{ ... %}' into the definitions file. The |
407| `%{' has already been read. Return after reading the `%}'. |
408`-------------------------------------------------------------------*/
1ff442ca 409
4a120d45 410static void
118fb205 411copy_definition (void)
1ff442ca 412{
a70083a3 413 int c;
ae3c3164 414 /* -1 while reading a character if prev char was %. */
a70083a3 415 int after_percent;
1ff442ca 416
89cab50d 417 if (!no_lines_flag)
25b222fa
MA
418 {
419 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 420 lineno, quotearg_style (c_quoting_style,
25b222fa
MA
421 muscle_find("filename")));
422 }
1ff442ca
NF
423
424 after_percent = 0;
425
ae3c3164 426 c = getc (finput);
1ff442ca
NF
427
428 for (;;)
429 {
430 switch (c)
431 {
432 case '\n':
dd60faec 433 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
434 lineno++;
435 break;
436
437 case '%':
a70083a3 438 after_percent = -1;
1ff442ca 439 break;
a083fbbf 440
1ff442ca
NF
441 case '\'':
442 case '"':
337bab46 443 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
444 break;
445
446 case '/':
337bab46 447 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
448 break;
449
450 case EOF:
a70083a3 451 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
452
453 default:
dd60faec 454 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
455 }
456
a70083a3 457 c = getc (finput);
1ff442ca
NF
458
459 if (after_percent)
460 {
461 if (c == '}')
462 return;
dd60faec 463 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
464 }
465 after_percent = 0;
1ff442ca 466 }
1ff442ca
NF
467}
468
469
d7020c20
AD
470/*-------------------------------------------------------------------.
471| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
472| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
473| are reversed. |
474`-------------------------------------------------------------------*/
1ff442ca 475
4a120d45 476static void
d7020c20 477parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 478{
342b8b6e
AD
479 token_t token = tok_undef;
480 char *typename = NULL;
1ff442ca 481
1e9798d5
AD
482 /* The symbol being defined. */
483 struct bucket *symbol = NULL;
484
485 /* After `%token' and `%nterm', any number of symbols maybe be
486 defined. */
1ff442ca
NF
487 for (;;)
488 {
e6011337
JT
489 int tmp_char = ungetc (skip_white_space (), finput);
490
1e9798d5
AD
491 /* `%' (for instance from `%token', or from `%%' etc.) is the
492 only valid means to end this declaration. */
e6011337 493 if (tmp_char == '%')
1ff442ca 494 return;
e6011337 495 if (tmp_char == EOF)
a0f6b076 496 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 497
a70083a3 498 token = lex ();
511e79b3 499 if (token == tok_comma)
943819bf
RS
500 {
501 symbol = NULL;
502 continue;
503 }
511e79b3 504 if (token == tok_typename)
1ff442ca 505 {
95e36146 506 typename = xstrdup (token_buffer);
1ff442ca 507 value_components_used = 1;
943819bf
RS
508 symbol = NULL;
509 }
511e79b3 510 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 511 {
8e03724b
AD
512 if (symval->alias)
513 warn (_("symbol `%s' used more than once as a literal string"),
514 symval->tag);
515 else if (symbol->alias)
516 warn (_("symbol `%s' given more than one literal string"),
517 symbol->tag);
518 else
519 {
520 symval->class = token_sym;
521 symval->type_name = typename;
522 symval->user_token_number = symbol->user_token_number;
523 symbol->user_token_number = SALIAS;
524 symval->alias = symbol;
525 symbol->alias = symval;
526 /* symbol and symval combined are only one symbol */
527 nsyms--;
528 }
8e03724b 529 symbol = NULL;
1ff442ca 530 }
511e79b3 531 else if (token == tok_identifier)
1ff442ca
NF
532 {
533 int oldclass = symval->class;
943819bf 534 symbol = symval;
1ff442ca 535
943819bf 536 if (symbol->class == what_is_not)
a0f6b076 537 complain (_("symbol %s redefined"), symbol->tag);
943819bf 538 symbol->class = what_is;
d7020c20 539 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 540 symbol->value = nvars++;
1ff442ca
NF
541
542 if (typename)
543 {
943819bf
RS
544 if (symbol->type_name == NULL)
545 symbol->type_name = typename;
a70083a3 546 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 547 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
548 }
549 }
511e79b3 550 else if (symbol && token == tok_number)
a70083a3 551 {
943819bf 552 symbol->user_token_number = numval;
a70083a3 553 }
1ff442ca 554 else
943819bf 555 {
a0f6b076 556 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
557 token_buffer,
558 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 559 skip_to_char ('%');
943819bf 560 }
1ff442ca
NF
561 }
562
563}
564
1ff442ca 565
d7020c20
AD
566/*------------------------------.
567| Parse what comes after %start |
568`------------------------------*/
1ff442ca 569
4a120d45 570static void
118fb205 571parse_start_decl (void)
1ff442ca
NF
572{
573 if (start_flag)
27821bff 574 complain (_("multiple %s declarations"), "%start");
511e79b3 575 if (lex () != tok_identifier)
27821bff 576 complain (_("invalid %s declaration"), "%start");
943819bf
RS
577 else
578 {
579 start_flag = 1;
580 startval = symval;
581 }
1ff442ca
NF
582}
583
a70083a3
AD
584/*-----------------------------------------------------------.
585| read in a %type declaration and record its information for |
586| get_type_name to access |
587`-----------------------------------------------------------*/
588
589static void
590parse_type_decl (void)
591{
a70083a3
AD
592 char *name;
593
511e79b3 594 if (lex () != tok_typename)
a70083a3
AD
595 {
596 complain ("%s", _("%type declaration has no <typename>"));
597 skip_to_char ('%');
598 return;
599 }
600
95e36146 601 name = xstrdup (token_buffer);
a70083a3
AD
602
603 for (;;)
604 {
f17bcd1f 605 token_t t;
a70083a3
AD
606 int tmp_char = ungetc (skip_white_space (), finput);
607
608 if (tmp_char == '%')
609 return;
610 if (tmp_char == EOF)
611 fatal (_("Premature EOF after %s"), token_buffer);
612
613 t = lex ();
614
615 switch (t)
1ff442ca
NF
616 {
617
511e79b3
AD
618 case tok_comma:
619 case tok_semicolon:
1ff442ca
NF
620 break;
621
511e79b3 622 case tok_identifier:
1ff442ca
NF
623 if (symval->type_name == NULL)
624 symval->type_name = name;
a70083a3 625 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 626 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
627
628 break;
629
630 default:
a0f6b076
AD
631 complain (_("invalid %%type declaration due to item: %s"),
632 token_buffer);
a70083a3 633 skip_to_char ('%');
1ff442ca
NF
634 }
635 }
636}
637
638
639
d7020c20
AD
640/*----------------------------------------------------------------.
641| Read in a %left, %right or %nonassoc declaration and record its |
642| information. |
643`----------------------------------------------------------------*/
1ff442ca 644
4a120d45 645static void
d7020c20 646parse_assoc_decl (associativity assoc)
1ff442ca 647{
a70083a3
AD
648 char *name = NULL;
649 int prev = 0;
1ff442ca 650
a70083a3 651 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 652
1ff442ca
NF
653 for (;;)
654 {
f17bcd1f 655 token_t t;
e6011337 656 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 657
e6011337 658 if (tmp_char == '%')
1ff442ca 659 return;
e6011337 660 if (tmp_char == EOF)
a0f6b076 661 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 662
a70083a3 663 t = lex ();
1ff442ca
NF
664
665 switch (t)
666 {
511e79b3 667 case tok_typename:
95e36146 668 name = xstrdup (token_buffer);
1ff442ca
NF
669 break;
670
511e79b3 671 case tok_comma:
1ff442ca
NF
672 break;
673
511e79b3 674 case tok_identifier:
1ff442ca 675 if (symval->prec != 0)
a0f6b076 676 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
677 symval->prec = lastprec;
678 symval->assoc = assoc;
d7020c20 679 if (symval->class == nterm_sym)
a0f6b076 680 complain (_("symbol %s redefined"), symval->tag);
d7020c20 681 symval->class = token_sym;
1ff442ca 682 if (name)
a70083a3 683 { /* record the type, if one is specified */
1ff442ca
NF
684 if (symval->type_name == NULL)
685 symval->type_name = name;
a70083a3 686 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 687 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
688 }
689 break;
690
511e79b3
AD
691 case tok_number:
692 if (prev == tok_identifier)
a70083a3 693 {
1ff442ca 694 symval->user_token_number = numval;
a70083a3
AD
695 }
696 else
697 {
698 complain (_
699 ("invalid text (%s) - number should be after identifier"),
700token_buffer);
701 skip_to_char ('%');
702 }
1ff442ca
NF
703 break;
704
511e79b3 705 case tok_semicolon:
1ff442ca
NF
706 return;
707
708 default:
a0f6b076 709 complain (_("unexpected item: %s"), token_buffer);
a70083a3 710 skip_to_char ('%');
1ff442ca
NF
711 }
712
713 prev = t;
1ff442ca
NF
714 }
715}
716
717
718
dd60faec 719/*--------------------------------------------------------------.
180d45ba
PB
720| Copy the union declaration into the stype muscle |
721| (and fdefines), where it is made into the definition of |
722| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 723`--------------------------------------------------------------*/
1ff442ca 724
4a120d45 725static void
118fb205 726parse_union_decl (void)
1ff442ca 727{
a70083a3
AD
728 int c;
729 int count = 0;
428046f8 730 bool done = FALSE;
180d45ba 731 struct obstack union_obstack;
1ff442ca 732 if (typed)
27821bff 733 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
734
735 typed = 1;
736
180d45ba
PB
737 obstack_init (&union_obstack);
738 obstack_sgrow (&union_obstack, "union");
1ff442ca 739
428046f8 740 while (!done)
1ff442ca 741 {
428046f8
AD
742 c = xgetc (finput);
743
342b8b6e
AD
744 /* If C contains '/', it is output by copy_comment (). */
745 if (c != '/')
2666f928 746 obstack_1grow (&union_obstack, c);
1ff442ca
NF
747
748 switch (c)
749 {
750 case '\n':
751 lineno++;
752 break;
753
754 case '/':
2666f928 755 copy_comment (finput, &union_obstack);
1ff442ca
NF
756 break;
757
1ff442ca
NF
758 case '{':
759 count++;
760 break;
761
762 case '}':
428046f8 763 /* FIXME: Errr. How could this happen???. --akim */
1ff442ca 764 if (count == 0)
27821bff 765 complain (_("unmatched %s"), "`}'");
1ff442ca 766 count--;
428046f8
AD
767 if (!count)
768 done = TRUE;
769 break;
1ff442ca 770 }
1ff442ca 771 }
180d45ba 772
428046f8
AD
773 /* JF don't choke on trailing semi */
774 c = skip_white_space ();
775 if (c != ';')
776 ungetc (c, finput);
777 obstack_1grow (&union_obstack, 0);
778 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
779}
780
d7020c20
AD
781
782/*-------------------------------------------------------.
783| Parse the declaration %expect N which says to expect N |
784| shift-reduce conflicts. |
785`-------------------------------------------------------*/
1ff442ca 786
4a120d45 787static void
118fb205 788parse_expect_decl (void)
1ff442ca 789{
131e2fef 790 int c = skip_white_space ();
1ff442ca
NF
791 ungetc (c, finput);
792
131e2fef 793 if (!isdigit (c))
79282c5a 794 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
795 else
796 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
797}
798
a70083a3
AD
799
800/*-------------------------------------------------------------------.
801| Parse what comes after %thong. the full syntax is |
802| |
803| %thong <type> token number literal |
804| |
805| the <type> or number may be omitted. The number specifies the |
806| user_token_number. |
807| |
808| Two symbols are entered in the table, one for the token symbol and |
809| one for the literal. Both are given the <type>, if any, from the |
810| declaration. The ->user_token_number of the first is SALIAS and |
811| the ->user_token_number of the second is set to the number, if |
812| any, from the declaration. The two symbols are linked via |
813| pointers in their ->alias fields. |
814| |
815| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
816| only the literal string is retained it is the literal string that |
817| is output to yytname |
818`-------------------------------------------------------------------*/
819
820static void
821parse_thong_decl (void)
7b306f52 822{
f17bcd1f 823 token_t token;
a70083a3
AD
824 struct bucket *symbol;
825 char *typename = 0;
6b7e85b9 826 int usrtoknum = SUNDEF;
7b306f52 827
a70083a3 828 token = lex (); /* fetch typename or first token */
511e79b3 829 if (token == tok_typename)
7b306f52 830 {
95e36146 831 typename = xstrdup (token_buffer);
a70083a3
AD
832 value_components_used = 1;
833 token = lex (); /* fetch first token */
7b306f52 834 }
7b306f52 835
a70083a3 836 /* process first token */
7b306f52 837
511e79b3 838 if (token != tok_identifier)
a70083a3
AD
839 {
840 complain (_("unrecognized item %s, expected an identifier"),
841 token_buffer);
842 skip_to_char ('%');
843 return;
7b306f52 844 }
d7020c20 845 symval->class = token_sym;
a70083a3
AD
846 symval->type_name = typename;
847 symval->user_token_number = SALIAS;
848 symbol = symval;
7b306f52 849
a70083a3 850 token = lex (); /* get number or literal string */
1ff442ca 851
511e79b3 852 if (token == tok_number)
943819bf 853 {
a70083a3
AD
854 usrtoknum = numval;
855 token = lex (); /* okay, did number, now get literal */
943819bf 856 }
1ff442ca 857
a70083a3 858 /* process literal string token */
1ff442ca 859
511e79b3 860 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 861 {
a70083a3
AD
862 complain (_("expected string constant instead of %s"), token_buffer);
863 skip_to_char ('%');
864 return;
1ff442ca 865 }
d7020c20 866 symval->class = token_sym;
a70083a3
AD
867 symval->type_name = typename;
868 symval->user_token_number = usrtoknum;
1ff442ca 869
a70083a3
AD
870 symval->alias = symbol;
871 symbol->alias = symval;
1ff442ca 872
79282c5a
AD
873 /* symbol and symval combined are only one symbol. */
874 nsyms--;
a70083a3 875}
3cef001a 876
b6610515 877static void
11d82f03 878parse_muscle_decl (void)
b6610515
RA
879{
880 int ch = ungetc (skip_white_space (), finput);
11d82f03
MA
881 char* muscle_key;
882 char* muscle_value;
b6610515
RA
883
884 /* Read key. */
885 if (!isalpha (ch) && ch != '_')
886 {
887 complain (_("invalid %s declaration"), "%define");
888 skip_to_char ('%');
889 return;
890 }
11d82f03
MA
891 copy_identifier (finput, &muscle_obstack);
892 obstack_1grow (&muscle_obstack, 0);
893 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 894
b6610515
RA
895 /* Read value. */
896 ch = skip_white_space ();
897 if (ch != '"')
898 {
899 ungetc (ch, finput);
900 if (ch != EOF)
901 {
902 complain (_("invalid %s declaration"), "%define");
903 skip_to_char ('%');
904 return;
905 }
906 else
907 fatal (_("Premature EOF after %s"), "\"");
908 }
11d82f03
MA
909 copy_string2 (finput, &muscle_obstack, '"', 0);
910 obstack_1grow (&muscle_obstack, 0);
911 muscle_value = obstack_finish (&muscle_obstack);
b6610515 912
b6610515 913 /* Store the (key, value) pair in the environment. */
11d82f03 914 muscle_insert (muscle_key, muscle_value);
b6610515
RA
915}
916
2ba3b73c 917
426cf563
MA
918
919/*---------------------------------.
a870c567 920| Parse a double quoted parameter. |
426cf563
MA
921`---------------------------------*/
922
923static const char *
924parse_dquoted_param (const char *from)
925{
926 struct obstack param_obstack;
927 const char *param = NULL;
928 int c;
929
930 obstack_init (&param_obstack);
931 c = skip_white_space ();
932
933 if (c != '"')
934 {
935 complain (_("invalid %s declaration"), from);
936 ungetc (c, finput);
937 skip_to_char ('%');
938 return NULL;
939 }
940
2648a72d
AD
941 while ((c = literalchar ()) != '"')
942 obstack_1grow (&param_obstack, c);
a870c567 943
426cf563
MA
944 obstack_1grow (&param_obstack, '\0');
945 param = obstack_finish (&param_obstack);
946
947 if (c != '"' || strlen (param) == 0)
948 {
949 complain (_("invalid %s declaration"), from);
950 if (c != '"')
951 ungetc (c, finput);
952 skip_to_char ('%');
953 return NULL;
954 }
955
956 return param;
957}
958
2ba3b73c
MA
959/*----------------------------------.
960| Parse what comes after %skeleton. |
961`----------------------------------*/
962
a870c567 963static void
2ba3b73c
MA
964parse_skel_decl (void)
965{
426cf563 966 skeleton = parse_dquoted_param ("%skeleton");
2ba3b73c
MA
967}
968
a70083a3
AD
969/*----------------------------------------------------------------.
970| Read from finput until `%%' is seen. Discard the `%%'. Handle |
971| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 972| groups to ATTRS_OBSTACK. |
a70083a3 973`----------------------------------------------------------------*/
1ff442ca 974
4a120d45 975static void
a70083a3 976read_declarations (void)
1ff442ca 977{
a70083a3 978 for (;;)
1ff442ca 979 {
951366c1 980 int c = skip_white_space ();
1ff442ca 981
a70083a3
AD
982 if (c == '%')
983 {
951366c1 984 token_t tok = parse_percent_token ();
1ff442ca 985
a70083a3 986 switch (tok)
943819bf 987 {
511e79b3 988 case tok_two_percents:
a70083a3 989 return;
1ff442ca 990
511e79b3 991 case tok_percent_left_curly:
a70083a3
AD
992 copy_definition ();
993 break;
1ff442ca 994
511e79b3 995 case tok_token:
d7020c20 996 parse_token_decl (token_sym, nterm_sym);
a70083a3 997 break;
1ff442ca 998
511e79b3 999 case tok_nterm:
d7020c20 1000 parse_token_decl (nterm_sym, token_sym);
a70083a3 1001 break;
1ff442ca 1002
511e79b3 1003 case tok_type:
a70083a3
AD
1004 parse_type_decl ();
1005 break;
1ff442ca 1006
511e79b3 1007 case tok_start:
a70083a3
AD
1008 parse_start_decl ();
1009 break;
118fb205 1010
511e79b3 1011 case tok_union:
a70083a3
AD
1012 parse_union_decl ();
1013 break;
1ff442ca 1014
511e79b3 1015 case tok_expect:
a70083a3
AD
1016 parse_expect_decl ();
1017 break;
6deb4447 1018
511e79b3 1019 case tok_thong:
a70083a3
AD
1020 parse_thong_decl ();
1021 break;
d7020c20 1022
511e79b3 1023 case tok_left:
d7020c20 1024 parse_assoc_decl (left_assoc);
a70083a3 1025 break;
1ff442ca 1026
511e79b3 1027 case tok_right:
d7020c20 1028 parse_assoc_decl (right_assoc);
a70083a3 1029 break;
1ff442ca 1030
511e79b3 1031 case tok_nonassoc:
d7020c20 1032 parse_assoc_decl (non_assoc);
a70083a3 1033 break;
1ff442ca 1034
b6610515 1035 case tok_define:
11d82f03 1036 parse_muscle_decl ();
b6610515 1037 break;
342b8b6e 1038
2ba3b73c
MA
1039 case tok_skel:
1040 parse_skel_decl ();
1041 break;
b6610515 1042
511e79b3 1043 case tok_noop:
a70083a3 1044 break;
1ff442ca 1045
951366c1
AD
1046 case tok_stropt:
1047 case tok_intopt:
1048 case tok_obsolete:
951366c1
AD
1049 abort ();
1050 break;
1051
e0c40012 1052 case tok_illegal:
a70083a3
AD
1053 default:
1054 complain (_("unrecognized: %s"), token_buffer);
1055 skip_to_char ('%');
1056 }
1057 }
1058 else if (c == EOF)
1059 fatal (_("no input grammar"));
1060 else
1061 {
ff4a34be
AD
1062 char buf[] = "c";
1063 buf[0] = c;
1064 complain (_("unknown character: %s"), quote (buf));
a70083a3 1065 skip_to_char ('%');
1ff442ca 1066 }
1ff442ca 1067 }
1ff442ca 1068}
a70083a3
AD
1069\f
1070/*-------------------------------------------------------------------.
1071| Assuming that a `{' has just been seen, copy everything up to the |
1072| matching `}' into the actions file. STACK_OFFSET is the number of |
1073| values in the current rule so far, which says where to find `$0' |
1074| with respect to the top of the stack. |
1075`-------------------------------------------------------------------*/
1ff442ca 1076
4a120d45 1077static void
79282c5a 1078copy_action (symbol_list *rule, int stack_offset)
1ff442ca 1079{
a70083a3 1080 int c;
a70083a3 1081 int count;
1ff442ca
NF
1082
1083 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1084 if (semantic_parser)
1085 stack_offset = 0;
1ff442ca 1086
1ff442ca 1087 count = 1;
a70083a3 1088 c = getc (finput);
1ff442ca
NF
1089
1090 while (count > 0)
1091 {
1092 while (c != '}')
a70083a3
AD
1093 {
1094 switch (c)
1ff442ca
NF
1095 {
1096 case '\n':
8c7ebe49 1097 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1098 lineno++;
1099 break;
1100
1101 case '{':
8c7ebe49 1102 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1103 count++;
1104 break;
1105
1106 case '\'':
1107 case '"':
337bab46 1108 copy_string (finput, &action_obstack, c);
1ff442ca
NF
1109 break;
1110
1111 case '/':
337bab46 1112 copy_comment (finput, &action_obstack);
1ff442ca
NF
1113 break;
1114
1115 case '$':
337bab46 1116 copy_dollar (finput, &action_obstack,
8c7ebe49 1117 rule, stack_offset);
1ff442ca
NF
1118 break;
1119
1120 case '@':
337bab46 1121 copy_at (finput, &action_obstack,
8c7ebe49 1122 stack_offset);
6666f98f 1123 break;
1ff442ca
NF
1124
1125 case EOF:
27821bff 1126 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1127
1128 default:
8c7ebe49 1129 obstack_1grow (&action_obstack, c);
a70083a3
AD
1130 }
1131
1132 c = getc (finput);
1133 }
1134
1135 /* above loop exits when c is '}' */
1136
1137 if (--count)
1138 {
8c7ebe49 1139 obstack_1grow (&action_obstack, c);
a70083a3
AD
1140 c = getc (finput);
1141 }
1142 }
1143
3f96f4dc
AD
1144 obstack_1grow (&action_obstack, '\0');
1145 rule->action = obstack_finish (&action_obstack);
1146 rule->action_line = lineno;
a70083a3
AD
1147}
1148\f
1149/*-------------------------------------------------------------------.
1150| After `%guard' is seen in the input file, copy the actual guard |
1151| into the guards file. If the guard is followed by an action, copy |
1152| that into the actions file. STACK_OFFSET is the number of values |
1153| in the current rule so far, which says where to find `$0' with |
1154| respect to the top of the stack, for the simple parser in which |
1155| the stack is not popped until after the guard is run. |
1156`-------------------------------------------------------------------*/
1157
1158static void
79282c5a 1159copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1160{
1161 int c;
a70083a3 1162 int count;
a70083a3
AD
1163 int brace_flag = 0;
1164
1165 /* offset is always 0 if parser has already popped the stack pointer */
1166 if (semantic_parser)
1167 stack_offset = 0;
1168
ea5607fd 1169 obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
89cab50d 1170 if (!no_lines_flag)
25b222fa 1171 obstack_fgrow2 (&guard_obstack, muscle_find ("linef"),
682d48cd 1172 lineno, quotearg_style (c_quoting_style,
11d82f03 1173 muscle_find ("filename")));
ea5607fd 1174 obstack_1grow (&guard_obstack, '{');
a70083a3
AD
1175
1176 count = 0;
1177 c = getc (finput);
1178
1179 while (brace_flag ? (count > 0) : (c != ';'))
1180 {
1181 switch (c)
1182 {
1183 case '\n':
ea5607fd 1184 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1185 lineno++;
1186 break;
1187
1188 case '{':
ea5607fd 1189 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1190 brace_flag = 1;
1191 count++;
1192 break;
1193
1194 case '}':
ea5607fd 1195 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1196 if (count > 0)
1197 count--;
1198 else
1199 {
1200 complain (_("unmatched %s"), "`}'");
1201 c = getc (finput); /* skip it */
1202 }
1203 break;
1204
1205 case '\'':
1206 case '"':
337bab46 1207 copy_string (finput, &guard_obstack, c);
a70083a3
AD
1208 break;
1209
1210 case '/':
337bab46 1211 copy_comment (finput, &guard_obstack);
a70083a3
AD
1212 break;
1213
1214 case '$':
337bab46 1215 copy_dollar (finput, &guard_obstack, rule, stack_offset);
a70083a3 1216 break;
1ff442ca 1217
a70083a3 1218 case '@':
337bab46 1219 copy_at (finput, &guard_obstack, stack_offset);
a70083a3 1220 break;
1ff442ca 1221
a70083a3
AD
1222 case EOF:
1223 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1224
a70083a3 1225 default:
ea5607fd 1226 obstack_1grow (&guard_obstack, c);
1ff442ca 1227 }
a70083a3
AD
1228
1229 if (c != '}' || count != 0)
1230 c = getc (finput);
1ff442ca
NF
1231 }
1232
a70083a3
AD
1233 c = skip_white_space ();
1234
ff4423cc 1235 obstack_sgrow (&guard_obstack, ";\n break;}");
a70083a3
AD
1236 if (c == '{')
1237 copy_action (rule, stack_offset);
1238 else if (c == '=')
1239 {
1240 c = getc (finput); /* why not skip_white_space -wjh */
1241 if (c == '{')
1242 copy_action (rule, stack_offset);
1243 }
1244 else
1245 ungetc (c, finput);
1ff442ca 1246}
a70083a3
AD
1247\f
1248
a70083a3
AD
1249/*-------------------------------------------------------------------.
1250| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1251| with the user's names. |
1252`-------------------------------------------------------------------*/
1ff442ca 1253
4a120d45 1254static bucket *
118fb205 1255gensym (void)
1ff442ca 1256{
274d42ce
AD
1257 /* Incremented for each generated symbol */
1258 static int gensym_count = 0;
1259 static char buf[256];
1260
a70083a3 1261 bucket *sym;
1ff442ca 1262
274d42ce
AD
1263 sprintf (buf, "@%d", ++gensym_count);
1264 token_buffer = buf;
a70083a3 1265 sym = getsym (token_buffer);
d7020c20 1266 sym->class = nterm_sym;
1ff442ca 1267 sym->value = nvars++;
36281465 1268 return sym;
1ff442ca 1269}
a70083a3 1270\f
107f7dfb
AD
1271/*-------------------------------------------------------------------.
1272| Parse the input grammar into a one symbol_list structure. Each |
1273| rule is represented by a sequence of symbols: the left hand side |
1274| followed by the contents of the right hand side, followed by a |
1275| null pointer instead of a symbol to terminate the rule. The next |
1276| symbol is the lhs of the following rule. |
1277| |
1278| All guards and actions are copied out to the appropriate files, |
1279| labelled by the rule number they apply to. |
1280| |
1281| Bison used to allow some %directives in the rules sections, but |
1282| this is no longer consider appropriate: (i) the documented grammar |
1283| doesn't claim it, (ii), it would promote bad style, (iii), error |
1284| recovery for %directives consists in skipping the junk until a `%' |
1285| is seen and helrp synchronizing. This scheme is definitely wrong |
1286| in the rules section. |
1287`-------------------------------------------------------------------*/
1ff442ca 1288
4a120d45 1289static void
118fb205 1290readgram (void)
1ff442ca 1291{
f17bcd1f 1292 token_t t;
a70083a3 1293 bucket *lhs = NULL;
107f7dfb
AD
1294 symbol_list *p = NULL;
1295 symbol_list *p1 = NULL;
a70083a3 1296 bucket *bp;
1ff442ca 1297
ff4a34be
AD
1298 /* Points to first symbol_list of current rule. its symbol is the
1299 lhs of the rule. */
107f7dfb 1300 symbol_list *crule = NULL;
ff4a34be 1301 /* Points to the symbol_list preceding crule. */
107f7dfb 1302 symbol_list *crule1 = NULL;
1ff442ca 1303
a70083a3 1304 t = lex ();
1ff442ca 1305
511e79b3 1306 while (t != tok_two_percents && t != tok_eof)
107f7dfb
AD
1307 if (t == tok_identifier || t == tok_bar)
1308 {
1309 int action_flag = 0;
1310 /* Number of symbols in rhs of this rule so far */
1311 int rulelength = 0;
1312 int xactions = 0; /* JF for error checking */
1313 bucket *first_rhs = 0;
1314
1315 if (t == tok_identifier)
1316 {
1317 lhs = symval;
1318
1319 if (!start_flag)
1320 {
1321 startval = lhs;
1322 start_flag = 1;
1323 }
1ff442ca 1324
107f7dfb
AD
1325 t = lex ();
1326 if (t != tok_colon)
1327 {
1328 complain (_("ill-formed rule: initial symbol not followed by colon"));
1329 unlex (t);
1330 }
1331 }
1332
1333 if (nrules == 0 && t == tok_bar)
1334 {
1335 complain (_("grammar starts with vertical bar"));
1336 lhs = symval; /* BOGUS: use a random symval */
1337 }
1338 /* start a new rule and record its lhs. */
1339
1340 nrules++;
1341 nitems++;
1342
1343 p = symbol_list_new (lhs);
1344
1345 crule1 = p1;
1346 if (p1)
1347 p1->next = p;
1348 else
1349 grammar = p;
1ff442ca 1350
107f7dfb
AD
1351 p1 = p;
1352 crule = p;
1ff442ca 1353
107f7dfb 1354 /* mark the rule's lhs as a nonterminal if not already so. */
1ff442ca 1355
107f7dfb
AD
1356 if (lhs->class == unknown_sym)
1357 {
1358 lhs->class = nterm_sym;
1359 lhs->value = nvars;
1360 nvars++;
1361 }
1362 else if (lhs->class == token_sym)
1363 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca 1364
107f7dfb 1365 /* read the rhs of the rule. */
1ff442ca 1366
107f7dfb
AD
1367 for (;;)
1368 {
1369 t = lex ();
1370 if (t == tok_prec)
1371 {
1372 t = lex ();
1373 crule->ruleprec = symval;
1374 t = lex ();
1375 }
1376
1377 if (!(t == tok_identifier || t == tok_left_curly))
1378 break;
1ff442ca 1379
107f7dfb
AD
1380 /* If next token is an identifier, see if a colon follows it.
1381 If one does, exit this rule now. */
1382 if (t == tok_identifier)
1383 {
1384 bucket *ssave;
1385 token_t t1;
1386
1387 ssave = symval;
1388 t1 = lex ();
1389 unlex (t1);
1390 symval = ssave;
1391 if (t1 == tok_colon)
1392 break;
1393
1394 if (!first_rhs) /* JF */
1395 first_rhs = symval;
1396 /* Not followed by colon =>
1397 process as part of this rule's rhs. */
1398 }
1399
1400 /* If we just passed an action, that action was in the middle
1401 of a rule, so make a dummy rule to reduce it to a
1402 non-terminal. */
1403 if (action_flag)
1404 {
1405 /* Since the action was written out with this rule's
1406 number, we must give the new rule this number by
1407 inserting the new rule before it. */
1408
1409 /* Make a dummy nonterminal, a gensym. */
1410 bucket *sdummy = gensym ();
1411
1412 /* Make a new rule, whose body is empty, before the
1413 current one, so that the action just read can
1414 belong to it. */
1415 nrules++;
1416 nitems++;
1417 p = symbol_list_new (sdummy);
1418 /* Attach its lineno to that of the host rule. */
1419 p->line = crule->line;
1420 if (crule1)
1421 crule1->next = p;
1422 else
1423 grammar = p;
1424 /* End of the rule. */
1425 crule1 = symbol_list_new (NULL);
1426 crule1->next = crule;
1427
1428 p->next = crule1;
1429
1430 /* Insert the dummy generated by that rule into this
1431 rule. */
1432 nitems++;
1433 p = symbol_list_new (sdummy);
1434 p1->next = p;
1435 p1 = p;
1436
1437 action_flag = 0;
1438 }
1439
1440 if (t == tok_identifier)
1441 {
1442 nitems++;
1443 p = symbol_list_new (symval);
1444 p1->next = p;
1445 p1 = p;
1446 }
1447 else /* handle an action. */
1448 {
1449 copy_action (crule, rulelength);
1450 action_flag = 1;
1451 xactions++; /* JF */
1452 }
1453 rulelength++;
1454 } /* end of read rhs of rule */
1455
1456 /* Put an empty link in the list to mark the end of this rule */
1457 p = symbol_list_new (NULL);
1458 p1->next = p;
1459 p1 = p;
1460
1461 if (t == tok_prec)
1462 {
1463 complain (_("two @prec's in a row"));
1464 t = lex ();
1465 crule->ruleprec = symval;
1466 t = lex ();
1467 }
1468 if (t == tok_guard)
1469 {
1470 if (!semantic_parser)
1471 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1472
107f7dfb 1473 copy_guard (crule, rulelength);
a70083a3 1474 t = lex ();
107f7dfb
AD
1475 }
1476 else if (t == tok_left_curly)
1477 {
1478 /* This case never occurs -wjh */
1479 if (action_flag)
1480 complain (_("two actions at end of one rule"));
1481 copy_action (crule, rulelength);
1482 action_flag = 1;
1483 xactions++; /* -wjh */
1484 t = lex ();
1485 }
1486 /* If $$ is being set in default way, report if any type
1487 mismatch. */
1488 else if (!xactions
1489 && first_rhs && lhs->type_name != first_rhs->type_name)
1490 {
1491 if (lhs->type_name == 0
1492 || first_rhs->type_name == 0
1493 || strcmp (lhs->type_name, first_rhs->type_name))
1494 complain (_("type clash (`%s' `%s') on default action"),
1495 lhs->type_name ? lhs->type_name : "",
1496 first_rhs->type_name ? first_rhs->type_name : "");
1497 }
1498 /* Warn if there is no default for $$ but we need one. */
1499 else if (!xactions && !first_rhs && lhs->type_name != 0)
1500 complain (_("empty rule for typed nonterminal, and no action"));
1501 if (t == tok_semicolon)
a70083a3 1502 t = lex ();
107f7dfb
AD
1503 }
1504 else
1505 {
1506 complain (_("invalid input: %s"), quote (token_buffer));
1507 t = lex ();
1508 }
943819bf 1509
1ff442ca 1510
943819bf
RS
1511 /* grammar has been read. Do some checking */
1512
1ff442ca 1513 if (nsyms > MAXSHORT)
a0f6b076
AD
1514 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1515 MAXSHORT);
1ff442ca 1516 if (nrules == 0)
a0f6b076 1517 fatal (_("no rules in the input grammar"));
1ff442ca 1518
1ff442ca
NF
1519 /* Report any undefined symbols and consider them nonterminals. */
1520
1521 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1522 if (bp->class == unknown_sym)
1ff442ca 1523 {
a70083a3
AD
1524 complain (_
1525 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1526 bp->tag);
d7020c20 1527 bp->class = nterm_sym;
1ff442ca
NF
1528 bp->value = nvars++;
1529 }
1530
1531 ntokens = nsyms - nvars;
1532}
ff48177d
MA
1533
1534/* At the end of the grammar file, some C source code must
63c2d5de 1535 be stored. It is going to be associated to the epilogue
ff48177d
MA
1536 directive. */
1537static void
1538read_additionnal_code (void)
1539{
1540 char c;
63c2d5de 1541 struct obstack el_obstack;
342b8b6e 1542
63c2d5de 1543 obstack_init (&el_obstack);
ff48177d 1544
710ddc4f
MA
1545 if (!no_lines_flag)
1546 {
1547 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1548 lineno, quotearg_style (c_quoting_style,
1549 muscle_find("filename")));
1550 }
1551
ff48177d 1552 while ((c = getc (finput)) != EOF)
63c2d5de 1553 obstack_1grow (&el_obstack, c);
342b8b6e 1554
63c2d5de 1555 obstack_1grow (&el_obstack, 0);
11d82f03 1556 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1557}
1558
a70083a3 1559\f
037ca2f1
AD
1560/*------------------------------------------------------------------.
1561| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1562| number. |
1563`------------------------------------------------------------------*/
1564
1565static void
1566token_translations_init (void)
1567{
1568 bucket *bp = NULL;
1569 int i;
1570
1571 token_translations = XCALLOC (short, max_user_token_number + 1);
1572
1573 /* Initialize all entries for literal tokens to 2, the internal
1574 token number for $undefined., which represents all invalid
1575 inputs. */
1576 for (i = 0; i <= max_user_token_number; i++)
1577 token_translations[i] = 2;
1578
1579 for (bp = firstsymbol; bp; bp = bp->next)
1580 {
1581 /* Non-terminal? */
1582 if (bp->value >= ntokens)
1583 continue;
1584 /* A token string alias? */
1585 if (bp->user_token_number == SALIAS)
1586 continue;
6b7e85b9
AD
1587
1588 assert (bp->user_token_number != SUNDEF);
1589
037ca2f1
AD
1590 /* A token which translation has already been set? */
1591 if (token_translations[bp->user_token_number] != 2)
1592 complain (_("tokens %s and %s both assigned number %d"),
1593 tags[token_translations[bp->user_token_number]],
1594 bp->tag, bp->user_token_number);
1595 token_translations[bp->user_token_number] = bp->value;
1596 }
1597}
1598
1599
a70083a3
AD
1600/*------------------------------------------------------------------.
1601| Assign symbol numbers, and write definition of token names into |
b2ca4022 1602| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1603| of symbols. |
1604`------------------------------------------------------------------*/
1ff442ca 1605
4a120d45 1606static void
118fb205 1607packsymbols (void)
1ff442ca 1608{
342b8b6e 1609 bucket *bp = NULL;
a70083a3 1610 int tokno = 1;
a70083a3 1611 int last_user_token_number;
4a120d45 1612 static char DOLLAR[] = "$";
1ff442ca 1613
d7913476 1614 tags = XCALLOC (char *, nsyms + 1);
d7913476 1615 user_toknums = XCALLOC (short, nsyms + 1);
1ff442ca 1616
d7913476
AD
1617 sprec = XCALLOC (short, nsyms);
1618 sassoc = XCALLOC (short, nsyms);
1ff442ca 1619
037ca2f1
AD
1620 /* The EOF token. */
1621 tags[0] = DOLLAR;
1622 user_toknums[0] = 0;
1623
1ff442ca
NF
1624 max_user_token_number = 256;
1625 last_user_token_number = 256;
1626
1627 for (bp = firstsymbol; bp; bp = bp->next)
1628 {
d7020c20 1629 if (bp->class == nterm_sym)
1ff442ca
NF
1630 {
1631 bp->value += ntokens;
1632 }
943819bf
RS
1633 else if (bp->alias)
1634 {
0a6384c4
AD
1635 /* this symbol and its alias are a single token defn.
1636 allocate a tokno, and assign to both check agreement of
1637 ->prec and ->assoc fields and make both the same */
1638 if (bp->value == 0)
1639 bp->value = bp->alias->value = tokno++;
943819bf 1640
0a6384c4
AD
1641 if (bp->prec != bp->alias->prec)
1642 {
1643 if (bp->prec != 0 && bp->alias->prec != 0
1644 && bp->user_token_number == SALIAS)
a0f6b076
AD
1645 complain (_("conflicting precedences for %s and %s"),
1646 bp->tag, bp->alias->tag);
0a6384c4
AD
1647 if (bp->prec != 0)
1648 bp->alias->prec = bp->prec;
1649 else
1650 bp->prec = bp->alias->prec;
1651 }
943819bf 1652
0a6384c4
AD
1653 if (bp->assoc != bp->alias->assoc)
1654 {
a0f6b076
AD
1655 if (bp->assoc != 0 && bp->alias->assoc != 0
1656 && bp->user_token_number == SALIAS)
1657 complain (_("conflicting assoc values for %s and %s"),
1658 bp->tag, bp->alias->tag);
1659 if (bp->assoc != 0)
1660 bp->alias->assoc = bp->assoc;
1661 else
1662 bp->assoc = bp->alias->assoc;
1663 }
0a6384c4
AD
1664
1665 if (bp->user_token_number == SALIAS)
a70083a3 1666 continue; /* do not do processing below for SALIASs */
943819bf 1667
a70083a3 1668 }
d7020c20 1669 else /* bp->class == token_sym */
943819bf
RS
1670 {
1671 bp->value = tokno++;
1672 }
1673
d7020c20 1674 if (bp->class == token_sym)
1ff442ca 1675 {
6b7e85b9 1676 if (bp->user_token_number == SUNDEF)
1ff442ca
NF
1677 bp->user_token_number = ++last_user_token_number;
1678 if (bp->user_token_number > max_user_token_number)
1679 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1680 }
1681
1682 tags[bp->value] = bp->tag;
943819bf 1683 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1684 sprec[bp->value] = bp->prec;
1685 sassoc[bp->value] = bp->assoc;
1ff442ca
NF
1686 }
1687
037ca2f1 1688 token_translations_init ();
1ff442ca
NF
1689
1690 error_token_number = errtoken->value;
1691
e3f1699f
AD
1692 if (startval->class == unknown_sym)
1693 fatal (_("the start symbol %s is undefined"), startval->tag);
1694 else if (startval->class == token_sym)
1695 fatal (_("the start symbol %s is a token"), startval->tag);
1696
1697 start_symbol = startval->value;
1698}
1699
1700
93ede233
AD
1701/*---------------------------------------------------------------.
1702| Save the definition of token names in the `TOKENDEFS' muscle. |
1703`---------------------------------------------------------------*/
e3f1699f
AD
1704
1705static void
93ede233 1706symbols_save (void)
e3f1699f 1707{
93ede233
AD
1708 struct obstack tokendefs;
1709 bucket *bp;
1710 char *cp, *symbol;
1711 char c;
1712 obstack_init (&tokendefs);
1713
1714 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1715 {
93ede233 1716 symbol = bp->tag; /* get symbol */
1ff442ca 1717
93ede233
AD
1718 if (bp->value >= ntokens)
1719 continue;
1720 if (bp->user_token_number == SALIAS)
1721 continue;
1722 if ('\'' == *symbol)
1723 continue; /* skip literal character */
1724 if (bp == errtoken)
1725 continue; /* skip error token */
1726 if ('\"' == *symbol)
037ca2f1 1727 {
93ede233
AD
1728 /* use literal string only if given a symbol with an alias */
1729 if (bp->alias)
1730 symbol = bp->alias->tag;
1731 else
1732 continue;
037ca2f1 1733 }
93ede233
AD
1734
1735 /* Don't #define nonliteral tokens whose names contain periods. */
1736 cp = symbol;
1737 while ((c = *cp++) && c != '.');
1738 if (c != '\0')
1739 continue;
1740
1741 obstack_fgrow2 (&tokendefs, "# define\t%s\t%d\n",
1742 symbol, bp->user_token_number);
1743 if (semantic_parser)
1744 /* FIXME: This is probably wrong, and should be just as
1745 above. --akim. */
1746 obstack_fgrow2 (&tokendefs, "# define\tT%s\t%d\n", symbol, bp->value);
1ff442ca 1747 }
93ede233
AD
1748
1749 obstack_1grow (&tokendefs, 0);
1750 muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1751 obstack_free (&tokendefs, NULL);
1ff442ca 1752}
a083fbbf 1753
1ff442ca 1754
a70083a3
AD
1755/*---------------------------------------------------------------.
1756| Convert the rules into the representation using RRHS, RLHS and |
1757| RITEMS. |
1758`---------------------------------------------------------------*/
1ff442ca 1759
4a120d45 1760static void
118fb205 1761packgram (void)
1ff442ca 1762{
a70083a3
AD
1763 int itemno;
1764 int ruleno;
1765 symbol_list *p;
1ff442ca 1766
d7913476 1767 ritem = XCALLOC (short, nitems + 1);
b2ed6e58 1768 rule_table = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1769
1770 itemno = 0;
1771 ruleno = 1;
1772
1773 p = grammar;
1774 while (p)
1775 {
b29b2ed5 1776 bucket *ruleprec = p->ruleprec;
b2ed6e58
AD
1777 rule_table[ruleno].lhs = p->sym->value;
1778 rule_table[ruleno].rhs = itemno;
b29b2ed5 1779 rule_table[ruleno].line = p->line;
68f1e3ed 1780 rule_table[ruleno].useful = TRUE;
3f96f4dc
AD
1781 rule_table[ruleno].action = p->action;
1782 rule_table[ruleno].action_line = p->action_line;
1ff442ca
NF
1783
1784 p = p->next;
1785 while (p && p->sym)
1786 {
1787 ritem[itemno++] = p->sym->value;
1788 /* A rule gets by default the precedence and associativity
1789 of the last token in it. */
d7020c20 1790 if (p->sym->class == token_sym)
1ff442ca 1791 {
652a871c
AD
1792 rule_table[ruleno].prec = p->sym->prec;
1793 rule_table[ruleno].assoc = p->sym->assoc;
1ff442ca 1794 }
a70083a3
AD
1795 if (p)
1796 p = p->next;
1ff442ca
NF
1797 }
1798
1799 /* If this rule has a %prec,
a70083a3 1800 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1801 if (ruleprec)
1802 {
652a871c
AD
1803 rule_table[ruleno].prec = ruleprec->prec;
1804 rule_table[ruleno].assoc = ruleprec->assoc;
1805 rule_table[ruleno].precsym = ruleprec->value;
1ff442ca
NF
1806 }
1807
1808 ritem[itemno++] = -ruleno;
1809 ruleno++;
1810
a70083a3
AD
1811 if (p)
1812 p = p->next;
1ff442ca
NF
1813 }
1814
1815 ritem[itemno] = 0;
3067fbef
AD
1816
1817 if (trace_flag)
1818 ritem_print (stderr);
1ff442ca 1819}
a70083a3
AD
1820\f
1821/*-------------------------------------------------------------------.
1822| Read in the grammar specification and record it in the format |
ea5607fd 1823| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1824| and all actions into ACTION_OBSTACK, in each case forming the body |
1825| of a C function (YYGUARD or YYACTION) which contains a switch |
1826| statement to decide which guard or action to execute. |
a70083a3
AD
1827`-------------------------------------------------------------------*/
1828
1829void
1830reader (void)
1831{
1832 start_flag = 0;
1833 startval = NULL; /* start symbol not specified yet. */
1834
a70083a3
AD
1835 nsyms = 1;
1836 nvars = 0;
1837 nrules = 0;
1838 nitems = 0;
a70083a3
AD
1839
1840 typed = 0;
1841 lastprec = 0;
1842
a70083a3
AD
1843 semantic_parser = 0;
1844 pure_parser = 0;
a70083a3
AD
1845
1846 grammar = NULL;
1847
342b8b6e 1848 lex_init ();
a70083a3
AD
1849 lineno = 1;
1850
11d82f03
MA
1851 /* Initialize the muscle obstack. */
1852 obstack_init (&muscle_obstack);
82e236e2 1853
a70083a3
AD
1854 /* Initialize the symbol table. */
1855 tabinit ();
b6610515 1856
a70083a3
AD
1857 /* Construct the error token */
1858 errtoken = getsym ("error");
d7020c20 1859 errtoken->class = token_sym;
a70083a3 1860 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 1861
a70083a3
AD
1862 /* Construct a token that represents all undefined literal tokens.
1863 It is always token number 2. */
1864 undeftoken = getsym ("$undefined.");
d7020c20 1865 undeftoken->class = token_sym;
a70083a3
AD
1866 undeftoken->user_token_number = 2;
1867
331dbc1b
AD
1868 /* Initialize the obstacks. */
1869 obstack_init (&action_obstack);
1870 obstack_init (&attrs_obstack);
1871 obstack_init (&guard_obstack);
1872 obstack_init (&output_obstack);
1873
1874 finput = xfopen (infile, "r");
1875
896fe5c1
AD
1876 /* Read the declaration section. Copy %{ ... %} groups to
1877 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1878 etc. found there. */
a70083a3 1879 read_declarations ();
a70083a3
AD
1880 /* Read in the grammar, build grammar in list form. Write out
1881 guards and actions. */
1882 readgram ();
ff48177d
MA
1883 /* Some C code is given at the end of the grammar file. */
1884 read_additionnal_code ();
b0c4483e 1885
331dbc1b
AD
1886 lex_free ();
1887 xfclose (finput);
1888
a70083a3
AD
1889 /* Assign the symbols their symbol numbers. Write #defines for the
1890 token symbols into FDEFINES if requested. */
1891 packsymbols ();
93ede233
AD
1892 /* Save them. */
1893 symbols_save ();
1894
a70083a3
AD
1895 /* Convert the grammar into the format described in gram.h. */
1896 packgram ();
a70083a3 1897}