]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/print.c (print_actions): Output the handling of `$' as the
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
037ca2f1 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
8c7ebe49 24#include "obstack.h"
2a91a95e
AD
25#include "quotearg.h"
26#include "quote.h"
ceed8467 27#include "getargs.h"
1ff442ca 28#include "files.h"
1ff442ca 29#include "symtab.h"
82b6d266 30#include "options.h"
1ff442ca
NF
31#include "lex.h"
32#include "gram.h"
a0f6b076 33#include "complain.h"
6c89f1c1 34#include "output.h"
b2ca4022 35#include "reader.h"
340ef489 36#include "conflicts.h"
11d82f03 37#include "muscle_tab.h"
1ff442ca 38
a70083a3
AD
39typedef struct symbol_list
40{
41 struct symbol_list *next;
42 bucket *sym;
b29b2ed5 43 int line;
a70083a3
AD
44 bucket *ruleprec;
45}
46symbol_list;
118fb205 47
1ff442ca 48int lineno;
1ff442ca 49char **tags;
d019d655 50short *user_toknums;
4a120d45
JT
51static symbol_list *grammar;
52static int start_flag;
53static bucket *startval;
1ff442ca
NF
54
55/* Nonzero if components of semantic values are used, implying
56 they must be unions. */
57static int value_components_used;
58
d7020c20
AD
59/* Nonzero if %union has been seen. */
60static int typed;
1ff442ca 61
d7020c20
AD
62/* Incremented for each %left, %right or %nonassoc seen */
63static int lastprec;
1ff442ca 64
1ff442ca 65static bucket *errtoken;
5b2e3c89 66static bucket *undeftoken;
b29b2ed5
AD
67
68
6255b435 69static symbol_list *
b29b2ed5
AD
70symbol_list_new (bucket *sym)
71{
72 symbol_list *res = XMALLOC (symbol_list, 1);
73 res->next = NULL;
74 res->sym = sym;
75 res->line = lineno;
76 res->ruleprec = NULL;
77 return res;
78}
79
0d533154 80\f
a70083a3 81
0d533154
AD
82/*===================\
83| Low level lexing. |
84\===================*/
943819bf
RS
85
86static void
118fb205 87skip_to_char (int target)
943819bf
RS
88{
89 int c;
90 if (target == '\n')
a0f6b076 91 complain (_(" Skipping to next \\n"));
943819bf 92 else
a0f6b076 93 complain (_(" Skipping to next %c"), target);
943819bf
RS
94
95 do
0d533154 96 c = skip_white_space ();
943819bf 97 while (c != target && c != EOF);
a083fbbf 98 if (c != EOF)
0d533154 99 ungetc (c, finput);
943819bf
RS
100}
101
102
0d533154
AD
103/*---------------------------------------------------------.
104| Read a signed integer from STREAM and return its value. |
105`---------------------------------------------------------*/
106
107static inline int
108read_signed_integer (FILE *stream)
109{
a70083a3
AD
110 int c = getc (stream);
111 int sign = 1;
112 int n = 0;
0d533154
AD
113
114 if (c == '-')
115 {
116 c = getc (stream);
117 sign = -1;
118 }
119
120 while (isdigit (c))
121 {
122 n = 10 * n + (c - '0');
123 c = getc (stream);
124 }
125
126 ungetc (c, stream);
127
128 return sign * n;
129}
130\f
79282c5a
AD
131/*--------------------------------------------------------------.
132| Get the data type (alternative in the union) of the value for |
133| symbol N in rule RULE. |
134`--------------------------------------------------------------*/
135
136static char *
b29b2ed5 137get_type_name (int n, symbol_list *rule)
79282c5a
AD
138{
139 int i;
140 symbol_list *rp;
141
142 if (n < 0)
143 {
144 complain (_("invalid $ value"));
145 return NULL;
146 }
147
148 rp = rule;
149 i = 0;
150
151 while (i < n)
152 {
153 rp = rp->next;
154 if (rp == NULL || rp->sym == NULL)
155 {
156 complain (_("invalid $ value"));
157 return NULL;
158 }
159 i++;
160 }
161
162 return rp->sym->type_name;
163}
164\f
337bab46
AD
165/*------------------------------------------------------------.
166| Dump the string from FIN to OOUT if non null. MATCH is the |
167| delimiter of the string (either ' or "). |
168`------------------------------------------------------------*/
ae3c3164
AD
169
170static inline void
b6610515 171copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
172{
173 int c;
174
b6610515
RA
175 if (store)
176 obstack_1grow (oout, match);
8c7ebe49 177
4a120d45 178 c = getc (fin);
ae3c3164
AD
179
180 while (c != match)
181 {
182 if (c == EOF)
183 fatal (_("unterminated string at end of file"));
184 if (c == '\n')
185 {
a0f6b076 186 complain (_("unterminated string"));
4a120d45 187 ungetc (c, fin);
ae3c3164
AD
188 c = match; /* invent terminator */
189 continue;
190 }
191
337bab46 192 obstack_1grow (oout, c);
ae3c3164
AD
193
194 if (c == '\\')
195 {
4a120d45 196 c = getc (fin);
ae3c3164
AD
197 if (c == EOF)
198 fatal (_("unterminated string at end of file"));
337bab46 199 obstack_1grow (oout, c);
8c7ebe49 200
ae3c3164
AD
201 if (c == '\n')
202 lineno++;
203 }
204
a70083a3 205 c = getc (fin);
ae3c3164
AD
206 }
207
b6610515
RA
208 if (store)
209 obstack_1grow (oout, c);
210}
211
212/* FIXME. */
213
214static inline void
215copy_string (FILE *fin, struct obstack *oout, int match)
216{
217 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
218}
219
b6610515
RA
220/* FIXME. */
221
222static inline void
223copy_identifier (FILE *fin, struct obstack *oout)
224{
225 int c;
226
227 while (isalnum (c = getc (fin)) || c == '_')
228 obstack_1grow (oout, c);
229
230 ungetc (c, fin);
231}
ae3c3164 232
337bab46
AD
233/*-----------------------------------------------------------------.
234| Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
235| NULL). In fact we just saw a `/', which might or might not be a |
236| comment. In any case, copy what we saw. |
237| |
238| OUT2 might be NULL. |
239`-----------------------------------------------------------------*/
ae3c3164
AD
240
241static inline void
337bab46 242copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
ae3c3164
AD
243{
244 int cplus_comment;
a70083a3 245 int ended;
550a72a3
AD
246 int c;
247
248 /* We read a `/', output it. */
337bab46 249 obstack_1grow (oout1, '/');
896fe5c1
AD
250 if (oout2)
251 obstack_1grow (oout2, '/');
550a72a3
AD
252
253 switch ((c = getc (fin)))
254 {
255 case '/':
256 cplus_comment = 1;
257 break;
258 case '*':
259 cplus_comment = 0;
260 break;
261 default:
262 ungetc (c, fin);
263 return;
264 }
ae3c3164 265
337bab46 266 obstack_1grow (oout1, c);
896fe5c1
AD
267 if (oout2)
268 obstack_1grow (oout2, c);
550a72a3 269 c = getc (fin);
ae3c3164
AD
270
271 ended = 0;
272 while (!ended)
273 {
274 if (!cplus_comment && c == '*')
275 {
276 while (c == '*')
277 {
337bab46 278 obstack_1grow (oout1, c);
896fe5c1
AD
279 if (oout2)
280 obstack_1grow (oout2, c);
550a72a3 281 c = getc (fin);
ae3c3164
AD
282 }
283
284 if (c == '/')
285 {
337bab46 286 obstack_1grow (oout1, c);
896fe5c1
AD
287 if (oout2)
288 obstack_1grow (oout2, c);
ae3c3164
AD
289 ended = 1;
290 }
291 }
292 else if (c == '\n')
293 {
294 lineno++;
337bab46 295 obstack_1grow (oout1, c);
896fe5c1
AD
296 if (oout2)
297 obstack_1grow (oout2, c);
ae3c3164
AD
298 if (cplus_comment)
299 ended = 1;
300 else
550a72a3 301 c = getc (fin);
ae3c3164
AD
302 }
303 else if (c == EOF)
304 fatal (_("unterminated comment"));
305 else
306 {
337bab46 307 obstack_1grow (oout1, c);
896fe5c1
AD
308 if (oout2)
309 obstack_1grow (oout2, c);
550a72a3 310 c = getc (fin);
ae3c3164
AD
311 }
312 }
313}
314
315
550a72a3
AD
316/*-------------------------------------------------------------------.
317| Dump the comment (actually the current string starting with a `/') |
337bab46 318| from FIN to OOUT. |
550a72a3 319`-------------------------------------------------------------------*/
27821bff
AD
320
321static inline void
337bab46 322copy_comment (FILE *fin, struct obstack *oout)
27821bff 323{
337bab46 324 copy_comment2 (fin, oout, NULL);
27821bff
AD
325}
326
327
a70083a3 328/*-----------------------------------------------------------------.
337bab46 329| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
330| reference to this location. STACK_OFFSET is the number of values |
331| in the current rule so far, which says where to find `$0' with |
332| respect to the top of the stack. |
333`-----------------------------------------------------------------*/
1ff442ca 334
a70083a3 335static inline void
337bab46 336copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 337{
a70083a3 338 int c;
1ff442ca 339
a70083a3
AD
340 c = getc (fin);
341 if (c == '$')
1ff442ca 342 {
ff4423cc 343 obstack_sgrow (oout, "yyloc");
89cab50d 344 locations_flag = 1;
a70083a3
AD
345 }
346 else if (isdigit (c) || c == '-')
347 {
348 int n;
1ff442ca 349
a70083a3
AD
350 ungetc (c, fin);
351 n = read_signed_integer (fin);
943819bf 352
337bab46 353 obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
89cab50d 354 locations_flag = 1;
1ff442ca 355 }
a70083a3 356 else
ff4a34be
AD
357 {
358 char buf[] = "@c";
359 buf[1] = c;
360 complain (_("%s is invalid"), quote (buf));
361 }
1ff442ca 362}
79282c5a
AD
363
364
365/*-------------------------------------------------------------------.
366| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
367| |
368| Possible inputs: $[<TYPENAME>]($|integer) |
369| |
337bab46 370| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
371| the number of values in the current rule so far, which says where |
372| to find `$0' with respect to the top of the stack. |
373`-------------------------------------------------------------------*/
374
375static inline void
337bab46 376copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
377 symbol_list *rule, int stack_offset)
378{
379 int c = getc (fin);
b0ce6046 380 const char *type_name = NULL;
79282c5a 381
f282676b 382 /* Get the type name if explicit. */
79282c5a
AD
383 if (c == '<')
384 {
f282676b 385 read_type_name (fin);
79282c5a
AD
386 type_name = token_buffer;
387 value_components_used = 1;
79282c5a
AD
388 c = getc (fin);
389 }
390
391 if (c == '$')
392 {
ff4423cc 393 obstack_sgrow (oout, "yyval");
8c7ebe49 394
79282c5a
AD
395 if (!type_name)
396 type_name = get_type_name (0, rule);
397 if (type_name)
337bab46 398 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
399 if (!type_name && typed)
400 complain (_("$$ of `%s' has no declared type"),
401 rule->sym->tag);
402 }
403 else if (isdigit (c) || c == '-')
404 {
405 int n;
406 ungetc (c, fin);
407 n = read_signed_integer (fin);
408
409 if (!type_name && n > 0)
410 type_name = get_type_name (n, rule);
411
337bab46 412 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
8c7ebe49 413
79282c5a 414 if (type_name)
337bab46 415 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
416 if (!type_name && typed)
417 complain (_("$%d of `%s' has no declared type"),
418 n, rule->sym->tag);
419 }
420 else
421 {
422 char buf[] = "$c";
423 buf[1] = c;
424 complain (_("%s is invalid"), quote (buf));
425 }
426}
a70083a3
AD
427\f
428/*-------------------------------------------------------------------.
429| Copy the contents of a `%{ ... %}' into the definitions file. The |
430| `%{' has already been read. Return after reading the `%}'. |
431`-------------------------------------------------------------------*/
1ff442ca 432
4a120d45 433static void
118fb205 434copy_definition (void)
1ff442ca 435{
a70083a3 436 int c;
ae3c3164 437 /* -1 while reading a character if prev char was %. */
a70083a3 438 int after_percent;
1ff442ca 439
89cab50d 440 if (!no_lines_flag)
25b222fa
MA
441 {
442 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 443 lineno, quotearg_style (c_quoting_style,
25b222fa
MA
444 muscle_find("filename")));
445 }
1ff442ca
NF
446
447 after_percent = 0;
448
ae3c3164 449 c = getc (finput);
1ff442ca
NF
450
451 for (;;)
452 {
453 switch (c)
454 {
455 case '\n':
dd60faec 456 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
457 lineno++;
458 break;
459
460 case '%':
a70083a3 461 after_percent = -1;
1ff442ca 462 break;
a083fbbf 463
1ff442ca
NF
464 case '\'':
465 case '"':
337bab46 466 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
467 break;
468
469 case '/':
337bab46 470 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
471 break;
472
473 case EOF:
a70083a3 474 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
475
476 default:
dd60faec 477 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
478 }
479
a70083a3 480 c = getc (finput);
1ff442ca
NF
481
482 if (after_percent)
483 {
484 if (c == '}')
485 return;
dd60faec 486 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
487 }
488 after_percent = 0;
1ff442ca 489 }
1ff442ca
NF
490}
491
492
d7020c20
AD
493/*-------------------------------------------------------------------.
494| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
495| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
496| are reversed. |
497`-------------------------------------------------------------------*/
1ff442ca 498
4a120d45 499static void
d7020c20 500parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 501{
342b8b6e
AD
502 token_t token = tok_undef;
503 char *typename = NULL;
1ff442ca 504
1e9798d5
AD
505 /* The symbol being defined. */
506 struct bucket *symbol = NULL;
507
508 /* After `%token' and `%nterm', any number of symbols maybe be
509 defined. */
1ff442ca
NF
510 for (;;)
511 {
e6011337
JT
512 int tmp_char = ungetc (skip_white_space (), finput);
513
1e9798d5
AD
514 /* `%' (for instance from `%token', or from `%%' etc.) is the
515 only valid means to end this declaration. */
e6011337 516 if (tmp_char == '%')
1ff442ca 517 return;
e6011337 518 if (tmp_char == EOF)
a0f6b076 519 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 520
a70083a3 521 token = lex ();
511e79b3 522 if (token == tok_comma)
943819bf
RS
523 {
524 symbol = NULL;
525 continue;
526 }
511e79b3 527 if (token == tok_typename)
1ff442ca 528 {
95e36146 529 typename = xstrdup (token_buffer);
1ff442ca 530 value_components_used = 1;
943819bf
RS
531 symbol = NULL;
532 }
511e79b3 533 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 534 {
8e03724b
AD
535 if (symval->alias)
536 warn (_("symbol `%s' used more than once as a literal string"),
537 symval->tag);
538 else if (symbol->alias)
539 warn (_("symbol `%s' given more than one literal string"),
540 symbol->tag);
541 else
542 {
543 symval->class = token_sym;
544 symval->type_name = typename;
545 symval->user_token_number = symbol->user_token_number;
546 symbol->user_token_number = SALIAS;
547 symval->alias = symbol;
548 symbol->alias = symval;
549 /* symbol and symval combined are only one symbol */
550 nsyms--;
551 }
8e03724b 552 symbol = NULL;
1ff442ca 553 }
511e79b3 554 else if (token == tok_identifier)
1ff442ca
NF
555 {
556 int oldclass = symval->class;
943819bf 557 symbol = symval;
1ff442ca 558
943819bf 559 if (symbol->class == what_is_not)
a0f6b076 560 complain (_("symbol %s redefined"), symbol->tag);
943819bf 561 symbol->class = what_is;
d7020c20 562 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 563 symbol->value = nvars++;
1ff442ca
NF
564
565 if (typename)
566 {
943819bf
RS
567 if (symbol->type_name == NULL)
568 symbol->type_name = typename;
a70083a3 569 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 570 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
571 }
572 }
511e79b3 573 else if (symbol && token == tok_number)
a70083a3 574 {
943819bf 575 symbol->user_token_number = numval;
a70083a3 576 }
1ff442ca 577 else
943819bf 578 {
a0f6b076 579 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
580 token_buffer,
581 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 582 skip_to_char ('%');
943819bf 583 }
1ff442ca
NF
584 }
585
586}
587
1ff442ca 588
d7020c20
AD
589/*------------------------------.
590| Parse what comes after %start |
591`------------------------------*/
1ff442ca 592
4a120d45 593static void
118fb205 594parse_start_decl (void)
1ff442ca
NF
595{
596 if (start_flag)
27821bff 597 complain (_("multiple %s declarations"), "%start");
511e79b3 598 if (lex () != tok_identifier)
27821bff 599 complain (_("invalid %s declaration"), "%start");
943819bf
RS
600 else
601 {
602 start_flag = 1;
603 startval = symval;
604 }
1ff442ca
NF
605}
606
a70083a3
AD
607/*-----------------------------------------------------------.
608| read in a %type declaration and record its information for |
609| get_type_name to access |
610`-----------------------------------------------------------*/
611
612static void
613parse_type_decl (void)
614{
a70083a3
AD
615 char *name;
616
511e79b3 617 if (lex () != tok_typename)
a70083a3
AD
618 {
619 complain ("%s", _("%type declaration has no <typename>"));
620 skip_to_char ('%');
621 return;
622 }
623
95e36146 624 name = xstrdup (token_buffer);
a70083a3
AD
625
626 for (;;)
627 {
f17bcd1f 628 token_t t;
a70083a3
AD
629 int tmp_char = ungetc (skip_white_space (), finput);
630
631 if (tmp_char == '%')
632 return;
633 if (tmp_char == EOF)
634 fatal (_("Premature EOF after %s"), token_buffer);
635
636 t = lex ();
637
638 switch (t)
1ff442ca
NF
639 {
640
511e79b3
AD
641 case tok_comma:
642 case tok_semicolon:
1ff442ca
NF
643 break;
644
511e79b3 645 case tok_identifier:
1ff442ca
NF
646 if (symval->type_name == NULL)
647 symval->type_name = name;
a70083a3 648 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 649 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
650
651 break;
652
653 default:
a0f6b076
AD
654 complain (_("invalid %%type declaration due to item: %s"),
655 token_buffer);
a70083a3 656 skip_to_char ('%');
1ff442ca
NF
657 }
658 }
659}
660
661
662
d7020c20
AD
663/*----------------------------------------------------------------.
664| Read in a %left, %right or %nonassoc declaration and record its |
665| information. |
666`----------------------------------------------------------------*/
1ff442ca 667
4a120d45 668static void
d7020c20 669parse_assoc_decl (associativity assoc)
1ff442ca 670{
a70083a3
AD
671 char *name = NULL;
672 int prev = 0;
1ff442ca 673
a70083a3 674 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 675
1ff442ca
NF
676 for (;;)
677 {
f17bcd1f 678 token_t t;
e6011337 679 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 680
e6011337 681 if (tmp_char == '%')
1ff442ca 682 return;
e6011337 683 if (tmp_char == EOF)
a0f6b076 684 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 685
a70083a3 686 t = lex ();
1ff442ca
NF
687
688 switch (t)
689 {
511e79b3 690 case tok_typename:
95e36146 691 name = xstrdup (token_buffer);
1ff442ca
NF
692 break;
693
511e79b3 694 case tok_comma:
1ff442ca
NF
695 break;
696
511e79b3 697 case tok_identifier:
1ff442ca 698 if (symval->prec != 0)
a0f6b076 699 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
700 symval->prec = lastprec;
701 symval->assoc = assoc;
d7020c20 702 if (symval->class == nterm_sym)
a0f6b076 703 complain (_("symbol %s redefined"), symval->tag);
d7020c20 704 symval->class = token_sym;
1ff442ca 705 if (name)
a70083a3 706 { /* record the type, if one is specified */
1ff442ca
NF
707 if (symval->type_name == NULL)
708 symval->type_name = name;
a70083a3 709 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 710 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
711 }
712 break;
713
511e79b3
AD
714 case tok_number:
715 if (prev == tok_identifier)
a70083a3 716 {
1ff442ca 717 symval->user_token_number = numval;
a70083a3
AD
718 }
719 else
720 {
721 complain (_
722 ("invalid text (%s) - number should be after identifier"),
723token_buffer);
724 skip_to_char ('%');
725 }
1ff442ca
NF
726 break;
727
511e79b3 728 case tok_semicolon:
1ff442ca
NF
729 return;
730
731 default:
a0f6b076 732 complain (_("unexpected item: %s"), token_buffer);
a70083a3 733 skip_to_char ('%');
1ff442ca
NF
734 }
735
736 prev = t;
1ff442ca
NF
737 }
738}
739
740
741
dd60faec 742/*--------------------------------------------------------------.
180d45ba
PB
743| Copy the union declaration into the stype muscle |
744| (and fdefines), where it is made into the definition of |
745| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 746`--------------------------------------------------------------*/
1ff442ca 747
4a120d45 748static void
118fb205 749parse_union_decl (void)
1ff442ca 750{
a70083a3
AD
751 int c;
752 int count = 0;
180d45ba 753 struct obstack union_obstack;
5f7e0832
AD
754 const char *prologue = "\
755#ifndef YYSTYPE\n\
756typedef union";
757 const char *epilogue = "\
758 yystype;\n\
759# define YYSTYPE yystype\n\
760#endif\n";
1ff442ca
NF
761
762 if (typed)
27821bff 763 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
764
765 typed = 1;
766
f6ec6d13
AD
767 /* FIXME: I'm worried: are you sure attrs_obstack is properly
768 filled? */
5f7e0832
AD
769 /* I don't see any reasons to keep this line, because we should
770 create a special skeleton for this option. */
180d45ba 771 if (no_lines_flag)
dd60faec 772 obstack_1grow (&attrs_obstack, '\n');
342b8b6e 773
180d45ba
PB
774 obstack_init (&union_obstack);
775 obstack_sgrow (&union_obstack, "union");
896fe5c1 776 if (defines_flag)
5f7e0832 777 obstack_sgrow (&defines_obstack, prologue);
1ff442ca 778
27821bff 779 c = getc (finput);
1ff442ca
NF
780
781 while (c != EOF)
782 {
342b8b6e
AD
783 /* If C contains '/', it is output by copy_comment (). */
784 if (c != '/')
785 {
f6ec6d13 786 obstack_1grow (&union_obstack, c);
342b8b6e
AD
787 if (defines_flag)
788 obstack_1grow (&defines_obstack, c);
789 }
1ff442ca
NF
790
791 switch (c)
792 {
793 case '\n':
794 lineno++;
795 break;
796
797 case '/':
180d45ba 798 copy_comment2 (finput, &defines_obstack, &union_obstack);
1ff442ca
NF
799 break;
800
1ff442ca
NF
801 case '{':
802 count++;
803 break;
804
805 case '}':
806 if (count == 0)
27821bff 807 complain (_("unmatched %s"), "`}'");
1ff442ca 808 count--;
943819bf 809 if (count <= 0)
1ff442ca 810 {
896fe5c1 811 if (defines_flag)
5f7e0832 812 obstack_sgrow (&defines_obstack, epilogue);
1ff442ca 813 /* JF don't choke on trailing semi */
27821bff
AD
814 c = skip_white_space ();
815 if (c != ';')
a70083a3 816 ungetc (c, finput);
180d45ba
PB
817 obstack_1grow (&union_obstack, 0);
818 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
819 return;
820 }
821 }
822
27821bff 823 c = getc (finput);
1ff442ca 824 }
180d45ba 825
1ff442ca
NF
826}
827
d7020c20
AD
828
829/*-------------------------------------------------------.
830| Parse the declaration %expect N which says to expect N |
831| shift-reduce conflicts. |
832`-------------------------------------------------------*/
1ff442ca 833
4a120d45 834static void
118fb205 835parse_expect_decl (void)
1ff442ca 836{
131e2fef 837 int c = skip_white_space ();
1ff442ca
NF
838 ungetc (c, finput);
839
131e2fef 840 if (!isdigit (c))
79282c5a 841 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
842 else
843 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
844}
845
a70083a3
AD
846
847/*-------------------------------------------------------------------.
848| Parse what comes after %thong. the full syntax is |
849| |
850| %thong <type> token number literal |
851| |
852| the <type> or number may be omitted. The number specifies the |
853| user_token_number. |
854| |
855| Two symbols are entered in the table, one for the token symbol and |
856| one for the literal. Both are given the <type>, if any, from the |
857| declaration. The ->user_token_number of the first is SALIAS and |
858| the ->user_token_number of the second is set to the number, if |
859| any, from the declaration. The two symbols are linked via |
860| pointers in their ->alias fields. |
861| |
862| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
863| only the literal string is retained it is the literal string that |
864| is output to yytname |
865`-------------------------------------------------------------------*/
866
867static void
868parse_thong_decl (void)
7b306f52 869{
f17bcd1f 870 token_t token;
a70083a3
AD
871 struct bucket *symbol;
872 char *typename = 0;
6b7e85b9 873 int usrtoknum = SUNDEF;
7b306f52 874
a70083a3 875 token = lex (); /* fetch typename or first token */
511e79b3 876 if (token == tok_typename)
7b306f52 877 {
95e36146 878 typename = xstrdup (token_buffer);
a70083a3
AD
879 value_components_used = 1;
880 token = lex (); /* fetch first token */
7b306f52 881 }
7b306f52 882
a70083a3 883 /* process first token */
7b306f52 884
511e79b3 885 if (token != tok_identifier)
a70083a3
AD
886 {
887 complain (_("unrecognized item %s, expected an identifier"),
888 token_buffer);
889 skip_to_char ('%');
890 return;
7b306f52 891 }
d7020c20 892 symval->class = token_sym;
a70083a3
AD
893 symval->type_name = typename;
894 symval->user_token_number = SALIAS;
895 symbol = symval;
7b306f52 896
a70083a3 897 token = lex (); /* get number or literal string */
1ff442ca 898
511e79b3 899 if (token == tok_number)
943819bf 900 {
a70083a3
AD
901 usrtoknum = numval;
902 token = lex (); /* okay, did number, now get literal */
943819bf 903 }
1ff442ca 904
a70083a3 905 /* process literal string token */
1ff442ca 906
511e79b3 907 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 908 {
a70083a3
AD
909 complain (_("expected string constant instead of %s"), token_buffer);
910 skip_to_char ('%');
911 return;
1ff442ca 912 }
d7020c20 913 symval->class = token_sym;
a70083a3
AD
914 symval->type_name = typename;
915 symval->user_token_number = usrtoknum;
1ff442ca 916
a70083a3
AD
917 symval->alias = symbol;
918 symbol->alias = symval;
1ff442ca 919
79282c5a
AD
920 /* symbol and symval combined are only one symbol. */
921 nsyms--;
a70083a3 922}
3cef001a 923
b6610515 924static void
11d82f03 925parse_muscle_decl (void)
b6610515
RA
926{
927 int ch = ungetc (skip_white_space (), finput);
11d82f03
MA
928 char* muscle_key;
929 char* muscle_value;
b6610515
RA
930
931 /* Read key. */
932 if (!isalpha (ch) && ch != '_')
933 {
934 complain (_("invalid %s declaration"), "%define");
935 skip_to_char ('%');
936 return;
937 }
11d82f03
MA
938 copy_identifier (finput, &muscle_obstack);
939 obstack_1grow (&muscle_obstack, 0);
940 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 941
b6610515
RA
942 /* Read value. */
943 ch = skip_white_space ();
944 if (ch != '"')
945 {
946 ungetc (ch, finput);
947 if (ch != EOF)
948 {
949 complain (_("invalid %s declaration"), "%define");
950 skip_to_char ('%');
951 return;
952 }
953 else
954 fatal (_("Premature EOF after %s"), "\"");
955 }
11d82f03
MA
956 copy_string2 (finput, &muscle_obstack, '"', 0);
957 obstack_1grow (&muscle_obstack, 0);
958 muscle_value = obstack_finish (&muscle_obstack);
b6610515 959
b6610515 960 /* Store the (key, value) pair in the environment. */
11d82f03 961 muscle_insert (muscle_key, muscle_value);
b6610515
RA
962}
963
2ba3b73c 964
426cf563
MA
965
966/*---------------------------------.
a870c567 967| Parse a double quoted parameter. |
426cf563
MA
968`---------------------------------*/
969
970static const char *
971parse_dquoted_param (const char *from)
972{
973 struct obstack param_obstack;
974 const char *param = NULL;
975 int c;
976
977 obstack_init (&param_obstack);
978 c = skip_white_space ();
979
980 if (c != '"')
981 {
982 complain (_("invalid %s declaration"), from);
983 ungetc (c, finput);
984 skip_to_char ('%');
985 return NULL;
986 }
987
988 for (;;)
989 {
990 if (literalchar (NULL, &c, '\"'))
991 obstack_1grow (&param_obstack, c);
992 else
993 break;
994 }
a870c567 995
426cf563
MA
996 obstack_1grow (&param_obstack, '\0');
997 param = obstack_finish (&param_obstack);
998
999 if (c != '"' || strlen (param) == 0)
1000 {
1001 complain (_("invalid %s declaration"), from);
1002 if (c != '"')
1003 ungetc (c, finput);
1004 skip_to_char ('%');
1005 return NULL;
1006 }
1007
1008 return param;
1009}
1010
2ba3b73c
MA
1011/*----------------------------------.
1012| Parse what comes after %skeleton. |
1013`----------------------------------*/
1014
a870c567 1015static void
2ba3b73c
MA
1016parse_skel_decl (void)
1017{
426cf563 1018 skeleton = parse_dquoted_param ("%skeleton");
2ba3b73c
MA
1019}
1020
a70083a3
AD
1021/*----------------------------------------------------------------.
1022| Read from finput until `%%' is seen. Discard the `%%'. Handle |
1023| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 1024| groups to ATTRS_OBSTACK. |
a70083a3 1025`----------------------------------------------------------------*/
1ff442ca 1026
4a120d45 1027static void
a70083a3 1028read_declarations (void)
1ff442ca 1029{
a70083a3 1030 for (;;)
1ff442ca 1031 {
951366c1 1032 int c = skip_white_space ();
1ff442ca 1033
a70083a3
AD
1034 if (c == '%')
1035 {
951366c1 1036 token_t tok = parse_percent_token ();
1ff442ca 1037
a70083a3 1038 switch (tok)
943819bf 1039 {
511e79b3 1040 case tok_two_percents:
a70083a3 1041 return;
1ff442ca 1042
511e79b3 1043 case tok_percent_left_curly:
a70083a3
AD
1044 copy_definition ();
1045 break;
1ff442ca 1046
511e79b3 1047 case tok_token:
d7020c20 1048 parse_token_decl (token_sym, nterm_sym);
a70083a3 1049 break;
1ff442ca 1050
511e79b3 1051 case tok_nterm:
d7020c20 1052 parse_token_decl (nterm_sym, token_sym);
a70083a3 1053 break;
1ff442ca 1054
511e79b3 1055 case tok_type:
a70083a3
AD
1056 parse_type_decl ();
1057 break;
1ff442ca 1058
511e79b3 1059 case tok_start:
a70083a3
AD
1060 parse_start_decl ();
1061 break;
118fb205 1062
511e79b3 1063 case tok_union:
a70083a3
AD
1064 parse_union_decl ();
1065 break;
1ff442ca 1066
511e79b3 1067 case tok_expect:
a70083a3
AD
1068 parse_expect_decl ();
1069 break;
6deb4447 1070
511e79b3 1071 case tok_thong:
a70083a3
AD
1072 parse_thong_decl ();
1073 break;
d7020c20 1074
511e79b3 1075 case tok_left:
d7020c20 1076 parse_assoc_decl (left_assoc);
a70083a3 1077 break;
1ff442ca 1078
511e79b3 1079 case tok_right:
d7020c20 1080 parse_assoc_decl (right_assoc);
a70083a3 1081 break;
1ff442ca 1082
511e79b3 1083 case tok_nonassoc:
d7020c20 1084 parse_assoc_decl (non_assoc);
a70083a3 1085 break;
1ff442ca 1086
b6610515 1087 case tok_define:
11d82f03 1088 parse_muscle_decl ();
b6610515 1089 break;
342b8b6e 1090
2ba3b73c
MA
1091 case tok_skel:
1092 parse_skel_decl ();
1093 break;
b6610515 1094
511e79b3 1095 case tok_noop:
a70083a3 1096 break;
1ff442ca 1097
951366c1
AD
1098 case tok_stropt:
1099 case tok_intopt:
1100 case tok_obsolete:
951366c1
AD
1101 abort ();
1102 break;
1103
e0c40012 1104 case tok_illegal:
a70083a3
AD
1105 default:
1106 complain (_("unrecognized: %s"), token_buffer);
1107 skip_to_char ('%');
1108 }
1109 }
1110 else if (c == EOF)
1111 fatal (_("no input grammar"));
1112 else
1113 {
ff4a34be
AD
1114 char buf[] = "c";
1115 buf[0] = c;
1116 complain (_("unknown character: %s"), quote (buf));
a70083a3 1117 skip_to_char ('%');
1ff442ca 1118 }
1ff442ca 1119 }
1ff442ca 1120}
a70083a3
AD
1121\f
1122/*-------------------------------------------------------------------.
1123| Assuming that a `{' has just been seen, copy everything up to the |
1124| matching `}' into the actions file. STACK_OFFSET is the number of |
1125| values in the current rule so far, which says where to find `$0' |
1126| with respect to the top of the stack. |
1127`-------------------------------------------------------------------*/
1ff442ca 1128
4a120d45 1129static void
79282c5a 1130copy_action (symbol_list *rule, int stack_offset)
1ff442ca 1131{
a70083a3 1132 int c;
a70083a3 1133 int count;
1ff442ca
NF
1134
1135 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1136 if (semantic_parser)
1137 stack_offset = 0;
1ff442ca 1138
25b222fa 1139 obstack_fgrow1 (&action_obstack, "\ncase %d:\n", nrules);
8c7ebe49 1140
89cab50d 1141 if (!no_lines_flag)
8c7ebe49 1142 {
25b222fa 1143 obstack_fgrow2 (&action_obstack, muscle_find ("linef"),
342b8b6e 1144 lineno, quotearg_style (c_quoting_style,
25b222fa 1145 muscle_find ("filename")));
8c7ebe49
AD
1146 }
1147 obstack_1grow (&action_obstack, '{');
1ff442ca
NF
1148
1149 count = 1;
a70083a3 1150 c = getc (finput);
1ff442ca
NF
1151
1152 while (count > 0)
1153 {
1154 while (c != '}')
a70083a3
AD
1155 {
1156 switch (c)
1ff442ca
NF
1157 {
1158 case '\n':
8c7ebe49 1159 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1160 lineno++;
1161 break;
1162
1163 case '{':
8c7ebe49 1164 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1165 count++;
1166 break;
1167
1168 case '\'':
1169 case '"':
337bab46 1170 copy_string (finput, &action_obstack, c);
1ff442ca
NF
1171 break;
1172
1173 case '/':
337bab46 1174 copy_comment (finput, &action_obstack);
1ff442ca
NF
1175 break;
1176
1177 case '$':
337bab46 1178 copy_dollar (finput, &action_obstack,
8c7ebe49 1179 rule, stack_offset);
1ff442ca
NF
1180 break;
1181
1182 case '@':
337bab46 1183 copy_at (finput, &action_obstack,
8c7ebe49 1184 stack_offset);
6666f98f 1185 break;
1ff442ca
NF
1186
1187 case EOF:
27821bff 1188 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1189
1190 default:
8c7ebe49 1191 obstack_1grow (&action_obstack, c);
a70083a3
AD
1192 }
1193
1194 c = getc (finput);
1195 }
1196
1197 /* above loop exits when c is '}' */
1198
1199 if (--count)
1200 {
8c7ebe49 1201 obstack_1grow (&action_obstack, c);
a70083a3
AD
1202 c = getc (finput);
1203 }
1204 }
1205
ff4423cc 1206 obstack_sgrow (&action_obstack, ";\n break;}");
a70083a3
AD
1207}
1208\f
1209/*-------------------------------------------------------------------.
1210| After `%guard' is seen in the input file, copy the actual guard |
1211| into the guards file. If the guard is followed by an action, copy |
1212| that into the actions file. STACK_OFFSET is the number of values |
1213| in the current rule so far, which says where to find `$0' with |
1214| respect to the top of the stack, for the simple parser in which |
1215| the stack is not popped until after the guard is run. |
1216`-------------------------------------------------------------------*/
1217
1218static void
79282c5a 1219copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1220{
1221 int c;
a70083a3 1222 int count;
a70083a3
AD
1223 int brace_flag = 0;
1224
1225 /* offset is always 0 if parser has already popped the stack pointer */
1226 if (semantic_parser)
1227 stack_offset = 0;
1228
ea5607fd 1229 obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
89cab50d 1230 if (!no_lines_flag)
25b222fa 1231 obstack_fgrow2 (&guard_obstack, muscle_find ("linef"),
682d48cd 1232 lineno, quotearg_style (c_quoting_style,
11d82f03 1233 muscle_find ("filename")));
ea5607fd 1234 obstack_1grow (&guard_obstack, '{');
a70083a3
AD
1235
1236 count = 0;
1237 c = getc (finput);
1238
1239 while (brace_flag ? (count > 0) : (c != ';'))
1240 {
1241 switch (c)
1242 {
1243 case '\n':
ea5607fd 1244 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1245 lineno++;
1246 break;
1247
1248 case '{':
ea5607fd 1249 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1250 brace_flag = 1;
1251 count++;
1252 break;
1253
1254 case '}':
ea5607fd 1255 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1256 if (count > 0)
1257 count--;
1258 else
1259 {
1260 complain (_("unmatched %s"), "`}'");
1261 c = getc (finput); /* skip it */
1262 }
1263 break;
1264
1265 case '\'':
1266 case '"':
337bab46 1267 copy_string (finput, &guard_obstack, c);
a70083a3
AD
1268 break;
1269
1270 case '/':
337bab46 1271 copy_comment (finput, &guard_obstack);
a70083a3
AD
1272 break;
1273
1274 case '$':
337bab46 1275 copy_dollar (finput, &guard_obstack, rule, stack_offset);
a70083a3 1276 break;
1ff442ca 1277
a70083a3 1278 case '@':
337bab46 1279 copy_at (finput, &guard_obstack, stack_offset);
a70083a3 1280 break;
1ff442ca 1281
a70083a3
AD
1282 case EOF:
1283 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1284
a70083a3 1285 default:
ea5607fd 1286 obstack_1grow (&guard_obstack, c);
1ff442ca 1287 }
a70083a3
AD
1288
1289 if (c != '}' || count != 0)
1290 c = getc (finput);
1ff442ca
NF
1291 }
1292
a70083a3
AD
1293 c = skip_white_space ();
1294
ff4423cc 1295 obstack_sgrow (&guard_obstack, ";\n break;}");
a70083a3
AD
1296 if (c == '{')
1297 copy_action (rule, stack_offset);
1298 else if (c == '=')
1299 {
1300 c = getc (finput); /* why not skip_white_space -wjh */
1301 if (c == '{')
1302 copy_action (rule, stack_offset);
1303 }
1304 else
1305 ungetc (c, finput);
1ff442ca 1306}
a70083a3
AD
1307\f
1308
a70083a3
AD
1309/*-------------------------------------------------------------------.
1310| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1311| with the user's names. |
1312`-------------------------------------------------------------------*/
1ff442ca 1313
4a120d45 1314static bucket *
118fb205 1315gensym (void)
1ff442ca 1316{
274d42ce
AD
1317 /* Incremented for each generated symbol */
1318 static int gensym_count = 0;
1319 static char buf[256];
1320
a70083a3 1321 bucket *sym;
1ff442ca 1322
274d42ce
AD
1323 sprintf (buf, "@%d", ++gensym_count);
1324 token_buffer = buf;
a70083a3 1325 sym = getsym (token_buffer);
d7020c20 1326 sym->class = nterm_sym;
1ff442ca 1327 sym->value = nvars++;
36281465 1328 return sym;
1ff442ca 1329}
a70083a3 1330\f
107f7dfb
AD
1331/*-------------------------------------------------------------------.
1332| Parse the input grammar into a one symbol_list structure. Each |
1333| rule is represented by a sequence of symbols: the left hand side |
1334| followed by the contents of the right hand side, followed by a |
1335| null pointer instead of a symbol to terminate the rule. The next |
1336| symbol is the lhs of the following rule. |
1337| |
1338| All guards and actions are copied out to the appropriate files, |
1339| labelled by the rule number they apply to. |
1340| |
1341| Bison used to allow some %directives in the rules sections, but |
1342| this is no longer consider appropriate: (i) the documented grammar |
1343| doesn't claim it, (ii), it would promote bad style, (iii), error |
1344| recovery for %directives consists in skipping the junk until a `%' |
1345| is seen and helrp synchronizing. This scheme is definitely wrong |
1346| in the rules section. |
1347`-------------------------------------------------------------------*/
1ff442ca 1348
4a120d45 1349static void
118fb205 1350readgram (void)
1ff442ca 1351{
f17bcd1f 1352 token_t t;
a70083a3 1353 bucket *lhs = NULL;
107f7dfb
AD
1354 symbol_list *p = NULL;
1355 symbol_list *p1 = NULL;
a70083a3 1356 bucket *bp;
1ff442ca 1357
ff4a34be
AD
1358 /* Points to first symbol_list of current rule. its symbol is the
1359 lhs of the rule. */
107f7dfb 1360 symbol_list *crule = NULL;
ff4a34be 1361 /* Points to the symbol_list preceding crule. */
107f7dfb 1362 symbol_list *crule1 = NULL;
1ff442ca 1363
a70083a3 1364 t = lex ();
1ff442ca 1365
511e79b3 1366 while (t != tok_two_percents && t != tok_eof)
107f7dfb
AD
1367 if (t == tok_identifier || t == tok_bar)
1368 {
1369 int action_flag = 0;
1370 /* Number of symbols in rhs of this rule so far */
1371 int rulelength = 0;
1372 int xactions = 0; /* JF for error checking */
1373 bucket *first_rhs = 0;
1374
1375 if (t == tok_identifier)
1376 {
1377 lhs = symval;
1378
1379 if (!start_flag)
1380 {
1381 startval = lhs;
1382 start_flag = 1;
1383 }
1ff442ca 1384
107f7dfb
AD
1385 t = lex ();
1386 if (t != tok_colon)
1387 {
1388 complain (_("ill-formed rule: initial symbol not followed by colon"));
1389 unlex (t);
1390 }
1391 }
1392
1393 if (nrules == 0 && t == tok_bar)
1394 {
1395 complain (_("grammar starts with vertical bar"));
1396 lhs = symval; /* BOGUS: use a random symval */
1397 }
1398 /* start a new rule and record its lhs. */
1399
1400 nrules++;
1401 nitems++;
1402
1403 p = symbol_list_new (lhs);
1404
1405 crule1 = p1;
1406 if (p1)
1407 p1->next = p;
1408 else
1409 grammar = p;
1ff442ca 1410
107f7dfb
AD
1411 p1 = p;
1412 crule = p;
1ff442ca 1413
107f7dfb 1414 /* mark the rule's lhs as a nonterminal if not already so. */
1ff442ca 1415
107f7dfb
AD
1416 if (lhs->class == unknown_sym)
1417 {
1418 lhs->class = nterm_sym;
1419 lhs->value = nvars;
1420 nvars++;
1421 }
1422 else if (lhs->class == token_sym)
1423 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca 1424
107f7dfb 1425 /* read the rhs of the rule. */
1ff442ca 1426
107f7dfb
AD
1427 for (;;)
1428 {
1429 t = lex ();
1430 if (t == tok_prec)
1431 {
1432 t = lex ();
1433 crule->ruleprec = symval;
1434 t = lex ();
1435 }
1436
1437 if (!(t == tok_identifier || t == tok_left_curly))
1438 break;
1ff442ca 1439
107f7dfb
AD
1440 /* If next token is an identifier, see if a colon follows it.
1441 If one does, exit this rule now. */
1442 if (t == tok_identifier)
1443 {
1444 bucket *ssave;
1445 token_t t1;
1446
1447 ssave = symval;
1448 t1 = lex ();
1449 unlex (t1);
1450 symval = ssave;
1451 if (t1 == tok_colon)
1452 break;
1453
1454 if (!first_rhs) /* JF */
1455 first_rhs = symval;
1456 /* Not followed by colon =>
1457 process as part of this rule's rhs. */
1458 }
1459
1460 /* If we just passed an action, that action was in the middle
1461 of a rule, so make a dummy rule to reduce it to a
1462 non-terminal. */
1463 if (action_flag)
1464 {
1465 /* Since the action was written out with this rule's
1466 number, we must give the new rule this number by
1467 inserting the new rule before it. */
1468
1469 /* Make a dummy nonterminal, a gensym. */
1470 bucket *sdummy = gensym ();
1471
1472 /* Make a new rule, whose body is empty, before the
1473 current one, so that the action just read can
1474 belong to it. */
1475 nrules++;
1476 nitems++;
1477 p = symbol_list_new (sdummy);
1478 /* Attach its lineno to that of the host rule. */
1479 p->line = crule->line;
1480 if (crule1)
1481 crule1->next = p;
1482 else
1483 grammar = p;
1484 /* End of the rule. */
1485 crule1 = symbol_list_new (NULL);
1486 crule1->next = crule;
1487
1488 p->next = crule1;
1489
1490 /* Insert the dummy generated by that rule into this
1491 rule. */
1492 nitems++;
1493 p = symbol_list_new (sdummy);
1494 p1->next = p;
1495 p1 = p;
1496
1497 action_flag = 0;
1498 }
1499
1500 if (t == tok_identifier)
1501 {
1502 nitems++;
1503 p = symbol_list_new (symval);
1504 p1->next = p;
1505 p1 = p;
1506 }
1507 else /* handle an action. */
1508 {
1509 copy_action (crule, rulelength);
1510 action_flag = 1;
1511 xactions++; /* JF */
1512 }
1513 rulelength++;
1514 } /* end of read rhs of rule */
1515
1516 /* Put an empty link in the list to mark the end of this rule */
1517 p = symbol_list_new (NULL);
1518 p1->next = p;
1519 p1 = p;
1520
1521 if (t == tok_prec)
1522 {
1523 complain (_("two @prec's in a row"));
1524 t = lex ();
1525 crule->ruleprec = symval;
1526 t = lex ();
1527 }
1528 if (t == tok_guard)
1529 {
1530 if (!semantic_parser)
1531 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1532
107f7dfb 1533 copy_guard (crule, rulelength);
a70083a3 1534 t = lex ();
107f7dfb
AD
1535 }
1536 else if (t == tok_left_curly)
1537 {
1538 /* This case never occurs -wjh */
1539 if (action_flag)
1540 complain (_("two actions at end of one rule"));
1541 copy_action (crule, rulelength);
1542 action_flag = 1;
1543 xactions++; /* -wjh */
1544 t = lex ();
1545 }
1546 /* If $$ is being set in default way, report if any type
1547 mismatch. */
1548 else if (!xactions
1549 && first_rhs && lhs->type_name != first_rhs->type_name)
1550 {
1551 if (lhs->type_name == 0
1552 || first_rhs->type_name == 0
1553 || strcmp (lhs->type_name, first_rhs->type_name))
1554 complain (_("type clash (`%s' `%s') on default action"),
1555 lhs->type_name ? lhs->type_name : "",
1556 first_rhs->type_name ? first_rhs->type_name : "");
1557 }
1558 /* Warn if there is no default for $$ but we need one. */
1559 else if (!xactions && !first_rhs && lhs->type_name != 0)
1560 complain (_("empty rule for typed nonterminal, and no action"));
1561 if (t == tok_semicolon)
a70083a3 1562 t = lex ();
107f7dfb
AD
1563 }
1564 else
1565 {
1566 complain (_("invalid input: %s"), quote (token_buffer));
1567 t = lex ();
1568 }
943819bf 1569
1ff442ca 1570
943819bf
RS
1571 /* grammar has been read. Do some checking */
1572
1ff442ca 1573 if (nsyms > MAXSHORT)
a0f6b076
AD
1574 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1575 MAXSHORT);
1ff442ca 1576 if (nrules == 0)
a0f6b076 1577 fatal (_("no rules in the input grammar"));
1ff442ca 1578
1ff442ca
NF
1579 /* Report any undefined symbols and consider them nonterminals. */
1580
1581 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1582 if (bp->class == unknown_sym)
1ff442ca 1583 {
a70083a3
AD
1584 complain (_
1585 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1586 bp->tag);
d7020c20 1587 bp->class = nterm_sym;
1ff442ca
NF
1588 bp->value = nvars++;
1589 }
1590
1591 ntokens = nsyms - nvars;
1592}
ff48177d
MA
1593
1594/* At the end of the grammar file, some C source code must
63c2d5de 1595 be stored. It is going to be associated to the epilogue
ff48177d
MA
1596 directive. */
1597static void
1598read_additionnal_code (void)
1599{
1600 char c;
63c2d5de 1601 struct obstack el_obstack;
342b8b6e 1602
63c2d5de 1603 obstack_init (&el_obstack);
ff48177d 1604
710ddc4f
MA
1605 if (!no_lines_flag)
1606 {
1607 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1608 lineno, quotearg_style (c_quoting_style,
1609 muscle_find("filename")));
1610 }
1611
ff48177d 1612 while ((c = getc (finput)) != EOF)
63c2d5de 1613 obstack_1grow (&el_obstack, c);
342b8b6e 1614
63c2d5de 1615 obstack_1grow (&el_obstack, 0);
11d82f03 1616 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1617}
1618
a70083a3
AD
1619\f
1620/*--------------------------------------------------------------.
1621| For named tokens, but not literal ones, define the name. The |
1622| value is the user token number. |
1623`--------------------------------------------------------------*/
1ff442ca 1624
4a120d45 1625static void
896fe5c1 1626output_token_defines (struct obstack *oout)
1ff442ca 1627{
a70083a3
AD
1628 bucket *bp;
1629 char *cp, *symbol;
1630 char c;
1ff442ca 1631
a70083a3 1632 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1633 {
a70083a3
AD
1634 symbol = bp->tag; /* get symbol */
1635
1636 if (bp->value >= ntokens)
1637 continue;
1638 if (bp->user_token_number == SALIAS)
1639 continue;
1640 if ('\'' == *symbol)
1641 continue; /* skip literal character */
1642 if (bp == errtoken)
1643 continue; /* skip error token */
1644 if ('\"' == *symbol)
1ff442ca 1645 {
a70083a3
AD
1646 /* use literal string only if given a symbol with an alias */
1647 if (bp->alias)
1648 symbol = bp->alias->tag;
1649 else
1650 continue;
1651 }
1ff442ca 1652
a70083a3
AD
1653 /* Don't #define nonliteral tokens whose names contain periods. */
1654 cp = symbol;
1655 while ((c = *cp++) && c != '.');
1656 if (c != '\0')
1657 continue;
1ff442ca 1658
0b8afb77 1659 obstack_fgrow2 (oout, "# define\t%s\t%d\n",
342b8b6e 1660 symbol, bp->user_token_number);
a70083a3 1661 if (semantic_parser)
342b8b6e
AD
1662 /* FIXME: This is certainly dead wrong, and should be just as
1663 above. --akim. */
0b8afb77 1664 obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
1665 }
1666}
1ff442ca
NF
1667
1668
037ca2f1
AD
1669/*------------------------------------------------------------------.
1670| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1671| number. |
1672`------------------------------------------------------------------*/
1673
1674static void
1675token_translations_init (void)
1676{
1677 bucket *bp = NULL;
1678 int i;
1679
1680 token_translations = XCALLOC (short, max_user_token_number + 1);
1681
1682 /* Initialize all entries for literal tokens to 2, the internal
1683 token number for $undefined., which represents all invalid
1684 inputs. */
1685 for (i = 0; i <= max_user_token_number; i++)
1686 token_translations[i] = 2;
1687
1688 for (bp = firstsymbol; bp; bp = bp->next)
1689 {
1690 /* Non-terminal? */
1691 if (bp->value >= ntokens)
1692 continue;
1693 /* A token string alias? */
1694 if (bp->user_token_number == SALIAS)
1695 continue;
6b7e85b9
AD
1696
1697 assert (bp->user_token_number != SUNDEF);
1698
037ca2f1
AD
1699 /* A token which translation has already been set? */
1700 if (token_translations[bp->user_token_number] != 2)
1701 complain (_("tokens %s and %s both assigned number %d"),
1702 tags[token_translations[bp->user_token_number]],
1703 bp->tag, bp->user_token_number);
1704 token_translations[bp->user_token_number] = bp->value;
1705 }
1706}
1707
1708
a70083a3
AD
1709/*------------------------------------------------------------------.
1710| Assign symbol numbers, and write definition of token names into |
b2ca4022 1711| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1712| of symbols. |
1713`------------------------------------------------------------------*/
1ff442ca 1714
4a120d45 1715static void
118fb205 1716packsymbols (void)
1ff442ca 1717{
342b8b6e 1718 bucket *bp = NULL;
a70083a3 1719 int tokno = 1;
a70083a3 1720 int last_user_token_number;
4a120d45 1721 static char DOLLAR[] = "$";
1ff442ca 1722
d7913476 1723 tags = XCALLOC (char *, nsyms + 1);
d7913476 1724 user_toknums = XCALLOC (short, nsyms + 1);
1ff442ca 1725
d7913476
AD
1726 sprec = XCALLOC (short, nsyms);
1727 sassoc = XCALLOC (short, nsyms);
1ff442ca 1728
037ca2f1
AD
1729 /* The EOF token. */
1730 tags[0] = DOLLAR;
1731 user_toknums[0] = 0;
1732
1ff442ca
NF
1733 max_user_token_number = 256;
1734 last_user_token_number = 256;
1735
1736 for (bp = firstsymbol; bp; bp = bp->next)
1737 {
d7020c20 1738 if (bp->class == nterm_sym)
1ff442ca
NF
1739 {
1740 bp->value += ntokens;
1741 }
943819bf
RS
1742 else if (bp->alias)
1743 {
0a6384c4
AD
1744 /* this symbol and its alias are a single token defn.
1745 allocate a tokno, and assign to both check agreement of
1746 ->prec and ->assoc fields and make both the same */
1747 if (bp->value == 0)
1748 bp->value = bp->alias->value = tokno++;
943819bf 1749
0a6384c4
AD
1750 if (bp->prec != bp->alias->prec)
1751 {
1752 if (bp->prec != 0 && bp->alias->prec != 0
1753 && bp->user_token_number == SALIAS)
a0f6b076
AD
1754 complain (_("conflicting precedences for %s and %s"),
1755 bp->tag, bp->alias->tag);
0a6384c4
AD
1756 if (bp->prec != 0)
1757 bp->alias->prec = bp->prec;
1758 else
1759 bp->prec = bp->alias->prec;
1760 }
943819bf 1761
0a6384c4
AD
1762 if (bp->assoc != bp->alias->assoc)
1763 {
a0f6b076
AD
1764 if (bp->assoc != 0 && bp->alias->assoc != 0
1765 && bp->user_token_number == SALIAS)
1766 complain (_("conflicting assoc values for %s and %s"),
1767 bp->tag, bp->alias->tag);
1768 if (bp->assoc != 0)
1769 bp->alias->assoc = bp->assoc;
1770 else
1771 bp->assoc = bp->alias->assoc;
1772 }
0a6384c4
AD
1773
1774 if (bp->user_token_number == SALIAS)
a70083a3 1775 continue; /* do not do processing below for SALIASs */
943819bf 1776
a70083a3 1777 }
d7020c20 1778 else /* bp->class == token_sym */
943819bf
RS
1779 {
1780 bp->value = tokno++;
1781 }
1782
d7020c20 1783 if (bp->class == token_sym)
1ff442ca 1784 {
6b7e85b9 1785 if (bp->user_token_number == SUNDEF)
1ff442ca
NF
1786 bp->user_token_number = ++last_user_token_number;
1787 if (bp->user_token_number > max_user_token_number)
1788 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1789 }
1790
1791 tags[bp->value] = bp->tag;
943819bf 1792 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1793 sprec[bp->value] = bp->prec;
1794 sassoc[bp->value] = bp->assoc;
1ff442ca
NF
1795 }
1796
037ca2f1 1797 token_translations_init ();
1ff442ca
NF
1798
1799 error_token_number = errtoken->value;
1800
e3f1699f
AD
1801 if (startval->class == unknown_sym)
1802 fatal (_("the start symbol %s is undefined"), startval->tag);
1803 else if (startval->class == token_sym)
1804 fatal (_("the start symbol %s is a token"), startval->tag);
1805
1806 start_symbol = startval->value;
1807}
1808
1809
1810/*-----------------------------------.
1811| Output definition of token names. |
1812`-----------------------------------*/
1813
1814static void
1815symbols_output (void)
1816{
342b8b6e
AD
1817 {
1818 struct obstack tokendefs;
1819 obstack_init (&tokendefs);
1820 output_token_defines (&tokendefs);
1821 obstack_1grow (&tokendefs, 0);
1822 muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1823 obstack_free (&tokendefs, NULL);
1824 }
b6610515 1825
89cab50d 1826 if (defines_flag)
1ff442ca 1827 {
896fe5c1 1828 output_token_defines (&defines_obstack);
1ff442ca
NF
1829
1830 if (!pure_parser)
78af9bbc
AD
1831 obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1832 spec_name_prefix);
1ff442ca 1833 if (semantic_parser)
037ca2f1
AD
1834 {
1835 int i;
1836
1837 for (i = ntokens; i < nsyms; i++)
1838 {
1839 /* don't make these for dummy nonterminals made by gensym. */
1840 if (*tags[i] != '@')
1841 obstack_fgrow2 (&defines_obstack,
1842 "# define\tNT%s\t%d\n", tags[i], i);
1843 }
1ff442ca 1844#if 0
037ca2f1
AD
1845 /* `fdefines' is now a temporary file, so we need to copy its
1846 contents in `done', so we can't close it here. */
1847 fclose (fdefines);
1848 fdefines = NULL;
1ff442ca 1849#endif
037ca2f1 1850 }
1ff442ca
NF
1851 }
1852}
a083fbbf 1853
1ff442ca 1854
a70083a3
AD
1855/*---------------------------------------------------------------.
1856| Convert the rules into the representation using RRHS, RLHS and |
1857| RITEMS. |
1858`---------------------------------------------------------------*/
1ff442ca 1859
4a120d45 1860static void
118fb205 1861packgram (void)
1ff442ca 1862{
a70083a3
AD
1863 int itemno;
1864 int ruleno;
1865 symbol_list *p;
1ff442ca 1866
d7913476 1867 ritem = XCALLOC (short, nitems + 1);
b2ed6e58 1868 rule_table = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1869
1870 itemno = 0;
1871 ruleno = 1;
1872
1873 p = grammar;
1874 while (p)
1875 {
b29b2ed5 1876 bucket *ruleprec = p->ruleprec;
b2ed6e58
AD
1877 rule_table[ruleno].lhs = p->sym->value;
1878 rule_table[ruleno].rhs = itemno;
b29b2ed5 1879 rule_table[ruleno].line = p->line;
68f1e3ed 1880 rule_table[ruleno].useful = TRUE;
1ff442ca
NF
1881
1882 p = p->next;
1883 while (p && p->sym)
1884 {
1885 ritem[itemno++] = p->sym->value;
1886 /* A rule gets by default the precedence and associativity
1887 of the last token in it. */
d7020c20 1888 if (p->sym->class == token_sym)
1ff442ca 1889 {
652a871c
AD
1890 rule_table[ruleno].prec = p->sym->prec;
1891 rule_table[ruleno].assoc = p->sym->assoc;
1ff442ca 1892 }
a70083a3
AD
1893 if (p)
1894 p = p->next;
1ff442ca
NF
1895 }
1896
1897 /* If this rule has a %prec,
a70083a3 1898 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1899 if (ruleprec)
1900 {
652a871c
AD
1901 rule_table[ruleno].prec = ruleprec->prec;
1902 rule_table[ruleno].assoc = ruleprec->assoc;
1903 rule_table[ruleno].precsym = ruleprec->value;
1ff442ca
NF
1904 }
1905
1906 ritem[itemno++] = -ruleno;
1907 ruleno++;
1908
a70083a3
AD
1909 if (p)
1910 p = p->next;
1ff442ca
NF
1911 }
1912
1913 ritem[itemno] = 0;
3067fbef
AD
1914
1915 if (trace_flag)
1916 ritem_print (stderr);
1ff442ca 1917}
a70083a3
AD
1918\f
1919/*-------------------------------------------------------------------.
1920| Read in the grammar specification and record it in the format |
ea5607fd 1921| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1922| and all actions into ACTION_OBSTACK, in each case forming the body |
1923| of a C function (YYGUARD or YYACTION) which contains a switch |
1924| statement to decide which guard or action to execute. |
a70083a3
AD
1925`-------------------------------------------------------------------*/
1926
1927void
1928reader (void)
1929{
1930 start_flag = 0;
1931 startval = NULL; /* start symbol not specified yet. */
1932
a70083a3
AD
1933 nsyms = 1;
1934 nvars = 0;
1935 nrules = 0;
1936 nitems = 0;
a70083a3
AD
1937
1938 typed = 0;
1939 lastprec = 0;
1940
a70083a3
AD
1941 semantic_parser = 0;
1942 pure_parser = 0;
a70083a3
AD
1943
1944 grammar = NULL;
1945
342b8b6e 1946 lex_init ();
a70083a3
AD
1947 lineno = 1;
1948
11d82f03
MA
1949 /* Initialize the muscle obstack. */
1950 obstack_init (&muscle_obstack);
82e236e2 1951
a70083a3
AD
1952 /* Initialize the symbol table. */
1953 tabinit ();
b6610515 1954
a70083a3
AD
1955 /* Construct the error token */
1956 errtoken = getsym ("error");
d7020c20 1957 errtoken->class = token_sym;
a70083a3 1958 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 1959
a70083a3
AD
1960 /* Construct a token that represents all undefined literal tokens.
1961 It is always token number 2. */
1962 undeftoken = getsym ("$undefined.");
d7020c20 1963 undeftoken->class = token_sym;
a70083a3
AD
1964 undeftoken->user_token_number = 2;
1965
896fe5c1
AD
1966 /* Read the declaration section. Copy %{ ... %} groups to
1967 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1968 etc. found there. */
a70083a3 1969 read_declarations ();
a70083a3
AD
1970 /* Read in the grammar, build grammar in list form. Write out
1971 guards and actions. */
1972 readgram ();
ff48177d
MA
1973 /* Some C code is given at the end of the grammar file. */
1974 read_additionnal_code ();
b0c4483e 1975
a70083a3 1976 /* Now we know whether we need the line-number stack. If we do,
b0c4483e
PB
1977 write its type into the .tab.h file.
1978 This is no longer need with header skeleton. */
1979
a70083a3
AD
1980 /* Assign the symbols their symbol numbers. Write #defines for the
1981 token symbols into FDEFINES if requested. */
1982 packsymbols ();
1983 /* Convert the grammar into the format described in gram.h. */
1984 packgram ();
edad7067
AD
1985 /* Output the headers. */
1986 symbols_output ();
a70083a3 1987}