]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/system.h: Use intl/libgettext.h.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
037ca2f1 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
8c7ebe49 24#include "obstack.h"
2a91a95e
AD
25#include "quotearg.h"
26#include "quote.h"
ceed8467 27#include "getargs.h"
1ff442ca 28#include "files.h"
1ff442ca 29#include "symtab.h"
82b6d266 30#include "options.h"
1ff442ca
NF
31#include "lex.h"
32#include "gram.h"
a0f6b076 33#include "complain.h"
6c89f1c1 34#include "output.h"
b2ca4022 35#include "reader.h"
340ef489 36#include "conflicts.h"
11d82f03 37#include "muscle_tab.h"
1ff442ca 38
a70083a3
AD
39typedef struct symbol_list
40{
41 struct symbol_list *next;
42 bucket *sym;
b29b2ed5 43 int line;
a70083a3
AD
44 bucket *ruleprec;
45}
46symbol_list;
118fb205 47
1ff442ca 48int lineno;
1ff442ca 49char **tags;
d019d655 50short *user_toknums;
4a120d45
JT
51static symbol_list *grammar;
52static int start_flag;
53static bucket *startval;
1ff442ca
NF
54
55/* Nonzero if components of semantic values are used, implying
56 they must be unions. */
57static int value_components_used;
58
d7020c20
AD
59/* Nonzero if %union has been seen. */
60static int typed;
1ff442ca 61
d7020c20
AD
62/* Incremented for each %left, %right or %nonassoc seen */
63static int lastprec;
1ff442ca 64
1ff442ca 65static bucket *errtoken;
5b2e3c89 66static bucket *undeftoken;
b29b2ed5
AD
67
68
6255b435 69static symbol_list *
b29b2ed5
AD
70symbol_list_new (bucket *sym)
71{
72 symbol_list *res = XMALLOC (symbol_list, 1);
73 res->next = NULL;
74 res->sym = sym;
75 res->line = lineno;
76 res->ruleprec = NULL;
77 return res;
78}
79
0d533154 80\f
a70083a3 81
0d533154
AD
82/*===================\
83| Low level lexing. |
84\===================*/
943819bf
RS
85
86static void
118fb205 87skip_to_char (int target)
943819bf
RS
88{
89 int c;
90 if (target == '\n')
a0f6b076 91 complain (_(" Skipping to next \\n"));
943819bf 92 else
a0f6b076 93 complain (_(" Skipping to next %c"), target);
943819bf
RS
94
95 do
0d533154 96 c = skip_white_space ();
943819bf 97 while (c != target && c != EOF);
a083fbbf 98 if (c != EOF)
0d533154 99 ungetc (c, finput);
943819bf
RS
100}
101
102
0d533154
AD
103/*---------------------------------------------------------.
104| Read a signed integer from STREAM and return its value. |
105`---------------------------------------------------------*/
106
107static inline int
108read_signed_integer (FILE *stream)
109{
a70083a3
AD
110 int c = getc (stream);
111 int sign = 1;
112 int n = 0;
0d533154
AD
113
114 if (c == '-')
115 {
116 c = getc (stream);
117 sign = -1;
118 }
119
120 while (isdigit (c))
121 {
122 n = 10 * n + (c - '0');
123 c = getc (stream);
124 }
125
126 ungetc (c, stream);
127
128 return sign * n;
129}
130\f
79282c5a
AD
131/*--------------------------------------------------------------.
132| Get the data type (alternative in the union) of the value for |
133| symbol N in rule RULE. |
134`--------------------------------------------------------------*/
135
136static char *
b29b2ed5 137get_type_name (int n, symbol_list *rule)
79282c5a
AD
138{
139 int i;
140 symbol_list *rp;
141
142 if (n < 0)
143 {
144 complain (_("invalid $ value"));
145 return NULL;
146 }
147
148 rp = rule;
149 i = 0;
150
151 while (i < n)
152 {
153 rp = rp->next;
154 if (rp == NULL || rp->sym == NULL)
155 {
156 complain (_("invalid $ value"));
157 return NULL;
158 }
159 i++;
160 }
161
162 return rp->sym->type_name;
163}
164\f
337bab46
AD
165/*------------------------------------------------------------.
166| Dump the string from FIN to OOUT if non null. MATCH is the |
167| delimiter of the string (either ' or "). |
168`------------------------------------------------------------*/
ae3c3164
AD
169
170static inline void
b6610515 171copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
172{
173 int c;
174
b6610515
RA
175 if (store)
176 obstack_1grow (oout, match);
8c7ebe49 177
4a120d45 178 c = getc (fin);
ae3c3164
AD
179
180 while (c != match)
181 {
182 if (c == EOF)
183 fatal (_("unterminated string at end of file"));
184 if (c == '\n')
185 {
a0f6b076 186 complain (_("unterminated string"));
4a120d45 187 ungetc (c, fin);
ae3c3164
AD
188 c = match; /* invent terminator */
189 continue;
190 }
191
337bab46 192 obstack_1grow (oout, c);
ae3c3164
AD
193
194 if (c == '\\')
195 {
4a120d45 196 c = getc (fin);
ae3c3164
AD
197 if (c == EOF)
198 fatal (_("unterminated string at end of file"));
337bab46 199 obstack_1grow (oout, c);
8c7ebe49 200
ae3c3164
AD
201 if (c == '\n')
202 lineno++;
203 }
204
a70083a3 205 c = getc (fin);
ae3c3164
AD
206 }
207
b6610515
RA
208 if (store)
209 obstack_1grow (oout, c);
210}
211
212/* FIXME. */
213
214static inline void
215copy_string (FILE *fin, struct obstack *oout, int match)
216{
217 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
218}
219
b6610515
RA
220/* FIXME. */
221
222static inline void
223copy_identifier (FILE *fin, struct obstack *oout)
224{
225 int c;
226
227 while (isalnum (c = getc (fin)) || c == '_')
228 obstack_1grow (oout, c);
229
230 ungetc (c, fin);
231}
ae3c3164 232
337bab46
AD
233/*-----------------------------------------------------------------.
234| Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
235| NULL). In fact we just saw a `/', which might or might not be a |
236| comment. In any case, copy what we saw. |
237| |
238| OUT2 might be NULL. |
239`-----------------------------------------------------------------*/
ae3c3164
AD
240
241static inline void
337bab46 242copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
ae3c3164
AD
243{
244 int cplus_comment;
a70083a3 245 int ended;
550a72a3
AD
246 int c;
247
248 /* We read a `/', output it. */
337bab46 249 obstack_1grow (oout1, '/');
896fe5c1
AD
250 if (oout2)
251 obstack_1grow (oout2, '/');
550a72a3
AD
252
253 switch ((c = getc (fin)))
254 {
255 case '/':
256 cplus_comment = 1;
257 break;
258 case '*':
259 cplus_comment = 0;
260 break;
261 default:
262 ungetc (c, fin);
263 return;
264 }
ae3c3164 265
337bab46 266 obstack_1grow (oout1, c);
896fe5c1
AD
267 if (oout2)
268 obstack_1grow (oout2, c);
550a72a3 269 c = getc (fin);
ae3c3164
AD
270
271 ended = 0;
272 while (!ended)
273 {
274 if (!cplus_comment && c == '*')
275 {
276 while (c == '*')
277 {
337bab46 278 obstack_1grow (oout1, c);
896fe5c1
AD
279 if (oout2)
280 obstack_1grow (oout2, c);
550a72a3 281 c = getc (fin);
ae3c3164
AD
282 }
283
284 if (c == '/')
285 {
337bab46 286 obstack_1grow (oout1, c);
896fe5c1
AD
287 if (oout2)
288 obstack_1grow (oout2, c);
ae3c3164
AD
289 ended = 1;
290 }
291 }
292 else if (c == '\n')
293 {
294 lineno++;
337bab46 295 obstack_1grow (oout1, c);
896fe5c1
AD
296 if (oout2)
297 obstack_1grow (oout2, c);
ae3c3164
AD
298 if (cplus_comment)
299 ended = 1;
300 else
550a72a3 301 c = getc (fin);
ae3c3164
AD
302 }
303 else if (c == EOF)
304 fatal (_("unterminated comment"));
305 else
306 {
337bab46 307 obstack_1grow (oout1, c);
896fe5c1
AD
308 if (oout2)
309 obstack_1grow (oout2, c);
550a72a3 310 c = getc (fin);
ae3c3164
AD
311 }
312 }
313}
314
315
550a72a3
AD
316/*-------------------------------------------------------------------.
317| Dump the comment (actually the current string starting with a `/') |
337bab46 318| from FIN to OOUT. |
550a72a3 319`-------------------------------------------------------------------*/
27821bff
AD
320
321static inline void
337bab46 322copy_comment (FILE *fin, struct obstack *oout)
27821bff 323{
337bab46 324 copy_comment2 (fin, oout, NULL);
27821bff
AD
325}
326
327
a70083a3 328/*-----------------------------------------------------------------.
337bab46 329| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
330| reference to this location. STACK_OFFSET is the number of values |
331| in the current rule so far, which says where to find `$0' with |
332| respect to the top of the stack. |
333`-----------------------------------------------------------------*/
1ff442ca 334
a70083a3 335static inline void
337bab46 336copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 337{
a70083a3 338 int c;
1ff442ca 339
a70083a3
AD
340 c = getc (fin);
341 if (c == '$')
1ff442ca 342 {
ff4423cc 343 obstack_sgrow (oout, "yyloc");
89cab50d 344 locations_flag = 1;
a70083a3
AD
345 }
346 else if (isdigit (c) || c == '-')
347 {
348 int n;
1ff442ca 349
a70083a3
AD
350 ungetc (c, fin);
351 n = read_signed_integer (fin);
943819bf 352
337bab46 353 obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
89cab50d 354 locations_flag = 1;
1ff442ca 355 }
a70083a3 356 else
ff4a34be
AD
357 {
358 char buf[] = "@c";
359 buf[1] = c;
360 complain (_("%s is invalid"), quote (buf));
361 }
1ff442ca 362}
79282c5a
AD
363
364
365/*-------------------------------------------------------------------.
366| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
367| |
368| Possible inputs: $[<TYPENAME>]($|integer) |
369| |
337bab46 370| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
371| the number of values in the current rule so far, which says where |
372| to find `$0' with respect to the top of the stack. |
373`-------------------------------------------------------------------*/
374
375static inline void
337bab46 376copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
377 symbol_list *rule, int stack_offset)
378{
379 int c = getc (fin);
b0ce6046 380 const char *type_name = NULL;
79282c5a 381
f282676b 382 /* Get the type name if explicit. */
79282c5a
AD
383 if (c == '<')
384 {
f282676b 385 read_type_name (fin);
79282c5a
AD
386 type_name = token_buffer;
387 value_components_used = 1;
79282c5a
AD
388 c = getc (fin);
389 }
390
391 if (c == '$')
392 {
ff4423cc 393 obstack_sgrow (oout, "yyval");
8c7ebe49 394
79282c5a
AD
395 if (!type_name)
396 type_name = get_type_name (0, rule);
397 if (type_name)
337bab46 398 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
399 if (!type_name && typed)
400 complain (_("$$ of `%s' has no declared type"),
401 rule->sym->tag);
402 }
403 else if (isdigit (c) || c == '-')
404 {
405 int n;
406 ungetc (c, fin);
407 n = read_signed_integer (fin);
408
409 if (!type_name && n > 0)
410 type_name = get_type_name (n, rule);
411
337bab46 412 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
8c7ebe49 413
79282c5a 414 if (type_name)
337bab46 415 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
416 if (!type_name && typed)
417 complain (_("$%d of `%s' has no declared type"),
418 n, rule->sym->tag);
419 }
420 else
421 {
422 char buf[] = "$c";
423 buf[1] = c;
424 complain (_("%s is invalid"), quote (buf));
425 }
426}
a70083a3
AD
427\f
428/*-------------------------------------------------------------------.
429| Copy the contents of a `%{ ... %}' into the definitions file. The |
430| `%{' has already been read. Return after reading the `%}'. |
431`-------------------------------------------------------------------*/
1ff442ca 432
4a120d45 433static void
118fb205 434copy_definition (void)
1ff442ca 435{
a70083a3 436 int c;
ae3c3164 437 /* -1 while reading a character if prev char was %. */
a70083a3 438 int after_percent;
1ff442ca 439
b6610515 440#if 0
89cab50d 441 if (!no_lines_flag)
25b222fa
MA
442 {
443 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 444 lineno, quotearg_style (c_quoting_style,
25b222fa
MA
445 muscle_find("filename")));
446 }
b6610515 447#endif
1ff442ca
NF
448
449 after_percent = 0;
450
ae3c3164 451 c = getc (finput);
1ff442ca
NF
452
453 for (;;)
454 {
455 switch (c)
456 {
457 case '\n':
dd60faec 458 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
459 lineno++;
460 break;
461
462 case '%':
a70083a3 463 after_percent = -1;
1ff442ca 464 break;
a083fbbf 465
1ff442ca
NF
466 case '\'':
467 case '"':
337bab46 468 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
469 break;
470
471 case '/':
337bab46 472 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
473 break;
474
475 case EOF:
a70083a3 476 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
477
478 default:
dd60faec 479 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
480 }
481
a70083a3 482 c = getc (finput);
1ff442ca
NF
483
484 if (after_percent)
485 {
486 if (c == '}')
487 return;
dd60faec 488 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
489 }
490 after_percent = 0;
1ff442ca 491 }
1ff442ca
NF
492}
493
494
d7020c20
AD
495/*-------------------------------------------------------------------.
496| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
497| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
498| are reversed. |
499`-------------------------------------------------------------------*/
1ff442ca 500
4a120d45 501static void
d7020c20 502parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 503{
342b8b6e
AD
504 token_t token = tok_undef;
505 char *typename = NULL;
1ff442ca 506
1e9798d5
AD
507 /* The symbol being defined. */
508 struct bucket *symbol = NULL;
509
510 /* After `%token' and `%nterm', any number of symbols maybe be
511 defined. */
1ff442ca
NF
512 for (;;)
513 {
e6011337
JT
514 int tmp_char = ungetc (skip_white_space (), finput);
515
1e9798d5
AD
516 /* `%' (for instance from `%token', or from `%%' etc.) is the
517 only valid means to end this declaration. */
e6011337 518 if (tmp_char == '%')
1ff442ca 519 return;
e6011337 520 if (tmp_char == EOF)
a0f6b076 521 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 522
a70083a3 523 token = lex ();
511e79b3 524 if (token == tok_comma)
943819bf
RS
525 {
526 symbol = NULL;
527 continue;
528 }
511e79b3 529 if (token == tok_typename)
1ff442ca 530 {
95e36146 531 typename = xstrdup (token_buffer);
1ff442ca 532 value_components_used = 1;
943819bf
RS
533 symbol = NULL;
534 }
511e79b3 535 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 536 {
8e03724b
AD
537 if (symval->alias)
538 warn (_("symbol `%s' used more than once as a literal string"),
539 symval->tag);
540 else if (symbol->alias)
541 warn (_("symbol `%s' given more than one literal string"),
542 symbol->tag);
543 else
544 {
545 symval->class = token_sym;
546 symval->type_name = typename;
547 symval->user_token_number = symbol->user_token_number;
548 symbol->user_token_number = SALIAS;
549 symval->alias = symbol;
550 symbol->alias = symval;
551 /* symbol and symval combined are only one symbol */
552 nsyms--;
553 }
8e03724b 554 symbol = NULL;
1ff442ca 555 }
511e79b3 556 else if (token == tok_identifier)
1ff442ca
NF
557 {
558 int oldclass = symval->class;
943819bf 559 symbol = symval;
1ff442ca 560
943819bf 561 if (symbol->class == what_is_not)
a0f6b076 562 complain (_("symbol %s redefined"), symbol->tag);
943819bf 563 symbol->class = what_is;
d7020c20 564 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 565 symbol->value = nvars++;
1ff442ca
NF
566
567 if (typename)
568 {
943819bf
RS
569 if (symbol->type_name == NULL)
570 symbol->type_name = typename;
a70083a3 571 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 572 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
573 }
574 }
511e79b3 575 else if (symbol && token == tok_number)
a70083a3 576 {
943819bf 577 symbol->user_token_number = numval;
a70083a3 578 }
1ff442ca 579 else
943819bf 580 {
a0f6b076 581 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
582 token_buffer,
583 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 584 skip_to_char ('%');
943819bf 585 }
1ff442ca
NF
586 }
587
588}
589
1ff442ca 590
d7020c20
AD
591/*------------------------------.
592| Parse what comes after %start |
593`------------------------------*/
1ff442ca 594
4a120d45 595static void
118fb205 596parse_start_decl (void)
1ff442ca
NF
597{
598 if (start_flag)
27821bff 599 complain (_("multiple %s declarations"), "%start");
511e79b3 600 if (lex () != tok_identifier)
27821bff 601 complain (_("invalid %s declaration"), "%start");
943819bf
RS
602 else
603 {
604 start_flag = 1;
605 startval = symval;
606 }
1ff442ca
NF
607}
608
a70083a3
AD
609/*-----------------------------------------------------------.
610| read in a %type declaration and record its information for |
611| get_type_name to access |
612`-----------------------------------------------------------*/
613
614static void
615parse_type_decl (void)
616{
a70083a3
AD
617 char *name;
618
511e79b3 619 if (lex () != tok_typename)
a70083a3
AD
620 {
621 complain ("%s", _("%type declaration has no <typename>"));
622 skip_to_char ('%');
623 return;
624 }
625
95e36146 626 name = xstrdup (token_buffer);
a70083a3
AD
627
628 for (;;)
629 {
f17bcd1f 630 token_t t;
a70083a3
AD
631 int tmp_char = ungetc (skip_white_space (), finput);
632
633 if (tmp_char == '%')
634 return;
635 if (tmp_char == EOF)
636 fatal (_("Premature EOF after %s"), token_buffer);
637
638 t = lex ();
639
640 switch (t)
1ff442ca
NF
641 {
642
511e79b3
AD
643 case tok_comma:
644 case tok_semicolon:
1ff442ca
NF
645 break;
646
511e79b3 647 case tok_identifier:
1ff442ca
NF
648 if (symval->type_name == NULL)
649 symval->type_name = name;
a70083a3 650 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 651 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
652
653 break;
654
655 default:
a0f6b076
AD
656 complain (_("invalid %%type declaration due to item: %s"),
657 token_buffer);
a70083a3 658 skip_to_char ('%');
1ff442ca
NF
659 }
660 }
661}
662
663
664
d7020c20
AD
665/*----------------------------------------------------------------.
666| Read in a %left, %right or %nonassoc declaration and record its |
667| information. |
668`----------------------------------------------------------------*/
1ff442ca 669
4a120d45 670static void
d7020c20 671parse_assoc_decl (associativity assoc)
1ff442ca 672{
a70083a3
AD
673 char *name = NULL;
674 int prev = 0;
1ff442ca 675
a70083a3 676 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 677
1ff442ca
NF
678 for (;;)
679 {
f17bcd1f 680 token_t t;
e6011337 681 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 682
e6011337 683 if (tmp_char == '%')
1ff442ca 684 return;
e6011337 685 if (tmp_char == EOF)
a0f6b076 686 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 687
a70083a3 688 t = lex ();
1ff442ca
NF
689
690 switch (t)
691 {
511e79b3 692 case tok_typename:
95e36146 693 name = xstrdup (token_buffer);
1ff442ca
NF
694 break;
695
511e79b3 696 case tok_comma:
1ff442ca
NF
697 break;
698
511e79b3 699 case tok_identifier:
1ff442ca 700 if (symval->prec != 0)
a0f6b076 701 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
702 symval->prec = lastprec;
703 symval->assoc = assoc;
d7020c20 704 if (symval->class == nterm_sym)
a0f6b076 705 complain (_("symbol %s redefined"), symval->tag);
d7020c20 706 symval->class = token_sym;
1ff442ca 707 if (name)
a70083a3 708 { /* record the type, if one is specified */
1ff442ca
NF
709 if (symval->type_name == NULL)
710 symval->type_name = name;
a70083a3 711 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 712 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
713 }
714 break;
715
511e79b3
AD
716 case tok_number:
717 if (prev == tok_identifier)
a70083a3 718 {
1ff442ca 719 symval->user_token_number = numval;
a70083a3
AD
720 }
721 else
722 {
723 complain (_
724 ("invalid text (%s) - number should be after identifier"),
725token_buffer);
726 skip_to_char ('%');
727 }
1ff442ca
NF
728 break;
729
511e79b3 730 case tok_semicolon:
1ff442ca
NF
731 return;
732
733 default:
a0f6b076 734 complain (_("unexpected item: %s"), token_buffer);
a70083a3 735 skip_to_char ('%');
1ff442ca
NF
736 }
737
738 prev = t;
1ff442ca
NF
739 }
740}
741
742
743
dd60faec 744/*--------------------------------------------------------------.
180d45ba
PB
745| Copy the union declaration into the stype muscle |
746| (and fdefines), where it is made into the definition of |
747| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 748`--------------------------------------------------------------*/
1ff442ca 749
4a120d45 750static void
118fb205 751parse_union_decl (void)
1ff442ca 752{
a70083a3
AD
753 int c;
754 int count = 0;
180d45ba 755 struct obstack union_obstack;
5f7e0832
AD
756 const char *prologue = "\
757#ifndef YYSTYPE\n\
758typedef union";
759 const char *epilogue = "\
760 yystype;\n\
761# define YYSTYPE yystype\n\
762#endif\n";
1ff442ca
NF
763
764 if (typed)
27821bff 765 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
766
767 typed = 1;
768
f6ec6d13
AD
769 /* FIXME: I'm worried: are you sure attrs_obstack is properly
770 filled? */
5f7e0832
AD
771 /* I don't see any reasons to keep this line, because we should
772 create a special skeleton for this option. */
180d45ba 773 if (no_lines_flag)
dd60faec 774 obstack_1grow (&attrs_obstack, '\n');
342b8b6e 775
180d45ba
PB
776 obstack_init (&union_obstack);
777 obstack_sgrow (&union_obstack, "union");
896fe5c1 778 if (defines_flag)
5f7e0832 779 obstack_sgrow (&defines_obstack, prologue);
1ff442ca 780
27821bff 781 c = getc (finput);
1ff442ca
NF
782
783 while (c != EOF)
784 {
342b8b6e
AD
785 /* If C contains '/', it is output by copy_comment (). */
786 if (c != '/')
787 {
f6ec6d13 788 obstack_1grow (&union_obstack, c);
342b8b6e
AD
789 if (defines_flag)
790 obstack_1grow (&defines_obstack, c);
791 }
1ff442ca
NF
792
793 switch (c)
794 {
795 case '\n':
796 lineno++;
797 break;
798
799 case '/':
180d45ba 800 copy_comment2 (finput, &defines_obstack, &union_obstack);
1ff442ca
NF
801 break;
802
1ff442ca
NF
803 case '{':
804 count++;
805 break;
806
807 case '}':
808 if (count == 0)
27821bff 809 complain (_("unmatched %s"), "`}'");
1ff442ca 810 count--;
943819bf 811 if (count <= 0)
1ff442ca 812 {
896fe5c1 813 if (defines_flag)
5f7e0832 814 obstack_sgrow (&defines_obstack, epilogue);
1ff442ca 815 /* JF don't choke on trailing semi */
27821bff
AD
816 c = skip_white_space ();
817 if (c != ';')
a70083a3 818 ungetc (c, finput);
180d45ba
PB
819 obstack_1grow (&union_obstack, 0);
820 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
821 return;
822 }
823 }
824
27821bff 825 c = getc (finput);
1ff442ca 826 }
180d45ba 827
1ff442ca
NF
828}
829
d7020c20
AD
830
831/*-------------------------------------------------------.
832| Parse the declaration %expect N which says to expect N |
833| shift-reduce conflicts. |
834`-------------------------------------------------------*/
1ff442ca 835
4a120d45 836static void
118fb205 837parse_expect_decl (void)
1ff442ca 838{
131e2fef 839 int c = skip_white_space ();
1ff442ca
NF
840 ungetc (c, finput);
841
131e2fef 842 if (!isdigit (c))
79282c5a 843 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
844 else
845 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
846}
847
a70083a3
AD
848
849/*-------------------------------------------------------------------.
850| Parse what comes after %thong. the full syntax is |
851| |
852| %thong <type> token number literal |
853| |
854| the <type> or number may be omitted. The number specifies the |
855| user_token_number. |
856| |
857| Two symbols are entered in the table, one for the token symbol and |
858| one for the literal. Both are given the <type>, if any, from the |
859| declaration. The ->user_token_number of the first is SALIAS and |
860| the ->user_token_number of the second is set to the number, if |
861| any, from the declaration. The two symbols are linked via |
862| pointers in their ->alias fields. |
863| |
864| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
865| only the literal string is retained it is the literal string that |
866| is output to yytname |
867`-------------------------------------------------------------------*/
868
869static void
870parse_thong_decl (void)
7b306f52 871{
f17bcd1f 872 token_t token;
a70083a3
AD
873 struct bucket *symbol;
874 char *typename = 0;
6b7e85b9 875 int usrtoknum = SUNDEF;
7b306f52 876
a70083a3 877 token = lex (); /* fetch typename or first token */
511e79b3 878 if (token == tok_typename)
7b306f52 879 {
95e36146 880 typename = xstrdup (token_buffer);
a70083a3
AD
881 value_components_used = 1;
882 token = lex (); /* fetch first token */
7b306f52 883 }
7b306f52 884
a70083a3 885 /* process first token */
7b306f52 886
511e79b3 887 if (token != tok_identifier)
a70083a3
AD
888 {
889 complain (_("unrecognized item %s, expected an identifier"),
890 token_buffer);
891 skip_to_char ('%');
892 return;
7b306f52 893 }
d7020c20 894 symval->class = token_sym;
a70083a3
AD
895 symval->type_name = typename;
896 symval->user_token_number = SALIAS;
897 symbol = symval;
7b306f52 898
a70083a3 899 token = lex (); /* get number or literal string */
1ff442ca 900
511e79b3 901 if (token == tok_number)
943819bf 902 {
a70083a3
AD
903 usrtoknum = numval;
904 token = lex (); /* okay, did number, now get literal */
943819bf 905 }
1ff442ca 906
a70083a3 907 /* process literal string token */
1ff442ca 908
511e79b3 909 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 910 {
a70083a3
AD
911 complain (_("expected string constant instead of %s"), token_buffer);
912 skip_to_char ('%');
913 return;
1ff442ca 914 }
d7020c20 915 symval->class = token_sym;
a70083a3
AD
916 symval->type_name = typename;
917 symval->user_token_number = usrtoknum;
1ff442ca 918
a70083a3
AD
919 symval->alias = symbol;
920 symbol->alias = symval;
1ff442ca 921
79282c5a
AD
922 /* symbol and symval combined are only one symbol. */
923 nsyms--;
a70083a3 924}
3cef001a 925
b6610515 926static void
11d82f03 927parse_muscle_decl (void)
b6610515
RA
928{
929 int ch = ungetc (skip_white_space (), finput);
11d82f03
MA
930 char* muscle_key;
931 char* muscle_value;
b6610515
RA
932
933 /* Read key. */
934 if (!isalpha (ch) && ch != '_')
935 {
936 complain (_("invalid %s declaration"), "%define");
937 skip_to_char ('%');
938 return;
939 }
11d82f03
MA
940 copy_identifier (finput, &muscle_obstack);
941 obstack_1grow (&muscle_obstack, 0);
942 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 943
b6610515
RA
944 /* Read value. */
945 ch = skip_white_space ();
946 if (ch != '"')
947 {
948 ungetc (ch, finput);
949 if (ch != EOF)
950 {
951 complain (_("invalid %s declaration"), "%define");
952 skip_to_char ('%');
953 return;
954 }
955 else
956 fatal (_("Premature EOF after %s"), "\"");
957 }
11d82f03
MA
958 copy_string2 (finput, &muscle_obstack, '"', 0);
959 obstack_1grow (&muscle_obstack, 0);
960 muscle_value = obstack_finish (&muscle_obstack);
b6610515 961
b6610515 962 /* Store the (key, value) pair in the environment. */
11d82f03 963 muscle_insert (muscle_key, muscle_value);
b6610515
RA
964}
965
2ba3b73c
MA
966
967/*----------------------------------.
968| Parse what comes after %skeleton. |
969`----------------------------------*/
970
971void
972parse_skel_decl (void)
973{
974 /* Complete with parse_dquoted_param () on the CVS branch 1.29. */
975}
976
f6bd5427
MA
977/*---------------------------------------.
978| Parse what comes after %skeleton_path. |
979`---------------------------------------*/
980
981void
982parse_include_decl (void)
983{
984 /* Complete with parse_dquoted_param () on the CVS branch 1.29. */
985}
986
a70083a3
AD
987/*----------------------------------------------------------------.
988| Read from finput until `%%' is seen. Discard the `%%'. Handle |
989| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 990| groups to ATTRS_OBSTACK. |
a70083a3 991`----------------------------------------------------------------*/
1ff442ca 992
4a120d45 993static void
a70083a3 994read_declarations (void)
1ff442ca 995{
a70083a3 996 for (;;)
1ff442ca 997 {
951366c1 998 int c = skip_white_space ();
1ff442ca 999
a70083a3
AD
1000 if (c == '%')
1001 {
951366c1 1002 token_t tok = parse_percent_token ();
1ff442ca 1003
a70083a3 1004 switch (tok)
943819bf 1005 {
511e79b3 1006 case tok_two_percents:
a70083a3 1007 return;
1ff442ca 1008
511e79b3 1009 case tok_percent_left_curly:
a70083a3
AD
1010 copy_definition ();
1011 break;
1ff442ca 1012
511e79b3 1013 case tok_token:
d7020c20 1014 parse_token_decl (token_sym, nterm_sym);
a70083a3 1015 break;
1ff442ca 1016
511e79b3 1017 case tok_nterm:
d7020c20 1018 parse_token_decl (nterm_sym, token_sym);
a70083a3 1019 break;
1ff442ca 1020
511e79b3 1021 case tok_type:
a70083a3
AD
1022 parse_type_decl ();
1023 break;
1ff442ca 1024
511e79b3 1025 case tok_start:
a70083a3
AD
1026 parse_start_decl ();
1027 break;
118fb205 1028
511e79b3 1029 case tok_union:
a70083a3
AD
1030 parse_union_decl ();
1031 break;
1ff442ca 1032
511e79b3 1033 case tok_expect:
a70083a3
AD
1034 parse_expect_decl ();
1035 break;
6deb4447 1036
511e79b3 1037 case tok_thong:
a70083a3
AD
1038 parse_thong_decl ();
1039 break;
d7020c20 1040
511e79b3 1041 case tok_left:
d7020c20 1042 parse_assoc_decl (left_assoc);
a70083a3 1043 break;
1ff442ca 1044
511e79b3 1045 case tok_right:
d7020c20 1046 parse_assoc_decl (right_assoc);
a70083a3 1047 break;
1ff442ca 1048
511e79b3 1049 case tok_nonassoc:
d7020c20 1050 parse_assoc_decl (non_assoc);
a70083a3 1051 break;
1ff442ca 1052
b6610515 1053 case tok_define:
11d82f03 1054 parse_muscle_decl ();
b6610515 1055 break;
342b8b6e 1056
2ba3b73c
MA
1057 case tok_skel:
1058 parse_skel_decl ();
1059 break;
b6610515 1060
f6bd5427
MA
1061 case tok_include:
1062 parse_include_decl ();
1063 break;
1064
511e79b3 1065 case tok_noop:
a70083a3 1066 break;
1ff442ca 1067
951366c1
AD
1068 case tok_stropt:
1069 case tok_intopt:
1070 case tok_obsolete:
951366c1
AD
1071 abort ();
1072 break;
1073
e0c40012 1074 case tok_illegal:
a70083a3
AD
1075 default:
1076 complain (_("unrecognized: %s"), token_buffer);
1077 skip_to_char ('%');
1078 }
1079 }
1080 else if (c == EOF)
1081 fatal (_("no input grammar"));
1082 else
1083 {
ff4a34be
AD
1084 char buf[] = "c";
1085 buf[0] = c;
1086 complain (_("unknown character: %s"), quote (buf));
a70083a3 1087 skip_to_char ('%');
1ff442ca 1088 }
1ff442ca 1089 }
1ff442ca 1090}
a70083a3
AD
1091\f
1092/*-------------------------------------------------------------------.
1093| Assuming that a `{' has just been seen, copy everything up to the |
1094| matching `}' into the actions file. STACK_OFFSET is the number of |
1095| values in the current rule so far, which says where to find `$0' |
1096| with respect to the top of the stack. |
1097`-------------------------------------------------------------------*/
1ff442ca 1098
4a120d45 1099static void
79282c5a 1100copy_action (symbol_list *rule, int stack_offset)
1ff442ca 1101{
a70083a3 1102 int c;
a70083a3 1103 int count;
8c7ebe49 1104 char buf[4096];
1ff442ca
NF
1105
1106 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1107 if (semantic_parser)
1108 stack_offset = 0;
1ff442ca 1109
25b222fa 1110 obstack_fgrow1 (&action_obstack, "\ncase %d:\n", nrules);
8c7ebe49 1111
89cab50d 1112 if (!no_lines_flag)
8c7ebe49 1113 {
25b222fa 1114 obstack_fgrow2 (&action_obstack, muscle_find ("linef"),
342b8b6e 1115 lineno, quotearg_style (c_quoting_style,
25b222fa 1116 muscle_find ("filename")));
8c7ebe49
AD
1117 }
1118 obstack_1grow (&action_obstack, '{');
1ff442ca
NF
1119
1120 count = 1;
a70083a3 1121 c = getc (finput);
1ff442ca
NF
1122
1123 while (count > 0)
1124 {
1125 while (c != '}')
a70083a3
AD
1126 {
1127 switch (c)
1ff442ca
NF
1128 {
1129 case '\n':
8c7ebe49 1130 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1131 lineno++;
1132 break;
1133
1134 case '{':
8c7ebe49 1135 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1136 count++;
1137 break;
1138
1139 case '\'':
1140 case '"':
337bab46 1141 copy_string (finput, &action_obstack, c);
1ff442ca
NF
1142 break;
1143
1144 case '/':
337bab46 1145 copy_comment (finput, &action_obstack);
1ff442ca
NF
1146 break;
1147
1148 case '$':
337bab46 1149 copy_dollar (finput, &action_obstack,
8c7ebe49 1150 rule, stack_offset);
1ff442ca
NF
1151 break;
1152
1153 case '@':
337bab46 1154 copy_at (finput, &action_obstack,
8c7ebe49 1155 stack_offset);
6666f98f 1156 break;
1ff442ca
NF
1157
1158 case EOF:
27821bff 1159 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1160
1161 default:
8c7ebe49 1162 obstack_1grow (&action_obstack, c);
a70083a3
AD
1163 }
1164
1165 c = getc (finput);
1166 }
1167
1168 /* above loop exits when c is '}' */
1169
1170 if (--count)
1171 {
8c7ebe49 1172 obstack_1grow (&action_obstack, c);
a70083a3
AD
1173 c = getc (finput);
1174 }
1175 }
1176
ff4423cc 1177 obstack_sgrow (&action_obstack, ";\n break;}");
a70083a3
AD
1178}
1179\f
1180/*-------------------------------------------------------------------.
1181| After `%guard' is seen in the input file, copy the actual guard |
1182| into the guards file. If the guard is followed by an action, copy |
1183| that into the actions file. STACK_OFFSET is the number of values |
1184| in the current rule so far, which says where to find `$0' with |
1185| respect to the top of the stack, for the simple parser in which |
1186| the stack is not popped until after the guard is run. |
1187`-------------------------------------------------------------------*/
1188
1189static void
79282c5a 1190copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1191{
1192 int c;
a70083a3 1193 int count;
a70083a3
AD
1194 int brace_flag = 0;
1195
1196 /* offset is always 0 if parser has already popped the stack pointer */
1197 if (semantic_parser)
1198 stack_offset = 0;
1199
ea5607fd 1200 obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
89cab50d 1201 if (!no_lines_flag)
25b222fa 1202 obstack_fgrow2 (&guard_obstack, muscle_find ("linef"),
682d48cd 1203 lineno, quotearg_style (c_quoting_style,
11d82f03 1204 muscle_find ("filename")));
ea5607fd 1205 obstack_1grow (&guard_obstack, '{');
a70083a3
AD
1206
1207 count = 0;
1208 c = getc (finput);
1209
1210 while (brace_flag ? (count > 0) : (c != ';'))
1211 {
1212 switch (c)
1213 {
1214 case '\n':
ea5607fd 1215 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1216 lineno++;
1217 break;
1218
1219 case '{':
ea5607fd 1220 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1221 brace_flag = 1;
1222 count++;
1223 break;
1224
1225 case '}':
ea5607fd 1226 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1227 if (count > 0)
1228 count--;
1229 else
1230 {
1231 complain (_("unmatched %s"), "`}'");
1232 c = getc (finput); /* skip it */
1233 }
1234 break;
1235
1236 case '\'':
1237 case '"':
337bab46 1238 copy_string (finput, &guard_obstack, c);
a70083a3
AD
1239 break;
1240
1241 case '/':
337bab46 1242 copy_comment (finput, &guard_obstack);
a70083a3
AD
1243 break;
1244
1245 case '$':
337bab46 1246 copy_dollar (finput, &guard_obstack, rule, stack_offset);
a70083a3 1247 break;
1ff442ca 1248
a70083a3 1249 case '@':
337bab46 1250 copy_at (finput, &guard_obstack, stack_offset);
a70083a3 1251 break;
1ff442ca 1252
a70083a3
AD
1253 case EOF:
1254 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1255
a70083a3 1256 default:
ea5607fd 1257 obstack_1grow (&guard_obstack, c);
1ff442ca 1258 }
a70083a3
AD
1259
1260 if (c != '}' || count != 0)
1261 c = getc (finput);
1ff442ca
NF
1262 }
1263
a70083a3
AD
1264 c = skip_white_space ();
1265
ff4423cc 1266 obstack_sgrow (&guard_obstack, ";\n break;}");
a70083a3
AD
1267 if (c == '{')
1268 copy_action (rule, stack_offset);
1269 else if (c == '=')
1270 {
1271 c = getc (finput); /* why not skip_white_space -wjh */
1272 if (c == '{')
1273 copy_action (rule, stack_offset);
1274 }
1275 else
1276 ungetc (c, finput);
1ff442ca 1277}
a70083a3
AD
1278\f
1279
a70083a3
AD
1280/*-------------------------------------------------------------------.
1281| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1282| with the user's names. |
1283`-------------------------------------------------------------------*/
1ff442ca 1284
4a120d45 1285static bucket *
118fb205 1286gensym (void)
1ff442ca 1287{
274d42ce
AD
1288 /* Incremented for each generated symbol */
1289 static int gensym_count = 0;
1290 static char buf[256];
1291
a70083a3 1292 bucket *sym;
1ff442ca 1293
274d42ce
AD
1294 sprintf (buf, "@%d", ++gensym_count);
1295 token_buffer = buf;
a70083a3 1296 sym = getsym (token_buffer);
d7020c20 1297 sym->class = nterm_sym;
1ff442ca 1298 sym->value = nvars++;
36281465 1299 return sym;
1ff442ca
NF
1300}
1301
a70083a3
AD
1302#if 0
1303/*------------------------------------------------------------------.
1304| read in a %type declaration and record its information for |
1305| get_type_name to access. This is unused. It is only called from |
1306| the #if 0 part of readgram |
1307`------------------------------------------------------------------*/
1308
1309static int
1310get_type (void)
1311{
1312 int k;
f17bcd1f 1313 token_t token;
a70083a3
AD
1314 char *name;
1315
f17bcd1f 1316 token = lex ();
a70083a3 1317
f17bcd1f 1318 if (token != tok_typename)
a70083a3
AD
1319 {
1320 complain (_("invalid %s declaration"), "%type");
1321 return t;
1322 }
1323
95e36146 1324 name = xstrdup (token_buffer);
a70083a3
AD
1325
1326 for (;;)
1327 {
f17bcd1f 1328 token = lex ();
a70083a3 1329
f17bcd1f 1330 switch (token)
a70083a3 1331 {
511e79b3 1332 case tok_semicolon:
a70083a3
AD
1333 return lex ();
1334
511e79b3 1335 case tok_comma:
a70083a3
AD
1336 break;
1337
511e79b3 1338 case tok_identifier:
a70083a3
AD
1339 if (symval->type_name == NULL)
1340 symval->type_name = name;
1341 else if (strcmp (name, symval->type_name) != 0)
1342 complain (_("type redeclaration for %s"), symval->tag);
1343
1344 break;
1345
1346 default:
f17bcd1f 1347 return token;
a70083a3
AD
1348 }
1349 }
1350}
1ff442ca 1351
a70083a3
AD
1352#endif
1353\f
1354/*------------------------------------------------------------------.
1355| Parse the input grammar into a one symbol_list structure. Each |
1356| rule is represented by a sequence of symbols: the left hand side |
1357| followed by the contents of the right hand side, followed by a |
1358| null pointer instead of a symbol to terminate the rule. The next |
1359| symbol is the lhs of the following rule. |
1360| |
1361| All guards and actions are copied out to the appropriate files, |
1362| labelled by the rule number they apply to. |
1363`------------------------------------------------------------------*/
1ff442ca 1364
4a120d45 1365static void
118fb205 1366readgram (void)
1ff442ca 1367{
f17bcd1f 1368 token_t t;
a70083a3
AD
1369 bucket *lhs = NULL;
1370 symbol_list *p;
1371 symbol_list *p1;
1372 bucket *bp;
1ff442ca 1373
ff4a34be
AD
1374 /* Points to first symbol_list of current rule. its symbol is the
1375 lhs of the rule. */
1376 symbol_list *crule;
1377 /* Points to the symbol_list preceding crule. */
1378 symbol_list *crule1;
1ff442ca
NF
1379
1380 p1 = NULL;
1381
a70083a3 1382 t = lex ();
1ff442ca 1383
511e79b3 1384 while (t != tok_two_percents && t != tok_eof)
1ff442ca 1385 {
511e79b3 1386 if (t == tok_identifier || t == tok_bar)
1ff442ca 1387 {
89cab50d 1388 int action_flag = 0;
ff4a34be
AD
1389 /* Number of symbols in rhs of this rule so far */
1390 int rulelength = 0;
1ff442ca
NF
1391 int xactions = 0; /* JF for error checking */
1392 bucket *first_rhs = 0;
1393
511e79b3 1394 if (t == tok_identifier)
1ff442ca
NF
1395 {
1396 lhs = symval;
943819bf
RS
1397
1398 if (!start_flag)
1399 {
1400 startval = lhs;
1401 start_flag = 1;
1402 }
a083fbbf 1403
a70083a3 1404 t = lex ();
511e79b3 1405 if (t != tok_colon)
943819bf 1406 {
a0f6b076 1407 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1408 unlex (t);
943819bf 1409 }
1ff442ca
NF
1410 }
1411
511e79b3 1412 if (nrules == 0 && t == tok_bar)
1ff442ca 1413 {
a0f6b076 1414 complain (_("grammar starts with vertical bar"));
943819bf 1415 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1416 }
1ff442ca
NF
1417 /* start a new rule and record its lhs. */
1418
1419 nrules++;
1420 nitems++;
1421
b29b2ed5 1422 p = symbol_list_new (lhs);
1ff442ca
NF
1423
1424 crule1 = p1;
1425 if (p1)
1426 p1->next = p;
1427 else
1428 grammar = p;
1429
1430 p1 = p;
1431 crule = p;
1432
1433 /* mark the rule's lhs as a nonterminal if not already so. */
1434
d7020c20 1435 if (lhs->class == unknown_sym)
1ff442ca 1436 {
d7020c20 1437 lhs->class = nterm_sym;
1ff442ca
NF
1438 lhs->value = nvars;
1439 nvars++;
1440 }
d7020c20 1441 else if (lhs->class == token_sym)
a0f6b076 1442 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1443
1444 /* read the rhs of the rule. */
1445
1446 for (;;)
1447 {
a70083a3 1448 t = lex ();
511e79b3 1449 if (t == tok_prec)
943819bf 1450 {
a70083a3 1451 t = lex ();
943819bf 1452 crule->ruleprec = symval;
a70083a3 1453 t = lex ();
943819bf 1454 }
1ff442ca 1455
511e79b3 1456 if (!(t == tok_identifier || t == tok_left_curly))
a70083a3 1457 break;
1ff442ca
NF
1458
1459 /* If next token is an identifier, see if a colon follows it.
a70083a3 1460 If one does, exit this rule now. */
511e79b3 1461 if (t == tok_identifier)
1ff442ca 1462 {
a70083a3 1463 bucket *ssave;
f17bcd1f 1464 token_t t1;
1ff442ca
NF
1465
1466 ssave = symval;
a70083a3
AD
1467 t1 = lex ();
1468 unlex (t1);
1ff442ca 1469 symval = ssave;
511e79b3 1470 if (t1 == tok_colon)
a70083a3 1471 break;
1ff442ca 1472
a70083a3 1473 if (!first_rhs) /* JF */
1ff442ca
NF
1474 first_rhs = symval;
1475 /* Not followed by colon =>
1476 process as part of this rule's rhs. */
1477 }
1478
1479 /* If we just passed an action, that action was in the middle
a70083a3
AD
1480 of a rule, so make a dummy rule to reduce it to a
1481 non-terminal. */
89cab50d 1482 if (action_flag)
1ff442ca 1483 {
f282676b
AD
1484 /* Since the action was written out with this rule's
1485 number, we must give the new rule this number by
1486 inserting the new rule before it. */
1ff442ca
NF
1487
1488 /* Make a dummy nonterminal, a gensym. */
b29b2ed5 1489 bucket *sdummy = gensym ();
1ff442ca 1490
2ca209c1
AD
1491 /* Make a new rule, whose body is empty, before the
1492 current one, so that the action just read can
1493 belong to it. */
1ff442ca
NF
1494 nrules++;
1495 nitems++;
b29b2ed5 1496 p = symbol_list_new (sdummy);
2ca209c1
AD
1497 /* Attach its lineno to that of the host rule. */
1498 p->line = crule->line;
1ff442ca
NF
1499 if (crule1)
1500 crule1->next = p;
a70083a3
AD
1501 else
1502 grammar = p;
b29b2ed5
AD
1503 /* End of the rule. */
1504 crule1 = symbol_list_new (NULL);
1ff442ca
NF
1505 crule1->next = crule;
1506
e41dc700
AD
1507 p->next = crule1;
1508
f282676b
AD
1509 /* Insert the dummy generated by that rule into this
1510 rule. */
1ff442ca 1511 nitems++;
b29b2ed5 1512 p = symbol_list_new (sdummy);
1ff442ca
NF
1513 p1->next = p;
1514 p1 = p;
1515
89cab50d 1516 action_flag = 0;
1ff442ca
NF
1517 }
1518
511e79b3 1519 if (t == tok_identifier)
1ff442ca
NF
1520 {
1521 nitems++;
b29b2ed5 1522 p = symbol_list_new (symval);
1ff442ca
NF
1523 p1->next = p;
1524 p1 = p;
1525 }
a70083a3 1526 else /* handle an action. */
1ff442ca 1527 {
a70083a3 1528 copy_action (crule, rulelength);
89cab50d 1529 action_flag = 1;
1ff442ca
NF
1530 xactions++; /* JF */
1531 }
1532 rulelength++;
a70083a3 1533 } /* end of read rhs of rule */
1ff442ca
NF
1534
1535 /* Put an empty link in the list to mark the end of this rule */
b29b2ed5 1536 p = symbol_list_new (NULL);
1ff442ca
NF
1537 p1->next = p;
1538 p1 = p;
1539
511e79b3 1540 if (t == tok_prec)
1ff442ca 1541 {
a0f6b076 1542 complain (_("two @prec's in a row"));
a70083a3 1543 t = lex ();
1ff442ca 1544 crule->ruleprec = symval;
a70083a3 1545 t = lex ();
1ff442ca 1546 }
511e79b3 1547 if (t == tok_guard)
1ff442ca 1548 {
a70083a3 1549 if (!semantic_parser)
ff4a34be 1550 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1551
a70083a3
AD
1552 copy_guard (crule, rulelength);
1553 t = lex ();
1ff442ca 1554 }
511e79b3 1555 else if (t == tok_left_curly)
1ff442ca 1556 {
a70083a3 1557 /* This case never occurs -wjh */
89cab50d 1558 if (action_flag)
a0f6b076 1559 complain (_("two actions at end of one rule"));
a70083a3 1560 copy_action (crule, rulelength);
89cab50d 1561 action_flag = 1;
943819bf 1562 xactions++; /* -wjh */
a70083a3 1563 t = lex ();
1ff442ca 1564 }
a0f6b076 1565 /* If $$ is being set in default way, report if any type
6666f98f
AD
1566 mismatch. */
1567 else if (!xactions
a70083a3 1568 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1569 {
6666f98f
AD
1570 if (lhs->type_name == 0
1571 || first_rhs->type_name == 0
a70083a3 1572 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1573 complain (_("type clash (`%s' `%s') on default action"),
1574 lhs->type_name ? lhs->type_name : "",
a70083a3 1575 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1576 }
1577 /* Warn if there is no default for $$ but we need one. */
1578 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1579 complain (_("empty rule for typed nonterminal, and no action"));
511e79b3 1580 if (t == tok_semicolon)
a70083a3 1581 t = lex ();
a083fbbf 1582 }
943819bf 1583#if 0
a70083a3 1584 /* these things can appear as alternatives to rules. */
943819bf
RS
1585/* NO, they cannot.
1586 a) none of the documentation allows them
1587 b) most of them scan forward until finding a next %
1588 thus they may swallow lots of intervening rules
1589*/
511e79b3 1590 else if (t == tok_token)
1ff442ca 1591 {
d7020c20 1592 parse_token_decl (token_sym, nterm_sym);
a70083a3 1593 t = lex ();
1ff442ca 1594 }
511e79b3 1595 else if (t == tok_nterm)
1ff442ca 1596 {
d7020c20 1597 parse_token_decl (nterm_sym, token_sym);
a70083a3 1598 t = lex ();
1ff442ca 1599 }
511e79b3 1600 else if (t == tok_type)
1ff442ca 1601 {
a70083a3 1602 t = get_type ();
1ff442ca 1603 }
511e79b3 1604 else if (t == tok_union)
1ff442ca 1605 {
a70083a3
AD
1606 parse_union_decl ();
1607 t = lex ();
1ff442ca 1608 }
511e79b3 1609 else if (t == tok_expect)
1ff442ca 1610 {
a70083a3
AD
1611 parse_expect_decl ();
1612 t = lex ();
1ff442ca 1613 }
511e79b3 1614 else if (t == tok_start)
1ff442ca 1615 {
a70083a3
AD
1616 parse_start_decl ();
1617 t = lex ();
1ff442ca 1618 }
943819bf
RS
1619#endif
1620
1ff442ca 1621 else
943819bf 1622 {
d01c415b 1623 complain (_("invalid input: %s"), quote (token_buffer));
a70083a3 1624 t = lex ();
943819bf 1625 }
1ff442ca
NF
1626 }
1627
943819bf
RS
1628 /* grammar has been read. Do some checking */
1629
1ff442ca 1630 if (nsyms > MAXSHORT)
a0f6b076
AD
1631 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1632 MAXSHORT);
1ff442ca 1633 if (nrules == 0)
a0f6b076 1634 fatal (_("no rules in the input grammar"));
1ff442ca 1635
1ff442ca
NF
1636 /* Report any undefined symbols and consider them nonterminals. */
1637
1638 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1639 if (bp->class == unknown_sym)
1ff442ca 1640 {
a70083a3
AD
1641 complain (_
1642 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1643 bp->tag);
d7020c20 1644 bp->class = nterm_sym;
1ff442ca
NF
1645 bp->value = nvars++;
1646 }
1647
1648 ntokens = nsyms - nvars;
1649}
ff48177d
MA
1650
1651/* At the end of the grammar file, some C source code must
63c2d5de 1652 be stored. It is going to be associated to the epilogue
ff48177d
MA
1653 directive. */
1654static void
1655read_additionnal_code (void)
1656{
1657 char c;
63c2d5de 1658 struct obstack el_obstack;
342b8b6e 1659
63c2d5de 1660 obstack_init (&el_obstack);
ff48177d
MA
1661
1662 while ((c = getc (finput)) != EOF)
63c2d5de 1663 obstack_1grow (&el_obstack, c);
342b8b6e 1664
63c2d5de 1665 obstack_1grow (&el_obstack, 0);
11d82f03 1666 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1667}
1668
a70083a3
AD
1669\f
1670/*--------------------------------------------------------------.
1671| For named tokens, but not literal ones, define the name. The |
1672| value is the user token number. |
1673`--------------------------------------------------------------*/
1ff442ca 1674
4a120d45 1675static void
896fe5c1 1676output_token_defines (struct obstack *oout)
1ff442ca 1677{
a70083a3
AD
1678 bucket *bp;
1679 char *cp, *symbol;
1680 char c;
1ff442ca 1681
a70083a3 1682 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1683 {
a70083a3
AD
1684 symbol = bp->tag; /* get symbol */
1685
1686 if (bp->value >= ntokens)
1687 continue;
1688 if (bp->user_token_number == SALIAS)
1689 continue;
1690 if ('\'' == *symbol)
1691 continue; /* skip literal character */
1692 if (bp == errtoken)
1693 continue; /* skip error token */
1694 if ('\"' == *symbol)
1ff442ca 1695 {
a70083a3
AD
1696 /* use literal string only if given a symbol with an alias */
1697 if (bp->alias)
1698 symbol = bp->alias->tag;
1699 else
1700 continue;
1701 }
1ff442ca 1702
a70083a3
AD
1703 /* Don't #define nonliteral tokens whose names contain periods. */
1704 cp = symbol;
1705 while ((c = *cp++) && c != '.');
1706 if (c != '\0')
1707 continue;
1ff442ca 1708
0b8afb77 1709 obstack_fgrow2 (oout, "# define\t%s\t%d\n",
342b8b6e 1710 symbol, bp->user_token_number);
a70083a3 1711 if (semantic_parser)
342b8b6e
AD
1712 /* FIXME: This is certainly dead wrong, and should be just as
1713 above. --akim. */
0b8afb77 1714 obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
1715 }
1716}
1ff442ca
NF
1717
1718
037ca2f1
AD
1719/*------------------------------------------------------------------.
1720| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1721| number. |
1722`------------------------------------------------------------------*/
1723
1724static void
1725token_translations_init (void)
1726{
1727 bucket *bp = NULL;
1728 int i;
1729
1730 token_translations = XCALLOC (short, max_user_token_number + 1);
1731
1732 /* Initialize all entries for literal tokens to 2, the internal
1733 token number for $undefined., which represents all invalid
1734 inputs. */
1735 for (i = 0; i <= max_user_token_number; i++)
1736 token_translations[i] = 2;
1737
1738 for (bp = firstsymbol; bp; bp = bp->next)
1739 {
1740 /* Non-terminal? */
1741 if (bp->value >= ntokens)
1742 continue;
1743 /* A token string alias? */
1744 if (bp->user_token_number == SALIAS)
1745 continue;
6b7e85b9
AD
1746
1747 assert (bp->user_token_number != SUNDEF);
1748
037ca2f1
AD
1749 /* A token which translation has already been set? */
1750 if (token_translations[bp->user_token_number] != 2)
1751 complain (_("tokens %s and %s both assigned number %d"),
1752 tags[token_translations[bp->user_token_number]],
1753 bp->tag, bp->user_token_number);
1754 token_translations[bp->user_token_number] = bp->value;
1755 }
1756}
1757
1758
a70083a3
AD
1759/*------------------------------------------------------------------.
1760| Assign symbol numbers, and write definition of token names into |
b2ca4022 1761| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1762| of symbols. |
1763`------------------------------------------------------------------*/
1ff442ca 1764
4a120d45 1765static void
118fb205 1766packsymbols (void)
1ff442ca 1767{
342b8b6e 1768 bucket *bp = NULL;
a70083a3 1769 int tokno = 1;
a70083a3 1770 int last_user_token_number;
4a120d45 1771 static char DOLLAR[] = "$";
1ff442ca 1772
d7913476 1773 tags = XCALLOC (char *, nsyms + 1);
d7913476 1774 user_toknums = XCALLOC (short, nsyms + 1);
1ff442ca 1775
d7913476
AD
1776 sprec = XCALLOC (short, nsyms);
1777 sassoc = XCALLOC (short, nsyms);
1ff442ca 1778
037ca2f1
AD
1779 /* The EOF token. */
1780 tags[0] = DOLLAR;
1781 user_toknums[0] = 0;
1782
1ff442ca
NF
1783 max_user_token_number = 256;
1784 last_user_token_number = 256;
1785
1786 for (bp = firstsymbol; bp; bp = bp->next)
1787 {
d7020c20 1788 if (bp->class == nterm_sym)
1ff442ca
NF
1789 {
1790 bp->value += ntokens;
1791 }
943819bf
RS
1792 else if (bp->alias)
1793 {
0a6384c4
AD
1794 /* this symbol and its alias are a single token defn.
1795 allocate a tokno, and assign to both check agreement of
1796 ->prec and ->assoc fields and make both the same */
1797 if (bp->value == 0)
1798 bp->value = bp->alias->value = tokno++;
943819bf 1799
0a6384c4
AD
1800 if (bp->prec != bp->alias->prec)
1801 {
1802 if (bp->prec != 0 && bp->alias->prec != 0
1803 && bp->user_token_number == SALIAS)
a0f6b076
AD
1804 complain (_("conflicting precedences for %s and %s"),
1805 bp->tag, bp->alias->tag);
0a6384c4
AD
1806 if (bp->prec != 0)
1807 bp->alias->prec = bp->prec;
1808 else
1809 bp->prec = bp->alias->prec;
1810 }
943819bf 1811
0a6384c4
AD
1812 if (bp->assoc != bp->alias->assoc)
1813 {
a0f6b076
AD
1814 if (bp->assoc != 0 && bp->alias->assoc != 0
1815 && bp->user_token_number == SALIAS)
1816 complain (_("conflicting assoc values for %s and %s"),
1817 bp->tag, bp->alias->tag);
1818 if (bp->assoc != 0)
1819 bp->alias->assoc = bp->assoc;
1820 else
1821 bp->assoc = bp->alias->assoc;
1822 }
0a6384c4
AD
1823
1824 if (bp->user_token_number == SALIAS)
a70083a3 1825 continue; /* do not do processing below for SALIASs */
943819bf 1826
a70083a3 1827 }
d7020c20 1828 else /* bp->class == token_sym */
943819bf
RS
1829 {
1830 bp->value = tokno++;
1831 }
1832
d7020c20 1833 if (bp->class == token_sym)
1ff442ca 1834 {
6b7e85b9 1835 if (bp->user_token_number == SUNDEF)
1ff442ca
NF
1836 bp->user_token_number = ++last_user_token_number;
1837 if (bp->user_token_number > max_user_token_number)
1838 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1839 }
1840
1841 tags[bp->value] = bp->tag;
943819bf 1842 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1843 sprec[bp->value] = bp->prec;
1844 sassoc[bp->value] = bp->assoc;
1ff442ca
NF
1845 }
1846
037ca2f1 1847 token_translations_init ();
1ff442ca
NF
1848
1849 error_token_number = errtoken->value;
1850
e3f1699f
AD
1851 if (startval->class == unknown_sym)
1852 fatal (_("the start symbol %s is undefined"), startval->tag);
1853 else if (startval->class == token_sym)
1854 fatal (_("the start symbol %s is a token"), startval->tag);
1855
1856 start_symbol = startval->value;
1857}
1858
1859
1860/*-----------------------------------.
1861| Output definition of token names. |
1862`-----------------------------------*/
1863
1864static void
1865symbols_output (void)
1866{
342b8b6e
AD
1867 {
1868 struct obstack tokendefs;
1869 obstack_init (&tokendefs);
1870 output_token_defines (&tokendefs);
1871 obstack_1grow (&tokendefs, 0);
1872 muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1873 obstack_free (&tokendefs, NULL);
1874 }
b6610515 1875
d8cb5183
MA
1876#if 0
1877 if (!no_parser_flag)
1878 output_token_defines (&table_obstack);
1879#endif
1ff442ca 1880
89cab50d 1881 if (defines_flag)
1ff442ca 1882 {
896fe5c1 1883 output_token_defines (&defines_obstack);
1ff442ca
NF
1884
1885 if (!pure_parser)
1886 {
1887 if (spec_name_prefix)
896fe5c1
AD
1888 obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1889 spec_name_prefix);
1ff442ca 1890 else
ff4423cc 1891 obstack_sgrow (&defines_obstack,
573c1d9f 1892 "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1893 }
1894
1895 if (semantic_parser)
037ca2f1
AD
1896 {
1897 int i;
1898
1899 for (i = ntokens; i < nsyms; i++)
1900 {
1901 /* don't make these for dummy nonterminals made by gensym. */
1902 if (*tags[i] != '@')
1903 obstack_fgrow2 (&defines_obstack,
1904 "# define\tNT%s\t%d\n", tags[i], i);
1905 }
1ff442ca 1906#if 0
037ca2f1
AD
1907 /* `fdefines' is now a temporary file, so we need to copy its
1908 contents in `done', so we can't close it here. */
1909 fclose (fdefines);
1910 fdefines = NULL;
1ff442ca 1911#endif
037ca2f1 1912 }
1ff442ca
NF
1913 }
1914}
a083fbbf 1915
1ff442ca 1916
a70083a3
AD
1917/*---------------------------------------------------------------.
1918| Convert the rules into the representation using RRHS, RLHS and |
1919| RITEMS. |
1920`---------------------------------------------------------------*/
1ff442ca 1921
4a120d45 1922static void
118fb205 1923packgram (void)
1ff442ca 1924{
a70083a3
AD
1925 int itemno;
1926 int ruleno;
1927 symbol_list *p;
1ff442ca 1928
d7913476 1929 ritem = XCALLOC (short, nitems + 1);
b2ed6e58 1930 rule_table = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1931
1932 itemno = 0;
1933 ruleno = 1;
1934
1935 p = grammar;
1936 while (p)
1937 {
b29b2ed5 1938 bucket *ruleprec = p->ruleprec;
b2ed6e58
AD
1939 rule_table[ruleno].lhs = p->sym->value;
1940 rule_table[ruleno].rhs = itemno;
b29b2ed5 1941 rule_table[ruleno].line = p->line;
1ff442ca
NF
1942
1943 p = p->next;
1944 while (p && p->sym)
1945 {
1946 ritem[itemno++] = p->sym->value;
1947 /* A rule gets by default the precedence and associativity
1948 of the last token in it. */
d7020c20 1949 if (p->sym->class == token_sym)
1ff442ca 1950 {
652a871c
AD
1951 rule_table[ruleno].prec = p->sym->prec;
1952 rule_table[ruleno].assoc = p->sym->assoc;
1ff442ca 1953 }
a70083a3
AD
1954 if (p)
1955 p = p->next;
1ff442ca
NF
1956 }
1957
1958 /* If this rule has a %prec,
a70083a3 1959 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1960 if (ruleprec)
1961 {
652a871c
AD
1962 rule_table[ruleno].prec = ruleprec->prec;
1963 rule_table[ruleno].assoc = ruleprec->assoc;
1964 rule_table[ruleno].precsym = ruleprec->value;
1ff442ca
NF
1965 }
1966
1967 ritem[itemno++] = -ruleno;
1968 ruleno++;
1969
a70083a3
AD
1970 if (p)
1971 p = p->next;
1ff442ca
NF
1972 }
1973
1974 ritem[itemno] = 0;
1975}
a70083a3
AD
1976\f
1977/*-------------------------------------------------------------------.
1978| Read in the grammar specification and record it in the format |
ea5607fd 1979| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1980| and all actions into ACTION_OBSTACK, in each case forming the body |
1981| of a C function (YYGUARD or YYACTION) which contains a switch |
1982| statement to decide which guard or action to execute. |
a70083a3
AD
1983`-------------------------------------------------------------------*/
1984
1985void
1986reader (void)
1987{
1988 start_flag = 0;
1989 startval = NULL; /* start symbol not specified yet. */
1990
a70083a3
AD
1991 nsyms = 1;
1992 nvars = 0;
1993 nrules = 0;
1994 nitems = 0;
a70083a3
AD
1995
1996 typed = 0;
1997 lastprec = 0;
1998
a70083a3
AD
1999 semantic_parser = 0;
2000 pure_parser = 0;
a70083a3
AD
2001
2002 grammar = NULL;
2003
342b8b6e 2004 lex_init ();
a70083a3
AD
2005 lineno = 1;
2006
11d82f03
MA
2007 /* Initialize the muscle obstack. */
2008 obstack_init (&muscle_obstack);
82e236e2 2009
a70083a3
AD
2010 /* Initialize the symbol table. */
2011 tabinit ();
b6610515 2012
a70083a3
AD
2013 /* Construct the error token */
2014 errtoken = getsym ("error");
d7020c20 2015 errtoken->class = token_sym;
a70083a3 2016 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 2017
a70083a3
AD
2018 /* Construct a token that represents all undefined literal tokens.
2019 It is always token number 2. */
2020 undeftoken = getsym ("$undefined.");
d7020c20 2021 undeftoken->class = token_sym;
a70083a3
AD
2022 undeftoken->user_token_number = 2;
2023
896fe5c1
AD
2024 /* Read the declaration section. Copy %{ ... %} groups to
2025 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
2026 etc. found there. */
a70083a3 2027 read_declarations ();
a70083a3
AD
2028 /* Read in the grammar, build grammar in list form. Write out
2029 guards and actions. */
2030 readgram ();
ff48177d
MA
2031 /* Some C code is given at the end of the grammar file. */
2032 read_additionnal_code ();
b0c4483e 2033
a70083a3 2034 /* Now we know whether we need the line-number stack. If we do,
b0c4483e
PB
2035 write its type into the .tab.h file.
2036 This is no longer need with header skeleton. */
2037
a70083a3
AD
2038 /* Assign the symbols their symbol numbers. Write #defines for the
2039 token symbols into FDEFINES if requested. */
2040 packsymbols ();
e3f1699f 2041 symbols_output ();
a70083a3
AD
2042 /* Convert the grammar into the format described in gram.h. */
2043 packgram ();
edad7067
AD
2044 /* Output the headers. */
2045 symbols_output ();
a70083a3 2046}