]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/output.h: And put its extern declaration here.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
037ca2f1 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
8c7ebe49 24#include "obstack.h"
2a91a95e
AD
25#include "quotearg.h"
26#include "quote.h"
ceed8467 27#include "getargs.h"
1ff442ca 28#include "files.h"
1ff442ca 29#include "symtab.h"
82b6d266 30#include "options.h"
1ff442ca
NF
31#include "lex.h"
32#include "gram.h"
a0f6b076 33#include "complain.h"
6c89f1c1 34#include "output.h"
b2ca4022 35#include "reader.h"
340ef489 36#include "conflicts.h"
11d82f03 37#include "muscle_tab.h"
1ff442ca 38
a70083a3
AD
39typedef struct symbol_list
40{
41 struct symbol_list *next;
42 bucket *sym;
b29b2ed5 43 int line;
a70083a3
AD
44 bucket *ruleprec;
45}
46symbol_list;
118fb205 47
1ff442ca 48int lineno;
1ff442ca 49char **tags;
d019d655 50short *user_toknums;
4a120d45
JT
51static symbol_list *grammar;
52static int start_flag;
53static bucket *startval;
1ff442ca
NF
54
55/* Nonzero if components of semantic values are used, implying
56 they must be unions. */
57static int value_components_used;
58
d7020c20
AD
59/* Nonzero if %union has been seen. */
60static int typed;
1ff442ca 61
d7020c20
AD
62/* Incremented for each %left, %right or %nonassoc seen */
63static int lastprec;
1ff442ca 64
1ff442ca 65static bucket *errtoken;
5b2e3c89 66static bucket *undeftoken;
b29b2ed5
AD
67
68
6255b435 69static symbol_list *
b29b2ed5
AD
70symbol_list_new (bucket *sym)
71{
72 symbol_list *res = XMALLOC (symbol_list, 1);
73 res->next = NULL;
74 res->sym = sym;
75 res->line = lineno;
76 res->ruleprec = NULL;
77 return res;
78}
79
0d533154 80\f
a70083a3 81
0d533154
AD
82/*===================\
83| Low level lexing. |
84\===================*/
943819bf
RS
85
86static void
118fb205 87skip_to_char (int target)
943819bf
RS
88{
89 int c;
90 if (target == '\n')
a0f6b076 91 complain (_(" Skipping to next \\n"));
943819bf 92 else
a0f6b076 93 complain (_(" Skipping to next %c"), target);
943819bf
RS
94
95 do
0d533154 96 c = skip_white_space ();
943819bf 97 while (c != target && c != EOF);
a083fbbf 98 if (c != EOF)
0d533154 99 ungetc (c, finput);
943819bf
RS
100}
101
102
0d533154
AD
103/*---------------------------------------------------------.
104| Read a signed integer from STREAM and return its value. |
105`---------------------------------------------------------*/
106
107static inline int
108read_signed_integer (FILE *stream)
109{
a70083a3
AD
110 int c = getc (stream);
111 int sign = 1;
112 int n = 0;
0d533154
AD
113
114 if (c == '-')
115 {
116 c = getc (stream);
117 sign = -1;
118 }
119
120 while (isdigit (c))
121 {
122 n = 10 * n + (c - '0');
123 c = getc (stream);
124 }
125
126 ungetc (c, stream);
127
128 return sign * n;
129}
130\f
79282c5a
AD
131/*--------------------------------------------------------------.
132| Get the data type (alternative in the union) of the value for |
133| symbol N in rule RULE. |
134`--------------------------------------------------------------*/
135
136static char *
b29b2ed5 137get_type_name (int n, symbol_list *rule)
79282c5a
AD
138{
139 int i;
140 symbol_list *rp;
141
142 if (n < 0)
143 {
144 complain (_("invalid $ value"));
145 return NULL;
146 }
147
148 rp = rule;
149 i = 0;
150
151 while (i < n)
152 {
153 rp = rp->next;
154 if (rp == NULL || rp->sym == NULL)
155 {
156 complain (_("invalid $ value"));
157 return NULL;
158 }
159 i++;
160 }
161
162 return rp->sym->type_name;
163}
164\f
337bab46
AD
165/*------------------------------------------------------------.
166| Dump the string from FIN to OOUT if non null. MATCH is the |
167| delimiter of the string (either ' or "). |
168`------------------------------------------------------------*/
ae3c3164
AD
169
170static inline void
b6610515 171copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
172{
173 int c;
174
b6610515
RA
175 if (store)
176 obstack_1grow (oout, match);
8c7ebe49 177
4a120d45 178 c = getc (fin);
ae3c3164
AD
179
180 while (c != match)
181 {
182 if (c == EOF)
183 fatal (_("unterminated string at end of file"));
184 if (c == '\n')
185 {
a0f6b076 186 complain (_("unterminated string"));
4a120d45 187 ungetc (c, fin);
ae3c3164
AD
188 c = match; /* invent terminator */
189 continue;
190 }
191
337bab46 192 obstack_1grow (oout, c);
ae3c3164
AD
193
194 if (c == '\\')
195 {
4a120d45 196 c = getc (fin);
ae3c3164
AD
197 if (c == EOF)
198 fatal (_("unterminated string at end of file"));
337bab46 199 obstack_1grow (oout, c);
8c7ebe49 200
ae3c3164
AD
201 if (c == '\n')
202 lineno++;
203 }
204
a70083a3 205 c = getc (fin);
ae3c3164
AD
206 }
207
b6610515
RA
208 if (store)
209 obstack_1grow (oout, c);
210}
211
212/* FIXME. */
213
214static inline void
215copy_string (FILE *fin, struct obstack *oout, int match)
216{
217 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
218}
219
b6610515
RA
220/* FIXME. */
221
222static inline void
223copy_identifier (FILE *fin, struct obstack *oout)
224{
225 int c;
226
227 while (isalnum (c = getc (fin)) || c == '_')
228 obstack_1grow (oout, c);
229
230 ungetc (c, fin);
231}
ae3c3164 232
337bab46
AD
233/*-----------------------------------------------------------------.
234| Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
235| NULL). In fact we just saw a `/', which might or might not be a |
236| comment. In any case, copy what we saw. |
237| |
238| OUT2 might be NULL. |
239`-----------------------------------------------------------------*/
ae3c3164
AD
240
241static inline void
337bab46 242copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
ae3c3164
AD
243{
244 int cplus_comment;
a70083a3 245 int ended;
550a72a3
AD
246 int c;
247
248 /* We read a `/', output it. */
337bab46 249 obstack_1grow (oout1, '/');
896fe5c1
AD
250 if (oout2)
251 obstack_1grow (oout2, '/');
550a72a3
AD
252
253 switch ((c = getc (fin)))
254 {
255 case '/':
256 cplus_comment = 1;
257 break;
258 case '*':
259 cplus_comment = 0;
260 break;
261 default:
262 ungetc (c, fin);
263 return;
264 }
ae3c3164 265
337bab46 266 obstack_1grow (oout1, c);
896fe5c1
AD
267 if (oout2)
268 obstack_1grow (oout2, c);
550a72a3 269 c = getc (fin);
ae3c3164
AD
270
271 ended = 0;
272 while (!ended)
273 {
274 if (!cplus_comment && c == '*')
275 {
276 while (c == '*')
277 {
337bab46 278 obstack_1grow (oout1, c);
896fe5c1
AD
279 if (oout2)
280 obstack_1grow (oout2, c);
550a72a3 281 c = getc (fin);
ae3c3164
AD
282 }
283
284 if (c == '/')
285 {
337bab46 286 obstack_1grow (oout1, c);
896fe5c1
AD
287 if (oout2)
288 obstack_1grow (oout2, c);
ae3c3164
AD
289 ended = 1;
290 }
291 }
292 else if (c == '\n')
293 {
294 lineno++;
337bab46 295 obstack_1grow (oout1, c);
896fe5c1
AD
296 if (oout2)
297 obstack_1grow (oout2, c);
ae3c3164
AD
298 if (cplus_comment)
299 ended = 1;
300 else
550a72a3 301 c = getc (fin);
ae3c3164
AD
302 }
303 else if (c == EOF)
304 fatal (_("unterminated comment"));
305 else
306 {
337bab46 307 obstack_1grow (oout1, c);
896fe5c1
AD
308 if (oout2)
309 obstack_1grow (oout2, c);
550a72a3 310 c = getc (fin);
ae3c3164
AD
311 }
312 }
313}
314
315
550a72a3
AD
316/*-------------------------------------------------------------------.
317| Dump the comment (actually the current string starting with a `/') |
337bab46 318| from FIN to OOUT. |
550a72a3 319`-------------------------------------------------------------------*/
27821bff
AD
320
321static inline void
337bab46 322copy_comment (FILE *fin, struct obstack *oout)
27821bff 323{
337bab46 324 copy_comment2 (fin, oout, NULL);
27821bff
AD
325}
326
327
a70083a3 328/*-----------------------------------------------------------------.
337bab46 329| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
330| reference to this location. STACK_OFFSET is the number of values |
331| in the current rule so far, which says where to find `$0' with |
332| respect to the top of the stack. |
333`-----------------------------------------------------------------*/
1ff442ca 334
a70083a3 335static inline void
337bab46 336copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 337{
a70083a3 338 int c;
1ff442ca 339
a70083a3
AD
340 c = getc (fin);
341 if (c == '$')
1ff442ca 342 {
ff4423cc 343 obstack_sgrow (oout, "yyloc");
89cab50d 344 locations_flag = 1;
a70083a3
AD
345 }
346 else if (isdigit (c) || c == '-')
347 {
348 int n;
1ff442ca 349
a70083a3
AD
350 ungetc (c, fin);
351 n = read_signed_integer (fin);
943819bf 352
337bab46 353 obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
89cab50d 354 locations_flag = 1;
1ff442ca 355 }
a70083a3 356 else
ff4a34be
AD
357 {
358 char buf[] = "@c";
359 buf[1] = c;
360 complain (_("%s is invalid"), quote (buf));
361 }
1ff442ca 362}
79282c5a
AD
363
364
365/*-------------------------------------------------------------------.
366| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
367| |
368| Possible inputs: $[<TYPENAME>]($|integer) |
369| |
337bab46 370| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
371| the number of values in the current rule so far, which says where |
372| to find `$0' with respect to the top of the stack. |
373`-------------------------------------------------------------------*/
374
375static inline void
337bab46 376copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
377 symbol_list *rule, int stack_offset)
378{
379 int c = getc (fin);
b0ce6046 380 const char *type_name = NULL;
79282c5a 381
f282676b 382 /* Get the type name if explicit. */
79282c5a
AD
383 if (c == '<')
384 {
f282676b 385 read_type_name (fin);
79282c5a
AD
386 type_name = token_buffer;
387 value_components_used = 1;
79282c5a
AD
388 c = getc (fin);
389 }
390
391 if (c == '$')
392 {
ff4423cc 393 obstack_sgrow (oout, "yyval");
8c7ebe49 394
79282c5a
AD
395 if (!type_name)
396 type_name = get_type_name (0, rule);
397 if (type_name)
337bab46 398 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
399 if (!type_name && typed)
400 complain (_("$$ of `%s' has no declared type"),
401 rule->sym->tag);
402 }
403 else if (isdigit (c) || c == '-')
404 {
405 int n;
406 ungetc (c, fin);
407 n = read_signed_integer (fin);
408
409 if (!type_name && n > 0)
410 type_name = get_type_name (n, rule);
411
337bab46 412 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
8c7ebe49 413
79282c5a 414 if (type_name)
337bab46 415 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
416 if (!type_name && typed)
417 complain (_("$%d of `%s' has no declared type"),
418 n, rule->sym->tag);
419 }
420 else
421 {
422 char buf[] = "$c";
423 buf[1] = c;
424 complain (_("%s is invalid"), quote (buf));
425 }
426}
a70083a3
AD
427\f
428/*-------------------------------------------------------------------.
429| Copy the contents of a `%{ ... %}' into the definitions file. The |
430| `%{' has already been read. Return after reading the `%}'. |
431`-------------------------------------------------------------------*/
1ff442ca 432
4a120d45 433static void
118fb205 434copy_definition (void)
1ff442ca 435{
a70083a3 436 int c;
ae3c3164 437 /* -1 while reading a character if prev char was %. */
a70083a3 438 int after_percent;
1ff442ca 439
b6610515 440#if 0
89cab50d 441 if (!no_lines_flag)
25b222fa
MA
442 {
443 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 444 lineno, quotearg_style (c_quoting_style,
25b222fa
MA
445 muscle_find("filename")));
446 }
b6610515 447#endif
1ff442ca
NF
448
449 after_percent = 0;
450
ae3c3164 451 c = getc (finput);
1ff442ca
NF
452
453 for (;;)
454 {
455 switch (c)
456 {
457 case '\n':
dd60faec 458 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
459 lineno++;
460 break;
461
462 case '%':
a70083a3 463 after_percent = -1;
1ff442ca 464 break;
a083fbbf 465
1ff442ca
NF
466 case '\'':
467 case '"':
337bab46 468 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
469 break;
470
471 case '/':
337bab46 472 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
473 break;
474
475 case EOF:
a70083a3 476 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
477
478 default:
dd60faec 479 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
480 }
481
a70083a3 482 c = getc (finput);
1ff442ca
NF
483
484 if (after_percent)
485 {
486 if (c == '}')
487 return;
dd60faec 488 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
489 }
490 after_percent = 0;
1ff442ca 491 }
1ff442ca
NF
492}
493
494
d7020c20
AD
495/*-------------------------------------------------------------------.
496| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
497| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
498| are reversed. |
499`-------------------------------------------------------------------*/
1ff442ca 500
4a120d45 501static void
d7020c20 502parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 503{
342b8b6e
AD
504 token_t token = tok_undef;
505 char *typename = NULL;
1ff442ca 506
1e9798d5
AD
507 /* The symbol being defined. */
508 struct bucket *symbol = NULL;
509
510 /* After `%token' and `%nterm', any number of symbols maybe be
511 defined. */
1ff442ca
NF
512 for (;;)
513 {
e6011337
JT
514 int tmp_char = ungetc (skip_white_space (), finput);
515
1e9798d5
AD
516 /* `%' (for instance from `%token', or from `%%' etc.) is the
517 only valid means to end this declaration. */
e6011337 518 if (tmp_char == '%')
1ff442ca 519 return;
e6011337 520 if (tmp_char == EOF)
a0f6b076 521 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 522
a70083a3 523 token = lex ();
511e79b3 524 if (token == tok_comma)
943819bf
RS
525 {
526 symbol = NULL;
527 continue;
528 }
511e79b3 529 if (token == tok_typename)
1ff442ca 530 {
95e36146 531 typename = xstrdup (token_buffer);
1ff442ca 532 value_components_used = 1;
943819bf
RS
533 symbol = NULL;
534 }
511e79b3 535 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 536 {
8e03724b
AD
537 if (symval->alias)
538 warn (_("symbol `%s' used more than once as a literal string"),
539 symval->tag);
540 else if (symbol->alias)
541 warn (_("symbol `%s' given more than one literal string"),
542 symbol->tag);
543 else
544 {
545 symval->class = token_sym;
546 symval->type_name = typename;
547 symval->user_token_number = symbol->user_token_number;
548 symbol->user_token_number = SALIAS;
549 symval->alias = symbol;
550 symbol->alias = symval;
551 /* symbol and symval combined are only one symbol */
552 nsyms--;
553 }
8e03724b 554 symbol = NULL;
1ff442ca 555 }
511e79b3 556 else if (token == tok_identifier)
1ff442ca
NF
557 {
558 int oldclass = symval->class;
943819bf 559 symbol = symval;
1ff442ca 560
943819bf 561 if (symbol->class == what_is_not)
a0f6b076 562 complain (_("symbol %s redefined"), symbol->tag);
943819bf 563 symbol->class = what_is;
d7020c20 564 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 565 symbol->value = nvars++;
1ff442ca
NF
566
567 if (typename)
568 {
943819bf
RS
569 if (symbol->type_name == NULL)
570 symbol->type_name = typename;
a70083a3 571 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 572 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
573 }
574 }
511e79b3 575 else if (symbol && token == tok_number)
a70083a3 576 {
943819bf 577 symbol->user_token_number = numval;
a70083a3 578 }
1ff442ca 579 else
943819bf 580 {
a0f6b076 581 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
582 token_buffer,
583 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 584 skip_to_char ('%');
943819bf 585 }
1ff442ca
NF
586 }
587
588}
589
1ff442ca 590
d7020c20
AD
591/*------------------------------.
592| Parse what comes after %start |
593`------------------------------*/
1ff442ca 594
4a120d45 595static void
118fb205 596parse_start_decl (void)
1ff442ca
NF
597{
598 if (start_flag)
27821bff 599 complain (_("multiple %s declarations"), "%start");
511e79b3 600 if (lex () != tok_identifier)
27821bff 601 complain (_("invalid %s declaration"), "%start");
943819bf
RS
602 else
603 {
604 start_flag = 1;
605 startval = symval;
606 }
1ff442ca
NF
607}
608
a70083a3
AD
609/*-----------------------------------------------------------.
610| read in a %type declaration and record its information for |
611| get_type_name to access |
612`-----------------------------------------------------------*/
613
614static void
615parse_type_decl (void)
616{
a70083a3
AD
617 char *name;
618
511e79b3 619 if (lex () != tok_typename)
a70083a3
AD
620 {
621 complain ("%s", _("%type declaration has no <typename>"));
622 skip_to_char ('%');
623 return;
624 }
625
95e36146 626 name = xstrdup (token_buffer);
a70083a3
AD
627
628 for (;;)
629 {
f17bcd1f 630 token_t t;
a70083a3
AD
631 int tmp_char = ungetc (skip_white_space (), finput);
632
633 if (tmp_char == '%')
634 return;
635 if (tmp_char == EOF)
636 fatal (_("Premature EOF after %s"), token_buffer);
637
638 t = lex ();
639
640 switch (t)
1ff442ca
NF
641 {
642
511e79b3
AD
643 case tok_comma:
644 case tok_semicolon:
1ff442ca
NF
645 break;
646
511e79b3 647 case tok_identifier:
1ff442ca
NF
648 if (symval->type_name == NULL)
649 symval->type_name = name;
a70083a3 650 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 651 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
652
653 break;
654
655 default:
a0f6b076
AD
656 complain (_("invalid %%type declaration due to item: %s"),
657 token_buffer);
a70083a3 658 skip_to_char ('%');
1ff442ca
NF
659 }
660 }
661}
662
663
664
d7020c20
AD
665/*----------------------------------------------------------------.
666| Read in a %left, %right or %nonassoc declaration and record its |
667| information. |
668`----------------------------------------------------------------*/
1ff442ca 669
4a120d45 670static void
d7020c20 671parse_assoc_decl (associativity assoc)
1ff442ca 672{
a70083a3
AD
673 char *name = NULL;
674 int prev = 0;
1ff442ca 675
a70083a3 676 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 677
1ff442ca
NF
678 for (;;)
679 {
f17bcd1f 680 token_t t;
e6011337 681 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 682
e6011337 683 if (tmp_char == '%')
1ff442ca 684 return;
e6011337 685 if (tmp_char == EOF)
a0f6b076 686 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 687
a70083a3 688 t = lex ();
1ff442ca
NF
689
690 switch (t)
691 {
511e79b3 692 case tok_typename:
95e36146 693 name = xstrdup (token_buffer);
1ff442ca
NF
694 break;
695
511e79b3 696 case tok_comma:
1ff442ca
NF
697 break;
698
511e79b3 699 case tok_identifier:
1ff442ca 700 if (symval->prec != 0)
a0f6b076 701 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
702 symval->prec = lastprec;
703 symval->assoc = assoc;
d7020c20 704 if (symval->class == nterm_sym)
a0f6b076 705 complain (_("symbol %s redefined"), symval->tag);
d7020c20 706 symval->class = token_sym;
1ff442ca 707 if (name)
a70083a3 708 { /* record the type, if one is specified */
1ff442ca
NF
709 if (symval->type_name == NULL)
710 symval->type_name = name;
a70083a3 711 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 712 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
713 }
714 break;
715
511e79b3
AD
716 case tok_number:
717 if (prev == tok_identifier)
a70083a3 718 {
1ff442ca 719 symval->user_token_number = numval;
a70083a3
AD
720 }
721 else
722 {
723 complain (_
724 ("invalid text (%s) - number should be after identifier"),
725token_buffer);
726 skip_to_char ('%');
727 }
1ff442ca
NF
728 break;
729
511e79b3 730 case tok_semicolon:
1ff442ca
NF
731 return;
732
733 default:
a0f6b076 734 complain (_("unexpected item: %s"), token_buffer);
a70083a3 735 skip_to_char ('%');
1ff442ca
NF
736 }
737
738 prev = t;
1ff442ca
NF
739 }
740}
741
742
743
dd60faec 744/*--------------------------------------------------------------.
180d45ba
PB
745| Copy the union declaration into the stype muscle |
746| (and fdefines), where it is made into the definition of |
747| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 748`--------------------------------------------------------------*/
1ff442ca 749
4a120d45 750static void
118fb205 751parse_union_decl (void)
1ff442ca 752{
a70083a3
AD
753 int c;
754 int count = 0;
180d45ba 755 struct obstack union_obstack;
5f7e0832
AD
756 const char *prologue = "\
757#ifndef YYSTYPE\n\
758typedef union";
759 const char *epilogue = "\
760 yystype;\n\
761# define YYSTYPE yystype\n\
762#endif\n";
1ff442ca
NF
763
764 if (typed)
27821bff 765 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
766
767 typed = 1;
768
f6ec6d13
AD
769 /* FIXME: I'm worried: are you sure attrs_obstack is properly
770 filled? */
5f7e0832
AD
771 /* I don't see any reasons to keep this line, because we should
772 create a special skeleton for this option. */
180d45ba 773 if (no_lines_flag)
dd60faec 774 obstack_1grow (&attrs_obstack, '\n');
342b8b6e 775
180d45ba
PB
776 obstack_init (&union_obstack);
777 obstack_sgrow (&union_obstack, "union");
896fe5c1 778 if (defines_flag)
5f7e0832 779 obstack_sgrow (&defines_obstack, prologue);
1ff442ca 780
27821bff 781 c = getc (finput);
1ff442ca
NF
782
783 while (c != EOF)
784 {
342b8b6e
AD
785 /* If C contains '/', it is output by copy_comment (). */
786 if (c != '/')
787 {
f6ec6d13 788 obstack_1grow (&union_obstack, c);
342b8b6e
AD
789 if (defines_flag)
790 obstack_1grow (&defines_obstack, c);
791 }
1ff442ca
NF
792
793 switch (c)
794 {
795 case '\n':
796 lineno++;
797 break;
798
799 case '/':
180d45ba 800 copy_comment2 (finput, &defines_obstack, &union_obstack);
1ff442ca
NF
801 break;
802
1ff442ca
NF
803 case '{':
804 count++;
805 break;
806
807 case '}':
808 if (count == 0)
27821bff 809 complain (_("unmatched %s"), "`}'");
1ff442ca 810 count--;
943819bf 811 if (count <= 0)
1ff442ca 812 {
896fe5c1 813 if (defines_flag)
5f7e0832 814 obstack_sgrow (&defines_obstack, epilogue);
1ff442ca 815 /* JF don't choke on trailing semi */
27821bff
AD
816 c = skip_white_space ();
817 if (c != ';')
a70083a3 818 ungetc (c, finput);
180d45ba
PB
819 obstack_1grow (&union_obstack, 0);
820 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
821 return;
822 }
823 }
824
27821bff 825 c = getc (finput);
1ff442ca 826 }
180d45ba 827
1ff442ca
NF
828}
829
d7020c20
AD
830
831/*-------------------------------------------------------.
832| Parse the declaration %expect N which says to expect N |
833| shift-reduce conflicts. |
834`-------------------------------------------------------*/
1ff442ca 835
4a120d45 836static void
118fb205 837parse_expect_decl (void)
1ff442ca 838{
131e2fef 839 int c = skip_white_space ();
1ff442ca
NF
840 ungetc (c, finput);
841
131e2fef 842 if (!isdigit (c))
79282c5a 843 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
844 else
845 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
846}
847
a70083a3
AD
848
849/*-------------------------------------------------------------------.
850| Parse what comes after %thong. the full syntax is |
851| |
852| %thong <type> token number literal |
853| |
854| the <type> or number may be omitted. The number specifies the |
855| user_token_number. |
856| |
857| Two symbols are entered in the table, one for the token symbol and |
858| one for the literal. Both are given the <type>, if any, from the |
859| declaration. The ->user_token_number of the first is SALIAS and |
860| the ->user_token_number of the second is set to the number, if |
861| any, from the declaration. The two symbols are linked via |
862| pointers in their ->alias fields. |
863| |
864| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
865| only the literal string is retained it is the literal string that |
866| is output to yytname |
867`-------------------------------------------------------------------*/
868
869static void
870parse_thong_decl (void)
7b306f52 871{
f17bcd1f 872 token_t token;
a70083a3
AD
873 struct bucket *symbol;
874 char *typename = 0;
6b7e85b9 875 int usrtoknum = SUNDEF;
7b306f52 876
a70083a3 877 token = lex (); /* fetch typename or first token */
511e79b3 878 if (token == tok_typename)
7b306f52 879 {
95e36146 880 typename = xstrdup (token_buffer);
a70083a3
AD
881 value_components_used = 1;
882 token = lex (); /* fetch first token */
7b306f52 883 }
7b306f52 884
a70083a3 885 /* process first token */
7b306f52 886
511e79b3 887 if (token != tok_identifier)
a70083a3
AD
888 {
889 complain (_("unrecognized item %s, expected an identifier"),
890 token_buffer);
891 skip_to_char ('%');
892 return;
7b306f52 893 }
d7020c20 894 symval->class = token_sym;
a70083a3
AD
895 symval->type_name = typename;
896 symval->user_token_number = SALIAS;
897 symbol = symval;
7b306f52 898
a70083a3 899 token = lex (); /* get number or literal string */
1ff442ca 900
511e79b3 901 if (token == tok_number)
943819bf 902 {
a70083a3
AD
903 usrtoknum = numval;
904 token = lex (); /* okay, did number, now get literal */
943819bf 905 }
1ff442ca 906
a70083a3 907 /* process literal string token */
1ff442ca 908
511e79b3 909 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 910 {
a70083a3
AD
911 complain (_("expected string constant instead of %s"), token_buffer);
912 skip_to_char ('%');
913 return;
1ff442ca 914 }
d7020c20 915 symval->class = token_sym;
a70083a3
AD
916 symval->type_name = typename;
917 symval->user_token_number = usrtoknum;
1ff442ca 918
a70083a3
AD
919 symval->alias = symbol;
920 symbol->alias = symval;
1ff442ca 921
79282c5a
AD
922 /* symbol and symval combined are only one symbol. */
923 nsyms--;
a70083a3 924}
3cef001a 925
b6610515 926static void
11d82f03 927parse_muscle_decl (void)
b6610515
RA
928{
929 int ch = ungetc (skip_white_space (), finput);
11d82f03
MA
930 char* muscle_key;
931 char* muscle_value;
b6610515
RA
932
933 /* Read key. */
934 if (!isalpha (ch) && ch != '_')
935 {
936 complain (_("invalid %s declaration"), "%define");
937 skip_to_char ('%');
938 return;
939 }
11d82f03
MA
940 copy_identifier (finput, &muscle_obstack);
941 obstack_1grow (&muscle_obstack, 0);
942 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 943
b6610515
RA
944 /* Read value. */
945 ch = skip_white_space ();
946 if (ch != '"')
947 {
948 ungetc (ch, finput);
949 if (ch != EOF)
950 {
951 complain (_("invalid %s declaration"), "%define");
952 skip_to_char ('%');
953 return;
954 }
955 else
956 fatal (_("Premature EOF after %s"), "\"");
957 }
11d82f03
MA
958 copy_string2 (finput, &muscle_obstack, '"', 0);
959 obstack_1grow (&muscle_obstack, 0);
960 muscle_value = obstack_finish (&muscle_obstack);
b6610515 961
b6610515 962 /* Store the (key, value) pair in the environment. */
11d82f03 963 muscle_insert (muscle_key, muscle_value);
b6610515
RA
964}
965
2ba3b73c
MA
966
967/*----------------------------------.
968| Parse what comes after %skeleton. |
969`----------------------------------*/
970
971void
972parse_skel_decl (void)
973{
974 /* Complete with parse_dquoted_param () on the CVS branch 1.29. */
975}
976
a70083a3
AD
977/*----------------------------------------------------------------.
978| Read from finput until `%%' is seen. Discard the `%%'. Handle |
979| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 980| groups to ATTRS_OBSTACK. |
a70083a3 981`----------------------------------------------------------------*/
1ff442ca 982
4a120d45 983static void
a70083a3 984read_declarations (void)
1ff442ca 985{
a70083a3 986 for (;;)
1ff442ca 987 {
951366c1 988 int c = skip_white_space ();
1ff442ca 989
a70083a3
AD
990 if (c == '%')
991 {
951366c1 992 token_t tok = parse_percent_token ();
1ff442ca 993
a70083a3 994 switch (tok)
943819bf 995 {
511e79b3 996 case tok_two_percents:
a70083a3 997 return;
1ff442ca 998
511e79b3 999 case tok_percent_left_curly:
a70083a3
AD
1000 copy_definition ();
1001 break;
1ff442ca 1002
511e79b3 1003 case tok_token:
d7020c20 1004 parse_token_decl (token_sym, nterm_sym);
a70083a3 1005 break;
1ff442ca 1006
511e79b3 1007 case tok_nterm:
d7020c20 1008 parse_token_decl (nterm_sym, token_sym);
a70083a3 1009 break;
1ff442ca 1010
511e79b3 1011 case tok_type:
a70083a3
AD
1012 parse_type_decl ();
1013 break;
1ff442ca 1014
511e79b3 1015 case tok_start:
a70083a3
AD
1016 parse_start_decl ();
1017 break;
118fb205 1018
511e79b3 1019 case tok_union:
a70083a3
AD
1020 parse_union_decl ();
1021 break;
1ff442ca 1022
511e79b3 1023 case tok_expect:
a70083a3
AD
1024 parse_expect_decl ();
1025 break;
6deb4447 1026
511e79b3 1027 case tok_thong:
a70083a3
AD
1028 parse_thong_decl ();
1029 break;
d7020c20 1030
511e79b3 1031 case tok_left:
d7020c20 1032 parse_assoc_decl (left_assoc);
a70083a3 1033 break;
1ff442ca 1034
511e79b3 1035 case tok_right:
d7020c20 1036 parse_assoc_decl (right_assoc);
a70083a3 1037 break;
1ff442ca 1038
511e79b3 1039 case tok_nonassoc:
d7020c20 1040 parse_assoc_decl (non_assoc);
a70083a3 1041 break;
1ff442ca 1042
b6610515 1043 case tok_define:
11d82f03 1044 parse_muscle_decl ();
b6610515 1045 break;
342b8b6e 1046
2ba3b73c
MA
1047 case tok_skel:
1048 parse_skel_decl ();
1049 break;
b6610515 1050
511e79b3 1051 case tok_noop:
a70083a3 1052 break;
1ff442ca 1053
951366c1
AD
1054 case tok_stropt:
1055 case tok_intopt:
1056 case tok_obsolete:
951366c1
AD
1057 abort ();
1058 break;
1059
e0c40012 1060 case tok_illegal:
a70083a3
AD
1061 default:
1062 complain (_("unrecognized: %s"), token_buffer);
1063 skip_to_char ('%');
1064 }
1065 }
1066 else if (c == EOF)
1067 fatal (_("no input grammar"));
1068 else
1069 {
ff4a34be
AD
1070 char buf[] = "c";
1071 buf[0] = c;
1072 complain (_("unknown character: %s"), quote (buf));
a70083a3 1073 skip_to_char ('%');
1ff442ca 1074 }
1ff442ca 1075 }
1ff442ca 1076}
a70083a3
AD
1077\f
1078/*-------------------------------------------------------------------.
1079| Assuming that a `{' has just been seen, copy everything up to the |
1080| matching `}' into the actions file. STACK_OFFSET is the number of |
1081| values in the current rule so far, which says where to find `$0' |
1082| with respect to the top of the stack. |
1083`-------------------------------------------------------------------*/
1ff442ca 1084
4a120d45 1085static void
79282c5a 1086copy_action (symbol_list *rule, int stack_offset)
1ff442ca 1087{
a70083a3 1088 int c;
a70083a3 1089 int count;
8c7ebe49 1090 char buf[4096];
1ff442ca
NF
1091
1092 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1093 if (semantic_parser)
1094 stack_offset = 0;
1ff442ca 1095
25b222fa 1096 obstack_fgrow1 (&action_obstack, "\ncase %d:\n", nrules);
8c7ebe49 1097
89cab50d 1098 if (!no_lines_flag)
8c7ebe49 1099 {
25b222fa 1100 obstack_fgrow2 (&action_obstack, muscle_find ("linef"),
342b8b6e 1101 lineno, quotearg_style (c_quoting_style,
25b222fa 1102 muscle_find ("filename")));
8c7ebe49
AD
1103 }
1104 obstack_1grow (&action_obstack, '{');
1ff442ca
NF
1105
1106 count = 1;
a70083a3 1107 c = getc (finput);
1ff442ca
NF
1108
1109 while (count > 0)
1110 {
1111 while (c != '}')
a70083a3
AD
1112 {
1113 switch (c)
1ff442ca
NF
1114 {
1115 case '\n':
8c7ebe49 1116 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1117 lineno++;
1118 break;
1119
1120 case '{':
8c7ebe49 1121 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1122 count++;
1123 break;
1124
1125 case '\'':
1126 case '"':
337bab46 1127 copy_string (finput, &action_obstack, c);
1ff442ca
NF
1128 break;
1129
1130 case '/':
337bab46 1131 copy_comment (finput, &action_obstack);
1ff442ca
NF
1132 break;
1133
1134 case '$':
337bab46 1135 copy_dollar (finput, &action_obstack,
8c7ebe49 1136 rule, stack_offset);
1ff442ca
NF
1137 break;
1138
1139 case '@':
337bab46 1140 copy_at (finput, &action_obstack,
8c7ebe49 1141 stack_offset);
6666f98f 1142 break;
1ff442ca
NF
1143
1144 case EOF:
27821bff 1145 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1146
1147 default:
8c7ebe49 1148 obstack_1grow (&action_obstack, c);
a70083a3
AD
1149 }
1150
1151 c = getc (finput);
1152 }
1153
1154 /* above loop exits when c is '}' */
1155
1156 if (--count)
1157 {
8c7ebe49 1158 obstack_1grow (&action_obstack, c);
a70083a3
AD
1159 c = getc (finput);
1160 }
1161 }
1162
ff4423cc 1163 obstack_sgrow (&action_obstack, ";\n break;}");
a70083a3
AD
1164}
1165\f
1166/*-------------------------------------------------------------------.
1167| After `%guard' is seen in the input file, copy the actual guard |
1168| into the guards file. If the guard is followed by an action, copy |
1169| that into the actions file. STACK_OFFSET is the number of values |
1170| in the current rule so far, which says where to find `$0' with |
1171| respect to the top of the stack, for the simple parser in which |
1172| the stack is not popped until after the guard is run. |
1173`-------------------------------------------------------------------*/
1174
1175static void
79282c5a 1176copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1177{
1178 int c;
a70083a3 1179 int count;
a70083a3
AD
1180 int brace_flag = 0;
1181
1182 /* offset is always 0 if parser has already popped the stack pointer */
1183 if (semantic_parser)
1184 stack_offset = 0;
1185
ea5607fd 1186 obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
89cab50d 1187 if (!no_lines_flag)
25b222fa 1188 obstack_fgrow2 (&guard_obstack, muscle_find ("linef"),
682d48cd 1189 lineno, quotearg_style (c_quoting_style,
11d82f03 1190 muscle_find ("filename")));
ea5607fd 1191 obstack_1grow (&guard_obstack, '{');
a70083a3
AD
1192
1193 count = 0;
1194 c = getc (finput);
1195
1196 while (brace_flag ? (count > 0) : (c != ';'))
1197 {
1198 switch (c)
1199 {
1200 case '\n':
ea5607fd 1201 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1202 lineno++;
1203 break;
1204
1205 case '{':
ea5607fd 1206 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1207 brace_flag = 1;
1208 count++;
1209 break;
1210
1211 case '}':
ea5607fd 1212 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1213 if (count > 0)
1214 count--;
1215 else
1216 {
1217 complain (_("unmatched %s"), "`}'");
1218 c = getc (finput); /* skip it */
1219 }
1220 break;
1221
1222 case '\'':
1223 case '"':
337bab46 1224 copy_string (finput, &guard_obstack, c);
a70083a3
AD
1225 break;
1226
1227 case '/':
337bab46 1228 copy_comment (finput, &guard_obstack);
a70083a3
AD
1229 break;
1230
1231 case '$':
337bab46 1232 copy_dollar (finput, &guard_obstack, rule, stack_offset);
a70083a3 1233 break;
1ff442ca 1234
a70083a3 1235 case '@':
337bab46 1236 copy_at (finput, &guard_obstack, stack_offset);
a70083a3 1237 break;
1ff442ca 1238
a70083a3
AD
1239 case EOF:
1240 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1241
a70083a3 1242 default:
ea5607fd 1243 obstack_1grow (&guard_obstack, c);
1ff442ca 1244 }
a70083a3
AD
1245
1246 if (c != '}' || count != 0)
1247 c = getc (finput);
1ff442ca
NF
1248 }
1249
a70083a3
AD
1250 c = skip_white_space ();
1251
ff4423cc 1252 obstack_sgrow (&guard_obstack, ";\n break;}");
a70083a3
AD
1253 if (c == '{')
1254 copy_action (rule, stack_offset);
1255 else if (c == '=')
1256 {
1257 c = getc (finput); /* why not skip_white_space -wjh */
1258 if (c == '{')
1259 copy_action (rule, stack_offset);
1260 }
1261 else
1262 ungetc (c, finput);
1ff442ca 1263}
a70083a3
AD
1264\f
1265
a70083a3
AD
1266/*-------------------------------------------------------------------.
1267| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1268| with the user's names. |
1269`-------------------------------------------------------------------*/
1ff442ca 1270
4a120d45 1271static bucket *
118fb205 1272gensym (void)
1ff442ca 1273{
274d42ce
AD
1274 /* Incremented for each generated symbol */
1275 static int gensym_count = 0;
1276 static char buf[256];
1277
a70083a3 1278 bucket *sym;
1ff442ca 1279
274d42ce
AD
1280 sprintf (buf, "@%d", ++gensym_count);
1281 token_buffer = buf;
a70083a3 1282 sym = getsym (token_buffer);
d7020c20 1283 sym->class = nterm_sym;
1ff442ca 1284 sym->value = nvars++;
36281465 1285 return sym;
1ff442ca
NF
1286}
1287
a70083a3
AD
1288#if 0
1289/*------------------------------------------------------------------.
1290| read in a %type declaration and record its information for |
1291| get_type_name to access. This is unused. It is only called from |
1292| the #if 0 part of readgram |
1293`------------------------------------------------------------------*/
1294
1295static int
1296get_type (void)
1297{
1298 int k;
f17bcd1f 1299 token_t token;
a70083a3
AD
1300 char *name;
1301
f17bcd1f 1302 token = lex ();
a70083a3 1303
f17bcd1f 1304 if (token != tok_typename)
a70083a3
AD
1305 {
1306 complain (_("invalid %s declaration"), "%type");
1307 return t;
1308 }
1309
95e36146 1310 name = xstrdup (token_buffer);
a70083a3
AD
1311
1312 for (;;)
1313 {
f17bcd1f 1314 token = lex ();
a70083a3 1315
f17bcd1f 1316 switch (token)
a70083a3 1317 {
511e79b3 1318 case tok_semicolon:
a70083a3
AD
1319 return lex ();
1320
511e79b3 1321 case tok_comma:
a70083a3
AD
1322 break;
1323
511e79b3 1324 case tok_identifier:
a70083a3
AD
1325 if (symval->type_name == NULL)
1326 symval->type_name = name;
1327 else if (strcmp (name, symval->type_name) != 0)
1328 complain (_("type redeclaration for %s"), symval->tag);
1329
1330 break;
1331
1332 default:
f17bcd1f 1333 return token;
a70083a3
AD
1334 }
1335 }
1336}
1ff442ca 1337
a70083a3
AD
1338#endif
1339\f
1340/*------------------------------------------------------------------.
1341| Parse the input grammar into a one symbol_list structure. Each |
1342| rule is represented by a sequence of symbols: the left hand side |
1343| followed by the contents of the right hand side, followed by a |
1344| null pointer instead of a symbol to terminate the rule. The next |
1345| symbol is the lhs of the following rule. |
1346| |
1347| All guards and actions are copied out to the appropriate files, |
1348| labelled by the rule number they apply to. |
1349`------------------------------------------------------------------*/
1ff442ca 1350
4a120d45 1351static void
118fb205 1352readgram (void)
1ff442ca 1353{
f17bcd1f 1354 token_t t;
a70083a3
AD
1355 bucket *lhs = NULL;
1356 symbol_list *p;
1357 symbol_list *p1;
1358 bucket *bp;
1ff442ca 1359
ff4a34be
AD
1360 /* Points to first symbol_list of current rule. its symbol is the
1361 lhs of the rule. */
1362 symbol_list *crule;
1363 /* Points to the symbol_list preceding crule. */
1364 symbol_list *crule1;
1ff442ca
NF
1365
1366 p1 = NULL;
1367
a70083a3 1368 t = lex ();
1ff442ca 1369
511e79b3 1370 while (t != tok_two_percents && t != tok_eof)
1ff442ca 1371 {
511e79b3 1372 if (t == tok_identifier || t == tok_bar)
1ff442ca 1373 {
89cab50d 1374 int action_flag = 0;
ff4a34be
AD
1375 /* Number of symbols in rhs of this rule so far */
1376 int rulelength = 0;
1ff442ca
NF
1377 int xactions = 0; /* JF for error checking */
1378 bucket *first_rhs = 0;
1379
511e79b3 1380 if (t == tok_identifier)
1ff442ca
NF
1381 {
1382 lhs = symval;
943819bf
RS
1383
1384 if (!start_flag)
1385 {
1386 startval = lhs;
1387 start_flag = 1;
1388 }
a083fbbf 1389
a70083a3 1390 t = lex ();
511e79b3 1391 if (t != tok_colon)
943819bf 1392 {
a0f6b076 1393 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1394 unlex (t);
943819bf 1395 }
1ff442ca
NF
1396 }
1397
511e79b3 1398 if (nrules == 0 && t == tok_bar)
1ff442ca 1399 {
a0f6b076 1400 complain (_("grammar starts with vertical bar"));
943819bf 1401 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1402 }
1ff442ca
NF
1403 /* start a new rule and record its lhs. */
1404
1405 nrules++;
1406 nitems++;
1407
b29b2ed5 1408 p = symbol_list_new (lhs);
1ff442ca
NF
1409
1410 crule1 = p1;
1411 if (p1)
1412 p1->next = p;
1413 else
1414 grammar = p;
1415
1416 p1 = p;
1417 crule = p;
1418
1419 /* mark the rule's lhs as a nonterminal if not already so. */
1420
d7020c20 1421 if (lhs->class == unknown_sym)
1ff442ca 1422 {
d7020c20 1423 lhs->class = nterm_sym;
1ff442ca
NF
1424 lhs->value = nvars;
1425 nvars++;
1426 }
d7020c20 1427 else if (lhs->class == token_sym)
a0f6b076 1428 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1429
1430 /* read the rhs of the rule. */
1431
1432 for (;;)
1433 {
a70083a3 1434 t = lex ();
511e79b3 1435 if (t == tok_prec)
943819bf 1436 {
a70083a3 1437 t = lex ();
943819bf 1438 crule->ruleprec = symval;
a70083a3 1439 t = lex ();
943819bf 1440 }
1ff442ca 1441
511e79b3 1442 if (!(t == tok_identifier || t == tok_left_curly))
a70083a3 1443 break;
1ff442ca
NF
1444
1445 /* If next token is an identifier, see if a colon follows it.
a70083a3 1446 If one does, exit this rule now. */
511e79b3 1447 if (t == tok_identifier)
1ff442ca 1448 {
a70083a3 1449 bucket *ssave;
f17bcd1f 1450 token_t t1;
1ff442ca
NF
1451
1452 ssave = symval;
a70083a3
AD
1453 t1 = lex ();
1454 unlex (t1);
1ff442ca 1455 symval = ssave;
511e79b3 1456 if (t1 == tok_colon)
a70083a3 1457 break;
1ff442ca 1458
a70083a3 1459 if (!first_rhs) /* JF */
1ff442ca
NF
1460 first_rhs = symval;
1461 /* Not followed by colon =>
1462 process as part of this rule's rhs. */
1463 }
1464
1465 /* If we just passed an action, that action was in the middle
a70083a3
AD
1466 of a rule, so make a dummy rule to reduce it to a
1467 non-terminal. */
89cab50d 1468 if (action_flag)
1ff442ca 1469 {
f282676b
AD
1470 /* Since the action was written out with this rule's
1471 number, we must give the new rule this number by
1472 inserting the new rule before it. */
1ff442ca
NF
1473
1474 /* Make a dummy nonterminal, a gensym. */
b29b2ed5 1475 bucket *sdummy = gensym ();
1ff442ca 1476
2ca209c1
AD
1477 /* Make a new rule, whose body is empty, before the
1478 current one, so that the action just read can
1479 belong to it. */
1ff442ca
NF
1480 nrules++;
1481 nitems++;
b29b2ed5 1482 p = symbol_list_new (sdummy);
2ca209c1
AD
1483 /* Attach its lineno to that of the host rule. */
1484 p->line = crule->line;
1ff442ca
NF
1485 if (crule1)
1486 crule1->next = p;
a70083a3
AD
1487 else
1488 grammar = p;
b29b2ed5
AD
1489 /* End of the rule. */
1490 crule1 = symbol_list_new (NULL);
1ff442ca
NF
1491 crule1->next = crule;
1492
e41dc700
AD
1493 p->next = crule1;
1494
f282676b
AD
1495 /* Insert the dummy generated by that rule into this
1496 rule. */
1ff442ca 1497 nitems++;
b29b2ed5 1498 p = symbol_list_new (sdummy);
1ff442ca
NF
1499 p1->next = p;
1500 p1 = p;
1501
89cab50d 1502 action_flag = 0;
1ff442ca
NF
1503 }
1504
511e79b3 1505 if (t == tok_identifier)
1ff442ca
NF
1506 {
1507 nitems++;
b29b2ed5 1508 p = symbol_list_new (symval);
1ff442ca
NF
1509 p1->next = p;
1510 p1 = p;
1511 }
a70083a3 1512 else /* handle an action. */
1ff442ca 1513 {
a70083a3 1514 copy_action (crule, rulelength);
89cab50d 1515 action_flag = 1;
1ff442ca
NF
1516 xactions++; /* JF */
1517 }
1518 rulelength++;
a70083a3 1519 } /* end of read rhs of rule */
1ff442ca
NF
1520
1521 /* Put an empty link in the list to mark the end of this rule */
b29b2ed5 1522 p = symbol_list_new (NULL);
1ff442ca
NF
1523 p1->next = p;
1524 p1 = p;
1525
511e79b3 1526 if (t == tok_prec)
1ff442ca 1527 {
a0f6b076 1528 complain (_("two @prec's in a row"));
a70083a3 1529 t = lex ();
1ff442ca 1530 crule->ruleprec = symval;
a70083a3 1531 t = lex ();
1ff442ca 1532 }
511e79b3 1533 if (t == tok_guard)
1ff442ca 1534 {
a70083a3 1535 if (!semantic_parser)
ff4a34be 1536 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1537
a70083a3
AD
1538 copy_guard (crule, rulelength);
1539 t = lex ();
1ff442ca 1540 }
511e79b3 1541 else if (t == tok_left_curly)
1ff442ca 1542 {
a70083a3 1543 /* This case never occurs -wjh */
89cab50d 1544 if (action_flag)
a0f6b076 1545 complain (_("two actions at end of one rule"));
a70083a3 1546 copy_action (crule, rulelength);
89cab50d 1547 action_flag = 1;
943819bf 1548 xactions++; /* -wjh */
a70083a3 1549 t = lex ();
1ff442ca 1550 }
a0f6b076 1551 /* If $$ is being set in default way, report if any type
6666f98f
AD
1552 mismatch. */
1553 else if (!xactions
a70083a3 1554 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1555 {
6666f98f
AD
1556 if (lhs->type_name == 0
1557 || first_rhs->type_name == 0
a70083a3 1558 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1559 complain (_("type clash (`%s' `%s') on default action"),
1560 lhs->type_name ? lhs->type_name : "",
a70083a3 1561 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1562 }
1563 /* Warn if there is no default for $$ but we need one. */
1564 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1565 complain (_("empty rule for typed nonterminal, and no action"));
511e79b3 1566 if (t == tok_semicolon)
a70083a3 1567 t = lex ();
a083fbbf 1568 }
943819bf 1569#if 0
a70083a3 1570 /* these things can appear as alternatives to rules. */
943819bf
RS
1571/* NO, they cannot.
1572 a) none of the documentation allows them
1573 b) most of them scan forward until finding a next %
1574 thus they may swallow lots of intervening rules
1575*/
511e79b3 1576 else if (t == tok_token)
1ff442ca 1577 {
d7020c20 1578 parse_token_decl (token_sym, nterm_sym);
a70083a3 1579 t = lex ();
1ff442ca 1580 }
511e79b3 1581 else if (t == tok_nterm)
1ff442ca 1582 {
d7020c20 1583 parse_token_decl (nterm_sym, token_sym);
a70083a3 1584 t = lex ();
1ff442ca 1585 }
511e79b3 1586 else if (t == tok_type)
1ff442ca 1587 {
a70083a3 1588 t = get_type ();
1ff442ca 1589 }
511e79b3 1590 else if (t == tok_union)
1ff442ca 1591 {
a70083a3
AD
1592 parse_union_decl ();
1593 t = lex ();
1ff442ca 1594 }
511e79b3 1595 else if (t == tok_expect)
1ff442ca 1596 {
a70083a3
AD
1597 parse_expect_decl ();
1598 t = lex ();
1ff442ca 1599 }
511e79b3 1600 else if (t == tok_start)
1ff442ca 1601 {
a70083a3
AD
1602 parse_start_decl ();
1603 t = lex ();
1ff442ca 1604 }
943819bf
RS
1605#endif
1606
1ff442ca 1607 else
943819bf 1608 {
d01c415b 1609 complain (_("invalid input: %s"), quote (token_buffer));
a70083a3 1610 t = lex ();
943819bf 1611 }
1ff442ca
NF
1612 }
1613
943819bf
RS
1614 /* grammar has been read. Do some checking */
1615
1ff442ca 1616 if (nsyms > MAXSHORT)
a0f6b076
AD
1617 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1618 MAXSHORT);
1ff442ca 1619 if (nrules == 0)
a0f6b076 1620 fatal (_("no rules in the input grammar"));
1ff442ca 1621
1ff442ca
NF
1622 /* Report any undefined symbols and consider them nonterminals. */
1623
1624 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1625 if (bp->class == unknown_sym)
1ff442ca 1626 {
a70083a3
AD
1627 complain (_
1628 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1629 bp->tag);
d7020c20 1630 bp->class = nterm_sym;
1ff442ca
NF
1631 bp->value = nvars++;
1632 }
1633
1634 ntokens = nsyms - nvars;
1635}
ff48177d
MA
1636
1637/* At the end of the grammar file, some C source code must
63c2d5de 1638 be stored. It is going to be associated to the epilogue
ff48177d
MA
1639 directive. */
1640static void
1641read_additionnal_code (void)
1642{
1643 char c;
63c2d5de 1644 struct obstack el_obstack;
342b8b6e 1645
63c2d5de 1646 obstack_init (&el_obstack);
ff48177d
MA
1647
1648 while ((c = getc (finput)) != EOF)
63c2d5de 1649 obstack_1grow (&el_obstack, c);
342b8b6e 1650
63c2d5de 1651 obstack_1grow (&el_obstack, 0);
11d82f03 1652 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1653}
1654
a70083a3
AD
1655\f
1656/*--------------------------------------------------------------.
1657| For named tokens, but not literal ones, define the name. The |
1658| value is the user token number. |
1659`--------------------------------------------------------------*/
1ff442ca 1660
4a120d45 1661static void
896fe5c1 1662output_token_defines (struct obstack *oout)
1ff442ca 1663{
a70083a3
AD
1664 bucket *bp;
1665 char *cp, *symbol;
1666 char c;
1ff442ca 1667
a70083a3 1668 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1669 {
a70083a3
AD
1670 symbol = bp->tag; /* get symbol */
1671
1672 if (bp->value >= ntokens)
1673 continue;
1674 if (bp->user_token_number == SALIAS)
1675 continue;
1676 if ('\'' == *symbol)
1677 continue; /* skip literal character */
1678 if (bp == errtoken)
1679 continue; /* skip error token */
1680 if ('\"' == *symbol)
1ff442ca 1681 {
a70083a3
AD
1682 /* use literal string only if given a symbol with an alias */
1683 if (bp->alias)
1684 symbol = bp->alias->tag;
1685 else
1686 continue;
1687 }
1ff442ca 1688
a70083a3
AD
1689 /* Don't #define nonliteral tokens whose names contain periods. */
1690 cp = symbol;
1691 while ((c = *cp++) && c != '.');
1692 if (c != '\0')
1693 continue;
1ff442ca 1694
0b8afb77 1695 obstack_fgrow2 (oout, "# define\t%s\t%d\n",
342b8b6e 1696 symbol, bp->user_token_number);
a70083a3 1697 if (semantic_parser)
342b8b6e
AD
1698 /* FIXME: This is certainly dead wrong, and should be just as
1699 above. --akim. */
0b8afb77 1700 obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
1701 }
1702}
1ff442ca
NF
1703
1704
037ca2f1
AD
1705/*------------------------------------------------------------------.
1706| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1707| number. |
1708`------------------------------------------------------------------*/
1709
1710static void
1711token_translations_init (void)
1712{
1713 bucket *bp = NULL;
1714 int i;
1715
1716 token_translations = XCALLOC (short, max_user_token_number + 1);
1717
1718 /* Initialize all entries for literal tokens to 2, the internal
1719 token number for $undefined., which represents all invalid
1720 inputs. */
1721 for (i = 0; i <= max_user_token_number; i++)
1722 token_translations[i] = 2;
1723
1724 for (bp = firstsymbol; bp; bp = bp->next)
1725 {
1726 /* Non-terminal? */
1727 if (bp->value >= ntokens)
1728 continue;
1729 /* A token string alias? */
1730 if (bp->user_token_number == SALIAS)
1731 continue;
6b7e85b9
AD
1732
1733 assert (bp->user_token_number != SUNDEF);
1734
037ca2f1
AD
1735 /* A token which translation has already been set? */
1736 if (token_translations[bp->user_token_number] != 2)
1737 complain (_("tokens %s and %s both assigned number %d"),
1738 tags[token_translations[bp->user_token_number]],
1739 bp->tag, bp->user_token_number);
1740 token_translations[bp->user_token_number] = bp->value;
1741 }
1742}
1743
1744
a70083a3
AD
1745/*------------------------------------------------------------------.
1746| Assign symbol numbers, and write definition of token names into |
b2ca4022 1747| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1748| of symbols. |
1749`------------------------------------------------------------------*/
1ff442ca 1750
4a120d45 1751static void
118fb205 1752packsymbols (void)
1ff442ca 1753{
342b8b6e 1754 bucket *bp = NULL;
a70083a3 1755 int tokno = 1;
a70083a3 1756 int last_user_token_number;
4a120d45 1757 static char DOLLAR[] = "$";
1ff442ca 1758
d7913476 1759 tags = XCALLOC (char *, nsyms + 1);
d7913476 1760 user_toknums = XCALLOC (short, nsyms + 1);
1ff442ca 1761
d7913476
AD
1762 sprec = XCALLOC (short, nsyms);
1763 sassoc = XCALLOC (short, nsyms);
1ff442ca 1764
037ca2f1
AD
1765 /* The EOF token. */
1766 tags[0] = DOLLAR;
1767 user_toknums[0] = 0;
1768
1ff442ca
NF
1769 max_user_token_number = 256;
1770 last_user_token_number = 256;
1771
1772 for (bp = firstsymbol; bp; bp = bp->next)
1773 {
d7020c20 1774 if (bp->class == nterm_sym)
1ff442ca
NF
1775 {
1776 bp->value += ntokens;
1777 }
943819bf
RS
1778 else if (bp->alias)
1779 {
0a6384c4
AD
1780 /* this symbol and its alias are a single token defn.
1781 allocate a tokno, and assign to both check agreement of
1782 ->prec and ->assoc fields and make both the same */
1783 if (bp->value == 0)
1784 bp->value = bp->alias->value = tokno++;
943819bf 1785
0a6384c4
AD
1786 if (bp->prec != bp->alias->prec)
1787 {
1788 if (bp->prec != 0 && bp->alias->prec != 0
1789 && bp->user_token_number == SALIAS)
a0f6b076
AD
1790 complain (_("conflicting precedences for %s and %s"),
1791 bp->tag, bp->alias->tag);
0a6384c4
AD
1792 if (bp->prec != 0)
1793 bp->alias->prec = bp->prec;
1794 else
1795 bp->prec = bp->alias->prec;
1796 }
943819bf 1797
0a6384c4
AD
1798 if (bp->assoc != bp->alias->assoc)
1799 {
a0f6b076
AD
1800 if (bp->assoc != 0 && bp->alias->assoc != 0
1801 && bp->user_token_number == SALIAS)
1802 complain (_("conflicting assoc values for %s and %s"),
1803 bp->tag, bp->alias->tag);
1804 if (bp->assoc != 0)
1805 bp->alias->assoc = bp->assoc;
1806 else
1807 bp->assoc = bp->alias->assoc;
1808 }
0a6384c4
AD
1809
1810 if (bp->user_token_number == SALIAS)
a70083a3 1811 continue; /* do not do processing below for SALIASs */
943819bf 1812
a70083a3 1813 }
d7020c20 1814 else /* bp->class == token_sym */
943819bf
RS
1815 {
1816 bp->value = tokno++;
1817 }
1818
d7020c20 1819 if (bp->class == token_sym)
1ff442ca 1820 {
6b7e85b9 1821 if (bp->user_token_number == SUNDEF)
1ff442ca
NF
1822 bp->user_token_number = ++last_user_token_number;
1823 if (bp->user_token_number > max_user_token_number)
1824 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1825 }
1826
1827 tags[bp->value] = bp->tag;
943819bf 1828 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1829 sprec[bp->value] = bp->prec;
1830 sassoc[bp->value] = bp->assoc;
1ff442ca
NF
1831 }
1832
037ca2f1 1833 token_translations_init ();
1ff442ca
NF
1834
1835 error_token_number = errtoken->value;
1836
e3f1699f
AD
1837 if (startval->class == unknown_sym)
1838 fatal (_("the start symbol %s is undefined"), startval->tag);
1839 else if (startval->class == token_sym)
1840 fatal (_("the start symbol %s is a token"), startval->tag);
1841
1842 start_symbol = startval->value;
1843}
1844
1845
1846/*-----------------------------------.
1847| Output definition of token names. |
1848`-----------------------------------*/
1849
1850static void
1851symbols_output (void)
1852{
342b8b6e
AD
1853 {
1854 struct obstack tokendefs;
1855 obstack_init (&tokendefs);
1856 output_token_defines (&tokendefs);
1857 obstack_1grow (&tokendefs, 0);
1858 muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1859 obstack_free (&tokendefs, NULL);
1860 }
b6610515 1861
d8cb5183
MA
1862#if 0
1863 if (!no_parser_flag)
1864 output_token_defines (&table_obstack);
1865#endif
1ff442ca 1866
89cab50d 1867 if (defines_flag)
1ff442ca 1868 {
896fe5c1 1869 output_token_defines (&defines_obstack);
1ff442ca
NF
1870
1871 if (!pure_parser)
1872 {
1873 if (spec_name_prefix)
896fe5c1
AD
1874 obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1875 spec_name_prefix);
1ff442ca 1876 else
ff4423cc 1877 obstack_sgrow (&defines_obstack,
573c1d9f 1878 "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1879 }
1880
1881 if (semantic_parser)
037ca2f1
AD
1882 {
1883 int i;
1884
1885 for (i = ntokens; i < nsyms; i++)
1886 {
1887 /* don't make these for dummy nonterminals made by gensym. */
1888 if (*tags[i] != '@')
1889 obstack_fgrow2 (&defines_obstack,
1890 "# define\tNT%s\t%d\n", tags[i], i);
1891 }
1ff442ca 1892#if 0
037ca2f1
AD
1893 /* `fdefines' is now a temporary file, so we need to copy its
1894 contents in `done', so we can't close it here. */
1895 fclose (fdefines);
1896 fdefines = NULL;
1ff442ca 1897#endif
037ca2f1 1898 }
1ff442ca
NF
1899 }
1900}
a083fbbf 1901
1ff442ca 1902
a70083a3
AD
1903/*---------------------------------------------------------------.
1904| Convert the rules into the representation using RRHS, RLHS and |
1905| RITEMS. |
1906`---------------------------------------------------------------*/
1ff442ca 1907
4a120d45 1908static void
118fb205 1909packgram (void)
1ff442ca 1910{
a70083a3
AD
1911 int itemno;
1912 int ruleno;
1913 symbol_list *p;
1ff442ca 1914
d7913476 1915 ritem = XCALLOC (short, nitems + 1);
b2ed6e58 1916 rule_table = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1917
1918 itemno = 0;
1919 ruleno = 1;
1920
1921 p = grammar;
1922 while (p)
1923 {
b29b2ed5 1924 bucket *ruleprec = p->ruleprec;
b2ed6e58
AD
1925 rule_table[ruleno].lhs = p->sym->value;
1926 rule_table[ruleno].rhs = itemno;
b29b2ed5 1927 rule_table[ruleno].line = p->line;
1ff442ca
NF
1928
1929 p = p->next;
1930 while (p && p->sym)
1931 {
1932 ritem[itemno++] = p->sym->value;
1933 /* A rule gets by default the precedence and associativity
1934 of the last token in it. */
d7020c20 1935 if (p->sym->class == token_sym)
1ff442ca 1936 {
652a871c
AD
1937 rule_table[ruleno].prec = p->sym->prec;
1938 rule_table[ruleno].assoc = p->sym->assoc;
1ff442ca 1939 }
a70083a3
AD
1940 if (p)
1941 p = p->next;
1ff442ca
NF
1942 }
1943
1944 /* If this rule has a %prec,
a70083a3 1945 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1946 if (ruleprec)
1947 {
652a871c
AD
1948 rule_table[ruleno].prec = ruleprec->prec;
1949 rule_table[ruleno].assoc = ruleprec->assoc;
1950 rule_table[ruleno].precsym = ruleprec->value;
1ff442ca
NF
1951 }
1952
1953 ritem[itemno++] = -ruleno;
1954 ruleno++;
1955
a70083a3
AD
1956 if (p)
1957 p = p->next;
1ff442ca
NF
1958 }
1959
1960 ritem[itemno] = 0;
1961}
a70083a3
AD
1962\f
1963/*-------------------------------------------------------------------.
1964| Read in the grammar specification and record it in the format |
ea5607fd 1965| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1966| and all actions into ACTION_OBSTACK, in each case forming the body |
1967| of a C function (YYGUARD or YYACTION) which contains a switch |
1968| statement to decide which guard or action to execute. |
a70083a3
AD
1969`-------------------------------------------------------------------*/
1970
1971void
1972reader (void)
1973{
1974 start_flag = 0;
1975 startval = NULL; /* start symbol not specified yet. */
1976
a70083a3
AD
1977 nsyms = 1;
1978 nvars = 0;
1979 nrules = 0;
1980 nitems = 0;
a70083a3
AD
1981
1982 typed = 0;
1983 lastprec = 0;
1984
a70083a3
AD
1985 semantic_parser = 0;
1986 pure_parser = 0;
a70083a3
AD
1987
1988 grammar = NULL;
1989
342b8b6e 1990 lex_init ();
a70083a3
AD
1991 lineno = 1;
1992
11d82f03
MA
1993 /* Initialize the muscle obstack. */
1994 obstack_init (&muscle_obstack);
82e236e2 1995
a70083a3
AD
1996 /* Initialize the symbol table. */
1997 tabinit ();
b6610515 1998
a70083a3
AD
1999 /* Construct the error token */
2000 errtoken = getsym ("error");
d7020c20 2001 errtoken->class = token_sym;
a70083a3 2002 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 2003
a70083a3
AD
2004 /* Construct a token that represents all undefined literal tokens.
2005 It is always token number 2. */
2006 undeftoken = getsym ("$undefined.");
d7020c20 2007 undeftoken->class = token_sym;
a70083a3
AD
2008 undeftoken->user_token_number = 2;
2009
896fe5c1
AD
2010 /* Read the declaration section. Copy %{ ... %} groups to
2011 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
2012 etc. found there. */
a70083a3 2013 read_declarations ();
a70083a3
AD
2014 /* Read in the grammar, build grammar in list form. Write out
2015 guards and actions. */
2016 readgram ();
ff48177d
MA
2017 /* Some C code is given at the end of the grammar file. */
2018 read_additionnal_code ();
b0c4483e 2019
a70083a3 2020 /* Now we know whether we need the line-number stack. If we do,
b0c4483e
PB
2021 write its type into the .tab.h file.
2022 This is no longer need with header skeleton. */
2023
a70083a3
AD
2024 /* Assign the symbols their symbol numbers. Write #defines for the
2025 token symbols into FDEFINES if requested. */
2026 packsymbols ();
e3f1699f 2027 symbols_output ();
a70083a3
AD
2028 /* Convert the grammar into the format described in gram.h. */
2029 packgram ();
edad7067
AD
2030 /* Output the headers. */
2031 symbols_output ();
a70083a3 2032}