]> git.saurik.com Git - bison.git/blame - src/reader.c
* configure.in: Invoke AC_FUNC_OBSTACK and AC_FUNC_ERROR_AT_LINE.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
037ca2f1 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
8c7ebe49 24#include "obstack.h"
2a91a95e
AD
25#include "quotearg.h"
26#include "quote.h"
ceed8467 27#include "getargs.h"
1ff442ca 28#include "files.h"
1ff442ca 29#include "symtab.h"
82b6d266 30#include "options.h"
1ff442ca
NF
31#include "lex.h"
32#include "gram.h"
a0f6b076 33#include "complain.h"
6c89f1c1 34#include "output.h"
b2ca4022 35#include "reader.h"
340ef489 36#include "conflicts.h"
11d82f03 37#include "muscle_tab.h"
1ff442ca 38
a70083a3
AD
39typedef struct symbol_list
40{
41 struct symbol_list *next;
42 bucket *sym;
b29b2ed5 43 int line;
a70083a3
AD
44 bucket *ruleprec;
45}
46symbol_list;
118fb205 47
1ff442ca 48int lineno;
1ff442ca 49char **tags;
d019d655 50short *user_toknums;
4a120d45
JT
51static symbol_list *grammar;
52static int start_flag;
53static bucket *startval;
1ff442ca
NF
54
55/* Nonzero if components of semantic values are used, implying
56 they must be unions. */
57static int value_components_used;
58
d7020c20
AD
59/* Nonzero if %union has been seen. */
60static int typed;
1ff442ca 61
d7020c20
AD
62/* Incremented for each %left, %right or %nonassoc seen */
63static int lastprec;
1ff442ca 64
1ff442ca 65static bucket *errtoken;
5b2e3c89 66static bucket *undeftoken;
b29b2ed5
AD
67
68
6255b435 69static symbol_list *
b29b2ed5
AD
70symbol_list_new (bucket *sym)
71{
72 symbol_list *res = XMALLOC (symbol_list, 1);
73 res->next = NULL;
74 res->sym = sym;
75 res->line = lineno;
76 res->ruleprec = NULL;
77 return res;
78}
79
0d533154 80\f
a70083a3 81
0d533154
AD
82/*===================\
83| Low level lexing. |
84\===================*/
943819bf
RS
85
86static void
118fb205 87skip_to_char (int target)
943819bf
RS
88{
89 int c;
90 if (target == '\n')
a0f6b076 91 complain (_(" Skipping to next \\n"));
943819bf 92 else
a0f6b076 93 complain (_(" Skipping to next %c"), target);
943819bf
RS
94
95 do
0d533154 96 c = skip_white_space ();
943819bf 97 while (c != target && c != EOF);
a083fbbf 98 if (c != EOF)
0d533154 99 ungetc (c, finput);
943819bf
RS
100}
101
102
0d533154
AD
103/*---------------------------------------------------------.
104| Read a signed integer from STREAM and return its value. |
105`---------------------------------------------------------*/
106
107static inline int
108read_signed_integer (FILE *stream)
109{
a70083a3
AD
110 int c = getc (stream);
111 int sign = 1;
112 int n = 0;
0d533154
AD
113
114 if (c == '-')
115 {
116 c = getc (stream);
117 sign = -1;
118 }
119
120 while (isdigit (c))
121 {
122 n = 10 * n + (c - '0');
123 c = getc (stream);
124 }
125
126 ungetc (c, stream);
127
128 return sign * n;
129}
130\f
79282c5a
AD
131/*--------------------------------------------------------------.
132| Get the data type (alternative in the union) of the value for |
133| symbol N in rule RULE. |
134`--------------------------------------------------------------*/
135
136static char *
b29b2ed5 137get_type_name (int n, symbol_list *rule)
79282c5a
AD
138{
139 int i;
140 symbol_list *rp;
141
142 if (n < 0)
143 {
144 complain (_("invalid $ value"));
145 return NULL;
146 }
147
148 rp = rule;
149 i = 0;
150
151 while (i < n)
152 {
153 rp = rp->next;
154 if (rp == NULL || rp->sym == NULL)
155 {
156 complain (_("invalid $ value"));
157 return NULL;
158 }
159 i++;
160 }
161
162 return rp->sym->type_name;
163}
164\f
337bab46
AD
165/*------------------------------------------------------------.
166| Dump the string from FIN to OOUT if non null. MATCH is the |
167| delimiter of the string (either ' or "). |
168`------------------------------------------------------------*/
ae3c3164
AD
169
170static inline void
b6610515 171copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
172{
173 int c;
174
b6610515
RA
175 if (store)
176 obstack_1grow (oout, match);
8c7ebe49 177
4a120d45 178 c = getc (fin);
ae3c3164
AD
179
180 while (c != match)
181 {
182 if (c == EOF)
183 fatal (_("unterminated string at end of file"));
184 if (c == '\n')
185 {
a0f6b076 186 complain (_("unterminated string"));
4a120d45 187 ungetc (c, fin);
ae3c3164
AD
188 c = match; /* invent terminator */
189 continue;
190 }
191
337bab46 192 obstack_1grow (oout, c);
ae3c3164
AD
193
194 if (c == '\\')
195 {
4a120d45 196 c = getc (fin);
ae3c3164
AD
197 if (c == EOF)
198 fatal (_("unterminated string at end of file"));
337bab46 199 obstack_1grow (oout, c);
8c7ebe49 200
ae3c3164
AD
201 if (c == '\n')
202 lineno++;
203 }
204
a70083a3 205 c = getc (fin);
ae3c3164
AD
206 }
207
b6610515
RA
208 if (store)
209 obstack_1grow (oout, c);
210}
211
212/* FIXME. */
213
214static inline void
215copy_string (FILE *fin, struct obstack *oout, int match)
216{
217 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
218}
219
b6610515
RA
220/* FIXME. */
221
222static inline void
223copy_identifier (FILE *fin, struct obstack *oout)
224{
225 int c;
226
227 while (isalnum (c = getc (fin)) || c == '_')
228 obstack_1grow (oout, c);
229
230 ungetc (c, fin);
231}
ae3c3164 232
337bab46
AD
233/*-----------------------------------------------------------------.
234| Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
235| NULL). In fact we just saw a `/', which might or might not be a |
236| comment. In any case, copy what we saw. |
237| |
238| OUT2 might be NULL. |
239`-----------------------------------------------------------------*/
ae3c3164
AD
240
241static inline void
337bab46 242copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
ae3c3164
AD
243{
244 int cplus_comment;
a70083a3 245 int ended;
550a72a3
AD
246 int c;
247
248 /* We read a `/', output it. */
337bab46 249 obstack_1grow (oout1, '/');
896fe5c1
AD
250 if (oout2)
251 obstack_1grow (oout2, '/');
550a72a3
AD
252
253 switch ((c = getc (fin)))
254 {
255 case '/':
256 cplus_comment = 1;
257 break;
258 case '*':
259 cplus_comment = 0;
260 break;
261 default:
262 ungetc (c, fin);
263 return;
264 }
ae3c3164 265
337bab46 266 obstack_1grow (oout1, c);
896fe5c1
AD
267 if (oout2)
268 obstack_1grow (oout2, c);
550a72a3 269 c = getc (fin);
ae3c3164
AD
270
271 ended = 0;
272 while (!ended)
273 {
274 if (!cplus_comment && c == '*')
275 {
276 while (c == '*')
277 {
337bab46 278 obstack_1grow (oout1, c);
896fe5c1
AD
279 if (oout2)
280 obstack_1grow (oout2, c);
550a72a3 281 c = getc (fin);
ae3c3164
AD
282 }
283
284 if (c == '/')
285 {
337bab46 286 obstack_1grow (oout1, c);
896fe5c1
AD
287 if (oout2)
288 obstack_1grow (oout2, c);
ae3c3164
AD
289 ended = 1;
290 }
291 }
292 else if (c == '\n')
293 {
294 lineno++;
337bab46 295 obstack_1grow (oout1, c);
896fe5c1
AD
296 if (oout2)
297 obstack_1grow (oout2, c);
ae3c3164
AD
298 if (cplus_comment)
299 ended = 1;
300 else
550a72a3 301 c = getc (fin);
ae3c3164
AD
302 }
303 else if (c == EOF)
304 fatal (_("unterminated comment"));
305 else
306 {
337bab46 307 obstack_1grow (oout1, c);
896fe5c1
AD
308 if (oout2)
309 obstack_1grow (oout2, c);
550a72a3 310 c = getc (fin);
ae3c3164
AD
311 }
312 }
313}
314
315
550a72a3
AD
316/*-------------------------------------------------------------------.
317| Dump the comment (actually the current string starting with a `/') |
337bab46 318| from FIN to OOUT. |
550a72a3 319`-------------------------------------------------------------------*/
27821bff
AD
320
321static inline void
337bab46 322copy_comment (FILE *fin, struct obstack *oout)
27821bff 323{
337bab46 324 copy_comment2 (fin, oout, NULL);
27821bff
AD
325}
326
327
a70083a3 328/*-----------------------------------------------------------------.
337bab46 329| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
330| reference to this location. STACK_OFFSET is the number of values |
331| in the current rule so far, which says where to find `$0' with |
332| respect to the top of the stack. |
333`-----------------------------------------------------------------*/
1ff442ca 334
a70083a3 335static inline void
337bab46 336copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 337{
a70083a3 338 int c;
1ff442ca 339
a70083a3
AD
340 c = getc (fin);
341 if (c == '$')
1ff442ca 342 {
ff4423cc 343 obstack_sgrow (oout, "yyloc");
89cab50d 344 locations_flag = 1;
a70083a3
AD
345 }
346 else if (isdigit (c) || c == '-')
347 {
348 int n;
1ff442ca 349
a70083a3
AD
350 ungetc (c, fin);
351 n = read_signed_integer (fin);
943819bf 352
337bab46 353 obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
89cab50d 354 locations_flag = 1;
1ff442ca 355 }
a70083a3 356 else
ff4a34be
AD
357 {
358 char buf[] = "@c";
359 buf[1] = c;
360 complain (_("%s is invalid"), quote (buf));
361 }
1ff442ca 362}
79282c5a
AD
363
364
365/*-------------------------------------------------------------------.
366| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
367| |
368| Possible inputs: $[<TYPENAME>]($|integer) |
369| |
337bab46 370| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
371| the number of values in the current rule so far, which says where |
372| to find `$0' with respect to the top of the stack. |
373`-------------------------------------------------------------------*/
374
375static inline void
337bab46 376copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
377 symbol_list *rule, int stack_offset)
378{
379 int c = getc (fin);
b0ce6046 380 const char *type_name = NULL;
79282c5a 381
f282676b 382 /* Get the type name if explicit. */
79282c5a
AD
383 if (c == '<')
384 {
f282676b 385 read_type_name (fin);
79282c5a
AD
386 type_name = token_buffer;
387 value_components_used = 1;
79282c5a
AD
388 c = getc (fin);
389 }
390
391 if (c == '$')
392 {
ff4423cc 393 obstack_sgrow (oout, "yyval");
8c7ebe49 394
79282c5a
AD
395 if (!type_name)
396 type_name = get_type_name (0, rule);
397 if (type_name)
337bab46 398 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
399 if (!type_name && typed)
400 complain (_("$$ of `%s' has no declared type"),
401 rule->sym->tag);
402 }
403 else if (isdigit (c) || c == '-')
404 {
405 int n;
406 ungetc (c, fin);
407 n = read_signed_integer (fin);
408
409 if (!type_name && n > 0)
410 type_name = get_type_name (n, rule);
411
337bab46 412 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
8c7ebe49 413
79282c5a 414 if (type_name)
337bab46 415 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
416 if (!type_name && typed)
417 complain (_("$%d of `%s' has no declared type"),
418 n, rule->sym->tag);
419 }
420 else
421 {
422 char buf[] = "$c";
423 buf[1] = c;
424 complain (_("%s is invalid"), quote (buf));
425 }
426}
a70083a3
AD
427\f
428/*-------------------------------------------------------------------.
429| Copy the contents of a `%{ ... %}' into the definitions file. The |
430| `%{' has already been read. Return after reading the `%}'. |
431`-------------------------------------------------------------------*/
1ff442ca 432
4a120d45 433static void
118fb205 434copy_definition (void)
1ff442ca 435{
a70083a3 436 int c;
ae3c3164 437 /* -1 while reading a character if prev char was %. */
a70083a3 438 int after_percent;
1ff442ca 439
b6610515 440#if 0
89cab50d 441 if (!no_lines_flag)
25b222fa
MA
442 {
443 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 444 lineno, quotearg_style (c_quoting_style,
25b222fa
MA
445 muscle_find("filename")));
446 }
b6610515 447#endif
1ff442ca
NF
448
449 after_percent = 0;
450
ae3c3164 451 c = getc (finput);
1ff442ca
NF
452
453 for (;;)
454 {
455 switch (c)
456 {
457 case '\n':
dd60faec 458 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
459 lineno++;
460 break;
461
462 case '%':
a70083a3 463 after_percent = -1;
1ff442ca 464 break;
a083fbbf 465
1ff442ca
NF
466 case '\'':
467 case '"':
337bab46 468 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
469 break;
470
471 case '/':
337bab46 472 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
473 break;
474
475 case EOF:
a70083a3 476 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
477
478 default:
dd60faec 479 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
480 }
481
a70083a3 482 c = getc (finput);
1ff442ca
NF
483
484 if (after_percent)
485 {
486 if (c == '}')
487 return;
dd60faec 488 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
489 }
490 after_percent = 0;
1ff442ca 491 }
1ff442ca
NF
492}
493
494
d7020c20
AD
495/*-------------------------------------------------------------------.
496| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
497| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
498| are reversed. |
499`-------------------------------------------------------------------*/
1ff442ca 500
4a120d45 501static void
d7020c20 502parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 503{
342b8b6e
AD
504 token_t token = tok_undef;
505 char *typename = NULL;
1ff442ca 506
1e9798d5
AD
507 /* The symbol being defined. */
508 struct bucket *symbol = NULL;
509
510 /* After `%token' and `%nterm', any number of symbols maybe be
511 defined. */
1ff442ca
NF
512 for (;;)
513 {
e6011337
JT
514 int tmp_char = ungetc (skip_white_space (), finput);
515
1e9798d5
AD
516 /* `%' (for instance from `%token', or from `%%' etc.) is the
517 only valid means to end this declaration. */
e6011337 518 if (tmp_char == '%')
1ff442ca 519 return;
e6011337 520 if (tmp_char == EOF)
a0f6b076 521 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 522
a70083a3 523 token = lex ();
511e79b3 524 if (token == tok_comma)
943819bf
RS
525 {
526 symbol = NULL;
527 continue;
528 }
511e79b3 529 if (token == tok_typename)
1ff442ca 530 {
95e36146 531 typename = xstrdup (token_buffer);
1ff442ca 532 value_components_used = 1;
943819bf
RS
533 symbol = NULL;
534 }
511e79b3 535 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 536 {
8e03724b
AD
537 if (symval->alias)
538 warn (_("symbol `%s' used more than once as a literal string"),
539 symval->tag);
540 else if (symbol->alias)
541 warn (_("symbol `%s' given more than one literal string"),
542 symbol->tag);
543 else
544 {
545 symval->class = token_sym;
546 symval->type_name = typename;
547 symval->user_token_number = symbol->user_token_number;
548 symbol->user_token_number = SALIAS;
549 symval->alias = symbol;
550 symbol->alias = symval;
551 /* symbol and symval combined are only one symbol */
552 nsyms--;
553 }
8e03724b 554 symbol = NULL;
1ff442ca 555 }
511e79b3 556 else if (token == tok_identifier)
1ff442ca
NF
557 {
558 int oldclass = symval->class;
943819bf 559 symbol = symval;
1ff442ca 560
943819bf 561 if (symbol->class == what_is_not)
a0f6b076 562 complain (_("symbol %s redefined"), symbol->tag);
943819bf 563 symbol->class = what_is;
d7020c20 564 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 565 symbol->value = nvars++;
1ff442ca
NF
566
567 if (typename)
568 {
943819bf
RS
569 if (symbol->type_name == NULL)
570 symbol->type_name = typename;
a70083a3 571 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 572 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
573 }
574 }
511e79b3 575 else if (symbol && token == tok_number)
a70083a3 576 {
943819bf 577 symbol->user_token_number = numval;
a70083a3 578 }
1ff442ca 579 else
943819bf 580 {
a0f6b076 581 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
582 token_buffer,
583 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 584 skip_to_char ('%');
943819bf 585 }
1ff442ca
NF
586 }
587
588}
589
1ff442ca 590
d7020c20
AD
591/*------------------------------.
592| Parse what comes after %start |
593`------------------------------*/
1ff442ca 594
4a120d45 595static void
118fb205 596parse_start_decl (void)
1ff442ca
NF
597{
598 if (start_flag)
27821bff 599 complain (_("multiple %s declarations"), "%start");
511e79b3 600 if (lex () != tok_identifier)
27821bff 601 complain (_("invalid %s declaration"), "%start");
943819bf
RS
602 else
603 {
604 start_flag = 1;
605 startval = symval;
606 }
1ff442ca
NF
607}
608
a70083a3
AD
609/*-----------------------------------------------------------.
610| read in a %type declaration and record its information for |
611| get_type_name to access |
612`-----------------------------------------------------------*/
613
614static void
615parse_type_decl (void)
616{
a70083a3
AD
617 char *name;
618
511e79b3 619 if (lex () != tok_typename)
a70083a3
AD
620 {
621 complain ("%s", _("%type declaration has no <typename>"));
622 skip_to_char ('%');
623 return;
624 }
625
95e36146 626 name = xstrdup (token_buffer);
a70083a3
AD
627
628 for (;;)
629 {
f17bcd1f 630 token_t t;
a70083a3
AD
631 int tmp_char = ungetc (skip_white_space (), finput);
632
633 if (tmp_char == '%')
634 return;
635 if (tmp_char == EOF)
636 fatal (_("Premature EOF after %s"), token_buffer);
637
638 t = lex ();
639
640 switch (t)
1ff442ca
NF
641 {
642
511e79b3
AD
643 case tok_comma:
644 case tok_semicolon:
1ff442ca
NF
645 break;
646
511e79b3 647 case tok_identifier:
1ff442ca
NF
648 if (symval->type_name == NULL)
649 symval->type_name = name;
a70083a3 650 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 651 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
652
653 break;
654
655 default:
a0f6b076
AD
656 complain (_("invalid %%type declaration due to item: %s"),
657 token_buffer);
a70083a3 658 skip_to_char ('%');
1ff442ca
NF
659 }
660 }
661}
662
663
664
d7020c20
AD
665/*----------------------------------------------------------------.
666| Read in a %left, %right or %nonassoc declaration and record its |
667| information. |
668`----------------------------------------------------------------*/
1ff442ca 669
4a120d45 670static void
d7020c20 671parse_assoc_decl (associativity assoc)
1ff442ca 672{
a70083a3
AD
673 char *name = NULL;
674 int prev = 0;
1ff442ca 675
a70083a3 676 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 677
1ff442ca
NF
678 for (;;)
679 {
f17bcd1f 680 token_t t;
e6011337 681 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 682
e6011337 683 if (tmp_char == '%')
1ff442ca 684 return;
e6011337 685 if (tmp_char == EOF)
a0f6b076 686 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 687
a70083a3 688 t = lex ();
1ff442ca
NF
689
690 switch (t)
691 {
511e79b3 692 case tok_typename:
95e36146 693 name = xstrdup (token_buffer);
1ff442ca
NF
694 break;
695
511e79b3 696 case tok_comma:
1ff442ca
NF
697 break;
698
511e79b3 699 case tok_identifier:
1ff442ca 700 if (symval->prec != 0)
a0f6b076 701 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
702 symval->prec = lastprec;
703 symval->assoc = assoc;
d7020c20 704 if (symval->class == nterm_sym)
a0f6b076 705 complain (_("symbol %s redefined"), symval->tag);
d7020c20 706 symval->class = token_sym;
1ff442ca 707 if (name)
a70083a3 708 { /* record the type, if one is specified */
1ff442ca
NF
709 if (symval->type_name == NULL)
710 symval->type_name = name;
a70083a3 711 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 712 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
713 }
714 break;
715
511e79b3
AD
716 case tok_number:
717 if (prev == tok_identifier)
a70083a3 718 {
1ff442ca 719 symval->user_token_number = numval;
a70083a3
AD
720 }
721 else
722 {
723 complain (_
724 ("invalid text (%s) - number should be after identifier"),
725token_buffer);
726 skip_to_char ('%');
727 }
1ff442ca
NF
728 break;
729
511e79b3 730 case tok_semicolon:
1ff442ca
NF
731 return;
732
733 default:
a0f6b076 734 complain (_("unexpected item: %s"), token_buffer);
a70083a3 735 skip_to_char ('%');
1ff442ca
NF
736 }
737
738 prev = t;
1ff442ca
NF
739 }
740}
741
742
743
dd60faec 744/*--------------------------------------------------------------.
180d45ba
PB
745| Copy the union declaration into the stype muscle |
746| (and fdefines), where it is made into the definition of |
747| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 748`--------------------------------------------------------------*/
1ff442ca 749
4a120d45 750static void
118fb205 751parse_union_decl (void)
1ff442ca 752{
a70083a3
AD
753 int c;
754 int count = 0;
180d45ba 755 struct obstack union_obstack;
5f7e0832
AD
756 const char *prologue = "\
757#ifndef YYSTYPE\n\
758typedef union";
759 const char *epilogue = "\
760 yystype;\n\
761# define YYSTYPE yystype\n\
762#endif\n";
1ff442ca
NF
763
764 if (typed)
27821bff 765 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
766
767 typed = 1;
768
f6ec6d13
AD
769 /* FIXME: I'm worried: are you sure attrs_obstack is properly
770 filled? */
5f7e0832
AD
771 /* I don't see any reasons to keep this line, because we should
772 create a special skeleton for this option. */
180d45ba 773 if (no_lines_flag)
dd60faec 774 obstack_1grow (&attrs_obstack, '\n');
342b8b6e 775
180d45ba
PB
776 obstack_init (&union_obstack);
777 obstack_sgrow (&union_obstack, "union");
896fe5c1 778 if (defines_flag)
5f7e0832 779 obstack_sgrow (&defines_obstack, prologue);
1ff442ca 780
27821bff 781 c = getc (finput);
1ff442ca
NF
782
783 while (c != EOF)
784 {
342b8b6e
AD
785 /* If C contains '/', it is output by copy_comment (). */
786 if (c != '/')
787 {
f6ec6d13 788 obstack_1grow (&union_obstack, c);
342b8b6e
AD
789 if (defines_flag)
790 obstack_1grow (&defines_obstack, c);
791 }
1ff442ca
NF
792
793 switch (c)
794 {
795 case '\n':
796 lineno++;
797 break;
798
799 case '/':
180d45ba 800 copy_comment2 (finput, &defines_obstack, &union_obstack);
1ff442ca
NF
801 break;
802
1ff442ca
NF
803 case '{':
804 count++;
805 break;
806
807 case '}':
808 if (count == 0)
27821bff 809 complain (_("unmatched %s"), "`}'");
1ff442ca 810 count--;
943819bf 811 if (count <= 0)
1ff442ca 812 {
896fe5c1 813 if (defines_flag)
5f7e0832 814 obstack_sgrow (&defines_obstack, epilogue);
1ff442ca 815 /* JF don't choke on trailing semi */
27821bff
AD
816 c = skip_white_space ();
817 if (c != ';')
a70083a3 818 ungetc (c, finput);
180d45ba
PB
819 obstack_1grow (&union_obstack, 0);
820 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
821 return;
822 }
823 }
824
27821bff 825 c = getc (finput);
1ff442ca 826 }
180d45ba 827
1ff442ca
NF
828}
829
d7020c20
AD
830
831/*-------------------------------------------------------.
832| Parse the declaration %expect N which says to expect N |
833| shift-reduce conflicts. |
834`-------------------------------------------------------*/
1ff442ca 835
4a120d45 836static void
118fb205 837parse_expect_decl (void)
1ff442ca 838{
131e2fef 839 int c = skip_white_space ();
1ff442ca
NF
840 ungetc (c, finput);
841
131e2fef 842 if (!isdigit (c))
79282c5a 843 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
844 else
845 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
846}
847
a70083a3
AD
848
849/*-------------------------------------------------------------------.
850| Parse what comes after %thong. the full syntax is |
851| |
852| %thong <type> token number literal |
853| |
854| the <type> or number may be omitted. The number specifies the |
855| user_token_number. |
856| |
857| Two symbols are entered in the table, one for the token symbol and |
858| one for the literal. Both are given the <type>, if any, from the |
859| declaration. The ->user_token_number of the first is SALIAS and |
860| the ->user_token_number of the second is set to the number, if |
861| any, from the declaration. The two symbols are linked via |
862| pointers in their ->alias fields. |
863| |
864| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
865| only the literal string is retained it is the literal string that |
866| is output to yytname |
867`-------------------------------------------------------------------*/
868
869static void
870parse_thong_decl (void)
7b306f52 871{
f17bcd1f 872 token_t token;
a70083a3
AD
873 struct bucket *symbol;
874 char *typename = 0;
6b7e85b9 875 int usrtoknum = SUNDEF;
7b306f52 876
a70083a3 877 token = lex (); /* fetch typename or first token */
511e79b3 878 if (token == tok_typename)
7b306f52 879 {
95e36146 880 typename = xstrdup (token_buffer);
a70083a3
AD
881 value_components_used = 1;
882 token = lex (); /* fetch first token */
7b306f52 883 }
7b306f52 884
a70083a3 885 /* process first token */
7b306f52 886
511e79b3 887 if (token != tok_identifier)
a70083a3
AD
888 {
889 complain (_("unrecognized item %s, expected an identifier"),
890 token_buffer);
891 skip_to_char ('%');
892 return;
7b306f52 893 }
d7020c20 894 symval->class = token_sym;
a70083a3
AD
895 symval->type_name = typename;
896 symval->user_token_number = SALIAS;
897 symbol = symval;
7b306f52 898
a70083a3 899 token = lex (); /* get number or literal string */
1ff442ca 900
511e79b3 901 if (token == tok_number)
943819bf 902 {
a70083a3
AD
903 usrtoknum = numval;
904 token = lex (); /* okay, did number, now get literal */
943819bf 905 }
1ff442ca 906
a70083a3 907 /* process literal string token */
1ff442ca 908
511e79b3 909 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 910 {
a70083a3
AD
911 complain (_("expected string constant instead of %s"), token_buffer);
912 skip_to_char ('%');
913 return;
1ff442ca 914 }
d7020c20 915 symval->class = token_sym;
a70083a3
AD
916 symval->type_name = typename;
917 symval->user_token_number = usrtoknum;
1ff442ca 918
a70083a3
AD
919 symval->alias = symbol;
920 symbol->alias = symval;
1ff442ca 921
79282c5a
AD
922 /* symbol and symval combined are only one symbol. */
923 nsyms--;
a70083a3 924}
3cef001a 925
b6610515 926static void
11d82f03 927parse_muscle_decl (void)
b6610515
RA
928{
929 int ch = ungetc (skip_white_space (), finput);
11d82f03
MA
930 char* muscle_key;
931 char* muscle_value;
b6610515
RA
932
933 /* Read key. */
934 if (!isalpha (ch) && ch != '_')
935 {
936 complain (_("invalid %s declaration"), "%define");
937 skip_to_char ('%');
938 return;
939 }
11d82f03
MA
940 copy_identifier (finput, &muscle_obstack);
941 obstack_1grow (&muscle_obstack, 0);
942 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 943
b6610515
RA
944 /* Read value. */
945 ch = skip_white_space ();
946 if (ch != '"')
947 {
948 ungetc (ch, finput);
949 if (ch != EOF)
950 {
951 complain (_("invalid %s declaration"), "%define");
952 skip_to_char ('%');
953 return;
954 }
955 else
956 fatal (_("Premature EOF after %s"), "\"");
957 }
11d82f03
MA
958 copy_string2 (finput, &muscle_obstack, '"', 0);
959 obstack_1grow (&muscle_obstack, 0);
960 muscle_value = obstack_finish (&muscle_obstack);
b6610515 961
b6610515 962 /* Store the (key, value) pair in the environment. */
11d82f03 963 muscle_insert (muscle_key, muscle_value);
b6610515
RA
964}
965
2ba3b73c
MA
966
967/*----------------------------------.
968| Parse what comes after %skeleton. |
969`----------------------------------*/
970
971void
972parse_skel_decl (void)
973{
974 /* Complete with parse_dquoted_param () on the CVS branch 1.29. */
975}
976
a70083a3
AD
977/*----------------------------------------------------------------.
978| Read from finput until `%%' is seen. Discard the `%%'. Handle |
979| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 980| groups to ATTRS_OBSTACK. |
a70083a3 981`----------------------------------------------------------------*/
1ff442ca 982
4a120d45 983static void
a70083a3 984read_declarations (void)
1ff442ca 985{
a70083a3 986 for (;;)
1ff442ca 987 {
951366c1 988 int c = skip_white_space ();
1ff442ca 989
a70083a3
AD
990 if (c == '%')
991 {
951366c1 992 token_t tok = parse_percent_token ();
1ff442ca 993
a70083a3 994 switch (tok)
943819bf 995 {
511e79b3 996 case tok_two_percents:
a70083a3 997 return;
1ff442ca 998
511e79b3 999 case tok_percent_left_curly:
a70083a3
AD
1000 copy_definition ();
1001 break;
1ff442ca 1002
511e79b3 1003 case tok_token:
d7020c20 1004 parse_token_decl (token_sym, nterm_sym);
a70083a3 1005 break;
1ff442ca 1006
511e79b3 1007 case tok_nterm:
d7020c20 1008 parse_token_decl (nterm_sym, token_sym);
a70083a3 1009 break;
1ff442ca 1010
511e79b3 1011 case tok_type:
a70083a3
AD
1012 parse_type_decl ();
1013 break;
1ff442ca 1014
511e79b3 1015 case tok_start:
a70083a3
AD
1016 parse_start_decl ();
1017 break;
118fb205 1018
511e79b3 1019 case tok_union:
a70083a3
AD
1020 parse_union_decl ();
1021 break;
1ff442ca 1022
511e79b3 1023 case tok_expect:
a70083a3
AD
1024 parse_expect_decl ();
1025 break;
6deb4447 1026
511e79b3 1027 case tok_thong:
a70083a3
AD
1028 parse_thong_decl ();
1029 break;
d7020c20 1030
511e79b3 1031 case tok_left:
d7020c20 1032 parse_assoc_decl (left_assoc);
a70083a3 1033 break;
1ff442ca 1034
511e79b3 1035 case tok_right:
d7020c20 1036 parse_assoc_decl (right_assoc);
a70083a3 1037 break;
1ff442ca 1038
511e79b3 1039 case tok_nonassoc:
d7020c20 1040 parse_assoc_decl (non_assoc);
a70083a3 1041 break;
1ff442ca 1042
b6610515 1043 case tok_define:
11d82f03 1044 parse_muscle_decl ();
b6610515 1045 break;
342b8b6e 1046
2ba3b73c
MA
1047 case tok_skel:
1048 parse_skel_decl ();
1049 break;
b6610515 1050
511e79b3 1051 case tok_noop:
a70083a3 1052 break;
1ff442ca 1053
951366c1
AD
1054 case tok_stropt:
1055 case tok_intopt:
1056 case tok_obsolete:
1057 case tok_illegal:
1058 abort ();
1059 break;
1060
a70083a3
AD
1061 default:
1062 complain (_("unrecognized: %s"), token_buffer);
1063 skip_to_char ('%');
1064 }
1065 }
1066 else if (c == EOF)
1067 fatal (_("no input grammar"));
1068 else
1069 {
ff4a34be
AD
1070 char buf[] = "c";
1071 buf[0] = c;
1072 complain (_("unknown character: %s"), quote (buf));
a70083a3 1073 skip_to_char ('%');
1ff442ca 1074 }
1ff442ca 1075 }
1ff442ca 1076}
a70083a3
AD
1077\f
1078/*-------------------------------------------------------------------.
1079| Assuming that a `{' has just been seen, copy everything up to the |
1080| matching `}' into the actions file. STACK_OFFSET is the number of |
1081| values in the current rule so far, which says where to find `$0' |
1082| with respect to the top of the stack. |
1083`-------------------------------------------------------------------*/
1ff442ca 1084
4a120d45 1085static void
79282c5a 1086copy_action (symbol_list *rule, int stack_offset)
1ff442ca 1087{
a70083a3 1088 int c;
a70083a3 1089 int count;
8c7ebe49 1090 char buf[4096];
1ff442ca
NF
1091
1092 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1093 if (semantic_parser)
1094 stack_offset = 0;
1ff442ca 1095
25b222fa 1096 obstack_fgrow1 (&action_obstack, "\ncase %d:\n", nrules);
8c7ebe49 1097
89cab50d 1098 if (!no_lines_flag)
8c7ebe49 1099 {
25b222fa 1100 obstack_fgrow2 (&action_obstack, muscle_find ("linef"),
342b8b6e 1101 lineno, quotearg_style (c_quoting_style,
25b222fa 1102 muscle_find ("filename")));
8c7ebe49
AD
1103 }
1104 obstack_1grow (&action_obstack, '{');
1ff442ca
NF
1105
1106 count = 1;
a70083a3 1107 c = getc (finput);
1ff442ca
NF
1108
1109 while (count > 0)
1110 {
1111 while (c != '}')
a70083a3
AD
1112 {
1113 switch (c)
1ff442ca
NF
1114 {
1115 case '\n':
8c7ebe49 1116 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1117 lineno++;
1118 break;
1119
1120 case '{':
8c7ebe49 1121 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1122 count++;
1123 break;
1124
1125 case '\'':
1126 case '"':
337bab46 1127 copy_string (finput, &action_obstack, c);
1ff442ca
NF
1128 break;
1129
1130 case '/':
337bab46 1131 copy_comment (finput, &action_obstack);
1ff442ca
NF
1132 break;
1133
1134 case '$':
337bab46 1135 copy_dollar (finput, &action_obstack,
8c7ebe49 1136 rule, stack_offset);
1ff442ca
NF
1137 break;
1138
1139 case '@':
337bab46 1140 copy_at (finput, &action_obstack,
8c7ebe49 1141 stack_offset);
6666f98f 1142 break;
1ff442ca
NF
1143
1144 case EOF:
27821bff 1145 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1146
1147 default:
8c7ebe49 1148 obstack_1grow (&action_obstack, c);
a70083a3
AD
1149 }
1150
1151 c = getc (finput);
1152 }
1153
1154 /* above loop exits when c is '}' */
1155
1156 if (--count)
1157 {
8c7ebe49 1158 obstack_1grow (&action_obstack, c);
a70083a3
AD
1159 c = getc (finput);
1160 }
1161 }
1162
ff4423cc 1163 obstack_sgrow (&action_obstack, ";\n break;}");
a70083a3
AD
1164}
1165\f
1166/*-------------------------------------------------------------------.
1167| After `%guard' is seen in the input file, copy the actual guard |
1168| into the guards file. If the guard is followed by an action, copy |
1169| that into the actions file. STACK_OFFSET is the number of values |
1170| in the current rule so far, which says where to find `$0' with |
1171| respect to the top of the stack, for the simple parser in which |
1172| the stack is not popped until after the guard is run. |
1173`-------------------------------------------------------------------*/
1174
1175static void
79282c5a 1176copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1177{
1178 int c;
a70083a3 1179 int count;
a70083a3
AD
1180 int brace_flag = 0;
1181
1182 /* offset is always 0 if parser has already popped the stack pointer */
1183 if (semantic_parser)
1184 stack_offset = 0;
1185
ea5607fd 1186 obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
89cab50d 1187 if (!no_lines_flag)
25b222fa 1188 obstack_fgrow2 (&guard_obstack, muscle_find ("linef"),
682d48cd 1189 lineno, quotearg_style (c_quoting_style,
11d82f03 1190 muscle_find ("filename")));
ea5607fd 1191 obstack_1grow (&guard_obstack, '{');
a70083a3
AD
1192
1193 count = 0;
1194 c = getc (finput);
1195
1196 while (brace_flag ? (count > 0) : (c != ';'))
1197 {
1198 switch (c)
1199 {
1200 case '\n':
ea5607fd 1201 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1202 lineno++;
1203 break;
1204
1205 case '{':
ea5607fd 1206 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1207 brace_flag = 1;
1208 count++;
1209 break;
1210
1211 case '}':
ea5607fd 1212 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1213 if (count > 0)
1214 count--;
1215 else
1216 {
1217 complain (_("unmatched %s"), "`}'");
1218 c = getc (finput); /* skip it */
1219 }
1220 break;
1221
1222 case '\'':
1223 case '"':
337bab46 1224 copy_string (finput, &guard_obstack, c);
a70083a3
AD
1225 break;
1226
1227 case '/':
337bab46 1228 copy_comment (finput, &guard_obstack);
a70083a3
AD
1229 break;
1230
1231 case '$':
337bab46 1232 copy_dollar (finput, &guard_obstack, rule, stack_offset);
a70083a3 1233 break;
1ff442ca 1234
a70083a3 1235 case '@':
337bab46 1236 copy_at (finput, &guard_obstack, stack_offset);
a70083a3 1237 break;
1ff442ca 1238
a70083a3
AD
1239 case EOF:
1240 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1241
a70083a3 1242 default:
ea5607fd 1243 obstack_1grow (&guard_obstack, c);
1ff442ca 1244 }
a70083a3
AD
1245
1246 if (c != '}' || count != 0)
1247 c = getc (finput);
1ff442ca
NF
1248 }
1249
a70083a3
AD
1250 c = skip_white_space ();
1251
ff4423cc 1252 obstack_sgrow (&guard_obstack, ";\n break;}");
a70083a3
AD
1253 if (c == '{')
1254 copy_action (rule, stack_offset);
1255 else if (c == '=')
1256 {
1257 c = getc (finput); /* why not skip_white_space -wjh */
1258 if (c == '{')
1259 copy_action (rule, stack_offset);
1260 }
1261 else
1262 ungetc (c, finput);
1ff442ca 1263}
a70083a3
AD
1264\f
1265
a70083a3
AD
1266/*-------------------------------------------------------------------.
1267| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1268| with the user's names. |
1269`-------------------------------------------------------------------*/
1ff442ca 1270
4a120d45 1271static bucket *
118fb205 1272gensym (void)
1ff442ca 1273{
274d42ce
AD
1274 /* Incremented for each generated symbol */
1275 static int gensym_count = 0;
1276 static char buf[256];
1277
a70083a3 1278 bucket *sym;
1ff442ca 1279
274d42ce
AD
1280 sprintf (buf, "@%d", ++gensym_count);
1281 token_buffer = buf;
a70083a3 1282 sym = getsym (token_buffer);
d7020c20 1283 sym->class = nterm_sym;
1ff442ca 1284 sym->value = nvars++;
36281465 1285 return sym;
1ff442ca
NF
1286}
1287
a70083a3
AD
1288#if 0
1289/*------------------------------------------------------------------.
1290| read in a %type declaration and record its information for |
1291| get_type_name to access. This is unused. It is only called from |
1292| the #if 0 part of readgram |
1293`------------------------------------------------------------------*/
1294
1295static int
1296get_type (void)
1297{
1298 int k;
f17bcd1f 1299 token_t token;
a70083a3
AD
1300 char *name;
1301
f17bcd1f 1302 token = lex ();
a70083a3 1303
f17bcd1f 1304 if (token != tok_typename)
a70083a3
AD
1305 {
1306 complain (_("invalid %s declaration"), "%type");
1307 return t;
1308 }
1309
95e36146 1310 name = xstrdup (token_buffer);
a70083a3
AD
1311
1312 for (;;)
1313 {
f17bcd1f 1314 token = lex ();
a70083a3 1315
f17bcd1f 1316 switch (token)
a70083a3 1317 {
511e79b3 1318 case tok_semicolon:
a70083a3
AD
1319 return lex ();
1320
511e79b3 1321 case tok_comma:
a70083a3
AD
1322 break;
1323
511e79b3 1324 case tok_identifier:
a70083a3
AD
1325 if (symval->type_name == NULL)
1326 symval->type_name = name;
1327 else if (strcmp (name, symval->type_name) != 0)
1328 complain (_("type redeclaration for %s"), symval->tag);
1329
1330 break;
1331
1332 default:
f17bcd1f 1333 return token;
a70083a3
AD
1334 }
1335 }
1336}
1ff442ca 1337
a70083a3
AD
1338#endif
1339\f
1340/*------------------------------------------------------------------.
1341| Parse the input grammar into a one symbol_list structure. Each |
1342| rule is represented by a sequence of symbols: the left hand side |
1343| followed by the contents of the right hand side, followed by a |
1344| null pointer instead of a symbol to terminate the rule. The next |
1345| symbol is the lhs of the following rule. |
1346| |
1347| All guards and actions are copied out to the appropriate files, |
1348| labelled by the rule number they apply to. |
1349`------------------------------------------------------------------*/
1ff442ca 1350
4a120d45 1351static void
118fb205 1352readgram (void)
1ff442ca 1353{
f17bcd1f 1354 token_t t;
a70083a3
AD
1355 bucket *lhs = NULL;
1356 symbol_list *p;
1357 symbol_list *p1;
1358 bucket *bp;
1ff442ca 1359
ff4a34be
AD
1360 /* Points to first symbol_list of current rule. its symbol is the
1361 lhs of the rule. */
1362 symbol_list *crule;
1363 /* Points to the symbol_list preceding crule. */
1364 symbol_list *crule1;
1ff442ca
NF
1365
1366 p1 = NULL;
1367
a70083a3 1368 t = lex ();
1ff442ca 1369
511e79b3 1370 while (t != tok_two_percents && t != tok_eof)
1ff442ca 1371 {
511e79b3 1372 if (t == tok_identifier || t == tok_bar)
1ff442ca 1373 {
89cab50d 1374 int action_flag = 0;
ff4a34be
AD
1375 /* Number of symbols in rhs of this rule so far */
1376 int rulelength = 0;
1ff442ca
NF
1377 int xactions = 0; /* JF for error checking */
1378 bucket *first_rhs = 0;
1379
511e79b3 1380 if (t == tok_identifier)
1ff442ca
NF
1381 {
1382 lhs = symval;
943819bf
RS
1383
1384 if (!start_flag)
1385 {
1386 startval = lhs;
1387 start_flag = 1;
1388 }
a083fbbf 1389
a70083a3 1390 t = lex ();
511e79b3 1391 if (t != tok_colon)
943819bf 1392 {
a0f6b076 1393 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1394 unlex (t);
943819bf 1395 }
1ff442ca
NF
1396 }
1397
511e79b3 1398 if (nrules == 0 && t == tok_bar)
1ff442ca 1399 {
a0f6b076 1400 complain (_("grammar starts with vertical bar"));
943819bf 1401 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1402 }
1ff442ca
NF
1403 /* start a new rule and record its lhs. */
1404
1405 nrules++;
1406 nitems++;
1407
b29b2ed5 1408 p = symbol_list_new (lhs);
1ff442ca
NF
1409
1410 crule1 = p1;
1411 if (p1)
1412 p1->next = p;
1413 else
1414 grammar = p;
1415
1416 p1 = p;
1417 crule = p;
1418
1419 /* mark the rule's lhs as a nonterminal if not already so. */
1420
d7020c20 1421 if (lhs->class == unknown_sym)
1ff442ca 1422 {
d7020c20 1423 lhs->class = nterm_sym;
1ff442ca
NF
1424 lhs->value = nvars;
1425 nvars++;
1426 }
d7020c20 1427 else if (lhs->class == token_sym)
a0f6b076 1428 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1429
1430 /* read the rhs of the rule. */
1431
1432 for (;;)
1433 {
a70083a3 1434 t = lex ();
511e79b3 1435 if (t == tok_prec)
943819bf 1436 {
a70083a3 1437 t = lex ();
943819bf 1438 crule->ruleprec = symval;
a70083a3 1439 t = lex ();
943819bf 1440 }
1ff442ca 1441
511e79b3 1442 if (!(t == tok_identifier || t == tok_left_curly))
a70083a3 1443 break;
1ff442ca
NF
1444
1445 /* If next token is an identifier, see if a colon follows it.
a70083a3 1446 If one does, exit this rule now. */
511e79b3 1447 if (t == tok_identifier)
1ff442ca 1448 {
a70083a3 1449 bucket *ssave;
f17bcd1f 1450 token_t t1;
1ff442ca
NF
1451
1452 ssave = symval;
a70083a3
AD
1453 t1 = lex ();
1454 unlex (t1);
1ff442ca 1455 symval = ssave;
511e79b3 1456 if (t1 == tok_colon)
a70083a3 1457 break;
1ff442ca 1458
a70083a3 1459 if (!first_rhs) /* JF */
1ff442ca
NF
1460 first_rhs = symval;
1461 /* Not followed by colon =>
1462 process as part of this rule's rhs. */
1463 }
1464
1465 /* If we just passed an action, that action was in the middle
a70083a3
AD
1466 of a rule, so make a dummy rule to reduce it to a
1467 non-terminal. */
89cab50d 1468 if (action_flag)
1ff442ca 1469 {
f282676b
AD
1470 /* Since the action was written out with this rule's
1471 number, we must give the new rule this number by
1472 inserting the new rule before it. */
1ff442ca
NF
1473
1474 /* Make a dummy nonterminal, a gensym. */
b29b2ed5 1475 bucket *sdummy = gensym ();
1ff442ca
NF
1476
1477 /* Make a new rule, whose body is empty,
1478 before the current one, so that the action
1479 just read can belong to it. */
1480 nrules++;
1481 nitems++;
b29b2ed5 1482 p = symbol_list_new (sdummy);
1ff442ca
NF
1483 if (crule1)
1484 crule1->next = p;
a70083a3
AD
1485 else
1486 grammar = p;
b29b2ed5
AD
1487 /* End of the rule. */
1488 crule1 = symbol_list_new (NULL);
1ff442ca
NF
1489 crule1->next = crule;
1490
e41dc700
AD
1491 p->next = crule1;
1492
f282676b
AD
1493 /* Insert the dummy generated by that rule into this
1494 rule. */
1ff442ca 1495 nitems++;
b29b2ed5 1496 p = symbol_list_new (sdummy);
1ff442ca
NF
1497 p1->next = p;
1498 p1 = p;
1499
89cab50d 1500 action_flag = 0;
1ff442ca
NF
1501 }
1502
511e79b3 1503 if (t == tok_identifier)
1ff442ca
NF
1504 {
1505 nitems++;
b29b2ed5 1506 p = symbol_list_new (symval);
1ff442ca
NF
1507 p1->next = p;
1508 p1 = p;
1509 }
a70083a3 1510 else /* handle an action. */
1ff442ca 1511 {
a70083a3 1512 copy_action (crule, rulelength);
89cab50d 1513 action_flag = 1;
1ff442ca
NF
1514 xactions++; /* JF */
1515 }
1516 rulelength++;
a70083a3 1517 } /* end of read rhs of rule */
1ff442ca
NF
1518
1519 /* Put an empty link in the list to mark the end of this rule */
b29b2ed5 1520 p = symbol_list_new (NULL);
1ff442ca
NF
1521 p1->next = p;
1522 p1 = p;
1523
511e79b3 1524 if (t == tok_prec)
1ff442ca 1525 {
a0f6b076 1526 complain (_("two @prec's in a row"));
a70083a3 1527 t = lex ();
1ff442ca 1528 crule->ruleprec = symval;
a70083a3 1529 t = lex ();
1ff442ca 1530 }
511e79b3 1531 if (t == tok_guard)
1ff442ca 1532 {
a70083a3 1533 if (!semantic_parser)
ff4a34be 1534 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1535
a70083a3
AD
1536 copy_guard (crule, rulelength);
1537 t = lex ();
1ff442ca 1538 }
511e79b3 1539 else if (t == tok_left_curly)
1ff442ca 1540 {
a70083a3 1541 /* This case never occurs -wjh */
89cab50d 1542 if (action_flag)
a0f6b076 1543 complain (_("two actions at end of one rule"));
a70083a3 1544 copy_action (crule, rulelength);
89cab50d 1545 action_flag = 1;
943819bf 1546 xactions++; /* -wjh */
a70083a3 1547 t = lex ();
1ff442ca 1548 }
a0f6b076 1549 /* If $$ is being set in default way, report if any type
6666f98f
AD
1550 mismatch. */
1551 else if (!xactions
a70083a3 1552 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1553 {
6666f98f
AD
1554 if (lhs->type_name == 0
1555 || first_rhs->type_name == 0
a70083a3 1556 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1557 complain (_("type clash (`%s' `%s') on default action"),
1558 lhs->type_name ? lhs->type_name : "",
a70083a3 1559 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1560 }
1561 /* Warn if there is no default for $$ but we need one. */
1562 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1563 complain (_("empty rule for typed nonterminal, and no action"));
511e79b3 1564 if (t == tok_semicolon)
a70083a3 1565 t = lex ();
a083fbbf 1566 }
943819bf 1567#if 0
a70083a3 1568 /* these things can appear as alternatives to rules. */
943819bf
RS
1569/* NO, they cannot.
1570 a) none of the documentation allows them
1571 b) most of them scan forward until finding a next %
1572 thus they may swallow lots of intervening rules
1573*/
511e79b3 1574 else if (t == tok_token)
1ff442ca 1575 {
d7020c20 1576 parse_token_decl (token_sym, nterm_sym);
a70083a3 1577 t = lex ();
1ff442ca 1578 }
511e79b3 1579 else if (t == tok_nterm)
1ff442ca 1580 {
d7020c20 1581 parse_token_decl (nterm_sym, token_sym);
a70083a3 1582 t = lex ();
1ff442ca 1583 }
511e79b3 1584 else if (t == tok_type)
1ff442ca 1585 {
a70083a3 1586 t = get_type ();
1ff442ca 1587 }
511e79b3 1588 else if (t == tok_union)
1ff442ca 1589 {
a70083a3
AD
1590 parse_union_decl ();
1591 t = lex ();
1ff442ca 1592 }
511e79b3 1593 else if (t == tok_expect)
1ff442ca 1594 {
a70083a3
AD
1595 parse_expect_decl ();
1596 t = lex ();
1ff442ca 1597 }
511e79b3 1598 else if (t == tok_start)
1ff442ca 1599 {
a70083a3
AD
1600 parse_start_decl ();
1601 t = lex ();
1ff442ca 1602 }
943819bf
RS
1603#endif
1604
1ff442ca 1605 else
943819bf 1606 {
d01c415b 1607 complain (_("invalid input: %s"), quote (token_buffer));
a70083a3 1608 t = lex ();
943819bf 1609 }
1ff442ca
NF
1610 }
1611
943819bf
RS
1612 /* grammar has been read. Do some checking */
1613
1ff442ca 1614 if (nsyms > MAXSHORT)
a0f6b076
AD
1615 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1616 MAXSHORT);
1ff442ca 1617 if (nrules == 0)
a0f6b076 1618 fatal (_("no rules in the input grammar"));
1ff442ca 1619
1ff442ca
NF
1620 /* Report any undefined symbols and consider them nonterminals. */
1621
1622 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1623 if (bp->class == unknown_sym)
1ff442ca 1624 {
a70083a3
AD
1625 complain (_
1626 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1627 bp->tag);
d7020c20 1628 bp->class = nterm_sym;
1ff442ca
NF
1629 bp->value = nvars++;
1630 }
1631
1632 ntokens = nsyms - nvars;
1633}
ff48177d
MA
1634
1635/* At the end of the grammar file, some C source code must
63c2d5de 1636 be stored. It is going to be associated to the epilogue
ff48177d
MA
1637 directive. */
1638static void
1639read_additionnal_code (void)
1640{
1641 char c;
63c2d5de 1642 struct obstack el_obstack;
342b8b6e 1643
63c2d5de 1644 obstack_init (&el_obstack);
ff48177d
MA
1645
1646 while ((c = getc (finput)) != EOF)
63c2d5de 1647 obstack_1grow (&el_obstack, c);
342b8b6e 1648
63c2d5de 1649 obstack_1grow (&el_obstack, 0);
11d82f03 1650 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1651}
1652
a70083a3
AD
1653\f
1654/*--------------------------------------------------------------.
1655| For named tokens, but not literal ones, define the name. The |
1656| value is the user token number. |
1657`--------------------------------------------------------------*/
1ff442ca 1658
4a120d45 1659static void
896fe5c1 1660output_token_defines (struct obstack *oout)
1ff442ca 1661{
a70083a3
AD
1662 bucket *bp;
1663 char *cp, *symbol;
1664 char c;
1ff442ca 1665
a70083a3 1666 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1667 {
a70083a3
AD
1668 symbol = bp->tag; /* get symbol */
1669
1670 if (bp->value >= ntokens)
1671 continue;
1672 if (bp->user_token_number == SALIAS)
1673 continue;
1674 if ('\'' == *symbol)
1675 continue; /* skip literal character */
1676 if (bp == errtoken)
1677 continue; /* skip error token */
1678 if ('\"' == *symbol)
1ff442ca 1679 {
a70083a3
AD
1680 /* use literal string only if given a symbol with an alias */
1681 if (bp->alias)
1682 symbol = bp->alias->tag;
1683 else
1684 continue;
1685 }
1ff442ca 1686
a70083a3
AD
1687 /* Don't #define nonliteral tokens whose names contain periods. */
1688 cp = symbol;
1689 while ((c = *cp++) && c != '.');
1690 if (c != '\0')
1691 continue;
1ff442ca 1692
0b8afb77 1693 obstack_fgrow2 (oout, "# define\t%s\t%d\n",
342b8b6e 1694 symbol, bp->user_token_number);
a70083a3 1695 if (semantic_parser)
342b8b6e
AD
1696 /* FIXME: This is certainly dead wrong, and should be just as
1697 above. --akim. */
0b8afb77 1698 obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
1699 }
1700}
1ff442ca
NF
1701
1702
037ca2f1
AD
1703/*------------------------------------------------------------------.
1704| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1705| number. |
1706`------------------------------------------------------------------*/
1707
1708static void
1709token_translations_init (void)
1710{
1711 bucket *bp = NULL;
1712 int i;
1713
1714 token_translations = XCALLOC (short, max_user_token_number + 1);
1715
1716 /* Initialize all entries for literal tokens to 2, the internal
1717 token number for $undefined., which represents all invalid
1718 inputs. */
1719 for (i = 0; i <= max_user_token_number; i++)
1720 token_translations[i] = 2;
1721
1722 for (bp = firstsymbol; bp; bp = bp->next)
1723 {
1724 /* Non-terminal? */
1725 if (bp->value >= ntokens)
1726 continue;
1727 /* A token string alias? */
1728 if (bp->user_token_number == SALIAS)
1729 continue;
6b7e85b9
AD
1730
1731 assert (bp->user_token_number != SUNDEF);
1732
037ca2f1
AD
1733 /* A token which translation has already been set? */
1734 if (token_translations[bp->user_token_number] != 2)
1735 complain (_("tokens %s and %s both assigned number %d"),
1736 tags[token_translations[bp->user_token_number]],
1737 bp->tag, bp->user_token_number);
1738 token_translations[bp->user_token_number] = bp->value;
1739 }
1740}
1741
1742
a70083a3
AD
1743/*------------------------------------------------------------------.
1744| Assign symbol numbers, and write definition of token names into |
b2ca4022 1745| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1746| of symbols. |
1747`------------------------------------------------------------------*/
1ff442ca 1748
4a120d45 1749static void
118fb205 1750packsymbols (void)
1ff442ca 1751{
342b8b6e 1752 bucket *bp = NULL;
a70083a3 1753 int tokno = 1;
a70083a3 1754 int last_user_token_number;
4a120d45 1755 static char DOLLAR[] = "$";
1ff442ca 1756
d7913476 1757 tags = XCALLOC (char *, nsyms + 1);
d7913476 1758 user_toknums = XCALLOC (short, nsyms + 1);
1ff442ca 1759
d7913476
AD
1760 sprec = XCALLOC (short, nsyms);
1761 sassoc = XCALLOC (short, nsyms);
1ff442ca 1762
037ca2f1
AD
1763 /* The EOF token. */
1764 tags[0] = DOLLAR;
1765 user_toknums[0] = 0;
1766
1ff442ca
NF
1767 max_user_token_number = 256;
1768 last_user_token_number = 256;
1769
1770 for (bp = firstsymbol; bp; bp = bp->next)
1771 {
d7020c20 1772 if (bp->class == nterm_sym)
1ff442ca
NF
1773 {
1774 bp->value += ntokens;
1775 }
943819bf
RS
1776 else if (bp->alias)
1777 {
0a6384c4
AD
1778 /* this symbol and its alias are a single token defn.
1779 allocate a tokno, and assign to both check agreement of
1780 ->prec and ->assoc fields and make both the same */
1781 if (bp->value == 0)
1782 bp->value = bp->alias->value = tokno++;
943819bf 1783
0a6384c4
AD
1784 if (bp->prec != bp->alias->prec)
1785 {
1786 if (bp->prec != 0 && bp->alias->prec != 0
1787 && bp->user_token_number == SALIAS)
a0f6b076
AD
1788 complain (_("conflicting precedences for %s and %s"),
1789 bp->tag, bp->alias->tag);
0a6384c4
AD
1790 if (bp->prec != 0)
1791 bp->alias->prec = bp->prec;
1792 else
1793 bp->prec = bp->alias->prec;
1794 }
943819bf 1795
0a6384c4
AD
1796 if (bp->assoc != bp->alias->assoc)
1797 {
a0f6b076
AD
1798 if (bp->assoc != 0 && bp->alias->assoc != 0
1799 && bp->user_token_number == SALIAS)
1800 complain (_("conflicting assoc values for %s and %s"),
1801 bp->tag, bp->alias->tag);
1802 if (bp->assoc != 0)
1803 bp->alias->assoc = bp->assoc;
1804 else
1805 bp->assoc = bp->alias->assoc;
1806 }
0a6384c4
AD
1807
1808 if (bp->user_token_number == SALIAS)
a70083a3 1809 continue; /* do not do processing below for SALIASs */
943819bf 1810
a70083a3 1811 }
d7020c20 1812 else /* bp->class == token_sym */
943819bf
RS
1813 {
1814 bp->value = tokno++;
1815 }
1816
d7020c20 1817 if (bp->class == token_sym)
1ff442ca 1818 {
6b7e85b9 1819 if (bp->user_token_number == SUNDEF)
1ff442ca
NF
1820 bp->user_token_number = ++last_user_token_number;
1821 if (bp->user_token_number > max_user_token_number)
1822 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1823 }
1824
1825 tags[bp->value] = bp->tag;
943819bf 1826 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1827 sprec[bp->value] = bp->prec;
1828 sassoc[bp->value] = bp->assoc;
1ff442ca
NF
1829 }
1830
037ca2f1 1831 token_translations_init ();
1ff442ca
NF
1832
1833 error_token_number = errtoken->value;
1834
e3f1699f
AD
1835 if (startval->class == unknown_sym)
1836 fatal (_("the start symbol %s is undefined"), startval->tag);
1837 else if (startval->class == token_sym)
1838 fatal (_("the start symbol %s is a token"), startval->tag);
1839
1840 start_symbol = startval->value;
1841}
1842
1843
1844/*-----------------------------------.
1845| Output definition of token names. |
1846`-----------------------------------*/
1847
1848static void
1849symbols_output (void)
1850{
342b8b6e
AD
1851 {
1852 struct obstack tokendefs;
1853 obstack_init (&tokendefs);
1854 output_token_defines (&tokendefs);
1855 obstack_1grow (&tokendefs, 0);
1856 muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1857 obstack_free (&tokendefs, NULL);
1858 }
b6610515 1859
d8cb5183
MA
1860#if 0
1861 if (!no_parser_flag)
1862 output_token_defines (&table_obstack);
1863#endif
1ff442ca 1864
89cab50d 1865 if (defines_flag)
1ff442ca 1866 {
896fe5c1 1867 output_token_defines (&defines_obstack);
1ff442ca
NF
1868
1869 if (!pure_parser)
1870 {
1871 if (spec_name_prefix)
896fe5c1
AD
1872 obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1873 spec_name_prefix);
1ff442ca 1874 else
ff4423cc 1875 obstack_sgrow (&defines_obstack,
573c1d9f 1876 "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1877 }
1878
1879 if (semantic_parser)
037ca2f1
AD
1880 {
1881 int i;
1882
1883 for (i = ntokens; i < nsyms; i++)
1884 {
1885 /* don't make these for dummy nonterminals made by gensym. */
1886 if (*tags[i] != '@')
1887 obstack_fgrow2 (&defines_obstack,
1888 "# define\tNT%s\t%d\n", tags[i], i);
1889 }
1ff442ca 1890#if 0
037ca2f1
AD
1891 /* `fdefines' is now a temporary file, so we need to copy its
1892 contents in `done', so we can't close it here. */
1893 fclose (fdefines);
1894 fdefines = NULL;
1ff442ca 1895#endif
037ca2f1 1896 }
1ff442ca
NF
1897 }
1898}
a083fbbf 1899
1ff442ca 1900
a70083a3
AD
1901/*---------------------------------------------------------------.
1902| Convert the rules into the representation using RRHS, RLHS and |
1903| RITEMS. |
1904`---------------------------------------------------------------*/
1ff442ca 1905
4a120d45 1906static void
118fb205 1907packgram (void)
1ff442ca 1908{
a70083a3
AD
1909 int itemno;
1910 int ruleno;
1911 symbol_list *p;
1ff442ca 1912
d7913476 1913 ritem = XCALLOC (short, nitems + 1);
b2ed6e58 1914 rule_table = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1915
1916 itemno = 0;
1917 ruleno = 1;
1918
1919 p = grammar;
1920 while (p)
1921 {
b29b2ed5 1922 bucket *ruleprec = p->ruleprec;
b2ed6e58
AD
1923 rule_table[ruleno].lhs = p->sym->value;
1924 rule_table[ruleno].rhs = itemno;
b29b2ed5 1925 rule_table[ruleno].line = p->line;
1ff442ca
NF
1926
1927 p = p->next;
1928 while (p && p->sym)
1929 {
1930 ritem[itemno++] = p->sym->value;
1931 /* A rule gets by default the precedence and associativity
1932 of the last token in it. */
d7020c20 1933 if (p->sym->class == token_sym)
1ff442ca 1934 {
652a871c
AD
1935 rule_table[ruleno].prec = p->sym->prec;
1936 rule_table[ruleno].assoc = p->sym->assoc;
1ff442ca 1937 }
a70083a3
AD
1938 if (p)
1939 p = p->next;
1ff442ca
NF
1940 }
1941
1942 /* If this rule has a %prec,
a70083a3 1943 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1944 if (ruleprec)
1945 {
652a871c
AD
1946 rule_table[ruleno].prec = ruleprec->prec;
1947 rule_table[ruleno].assoc = ruleprec->assoc;
1948 rule_table[ruleno].precsym = ruleprec->value;
1ff442ca
NF
1949 }
1950
1951 ritem[itemno++] = -ruleno;
1952 ruleno++;
1953
a70083a3
AD
1954 if (p)
1955 p = p->next;
1ff442ca
NF
1956 }
1957
1958 ritem[itemno] = 0;
1959}
a70083a3
AD
1960\f
1961/*-------------------------------------------------------------------.
1962| Read in the grammar specification and record it in the format |
ea5607fd 1963| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1964| and all actions into ACTION_OBSTACK, in each case forming the body |
1965| of a C function (YYGUARD or YYACTION) which contains a switch |
1966| statement to decide which guard or action to execute. |
a70083a3
AD
1967`-------------------------------------------------------------------*/
1968
1969void
1970reader (void)
1971{
1972 start_flag = 0;
1973 startval = NULL; /* start symbol not specified yet. */
1974
a70083a3
AD
1975 nsyms = 1;
1976 nvars = 0;
1977 nrules = 0;
1978 nitems = 0;
a70083a3
AD
1979
1980 typed = 0;
1981 lastprec = 0;
1982
a70083a3
AD
1983 semantic_parser = 0;
1984 pure_parser = 0;
a70083a3
AD
1985
1986 grammar = NULL;
1987
342b8b6e 1988 lex_init ();
a70083a3
AD
1989 lineno = 1;
1990
11d82f03
MA
1991 /* Initialize the muscle obstack. */
1992 obstack_init (&muscle_obstack);
82e236e2 1993
a70083a3
AD
1994 /* Initialize the symbol table. */
1995 tabinit ();
b6610515 1996
a70083a3
AD
1997 /* Construct the error token */
1998 errtoken = getsym ("error");
d7020c20 1999 errtoken->class = token_sym;
a70083a3 2000 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 2001
a70083a3
AD
2002 /* Construct a token that represents all undefined literal tokens.
2003 It is always token number 2. */
2004 undeftoken = getsym ("$undefined.");
d7020c20 2005 undeftoken->class = token_sym;
a70083a3
AD
2006 undeftoken->user_token_number = 2;
2007
896fe5c1
AD
2008 /* Read the declaration section. Copy %{ ... %} groups to
2009 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
2010 etc. found there. */
a70083a3 2011 read_declarations ();
a70083a3
AD
2012 /* Read in the grammar, build grammar in list form. Write out
2013 guards and actions. */
2014 readgram ();
ff48177d
MA
2015 /* Some C code is given at the end of the grammar file. */
2016 read_additionnal_code ();
b0c4483e 2017
a70083a3 2018 /* Now we know whether we need the line-number stack. If we do,
b0c4483e
PB
2019 write its type into the .tab.h file.
2020 This is no longer need with header skeleton. */
2021
a70083a3
AD
2022 /* Assign the symbols their symbol numbers. Write #defines for the
2023 token symbols into FDEFINES if requested. */
2024 packsymbols ();
e3f1699f 2025 symbols_output ();
a70083a3
AD
2026 /* Convert the grammar into the format described in gram.h. */
2027 packgram ();
edad7067
AD
2028 /* Output the headers. */
2029 symbols_output ();
a70083a3 2030}