]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/output.c (output_parser): Compute the `#line' lines when
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
8c7ebe49 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
8c7ebe49 24#include "obstack.h"
2a91a95e
AD
25#include "quotearg.h"
26#include "quote.h"
ceed8467 27#include "getargs.h"
1ff442ca 28#include "files.h"
d7913476 29#include "xalloc.h"
1ff442ca
NF
30#include "symtab.h"
31#include "lex.h"
32#include "gram.h"
a0f6b076 33#include "complain.h"
6c89f1c1 34#include "output.h"
b2ca4022 35#include "reader.h"
340ef489 36#include "conflicts.h"
1ff442ca 37
1ff442ca 38/* Number of slots allocated (but not necessarily used yet) in `rline' */
4a120d45 39static int rline_allocated;
1ff442ca 40
a70083a3
AD
41typedef struct symbol_list
42{
43 struct symbol_list *next;
44 bucket *sym;
45 bucket *ruleprec;
46}
47symbol_list;
118fb205 48
1ff442ca 49int lineno;
1ff442ca 50char **tags;
d019d655 51short *user_toknums;
4a120d45
JT
52static symbol_list *grammar;
53static int start_flag;
54static bucket *startval;
1ff442ca
NF
55
56/* Nonzero if components of semantic values are used, implying
57 they must be unions. */
58static int value_components_used;
59
d7020c20
AD
60/* Nonzero if %union has been seen. */
61static int typed;
1ff442ca 62
d7020c20
AD
63/* Incremented for each %left, %right or %nonassoc seen */
64static int lastprec;
1ff442ca 65
d7020c20
AD
66/* Incremented for each generated symbol */
67static int gensym_count;
1ff442ca
NF
68
69static bucket *errtoken;
5b2e3c89 70static bucket *undeftoken;
0d533154 71\f
a70083a3 72
0d533154
AD
73/*===================\
74| Low level lexing. |
75\===================*/
943819bf
RS
76
77static void
118fb205 78skip_to_char (int target)
943819bf
RS
79{
80 int c;
81 if (target == '\n')
a0f6b076 82 complain (_(" Skipping to next \\n"));
943819bf 83 else
a0f6b076 84 complain (_(" Skipping to next %c"), target);
943819bf
RS
85
86 do
0d533154 87 c = skip_white_space ();
943819bf 88 while (c != target && c != EOF);
a083fbbf 89 if (c != EOF)
0d533154 90 ungetc (c, finput);
943819bf
RS
91}
92
93
0d533154
AD
94/*---------------------------------------------------------.
95| Read a signed integer from STREAM and return its value. |
96`---------------------------------------------------------*/
97
98static inline int
99read_signed_integer (FILE *stream)
100{
a70083a3
AD
101 int c = getc (stream);
102 int sign = 1;
103 int n = 0;
0d533154
AD
104
105 if (c == '-')
106 {
107 c = getc (stream);
108 sign = -1;
109 }
110
111 while (isdigit (c))
112 {
113 n = 10 * n + (c - '0');
114 c = getc (stream);
115 }
116
117 ungetc (c, stream);
118
119 return sign * n;
120}
121\f
79282c5a
AD
122/*--------------------------------------------------------------.
123| Get the data type (alternative in the union) of the value for |
124| symbol N in rule RULE. |
125`--------------------------------------------------------------*/
126
127static char *
128get_type_name (int n, symbol_list * rule)
129{
130 int i;
131 symbol_list *rp;
132
133 if (n < 0)
134 {
135 complain (_("invalid $ value"));
136 return NULL;
137 }
138
139 rp = rule;
140 i = 0;
141
142 while (i < n)
143 {
144 rp = rp->next;
145 if (rp == NULL || rp->sym == NULL)
146 {
147 complain (_("invalid $ value"));
148 return NULL;
149 }
150 i++;
151 }
152
153 return rp->sym->type_name;
154}
155\f
8c7ebe49
AD
156/*-----------------------------------------------------------------.
157| Dump the string from FIN to FOUT and OOUT if non null. MATCH is |
158| the delimiter of the string (either ' or "). |
159`-----------------------------------------------------------------*/
ae3c3164
AD
160
161static inline void
8c7ebe49 162copy_string (FILE *fin, FILE *fout, struct obstack *oout, int match)
ae3c3164
AD
163{
164 int c;
165
8c7ebe49
AD
166 if (fout)
167 putc (match, fout);
168 if (oout)
169 obstack_1grow (oout, match);
170
4a120d45 171 c = getc (fin);
ae3c3164
AD
172
173 while (c != match)
174 {
175 if (c == EOF)
176 fatal (_("unterminated string at end of file"));
177 if (c == '\n')
178 {
a0f6b076 179 complain (_("unterminated string"));
4a120d45 180 ungetc (c, fin);
ae3c3164
AD
181 c = match; /* invent terminator */
182 continue;
183 }
184
8c7ebe49
AD
185 if (fout)
186 putc (c, fout);
187 if (oout)
188 obstack_1grow (oout, c);
ae3c3164
AD
189
190 if (c == '\\')
191 {
4a120d45 192 c = getc (fin);
ae3c3164
AD
193 if (c == EOF)
194 fatal (_("unterminated string at end of file"));
8c7ebe49
AD
195 if (fout)
196 putc (c, fout);
197 if (oout)
198 obstack_1grow (oout, c);
199
ae3c3164
AD
200 if (c == '\n')
201 lineno++;
202 }
203
a70083a3 204 c = getc (fin);
ae3c3164
AD
205 }
206
8c7ebe49
AD
207 if (fout)
208 putc (c, fout);
209 if (oout)
210 obstack_1grow (oout, c);
ae3c3164
AD
211}
212
213
550a72a3
AD
214/*----------------------------------------------------------------.
215| Dump the wannabee comment from IN to OUT1 and OUT2. In fact we |
216| just saw a `/', which might or might not be a comment. In any |
217| case, copy what we saw. |
218| |
219| OUT2 might be NULL. |
220`----------------------------------------------------------------*/
ae3c3164
AD
221
222static inline void
896fe5c1
AD
223copy_comment2 (FILE *fin, FILE *out1,
224 struct obstack *oout2, struct obstack *oout)
ae3c3164
AD
225{
226 int cplus_comment;
a70083a3 227 int ended;
550a72a3
AD
228 int c;
229
230 /* We read a `/', output it. */
8c7ebe49
AD
231 if (out1)
232 putc ('/', out1);
8c7ebe49
AD
233 if (oout)
234 obstack_1grow (oout, '/');
896fe5c1
AD
235 if (oout2)
236 obstack_1grow (oout2, '/');
550a72a3
AD
237
238 switch ((c = getc (fin)))
239 {
240 case '/':
241 cplus_comment = 1;
242 break;
243 case '*':
244 cplus_comment = 0;
245 break;
246 default:
247 ungetc (c, fin);
248 return;
249 }
ae3c3164 250
8c7ebe49
AD
251 if (out1)
252 putc (c, out1);
8c7ebe49
AD
253 if (oout)
254 obstack_1grow (oout, c);
896fe5c1
AD
255 if (oout2)
256 obstack_1grow (oout2, c);
550a72a3 257 c = getc (fin);
ae3c3164
AD
258
259 ended = 0;
260 while (!ended)
261 {
262 if (!cplus_comment && c == '*')
263 {
264 while (c == '*')
265 {
8c7ebe49
AD
266 if (out1)
267 putc (c, out1);
8c7ebe49
AD
268 if (oout)
269 obstack_1grow (oout, c);
896fe5c1
AD
270 if (oout2)
271 obstack_1grow (oout2, c);
550a72a3 272 c = getc (fin);
ae3c3164
AD
273 }
274
275 if (c == '/')
276 {
8c7ebe49
AD
277 if (out1)
278 putc (c, out1);
8c7ebe49
AD
279 if (oout)
280 obstack_1grow (oout, c);
896fe5c1
AD
281 if (oout2)
282 obstack_1grow (oout2, c);
ae3c3164
AD
283 ended = 1;
284 }
285 }
286 else if (c == '\n')
287 {
288 lineno++;
8c7ebe49
AD
289 if (out1)
290 putc (c, out1);
8c7ebe49
AD
291 if (oout)
292 obstack_1grow (oout, c);
896fe5c1
AD
293 if (oout2)
294 obstack_1grow (oout2, c);
ae3c3164
AD
295 if (cplus_comment)
296 ended = 1;
297 else
550a72a3 298 c = getc (fin);
ae3c3164
AD
299 }
300 else if (c == EOF)
301 fatal (_("unterminated comment"));
302 else
303 {
8c7ebe49
AD
304 if (out1)
305 putc (c, out1);
8c7ebe49
AD
306 if (oout)
307 obstack_1grow (oout, c);
896fe5c1
AD
308 if (oout2)
309 obstack_1grow (oout2, c);
550a72a3 310 c = getc (fin);
ae3c3164
AD
311 }
312 }
313}
314
315
550a72a3
AD
316/*-------------------------------------------------------------------.
317| Dump the comment (actually the current string starting with a `/') |
318| from FIN to FOUT. |
319`-------------------------------------------------------------------*/
27821bff
AD
320
321static inline void
8c7ebe49 322copy_comment (FILE *fin, FILE *fout, struct obstack *oout)
27821bff 323{
8c7ebe49 324 copy_comment2 (fin, fout, NULL, oout);
27821bff
AD
325}
326
327
a70083a3
AD
328/*-----------------------------------------------------------------.
329| FIN is pointing to a location (i.e., a `@'). Output to FOUT a |
330| reference to this location. STACK_OFFSET is the number of values |
331| in the current rule so far, which says where to find `$0' with |
332| respect to the top of the stack. |
333`-----------------------------------------------------------------*/
1ff442ca 334
a70083a3 335static inline void
8c7ebe49 336copy_at (FILE *fin, FILE *fout, struct obstack *oout, int stack_offset)
1ff442ca 337{
a70083a3 338 int c;
1ff442ca 339
a70083a3
AD
340 c = getc (fin);
341 if (c == '$')
1ff442ca 342 {
8c7ebe49
AD
343 if (fout)
344 fprintf (fout, "yyloc");
345 if (oout)
dd60faec 346 obstack_grow_literal_string (oout, "yyloc");
89cab50d 347 locations_flag = 1;
a70083a3
AD
348 }
349 else if (isdigit (c) || c == '-')
350 {
351 int n;
8c7ebe49 352 char buf[4096];
1ff442ca 353
a70083a3
AD
354 ungetc (c, fin);
355 n = read_signed_integer (fin);
943819bf 356
8c7ebe49
AD
357 sprintf (buf, "yylsp[%d]", n - stack_offset);
358 if (fout)
359 fputs (buf, fout);
360 if (oout)
361 obstack_grow (oout, buf, strlen (buf));
89cab50d 362 locations_flag = 1;
1ff442ca 363 }
a70083a3 364 else
ff4a34be
AD
365 {
366 char buf[] = "@c";
367 buf[1] = c;
368 complain (_("%s is invalid"), quote (buf));
369 }
1ff442ca 370}
79282c5a
AD
371
372
373/*-------------------------------------------------------------------.
374| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
375| |
376| Possible inputs: $[<TYPENAME>]($|integer) |
377| |
378| Output to FOUT a reference to this semantic value. STACK_OFFSET is |
379| the number of values in the current rule so far, which says where |
380| to find `$0' with respect to the top of the stack. |
381`-------------------------------------------------------------------*/
382
383static inline void
8c7ebe49 384copy_dollar (FILE *fin, FILE *fout, struct obstack *oout,
79282c5a
AD
385 symbol_list *rule, int stack_offset)
386{
387 int c = getc (fin);
388 char *type_name = NULL;
389
f282676b 390 /* Get the type name if explicit. */
79282c5a
AD
391 if (c == '<')
392 {
f282676b 393 read_type_name (fin);
79282c5a
AD
394 type_name = token_buffer;
395 value_components_used = 1;
79282c5a
AD
396 c = getc (fin);
397 }
398
399 if (c == '$')
400 {
8c7ebe49
AD
401 if (fout)
402 fputs ("yyval", fout);
403 if (oout)
404 obstack_grow_literal_string (oout, "yyval");
405
79282c5a
AD
406 if (!type_name)
407 type_name = get_type_name (0, rule);
408 if (type_name)
8c7ebe49
AD
409 {
410 if (fout)
411 fprintf (fout, ".%s", type_name);
412 if (oout)
413 obstack_fgrow1 (oout, ".%s", type_name);
414 }
79282c5a
AD
415 if (!type_name && typed)
416 complain (_("$$ of `%s' has no declared type"),
417 rule->sym->tag);
418 }
419 else if (isdigit (c) || c == '-')
420 {
421 int n;
422 ungetc (c, fin);
423 n = read_signed_integer (fin);
424
425 if (!type_name && n > 0)
426 type_name = get_type_name (n, rule);
427
8c7ebe49
AD
428 if (fout)
429 fprintf (fout, "yyvsp[%d]", n - stack_offset);
430 if (oout)
431 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
432
79282c5a 433 if (type_name)
8c7ebe49
AD
434 {
435 if (fout)
436 fprintf (fout, ".%s", type_name);
437 if (oout)
438 obstack_fgrow1 (oout, ".%s", type_name);
439 }
79282c5a
AD
440 if (!type_name && typed)
441 complain (_("$%d of `%s' has no declared type"),
442 n, rule->sym->tag);
443 }
444 else
445 {
446 char buf[] = "$c";
447 buf[1] = c;
448 complain (_("%s is invalid"), quote (buf));
449 }
450}
a70083a3
AD
451\f
452/*-------------------------------------------------------------------.
453| Copy the contents of a `%{ ... %}' into the definitions file. The |
454| `%{' has already been read. Return after reading the `%}'. |
455`-------------------------------------------------------------------*/
1ff442ca 456
4a120d45 457static void
118fb205 458copy_definition (void)
1ff442ca 459{
a70083a3 460 int c;
ae3c3164 461 /* -1 while reading a character if prev char was %. */
a70083a3 462 int after_percent;
1ff442ca 463
89cab50d 464 if (!no_lines_flag)
2a91a95e
AD
465 obstack_fgrow2 (&attrs_obstack, "#line %d %s\n",
466 lineno, quotearg_style (c_quoting_style, infile));
1ff442ca
NF
467
468 after_percent = 0;
469
ae3c3164 470 c = getc (finput);
1ff442ca
NF
471
472 for (;;)
473 {
474 switch (c)
475 {
476 case '\n':
dd60faec 477 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
478 lineno++;
479 break;
480
481 case '%':
a70083a3 482 after_percent = -1;
1ff442ca 483 break;
a083fbbf 484
1ff442ca
NF
485 case '\'':
486 case '"':
dd60faec 487 copy_string (finput, 0, &attrs_obstack, c);
1ff442ca
NF
488 break;
489
490 case '/':
dd60faec 491 copy_comment (finput, 0, &attrs_obstack);
1ff442ca
NF
492 break;
493
494 case EOF:
a70083a3 495 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
496
497 default:
dd60faec 498 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
499 }
500
a70083a3 501 c = getc (finput);
1ff442ca
NF
502
503 if (after_percent)
504 {
505 if (c == '}')
506 return;
dd60faec 507 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
508 }
509 after_percent = 0;
1ff442ca 510 }
1ff442ca
NF
511}
512
513
d7020c20
AD
514/*-------------------------------------------------------------------.
515| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
516| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
517| are reversed. |
518`-------------------------------------------------------------------*/
1ff442ca 519
4a120d45 520static void
d7020c20 521parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 522{
a70083a3
AD
523 int token = 0;
524 char *typename = 0;
1ff442ca 525
1e9798d5
AD
526 /* The symbol being defined. */
527 struct bucket *symbol = NULL;
528
529 /* After `%token' and `%nterm', any number of symbols maybe be
530 defined. */
1ff442ca
NF
531 for (;;)
532 {
e6011337
JT
533 int tmp_char = ungetc (skip_white_space (), finput);
534
1e9798d5
AD
535 /* `%' (for instance from `%token', or from `%%' etc.) is the
536 only valid means to end this declaration. */
e6011337 537 if (tmp_char == '%')
1ff442ca 538 return;
e6011337 539 if (tmp_char == EOF)
a0f6b076 540 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 541
a70083a3 542 token = lex ();
1ff442ca 543 if (token == COMMA)
943819bf
RS
544 {
545 symbol = NULL;
546 continue;
547 }
1ff442ca
NF
548 if (token == TYPENAME)
549 {
95e36146 550 typename = xstrdup (token_buffer);
1ff442ca 551 value_components_used = 1;
943819bf
RS
552 symbol = NULL;
553 }
a70083a3 554 else if (token == IDENTIFIER && *symval->tag == '\"' && symbol)
943819bf 555 {
8e03724b
AD
556 if (symval->alias)
557 warn (_("symbol `%s' used more than once as a literal string"),
558 symval->tag);
559 else if (symbol->alias)
560 warn (_("symbol `%s' given more than one literal string"),
561 symbol->tag);
562 else
563 {
564 symval->class = token_sym;
565 symval->type_name = typename;
566 symval->user_token_number = symbol->user_token_number;
567 symbol->user_token_number = SALIAS;
568 symval->alias = symbol;
569 symbol->alias = symval;
570 /* symbol and symval combined are only one symbol */
571 nsyms--;
572 }
943819bf 573 translations = 1;
8e03724b 574 symbol = NULL;
1ff442ca
NF
575 }
576 else if (token == IDENTIFIER)
577 {
578 int oldclass = symval->class;
943819bf 579 symbol = symval;
1ff442ca 580
943819bf 581 if (symbol->class == what_is_not)
a0f6b076 582 complain (_("symbol %s redefined"), symbol->tag);
943819bf 583 symbol->class = what_is;
d7020c20 584 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 585 symbol->value = nvars++;
1ff442ca
NF
586
587 if (typename)
588 {
943819bf
RS
589 if (symbol->type_name == NULL)
590 symbol->type_name = typename;
a70083a3 591 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 592 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
593 }
594 }
943819bf 595 else if (symbol && token == NUMBER)
a70083a3 596 {
943819bf 597 symbol->user_token_number = numval;
1ff442ca 598 translations = 1;
a70083a3 599 }
1ff442ca 600 else
943819bf 601 {
a0f6b076 602 complain (_("`%s' is invalid in %s"),
d7020c20 603 token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 604 skip_to_char ('%');
943819bf 605 }
1ff442ca
NF
606 }
607
608}
609
1ff442ca 610
d7020c20
AD
611/*------------------------------.
612| Parse what comes after %start |
613`------------------------------*/
1ff442ca 614
4a120d45 615static void
118fb205 616parse_start_decl (void)
1ff442ca
NF
617{
618 if (start_flag)
27821bff
AD
619 complain (_("multiple %s declarations"), "%start");
620 if (lex () != IDENTIFIER)
621 complain (_("invalid %s declaration"), "%start");
943819bf
RS
622 else
623 {
624 start_flag = 1;
625 startval = symval;
626 }
1ff442ca
NF
627}
628
a70083a3
AD
629/*-----------------------------------------------------------.
630| read in a %type declaration and record its information for |
631| get_type_name to access |
632`-----------------------------------------------------------*/
633
634static void
635parse_type_decl (void)
636{
a70083a3
AD
637 char *name;
638
639 if (lex () != TYPENAME)
640 {
641 complain ("%s", _("%type declaration has no <typename>"));
642 skip_to_char ('%');
643 return;
644 }
645
95e36146 646 name = xstrdup (token_buffer);
a70083a3
AD
647
648 for (;;)
649 {
650 int t;
651 int tmp_char = ungetc (skip_white_space (), finput);
652
653 if (tmp_char == '%')
654 return;
655 if (tmp_char == EOF)
656 fatal (_("Premature EOF after %s"), token_buffer);
657
658 t = lex ();
659
660 switch (t)
1ff442ca
NF
661 {
662
663 case COMMA:
664 case SEMICOLON:
665 break;
666
667 case IDENTIFIER:
668 if (symval->type_name == NULL)
669 symval->type_name = name;
a70083a3 670 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 671 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
672
673 break;
674
675 default:
a0f6b076
AD
676 complain (_("invalid %%type declaration due to item: %s"),
677 token_buffer);
a70083a3 678 skip_to_char ('%');
1ff442ca
NF
679 }
680 }
681}
682
683
684
d7020c20
AD
685/*----------------------------------------------------------------.
686| Read in a %left, %right or %nonassoc declaration and record its |
687| information. |
688`----------------------------------------------------------------*/
1ff442ca 689
4a120d45 690static void
d7020c20 691parse_assoc_decl (associativity assoc)
1ff442ca 692{
a70083a3
AD
693 char *name = NULL;
694 int prev = 0;
1ff442ca 695
a70083a3 696 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 697
1ff442ca
NF
698 for (;;)
699 {
a70083a3 700 int t;
e6011337 701 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 702
e6011337 703 if (tmp_char == '%')
1ff442ca 704 return;
e6011337 705 if (tmp_char == EOF)
a0f6b076 706 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 707
a70083a3 708 t = lex ();
1ff442ca
NF
709
710 switch (t)
711 {
1ff442ca 712 case TYPENAME:
95e36146 713 name = xstrdup (token_buffer);
1ff442ca
NF
714 break;
715
716 case COMMA:
717 break;
718
719 case IDENTIFIER:
720 if (symval->prec != 0)
a0f6b076 721 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
722 symval->prec = lastprec;
723 symval->assoc = assoc;
d7020c20 724 if (symval->class == nterm_sym)
a0f6b076 725 complain (_("symbol %s redefined"), symval->tag);
d7020c20 726 symval->class = token_sym;
1ff442ca 727 if (name)
a70083a3 728 { /* record the type, if one is specified */
1ff442ca
NF
729 if (symval->type_name == NULL)
730 symval->type_name = name;
a70083a3 731 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 732 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
733 }
734 break;
735
736 case NUMBER:
737 if (prev == IDENTIFIER)
a70083a3 738 {
1ff442ca
NF
739 symval->user_token_number = numval;
740 translations = 1;
a70083a3
AD
741 }
742 else
743 {
744 complain (_
745 ("invalid text (%s) - number should be after identifier"),
746token_buffer);
747 skip_to_char ('%');
748 }
1ff442ca
NF
749 break;
750
751 case SEMICOLON:
752 return;
753
754 default:
a0f6b076 755 complain (_("unexpected item: %s"), token_buffer);
a70083a3 756 skip_to_char ('%');
1ff442ca
NF
757 }
758
759 prev = t;
760
761 }
762}
763
764
765
dd60faec
AD
766/*--------------------------------------------------------------.
767| Copy the union declaration into ATTRS_OBSTACK (and fdefines), |
768| where it is made into the definition of YYSTYPE, the type of |
769| elements of the parser value stack. |
770`--------------------------------------------------------------*/
1ff442ca 771
4a120d45 772static void
118fb205 773parse_union_decl (void)
1ff442ca 774{
a70083a3
AD
775 int c;
776 int count = 0;
1ff442ca
NF
777
778 if (typed)
27821bff 779 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
780
781 typed = 1;
782
89cab50d 783 if (!no_lines_flag)
2a91a95e
AD
784 obstack_fgrow2 (&attrs_obstack, "\n#line %d %s\n",
785 lineno, quotearg_style (c_quoting_style, infile));
1ff442ca 786 else
dd60faec 787 obstack_1grow (&attrs_obstack, '\n');
1ff442ca 788
dd60faec 789 obstack_grow_literal_string (&attrs_obstack, "typedef union");
896fe5c1
AD
790 if (defines_flag)
791 obstack_grow_literal_string (&defines_obstack, "typedef union");
1ff442ca 792
27821bff 793 c = getc (finput);
1ff442ca
NF
794
795 while (c != EOF)
796 {
dd60faec 797 obstack_1grow (&attrs_obstack, c);
896fe5c1 798 if (defines_flag)
d7045ec6 799 obstack_1grow (&defines_obstack, c);
1ff442ca
NF
800
801 switch (c)
802 {
803 case '\n':
804 lineno++;
805 break;
806
807 case '/':
896fe5c1 808 copy_comment2 (finput, 0, &defines_obstack, &attrs_obstack);
1ff442ca
NF
809 break;
810
1ff442ca
NF
811 case '{':
812 count++;
813 break;
814
815 case '}':
816 if (count == 0)
27821bff 817 complain (_("unmatched %s"), "`}'");
1ff442ca 818 count--;
943819bf 819 if (count <= 0)
1ff442ca 820 {
dd60faec 821 obstack_grow_literal_string (&attrs_obstack, " YYSTYPE;\n");
896fe5c1
AD
822 if (defines_flag)
823 obstack_grow_literal_string (&defines_obstack, " YYSTYPE;\n");
1ff442ca 824 /* JF don't choke on trailing semi */
27821bff
AD
825 c = skip_white_space ();
826 if (c != ';')
a70083a3 827 ungetc (c, finput);
1ff442ca
NF
828 return;
829 }
830 }
831
27821bff 832 c = getc (finput);
1ff442ca
NF
833 }
834}
835
d7020c20
AD
836
837/*-------------------------------------------------------.
838| Parse the declaration %expect N which says to expect N |
839| shift-reduce conflicts. |
840`-------------------------------------------------------*/
1ff442ca 841
4a120d45 842static void
118fb205 843parse_expect_decl (void)
1ff442ca 844{
131e2fef 845 int c = skip_white_space ();
1ff442ca
NF
846 ungetc (c, finput);
847
131e2fef 848 if (!isdigit (c))
79282c5a 849 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
850 else
851 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
852}
853
a70083a3
AD
854
855/*-------------------------------------------------------------------.
856| Parse what comes after %thong. the full syntax is |
857| |
858| %thong <type> token number literal |
859| |
860| the <type> or number may be omitted. The number specifies the |
861| user_token_number. |
862| |
863| Two symbols are entered in the table, one for the token symbol and |
864| one for the literal. Both are given the <type>, if any, from the |
865| declaration. The ->user_token_number of the first is SALIAS and |
866| the ->user_token_number of the second is set to the number, if |
867| any, from the declaration. The two symbols are linked via |
868| pointers in their ->alias fields. |
869| |
870| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
871| only the literal string is retained it is the literal string that |
872| is output to yytname |
873`-------------------------------------------------------------------*/
874
875static void
876parse_thong_decl (void)
7b306f52 877{
a70083a3
AD
878 int token;
879 struct bucket *symbol;
880 char *typename = 0;
95e36146 881 int usrtoknum;
7b306f52 882
a70083a3
AD
883 translations = 1;
884 token = lex (); /* fetch typename or first token */
885 if (token == TYPENAME)
7b306f52 886 {
95e36146 887 typename = xstrdup (token_buffer);
a70083a3
AD
888 value_components_used = 1;
889 token = lex (); /* fetch first token */
7b306f52 890 }
7b306f52 891
a70083a3 892 /* process first token */
7b306f52 893
a70083a3
AD
894 if (token != IDENTIFIER)
895 {
896 complain (_("unrecognized item %s, expected an identifier"),
897 token_buffer);
898 skip_to_char ('%');
899 return;
7b306f52 900 }
d7020c20 901 symval->class = token_sym;
a70083a3
AD
902 symval->type_name = typename;
903 symval->user_token_number = SALIAS;
904 symbol = symval;
7b306f52 905
a70083a3 906 token = lex (); /* get number or literal string */
1ff442ca 907
a70083a3 908 if (token == NUMBER)
943819bf 909 {
a70083a3
AD
910 usrtoknum = numval;
911 token = lex (); /* okay, did number, now get literal */
943819bf 912 }
a70083a3
AD
913 else
914 usrtoknum = 0;
1ff442ca 915
a70083a3 916 /* process literal string token */
1ff442ca 917
a70083a3 918 if (token != IDENTIFIER || *symval->tag != '\"')
1ff442ca 919 {
a70083a3
AD
920 complain (_("expected string constant instead of %s"), token_buffer);
921 skip_to_char ('%');
922 return;
1ff442ca 923 }
d7020c20 924 symval->class = token_sym;
a70083a3
AD
925 symval->type_name = typename;
926 symval->user_token_number = usrtoknum;
1ff442ca 927
a70083a3
AD
928 symval->alias = symbol;
929 symbol->alias = symval;
1ff442ca 930
79282c5a
AD
931 /* symbol and symval combined are only one symbol. */
932 nsyms--;
a70083a3 933}
3cef001a 934
d7020c20 935
a70083a3
AD
936/*----------------------------------------------------------------.
937| Read from finput until `%%' is seen. Discard the `%%'. Handle |
938| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 939| groups to ATTRS_OBSTACK. |
a70083a3 940`----------------------------------------------------------------*/
1ff442ca 941
4a120d45 942static void
a70083a3 943read_declarations (void)
1ff442ca 944{
a70083a3
AD
945 int c;
946 int tok;
1ff442ca 947
a70083a3 948 for (;;)
1ff442ca 949 {
a70083a3 950 c = skip_white_space ();
1ff442ca 951
a70083a3
AD
952 if (c == '%')
953 {
954 tok = parse_percent_token ();
1ff442ca 955
a70083a3 956 switch (tok)
943819bf 957 {
a70083a3
AD
958 case TWO_PERCENTS:
959 return;
1ff442ca 960
a70083a3
AD
961 case PERCENT_LEFT_CURLY:
962 copy_definition ();
963 break;
1ff442ca 964
a70083a3 965 case TOKEN:
d7020c20 966 parse_token_decl (token_sym, nterm_sym);
a70083a3 967 break;
1ff442ca 968
a70083a3 969 case NTERM:
d7020c20 970 parse_token_decl (nterm_sym, token_sym);
a70083a3 971 break;
1ff442ca 972
a70083a3
AD
973 case TYPE:
974 parse_type_decl ();
975 break;
1ff442ca 976
a70083a3
AD
977 case START:
978 parse_start_decl ();
979 break;
118fb205 980
a70083a3
AD
981 case UNION:
982 parse_union_decl ();
983 break;
1ff442ca 984
a70083a3
AD
985 case EXPECT:
986 parse_expect_decl ();
987 break;
988 case THONG:
989 parse_thong_decl ();
990 break;
d7020c20 991
a70083a3 992 case LEFT:
d7020c20 993 parse_assoc_decl (left_assoc);
a70083a3 994 break;
1ff442ca 995
a70083a3 996 case RIGHT:
d7020c20 997 parse_assoc_decl (right_assoc);
a70083a3 998 break;
1ff442ca 999
a70083a3 1000 case NONASSOC:
d7020c20 1001 parse_assoc_decl (non_assoc);
a70083a3 1002 break;
1ff442ca 1003
a70083a3 1004 case SEMANTIC_PARSER:
ff61dabd
AD
1005 if (!semantic_parser)
1006 fguard = xfopen (guardfile, "w");
1007 semantic_parser = 1;
a70083a3 1008 break;
1ff442ca 1009
a70083a3
AD
1010 case PURE_PARSER:
1011 pure_parser = 1;
1012 break;
1ff442ca 1013
a70083a3
AD
1014 case NOOP:
1015 break;
1ff442ca 1016
a70083a3
AD
1017 default:
1018 complain (_("unrecognized: %s"), token_buffer);
1019 skip_to_char ('%');
1020 }
1021 }
1022 else if (c == EOF)
1023 fatal (_("no input grammar"));
1024 else
1025 {
ff4a34be
AD
1026 char buf[] = "c";
1027 buf[0] = c;
1028 complain (_("unknown character: %s"), quote (buf));
a70083a3 1029 skip_to_char ('%');
1ff442ca 1030 }
1ff442ca 1031 }
1ff442ca 1032}
a70083a3
AD
1033\f
1034/*-------------------------------------------------------------------.
1035| Assuming that a `{' has just been seen, copy everything up to the |
1036| matching `}' into the actions file. STACK_OFFSET is the number of |
1037| values in the current rule so far, which says where to find `$0' |
1038| with respect to the top of the stack. |
1039`-------------------------------------------------------------------*/
1ff442ca 1040
4a120d45 1041static void
79282c5a 1042copy_action (symbol_list *rule, int stack_offset)
1ff442ca 1043{
a70083a3 1044 int c;
a70083a3 1045 int count;
8c7ebe49 1046 char buf[4096];
1ff442ca
NF
1047
1048 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1049 if (semantic_parser)
1050 stack_offset = 0;
1ff442ca 1051
8c7ebe49
AD
1052 sprintf (buf, "\ncase %d:\n", nrules);
1053 obstack_grow (&action_obstack, buf, strlen (buf));
1054
89cab50d 1055 if (!no_lines_flag)
8c7ebe49 1056 {
2a91a95e
AD
1057 sprintf (buf, "#line %d %s\n",
1058 lineno, quotearg_style (c_quoting_style, infile));
8c7ebe49
AD
1059 obstack_grow (&action_obstack, buf, strlen (buf));
1060 }
1061 obstack_1grow (&action_obstack, '{');
1ff442ca
NF
1062
1063 count = 1;
a70083a3 1064 c = getc (finput);
1ff442ca
NF
1065
1066 while (count > 0)
1067 {
1068 while (c != '}')
a70083a3
AD
1069 {
1070 switch (c)
1ff442ca
NF
1071 {
1072 case '\n':
8c7ebe49 1073 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1074 lineno++;
1075 break;
1076
1077 case '{':
8c7ebe49 1078 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1079 count++;
1080 break;
1081
1082 case '\'':
1083 case '"':
8c7ebe49 1084 copy_string (finput, 0, &action_obstack, c);
1ff442ca
NF
1085 break;
1086
1087 case '/':
8c7ebe49 1088 copy_comment (finput, 0, &action_obstack);
1ff442ca
NF
1089 break;
1090
1091 case '$':
8c7ebe49
AD
1092 copy_dollar (finput, 0, &action_obstack,
1093 rule, stack_offset);
1ff442ca
NF
1094 break;
1095
1096 case '@':
8c7ebe49
AD
1097 copy_at (finput, 0, &action_obstack,
1098 stack_offset);
6666f98f 1099 break;
1ff442ca
NF
1100
1101 case EOF:
27821bff 1102 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1103
1104 default:
8c7ebe49 1105 obstack_1grow (&action_obstack, c);
a70083a3
AD
1106 }
1107
1108 c = getc (finput);
1109 }
1110
1111 /* above loop exits when c is '}' */
1112
1113 if (--count)
1114 {
8c7ebe49 1115 obstack_1grow (&action_obstack, c);
a70083a3
AD
1116 c = getc (finput);
1117 }
1118 }
1119
8c7ebe49
AD
1120 obstack_grow_literal_string (&action_obstack,
1121 ";\n break;}");
a70083a3
AD
1122}
1123\f
1124/*-------------------------------------------------------------------.
1125| After `%guard' is seen in the input file, copy the actual guard |
1126| into the guards file. If the guard is followed by an action, copy |
1127| that into the actions file. STACK_OFFSET is the number of values |
1128| in the current rule so far, which says where to find `$0' with |
1129| respect to the top of the stack, for the simple parser in which |
1130| the stack is not popped until after the guard is run. |
1131`-------------------------------------------------------------------*/
1132
1133static void
79282c5a 1134copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1135{
1136 int c;
a70083a3 1137 int count;
a70083a3
AD
1138 int brace_flag = 0;
1139
1140 /* offset is always 0 if parser has already popped the stack pointer */
1141 if (semantic_parser)
1142 stack_offset = 0;
1143
1144 fprintf (fguard, "\ncase %d:\n", nrules);
89cab50d 1145 if (!no_lines_flag)
2a91a95e
AD
1146 fprintf (fguard, "#line %d %s\n",
1147 lineno, quotearg_style (c_quoting_style, infile));
a70083a3
AD
1148 putc ('{', fguard);
1149
1150 count = 0;
1151 c = getc (finput);
1152
1153 while (brace_flag ? (count > 0) : (c != ';'))
1154 {
1155 switch (c)
1156 {
1157 case '\n':
1158 putc (c, fguard);
1159 lineno++;
1160 break;
1161
1162 case '{':
1163 putc (c, fguard);
1164 brace_flag = 1;
1165 count++;
1166 break;
1167
1168 case '}':
1169 putc (c, fguard);
1170 if (count > 0)
1171 count--;
1172 else
1173 {
1174 complain (_("unmatched %s"), "`}'");
1175 c = getc (finput); /* skip it */
1176 }
1177 break;
1178
1179 case '\'':
1180 case '"':
8c7ebe49 1181 copy_string (finput, fguard, 0, c);
a70083a3
AD
1182 break;
1183
1184 case '/':
8c7ebe49 1185 copy_comment (finput, fguard, 0);
a70083a3
AD
1186 break;
1187
1188 case '$':
8c7ebe49 1189 copy_dollar (finput, fguard, 0, rule, stack_offset);
a70083a3 1190 break;
1ff442ca 1191
a70083a3 1192 case '@':
8c7ebe49 1193 copy_at (finput, fguard, 0, stack_offset);
a70083a3 1194 break;
1ff442ca 1195
a70083a3
AD
1196 case EOF:
1197 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1198
a70083a3
AD
1199 default:
1200 putc (c, fguard);
1ff442ca 1201 }
a70083a3
AD
1202
1203 if (c != '}' || count != 0)
1204 c = getc (finput);
1ff442ca
NF
1205 }
1206
a70083a3
AD
1207 c = skip_white_space ();
1208
1209 fprintf (fguard, ";\n break;}");
1210 if (c == '{')
1211 copy_action (rule, stack_offset);
1212 else if (c == '=')
1213 {
1214 c = getc (finput); /* why not skip_white_space -wjh */
1215 if (c == '{')
1216 copy_action (rule, stack_offset);
1217 }
1218 else
1219 ungetc (c, finput);
1ff442ca 1220}
a70083a3
AD
1221\f
1222
1223static void
1224record_rule_line (void)
1225{
1226 /* Record each rule's source line number in rline table. */
1ff442ca 1227
a70083a3
AD
1228 if (nrules >= rline_allocated)
1229 {
1230 rline_allocated = nrules * 2;
d7913476 1231 rline = XREALLOC (rline, short, rline_allocated);
a70083a3
AD
1232 }
1233 rline[nrules] = lineno;
1234}
1ff442ca
NF
1235
1236
a70083a3
AD
1237/*-------------------------------------------------------------------.
1238| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1239| with the user's names. |
1240`-------------------------------------------------------------------*/
1ff442ca 1241
4a120d45 1242static bucket *
118fb205 1243gensym (void)
1ff442ca 1244{
a70083a3 1245 bucket *sym;
1ff442ca
NF
1246
1247 sprintf (token_buffer, "@%d", ++gensym_count);
a70083a3 1248 sym = getsym (token_buffer);
d7020c20 1249 sym->class = nterm_sym;
1ff442ca 1250 sym->value = nvars++;
36281465 1251 return sym;
1ff442ca
NF
1252}
1253
a70083a3
AD
1254#if 0
1255/*------------------------------------------------------------------.
1256| read in a %type declaration and record its information for |
1257| get_type_name to access. This is unused. It is only called from |
1258| the #if 0 part of readgram |
1259`------------------------------------------------------------------*/
1260
1261static int
1262get_type (void)
1263{
1264 int k;
1265 int t;
1266 char *name;
1267
1268 t = lex ();
1269
1270 if (t != TYPENAME)
1271 {
1272 complain (_("invalid %s declaration"), "%type");
1273 return t;
1274 }
1275
95e36146 1276 name = xstrdup (token_buffer);
a70083a3
AD
1277
1278 for (;;)
1279 {
1280 t = lex ();
1281
1282 switch (t)
1283 {
1284 case SEMICOLON:
1285 return lex ();
1286
1287 case COMMA:
1288 break;
1289
1290 case IDENTIFIER:
1291 if (symval->type_name == NULL)
1292 symval->type_name = name;
1293 else if (strcmp (name, symval->type_name) != 0)
1294 complain (_("type redeclaration for %s"), symval->tag);
1295
1296 break;
1297
1298 default:
1299 return t;
1300 }
1301 }
1302}
1ff442ca 1303
a70083a3
AD
1304#endif
1305\f
1306/*------------------------------------------------------------------.
1307| Parse the input grammar into a one symbol_list structure. Each |
1308| rule is represented by a sequence of symbols: the left hand side |
1309| followed by the contents of the right hand side, followed by a |
1310| null pointer instead of a symbol to terminate the rule. The next |
1311| symbol is the lhs of the following rule. |
1312| |
1313| All guards and actions are copied out to the appropriate files, |
1314| labelled by the rule number they apply to. |
1315`------------------------------------------------------------------*/
1ff442ca 1316
4a120d45 1317static void
118fb205 1318readgram (void)
1ff442ca 1319{
a70083a3
AD
1320 int t;
1321 bucket *lhs = NULL;
1322 symbol_list *p;
1323 symbol_list *p1;
1324 bucket *bp;
1ff442ca 1325
ff4a34be
AD
1326 /* Points to first symbol_list of current rule. its symbol is the
1327 lhs of the rule. */
1328 symbol_list *crule;
1329 /* Points to the symbol_list preceding crule. */
1330 symbol_list *crule1;
1ff442ca
NF
1331
1332 p1 = NULL;
1333
a70083a3 1334 t = lex ();
1ff442ca
NF
1335
1336 while (t != TWO_PERCENTS && t != ENDFILE)
1337 {
1338 if (t == IDENTIFIER || t == BAR)
1339 {
89cab50d 1340 int action_flag = 0;
ff4a34be
AD
1341 /* Number of symbols in rhs of this rule so far */
1342 int rulelength = 0;
1ff442ca
NF
1343 int xactions = 0; /* JF for error checking */
1344 bucket *first_rhs = 0;
1345
1346 if (t == IDENTIFIER)
1347 {
1348 lhs = symval;
943819bf
RS
1349
1350 if (!start_flag)
1351 {
1352 startval = lhs;
1353 start_flag = 1;
1354 }
a083fbbf 1355
a70083a3 1356 t = lex ();
1ff442ca 1357 if (t != COLON)
943819bf 1358 {
a0f6b076 1359 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1360 unlex (t);
943819bf 1361 }
1ff442ca
NF
1362 }
1363
943819bf 1364 if (nrules == 0 && t == BAR)
1ff442ca 1365 {
a0f6b076 1366 complain (_("grammar starts with vertical bar"));
943819bf 1367 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1368 }
1ff442ca
NF
1369 /* start a new rule and record its lhs. */
1370
1371 nrules++;
1372 nitems++;
1373
1374 record_rule_line ();
1375
d7913476 1376 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1377 p->sym = lhs;
1378
1379 crule1 = p1;
1380 if (p1)
1381 p1->next = p;
1382 else
1383 grammar = p;
1384
1385 p1 = p;
1386 crule = p;
1387
1388 /* mark the rule's lhs as a nonterminal if not already so. */
1389
d7020c20 1390 if (lhs->class == unknown_sym)
1ff442ca 1391 {
d7020c20 1392 lhs->class = nterm_sym;
1ff442ca
NF
1393 lhs->value = nvars;
1394 nvars++;
1395 }
d7020c20 1396 else if (lhs->class == token_sym)
a0f6b076 1397 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1398
1399 /* read the rhs of the rule. */
1400
1401 for (;;)
1402 {
a70083a3 1403 t = lex ();
943819bf
RS
1404 if (t == PREC)
1405 {
a70083a3 1406 t = lex ();
943819bf 1407 crule->ruleprec = symval;
a70083a3 1408 t = lex ();
943819bf 1409 }
1ff442ca 1410
a70083a3
AD
1411 if (!(t == IDENTIFIER || t == LEFT_CURLY))
1412 break;
1ff442ca
NF
1413
1414 /* If next token is an identifier, see if a colon follows it.
a70083a3 1415 If one does, exit this rule now. */
1ff442ca
NF
1416 if (t == IDENTIFIER)
1417 {
a70083a3
AD
1418 bucket *ssave;
1419 int t1;
1ff442ca
NF
1420
1421 ssave = symval;
a70083a3
AD
1422 t1 = lex ();
1423 unlex (t1);
1ff442ca 1424 symval = ssave;
a70083a3
AD
1425 if (t1 == COLON)
1426 break;
1ff442ca 1427
a70083a3 1428 if (!first_rhs) /* JF */
1ff442ca
NF
1429 first_rhs = symval;
1430 /* Not followed by colon =>
1431 process as part of this rule's rhs. */
1432 }
1433
1434 /* If we just passed an action, that action was in the middle
a70083a3
AD
1435 of a rule, so make a dummy rule to reduce it to a
1436 non-terminal. */
89cab50d 1437 if (action_flag)
1ff442ca 1438 {
a70083a3 1439 bucket *sdummy;
1ff442ca 1440
f282676b
AD
1441 /* Since the action was written out with this rule's
1442 number, we must give the new rule this number by
1443 inserting the new rule before it. */
1ff442ca
NF
1444
1445 /* Make a dummy nonterminal, a gensym. */
a70083a3 1446 sdummy = gensym ();
1ff442ca
NF
1447
1448 /* Make a new rule, whose body is empty,
1449 before the current one, so that the action
1450 just read can belong to it. */
1451 nrules++;
1452 nitems++;
1453 record_rule_line ();
d7913476 1454 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1455 if (crule1)
1456 crule1->next = p;
a70083a3
AD
1457 else
1458 grammar = p;
1ff442ca 1459 p->sym = sdummy;
d7913476 1460 crule1 = XCALLOC (symbol_list, 1);
1ff442ca
NF
1461 p->next = crule1;
1462 crule1->next = crule;
1463
f282676b
AD
1464 /* Insert the dummy generated by that rule into this
1465 rule. */
1ff442ca 1466 nitems++;
d7913476 1467 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1468 p->sym = sdummy;
1469 p1->next = p;
1470 p1 = p;
1471
89cab50d 1472 action_flag = 0;
1ff442ca
NF
1473 }
1474
1475 if (t == IDENTIFIER)
1476 {
1477 nitems++;
d7913476 1478 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1479 p->sym = symval;
1480 p1->next = p;
1481 p1 = p;
1482 }
a70083a3 1483 else /* handle an action. */
1ff442ca 1484 {
a70083a3 1485 copy_action (crule, rulelength);
89cab50d 1486 action_flag = 1;
1ff442ca
NF
1487 xactions++; /* JF */
1488 }
1489 rulelength++;
a70083a3 1490 } /* end of read rhs of rule */
1ff442ca
NF
1491
1492 /* Put an empty link in the list to mark the end of this rule */
d7913476 1493 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1494 p1->next = p;
1495 p1 = p;
1496
1497 if (t == PREC)
1498 {
a0f6b076 1499 complain (_("two @prec's in a row"));
a70083a3 1500 t = lex ();
1ff442ca 1501 crule->ruleprec = symval;
a70083a3 1502 t = lex ();
1ff442ca
NF
1503 }
1504 if (t == GUARD)
1505 {
a70083a3 1506 if (!semantic_parser)
ff4a34be 1507 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1508
a70083a3
AD
1509 copy_guard (crule, rulelength);
1510 t = lex ();
1ff442ca
NF
1511 }
1512 else if (t == LEFT_CURLY)
1513 {
a70083a3 1514 /* This case never occurs -wjh */
89cab50d 1515 if (action_flag)
a0f6b076 1516 complain (_("two actions at end of one rule"));
a70083a3 1517 copy_action (crule, rulelength);
89cab50d 1518 action_flag = 1;
943819bf 1519 xactions++; /* -wjh */
a70083a3 1520 t = lex ();
1ff442ca 1521 }
a0f6b076 1522 /* If $$ is being set in default way, report if any type
6666f98f
AD
1523 mismatch. */
1524 else if (!xactions
a70083a3 1525 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1526 {
6666f98f
AD
1527 if (lhs->type_name == 0
1528 || first_rhs->type_name == 0
a70083a3 1529 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1530 complain (_("type clash (`%s' `%s') on default action"),
1531 lhs->type_name ? lhs->type_name : "",
a70083a3 1532 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1533 }
1534 /* Warn if there is no default for $$ but we need one. */
1535 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1536 complain (_("empty rule for typed nonterminal, and no action"));
1ff442ca 1537 if (t == SEMICOLON)
a70083a3 1538 t = lex ();
a083fbbf 1539 }
943819bf 1540#if 0
a70083a3 1541 /* these things can appear as alternatives to rules. */
943819bf
RS
1542/* NO, they cannot.
1543 a) none of the documentation allows them
1544 b) most of them scan forward until finding a next %
1545 thus they may swallow lots of intervening rules
1546*/
1ff442ca
NF
1547 else if (t == TOKEN)
1548 {
d7020c20 1549 parse_token_decl (token_sym, nterm_sym);
a70083a3 1550 t = lex ();
1ff442ca
NF
1551 }
1552 else if (t == NTERM)
1553 {
d7020c20 1554 parse_token_decl (nterm_sym, token_sym);
a70083a3 1555 t = lex ();
1ff442ca
NF
1556 }
1557 else if (t == TYPE)
1558 {
a70083a3 1559 t = get_type ();
1ff442ca
NF
1560 }
1561 else if (t == UNION)
1562 {
a70083a3
AD
1563 parse_union_decl ();
1564 t = lex ();
1ff442ca
NF
1565 }
1566 else if (t == EXPECT)
1567 {
a70083a3
AD
1568 parse_expect_decl ();
1569 t = lex ();
1ff442ca
NF
1570 }
1571 else if (t == START)
1572 {
a70083a3
AD
1573 parse_start_decl ();
1574 t = lex ();
1ff442ca 1575 }
943819bf
RS
1576#endif
1577
1ff442ca 1578 else
943819bf 1579 {
a0f6b076 1580 complain (_("invalid input: %s"), token_buffer);
a70083a3 1581 t = lex ();
943819bf 1582 }
1ff442ca
NF
1583 }
1584
943819bf
RS
1585 /* grammar has been read. Do some checking */
1586
1ff442ca 1587 if (nsyms > MAXSHORT)
a0f6b076
AD
1588 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1589 MAXSHORT);
1ff442ca 1590 if (nrules == 0)
a0f6b076 1591 fatal (_("no rules in the input grammar"));
1ff442ca 1592
ff4a34be
AD
1593 /* JF put out same default YYSTYPE as YACC does */
1594 if (typed == 0
1ff442ca
NF
1595 && !value_components_used)
1596 {
1597 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
a70083a3
AD
1598 but it seems better to be consistent.
1599 Most programs should declare their own type anyway. */
dd60faec
AD
1600 obstack_grow_literal_string (&attrs_obstack,
1601 "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
896fe5c1
AD
1602 if (defines_flag)
1603 obstack_grow_literal_string (&defines_obstack, "\
1604#ifndef YYSTYPE\n\
1605# define YYSTYPE int\n\
1606#endif\n");
1ff442ca
NF
1607 }
1608
1609 /* Report any undefined symbols and consider them nonterminals. */
1610
1611 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1612 if (bp->class == unknown_sym)
1ff442ca 1613 {
a70083a3
AD
1614 complain (_
1615 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1616 bp->tag);
d7020c20 1617 bp->class = nterm_sym;
1ff442ca
NF
1618 bp->value = nvars++;
1619 }
1620
1621 ntokens = nsyms - nvars;
1622}
a70083a3
AD
1623\f
1624/*--------------------------------------------------------------.
1625| For named tokens, but not literal ones, define the name. The |
1626| value is the user token number. |
1627`--------------------------------------------------------------*/
1ff442ca 1628
4a120d45 1629static void
896fe5c1 1630output_token_defines (struct obstack *oout)
1ff442ca 1631{
a70083a3
AD
1632 bucket *bp;
1633 char *cp, *symbol;
1634 char c;
1ff442ca 1635
a70083a3 1636 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1637 {
a70083a3
AD
1638 symbol = bp->tag; /* get symbol */
1639
1640 if (bp->value >= ntokens)
1641 continue;
1642 if (bp->user_token_number == SALIAS)
1643 continue;
1644 if ('\'' == *symbol)
1645 continue; /* skip literal character */
1646 if (bp == errtoken)
1647 continue; /* skip error token */
1648 if ('\"' == *symbol)
1ff442ca 1649 {
a70083a3
AD
1650 /* use literal string only if given a symbol with an alias */
1651 if (bp->alias)
1652 symbol = bp->alias->tag;
1653 else
1654 continue;
1655 }
1ff442ca 1656
a70083a3
AD
1657 /* Don't #define nonliteral tokens whose names contain periods. */
1658 cp = symbol;
1659 while ((c = *cp++) && c != '.');
1660 if (c != '\0')
1661 continue;
1ff442ca 1662
896fe5c1
AD
1663 obstack_fgrow2 (oout, "#define\t%s\t%d\n",
1664 symbol,
1665 ((translations && !raw_flag)
1666 ? bp->user_token_number : bp->value));
a70083a3 1667 if (semantic_parser)
896fe5c1 1668 obstack_fgrow2 (oout, "#define\tT%s\t%d\n", symbol, bp->value);
1ff442ca 1669 }
a70083a3 1670
896fe5c1 1671 obstack_1grow (oout, '\n');
1ff442ca 1672}
1ff442ca
NF
1673
1674
a70083a3
AD
1675/*------------------------------------------------------------------.
1676| Assign symbol numbers, and write definition of token names into |
b2ca4022 1677| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1678| of symbols. |
1679`------------------------------------------------------------------*/
1ff442ca 1680
4a120d45 1681static void
118fb205 1682packsymbols (void)
1ff442ca 1683{
a70083a3
AD
1684 bucket *bp;
1685 int tokno = 1;
1686 int i;
1687 int last_user_token_number;
4a120d45 1688 static char DOLLAR[] = "$";
1ff442ca
NF
1689
1690 /* int lossage = 0; JF set but not used */
1691
d7913476 1692 tags = XCALLOC (char *, nsyms + 1);
4a120d45 1693 tags[0] = DOLLAR;
d7913476 1694 user_toknums = XCALLOC (short, nsyms + 1);
943819bf 1695 user_toknums[0] = 0;
1ff442ca 1696
d7913476
AD
1697 sprec = XCALLOC (short, nsyms);
1698 sassoc = XCALLOC (short, nsyms);
1ff442ca
NF
1699
1700 max_user_token_number = 256;
1701 last_user_token_number = 256;
1702
1703 for (bp = firstsymbol; bp; bp = bp->next)
1704 {
d7020c20 1705 if (bp->class == nterm_sym)
1ff442ca
NF
1706 {
1707 bp->value += ntokens;
1708 }
943819bf
RS
1709 else if (bp->alias)
1710 {
0a6384c4
AD
1711 /* this symbol and its alias are a single token defn.
1712 allocate a tokno, and assign to both check agreement of
1713 ->prec and ->assoc fields and make both the same */
1714 if (bp->value == 0)
1715 bp->value = bp->alias->value = tokno++;
943819bf 1716
0a6384c4
AD
1717 if (bp->prec != bp->alias->prec)
1718 {
1719 if (bp->prec != 0 && bp->alias->prec != 0
1720 && bp->user_token_number == SALIAS)
a0f6b076
AD
1721 complain (_("conflicting precedences for %s and %s"),
1722 bp->tag, bp->alias->tag);
0a6384c4
AD
1723 if (bp->prec != 0)
1724 bp->alias->prec = bp->prec;
1725 else
1726 bp->prec = bp->alias->prec;
1727 }
943819bf 1728
0a6384c4
AD
1729 if (bp->assoc != bp->alias->assoc)
1730 {
a0f6b076
AD
1731 if (bp->assoc != 0 && bp->alias->assoc != 0
1732 && bp->user_token_number == SALIAS)
1733 complain (_("conflicting assoc values for %s and %s"),
1734 bp->tag, bp->alias->tag);
1735 if (bp->assoc != 0)
1736 bp->alias->assoc = bp->assoc;
1737 else
1738 bp->assoc = bp->alias->assoc;
1739 }
0a6384c4
AD
1740
1741 if (bp->user_token_number == SALIAS)
a70083a3 1742 continue; /* do not do processing below for SALIASs */
943819bf 1743
a70083a3 1744 }
d7020c20 1745 else /* bp->class == token_sym */
943819bf
RS
1746 {
1747 bp->value = tokno++;
1748 }
1749
d7020c20 1750 if (bp->class == token_sym)
1ff442ca
NF
1751 {
1752 if (translations && !(bp->user_token_number))
1753 bp->user_token_number = ++last_user_token_number;
1754 if (bp->user_token_number > max_user_token_number)
1755 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1756 }
1757
1758 tags[bp->value] = bp->tag;
943819bf 1759 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1760 sprec[bp->value] = bp->prec;
1761 sassoc[bp->value] = bp->assoc;
1762
1763 }
1764
1765 if (translations)
1766 {
a70083a3 1767 int j;
1ff442ca 1768
d7913476 1769 token_translations = XCALLOC (short, max_user_token_number + 1);
1ff442ca 1770
0a6384c4 1771 /* initialize all entries for literal tokens to 2, the internal
a70083a3
AD
1772 token number for $undefined., which represents all invalid
1773 inputs. */
4a120d45 1774 for (j = 0; j <= max_user_token_number; j++)
a70083a3 1775 token_translations[j] = 2;
1ff442ca 1776
943819bf 1777 for (bp = firstsymbol; bp; bp = bp->next)
a70083a3
AD
1778 {
1779 if (bp->value >= ntokens)
1780 continue; /* non-terminal */
1781 if (bp->user_token_number == SALIAS)
0a6384c4 1782 continue;
a70083a3 1783 if (token_translations[bp->user_token_number] != 2)
a0f6b076
AD
1784 complain (_("tokens %s and %s both assigned number %d"),
1785 tags[token_translations[bp->user_token_number]],
a70083a3
AD
1786 bp->tag, bp->user_token_number);
1787 token_translations[bp->user_token_number] = bp->value;
1788 }
1ff442ca
NF
1789 }
1790
1791 error_token_number = errtoken->value;
1792
89cab50d 1793 if (!no_parser_flag)
896fe5c1 1794 output_token_defines (&table_obstack);
1ff442ca 1795
d7020c20 1796 if (startval->class == unknown_sym)
a0f6b076 1797 fatal (_("the start symbol %s is undefined"), startval->tag);
d7020c20 1798 else if (startval->class == token_sym)
a0f6b076 1799 fatal (_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1800
1801 start_symbol = startval->value;
1802
89cab50d 1803 if (defines_flag)
1ff442ca 1804 {
896fe5c1 1805 output_token_defines (&defines_obstack);
1ff442ca
NF
1806
1807 if (!pure_parser)
1808 {
1809 if (spec_name_prefix)
896fe5c1
AD
1810 obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1811 spec_name_prefix);
1ff442ca 1812 else
896fe5c1
AD
1813 obstack_grow_literal_string (&defines_obstack,
1814 "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1815 }
1816
1817 if (semantic_parser)
1818 for (i = ntokens; i < nsyms; i++)
1819 {
1820 /* don't make these for dummy nonterminals made by gensym. */
1821 if (*tags[i] != '@')
896fe5c1
AD
1822 obstack_fgrow2 (&defines_obstack,
1823 "#define\tNT%s\t%d\n", tags[i], i);
1ff442ca
NF
1824 }
1825#if 0
1826 /* `fdefines' is now a temporary file, so we need to copy its
1827 contents in `done', so we can't close it here. */
a70083a3 1828 fclose (fdefines);
1ff442ca
NF
1829 fdefines = NULL;
1830#endif
1831 }
1832}
a083fbbf 1833
1ff442ca 1834
a70083a3
AD
1835/*---------------------------------------------------------------.
1836| Convert the rules into the representation using RRHS, RLHS and |
1837| RITEMS. |
1838`---------------------------------------------------------------*/
1ff442ca 1839
4a120d45 1840static void
118fb205 1841packgram (void)
1ff442ca 1842{
a70083a3
AD
1843 int itemno;
1844 int ruleno;
1845 symbol_list *p;
1ff442ca
NF
1846
1847 bucket *ruleprec;
1848
d7913476
AD
1849 ritem = XCALLOC (short, nitems + 1);
1850 rlhs = XCALLOC (short, nrules) - 1;
1851 rrhs = XCALLOC (short, nrules) - 1;
1852 rprec = XCALLOC (short, nrules) - 1;
1853 rprecsym = XCALLOC (short, nrules) - 1;
1854 rassoc = XCALLOC (short, nrules) - 1;
1ff442ca
NF
1855
1856 itemno = 0;
1857 ruleno = 1;
1858
1859 p = grammar;
1860 while (p)
1861 {
1862 rlhs[ruleno] = p->sym->value;
1863 rrhs[ruleno] = itemno;
1864 ruleprec = p->ruleprec;
1865
1866 p = p->next;
1867 while (p && p->sym)
1868 {
1869 ritem[itemno++] = p->sym->value;
1870 /* A rule gets by default the precedence and associativity
1871 of the last token in it. */
d7020c20 1872 if (p->sym->class == token_sym)
1ff442ca
NF
1873 {
1874 rprec[ruleno] = p->sym->prec;
1875 rassoc[ruleno] = p->sym->assoc;
1876 }
a70083a3
AD
1877 if (p)
1878 p = p->next;
1ff442ca
NF
1879 }
1880
1881 /* If this rule has a %prec,
a70083a3 1882 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1883 if (ruleprec)
1884 {
a70083a3
AD
1885 rprec[ruleno] = ruleprec->prec;
1886 rassoc[ruleno] = ruleprec->assoc;
1ff442ca
NF
1887 rprecsym[ruleno] = ruleprec->value;
1888 }
1889
1890 ritem[itemno++] = -ruleno;
1891 ruleno++;
1892
a70083a3
AD
1893 if (p)
1894 p = p->next;
1ff442ca
NF
1895 }
1896
1897 ritem[itemno] = 0;
1898}
a70083a3
AD
1899\f
1900/*-------------------------------------------------------------------.
1901| Read in the grammar specification and record it in the format |
1902| described in gram.h. All guards are copied into the FGUARD file |
8c7ebe49
AD
1903| and all actions into ACTION_OBSTACK, in each case forming the body |
1904| of a C function (YYGUARD or YYACTION) which contains a switch |
1905| statement to decide which guard or action to execute. |
a70083a3
AD
1906`-------------------------------------------------------------------*/
1907
1908void
1909reader (void)
1910{
1911 start_flag = 0;
1912 startval = NULL; /* start symbol not specified yet. */
1913
1914#if 0
1915 /* initially assume token number translation not needed. */
1916 translations = 0;
1917#endif
1918 /* Nowadays translations is always set to 1, since we give `error' a
1919 user-token-number to satisfy the Posix demand for YYERRCODE==256.
1920 */
1921 translations = 1;
1922
1923 nsyms = 1;
1924 nvars = 0;
1925 nrules = 0;
1926 nitems = 0;
1927 rline_allocated = 10;
d7913476 1928 rline = XCALLOC (short, rline_allocated);
a70083a3
AD
1929
1930 typed = 0;
1931 lastprec = 0;
1932
1933 gensym_count = 0;
1934
1935 semantic_parser = 0;
1936 pure_parser = 0;
a70083a3
AD
1937
1938 grammar = NULL;
1939
1940 init_lex ();
1941 lineno = 1;
1942
1943 /* Initialize the symbol table. */
1944 tabinit ();
1945 /* Construct the error token */
1946 errtoken = getsym ("error");
d7020c20 1947 errtoken->class = token_sym;
a70083a3
AD
1948 errtoken->user_token_number = 256; /* Value specified by POSIX. */
1949 /* Construct a token that represents all undefined literal tokens.
1950 It is always token number 2. */
1951 undeftoken = getsym ("$undefined.");
d7020c20 1952 undeftoken->class = token_sym;
a70083a3
AD
1953 undeftoken->user_token_number = 2;
1954
896fe5c1
AD
1955 /* Read the declaration section. Copy %{ ... %} groups to
1956 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1957 etc. found there. */
1958 obstack_1grow (&table_obstack, '\n');
1959 obstack_fgrow3 (&table_obstack, "\
a70083a3
AD
1960/* %s, made from %s\n\
1961 by GNU bison %s. */\n\
896fe5c1
AD
1962\n",
1963 no_parser_flag ? "Bison-generated parse tables" : "A Bison parser",
1964 infile, VERSION);
a70083a3 1965
896fe5c1
AD
1966 obstack_grow_literal_string (&table_obstack,
1967 "#define YYBISON 1 /* Identify Bison output. */\n\n");
a70083a3
AD
1968 read_declarations ();
1969 /* Start writing the guard and action files, if they are needed. */
1970 output_headers ();
1971 /* Read in the grammar, build grammar in list form. Write out
1972 guards and actions. */
1973 readgram ();
1974 /* Now we know whether we need the line-number stack. If we do,
1975 write its type into the .tab.h file. */
896fe5c1
AD
1976 if (defines_flag)
1977 reader_output_yylsp (&defines_obstack);
a70083a3
AD
1978 /* Write closing delimiters for actions and guards. */
1979 output_trailers ();
89cab50d 1980 if (locations_flag)
5af1f549 1981 obstack_grow_literal_string (&table_obstack, "#define YYLSP_NEEDED 1\n\n");
a70083a3
AD
1982 /* Assign the symbols their symbol numbers. Write #defines for the
1983 token symbols into FDEFINES if requested. */
1984 packsymbols ();
1985 /* Convert the grammar into the format described in gram.h. */
1986 packgram ();
1987 /* Free the symbol table data structure since symbols are now all
1988 referred to by symbol number. */
1989 free_symtab ();
1990}
1991
d7020c20 1992
3abcd459
AD
1993/*------------------------------------------------------------------.
1994| Define YYLTYPE. Cannot be in the skeleton since we might have to |
1995| output it in the headers if --defines is used. |
1996`------------------------------------------------------------------*/
1997
a70083a3 1998void
896fe5c1 1999reader_output_yylsp (struct obstack *oout)
a70083a3 2000{
89cab50d 2001 if (locations_flag)
896fe5c1 2002 obstack_grow_literal_string (oout, "\
d7020c20
AD
2003\n\
2004#ifndef YYLTYPE\n\
89cab50d
AD
2005typedef struct yyltype\n\
2006{\n\
89cab50d 2007 int first_line;\n\
3abcd459 2008 int first_column;\n\
d7020c20 2009\n\
89cab50d
AD
2010 int last_line;\n\
2011 int last_column;\n\
89cab50d 2012} yyltype;\n\
d7020c20 2013\n\
89cab50d 2014# define YYLTYPE yyltype\n\
d7020c20 2015#endif\n\
896fe5c1 2016\n");
a70083a3 2017}