]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/lex.c (read_typename): New function.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
a70083a3
AD
2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000
3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
ceed8467 24#include "getargs.h"
1ff442ca 25#include "files.h"
d7913476 26#include "xalloc.h"
1ff442ca
NF
27#include "symtab.h"
28#include "lex.h"
29#include "gram.h"
a0f6b076 30#include "complain.h"
6c89f1c1 31#include "output.h"
b2ca4022 32#include "reader.h"
340ef489 33#include "conflicts.h"
ff4a34be 34#include "quote.h"
1ff442ca 35
1ff442ca 36/* Number of slots allocated (but not necessarily used yet) in `rline' */
4a120d45 37static int rline_allocated;
1ff442ca 38
a70083a3
AD
39typedef struct symbol_list
40{
41 struct symbol_list *next;
42 bucket *sym;
43 bucket *ruleprec;
44}
45symbol_list;
118fb205 46
1ff442ca 47int lineno;
1ff442ca 48char **tags;
d019d655 49short *user_toknums;
4a120d45
JT
50static symbol_list *grammar;
51static int start_flag;
52static bucket *startval;
1ff442ca
NF
53
54/* Nonzero if components of semantic values are used, implying
55 they must be unions. */
56static int value_components_used;
57
d7020c20
AD
58/* Nonzero if %union has been seen. */
59static int typed;
1ff442ca 60
d7020c20
AD
61/* Incremented for each %left, %right or %nonassoc seen */
62static int lastprec;
1ff442ca 63
d7020c20
AD
64/* Incremented for each generated symbol */
65static int gensym_count;
1ff442ca
NF
66
67static bucket *errtoken;
5b2e3c89 68static bucket *undeftoken;
0d533154 69\f
a70083a3 70
0d533154
AD
71/*===================\
72| Low level lexing. |
73\===================*/
943819bf
RS
74
75static void
118fb205 76skip_to_char (int target)
943819bf
RS
77{
78 int c;
79 if (target == '\n')
a0f6b076 80 complain (_(" Skipping to next \\n"));
943819bf 81 else
a0f6b076 82 complain (_(" Skipping to next %c"), target);
943819bf
RS
83
84 do
0d533154 85 c = skip_white_space ();
943819bf 86 while (c != target && c != EOF);
a083fbbf 87 if (c != EOF)
0d533154 88 ungetc (c, finput);
943819bf
RS
89}
90
91
0d533154
AD
92/*---------------------------------------------------------.
93| Read a signed integer from STREAM and return its value. |
94`---------------------------------------------------------*/
95
96static inline int
97read_signed_integer (FILE *stream)
98{
a70083a3
AD
99 int c = getc (stream);
100 int sign = 1;
101 int n = 0;
0d533154
AD
102
103 if (c == '-')
104 {
105 c = getc (stream);
106 sign = -1;
107 }
108
109 while (isdigit (c))
110 {
111 n = 10 * n + (c - '0');
112 c = getc (stream);
113 }
114
115 ungetc (c, stream);
116
117 return sign * n;
118}
119\f
79282c5a
AD
120/*--------------------------------------------------------------.
121| Get the data type (alternative in the union) of the value for |
122| symbol N in rule RULE. |
123`--------------------------------------------------------------*/
124
125static char *
126get_type_name (int n, symbol_list * rule)
127{
128 int i;
129 symbol_list *rp;
130
131 if (n < 0)
132 {
133 complain (_("invalid $ value"));
134 return NULL;
135 }
136
137 rp = rule;
138 i = 0;
139
140 while (i < n)
141 {
142 rp = rp->next;
143 if (rp == NULL || rp->sym == NULL)
144 {
145 complain (_("invalid $ value"));
146 return NULL;
147 }
148 i++;
149 }
150
151 return rp->sym->type_name;
152}
153\f
0d533154
AD
154/*-------------------------------------------------------------------.
155| Dump the string from FINPUT to FOUTPUT. MATCH is the delimiter of |
156| the string (either ' or "). |
157`-------------------------------------------------------------------*/
ae3c3164
AD
158
159static inline void
4a120d45 160copy_string (FILE *fin, FILE *fout, int match)
ae3c3164
AD
161{
162 int c;
163
4a120d45
JT
164 putc (match, fout);
165 c = getc (fin);
ae3c3164
AD
166
167 while (c != match)
168 {
169 if (c == EOF)
170 fatal (_("unterminated string at end of file"));
171 if (c == '\n')
172 {
a0f6b076 173 complain (_("unterminated string"));
4a120d45 174 ungetc (c, fin);
ae3c3164
AD
175 c = match; /* invent terminator */
176 continue;
177 }
178
a70083a3 179 putc (c, fout);
ae3c3164
AD
180
181 if (c == '\\')
182 {
4a120d45 183 c = getc (fin);
ae3c3164
AD
184 if (c == EOF)
185 fatal (_("unterminated string at end of file"));
4a120d45 186 putc (c, fout);
ae3c3164
AD
187 if (c == '\n')
188 lineno++;
189 }
190
a70083a3 191 c = getc (fin);
ae3c3164
AD
192 }
193
a70083a3 194 putc (c, fout);
ae3c3164
AD
195}
196
197
550a72a3
AD
198/*----------------------------------------------------------------.
199| Dump the wannabee comment from IN to OUT1 and OUT2. In fact we |
200| just saw a `/', which might or might not be a comment. In any |
201| case, copy what we saw. |
202| |
203| OUT2 might be NULL. |
204`----------------------------------------------------------------*/
ae3c3164
AD
205
206static inline void
550a72a3 207copy_comment2 (FILE *fin, FILE *out1, FILE *out2)
ae3c3164
AD
208{
209 int cplus_comment;
a70083a3 210 int ended;
550a72a3
AD
211 int c;
212
213 /* We read a `/', output it. */
214 putc ('/', out1);
215 if (out2)
216 putc ('/', out2);
217
218 switch ((c = getc (fin)))
219 {
220 case '/':
221 cplus_comment = 1;
222 break;
223 case '*':
224 cplus_comment = 0;
225 break;
226 default:
227 ungetc (c, fin);
228 return;
229 }
ae3c3164 230
27821bff
AD
231 putc (c, out1);
232 if (out2)
233 putc (c, out2);
550a72a3 234 c = getc (fin);
ae3c3164
AD
235
236 ended = 0;
237 while (!ended)
238 {
239 if (!cplus_comment && c == '*')
240 {
241 while (c == '*')
242 {
27821bff
AD
243 putc (c, out1);
244 if (out2)
245 putc (c, out2);
550a72a3 246 c = getc (fin);
ae3c3164
AD
247 }
248
249 if (c == '/')
250 {
a70083a3 251 putc (c, out1);
27821bff 252 if (out2)
a70083a3 253 putc (c, out2);
ae3c3164
AD
254 ended = 1;
255 }
256 }
257 else if (c == '\n')
258 {
259 lineno++;
27821bff
AD
260 putc (c, out1);
261 if (out2)
262 putc (c, out2);
ae3c3164
AD
263 if (cplus_comment)
264 ended = 1;
265 else
550a72a3 266 c = getc (fin);
ae3c3164
AD
267 }
268 else if (c == EOF)
269 fatal (_("unterminated comment"));
270 else
271 {
27821bff
AD
272 putc (c, out1);
273 if (out2)
274 putc (c, out2);
550a72a3 275 c = getc (fin);
ae3c3164
AD
276 }
277 }
278}
279
280
550a72a3
AD
281/*-------------------------------------------------------------------.
282| Dump the comment (actually the current string starting with a `/') |
283| from FIN to FOUT. |
284`-------------------------------------------------------------------*/
27821bff
AD
285
286static inline void
550a72a3 287copy_comment (FILE *fin, FILE *fout)
27821bff 288{
550a72a3 289 copy_comment2 (fin, fout, NULL);
27821bff
AD
290}
291
292
a70083a3
AD
293/*-----------------------------------------------------------------.
294| FIN is pointing to a location (i.e., a `@'). Output to FOUT a |
295| reference to this location. STACK_OFFSET is the number of values |
296| in the current rule so far, which says where to find `$0' with |
297| respect to the top of the stack. |
298`-----------------------------------------------------------------*/
1ff442ca 299
a70083a3
AD
300static inline void
301copy_at (FILE *fin, FILE *fout, int stack_offset)
1ff442ca 302{
a70083a3 303 int c;
1ff442ca 304
a70083a3
AD
305 c = getc (fin);
306 if (c == '$')
1ff442ca 307 {
a70083a3 308 fprintf (fout, "yyloc");
89cab50d 309 locations_flag = 1;
a70083a3
AD
310 }
311 else if (isdigit (c) || c == '-')
312 {
313 int n;
1ff442ca 314
a70083a3
AD
315 ungetc (c, fin);
316 n = read_signed_integer (fin);
943819bf 317
a70083a3 318 fprintf (fout, "yylsp[%d]", n - stack_offset);
89cab50d 319 locations_flag = 1;
1ff442ca 320 }
a70083a3 321 else
ff4a34be
AD
322 {
323 char buf[] = "@c";
324 buf[1] = c;
325 complain (_("%s is invalid"), quote (buf));
326 }
1ff442ca 327}
79282c5a
AD
328
329
330/*-------------------------------------------------------------------.
331| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
332| |
333| Possible inputs: $[<TYPENAME>]($|integer) |
334| |
335| Output to FOUT a reference to this semantic value. STACK_OFFSET is |
336| the number of values in the current rule so far, which says where |
337| to find `$0' with respect to the top of the stack. |
338`-------------------------------------------------------------------*/
339
340static inline void
341copy_dollar (FILE *fin, FILE *fout,
342 symbol_list *rule, int stack_offset)
343{
344 int c = getc (fin);
345 char *type_name = NULL;
346
f282676b 347 /* Get the type name if explicit. */
79282c5a
AD
348 if (c == '<')
349 {
f282676b 350 read_type_name (fin);
79282c5a
AD
351 type_name = token_buffer;
352 value_components_used = 1;
79282c5a
AD
353 c = getc (fin);
354 }
355
356 if (c == '$')
357 {
358 fprintf (fout, "yyval");
359 if (!type_name)
360 type_name = get_type_name (0, rule);
361 if (type_name)
362 fprintf (fout, ".%s", type_name);
363 if (!type_name && typed)
364 complain (_("$$ of `%s' has no declared type"),
365 rule->sym->tag);
366 }
367 else if (isdigit (c) || c == '-')
368 {
369 int n;
370 ungetc (c, fin);
371 n = read_signed_integer (fin);
372
373 if (!type_name && n > 0)
374 type_name = get_type_name (n, rule);
375
376 fprintf (fout, "yyvsp[%d]", n - stack_offset);
377 if (type_name)
378 fprintf (fout, ".%s", type_name);
379 if (!type_name && typed)
380 complain (_("$%d of `%s' has no declared type"),
381 n, rule->sym->tag);
382 }
383 else
384 {
385 char buf[] = "$c";
386 buf[1] = c;
387 complain (_("%s is invalid"), quote (buf));
388 }
389}
a70083a3
AD
390\f
391/*-------------------------------------------------------------------.
392| Copy the contents of a `%{ ... %}' into the definitions file. The |
393| `%{' has already been read. Return after reading the `%}'. |
394`-------------------------------------------------------------------*/
1ff442ca 395
4a120d45 396static void
118fb205 397copy_definition (void)
1ff442ca 398{
a70083a3 399 int c;
ae3c3164 400 /* -1 while reading a character if prev char was %. */
a70083a3 401 int after_percent;
1ff442ca 402
89cab50d 403 if (!no_lines_flag)
a70083a3 404 fprintf (fattrs, "#line %d \"%s\"\n", lineno, infile);
1ff442ca
NF
405
406 after_percent = 0;
407
ae3c3164 408 c = getc (finput);
1ff442ca
NF
409
410 for (;;)
411 {
412 switch (c)
413 {
414 case '\n':
a70083a3 415 putc (c, fattrs);
1ff442ca
NF
416 lineno++;
417 break;
418
419 case '%':
a70083a3 420 after_percent = -1;
1ff442ca 421 break;
a083fbbf 422
1ff442ca
NF
423 case '\'':
424 case '"':
ae3c3164 425 copy_string (finput, fattrs, c);
1ff442ca
NF
426 break;
427
428 case '/':
550a72a3 429 copy_comment (finput, fattrs);
1ff442ca
NF
430 break;
431
432 case EOF:
a70083a3 433 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
434
435 default:
a70083a3 436 putc (c, fattrs);
1ff442ca
NF
437 }
438
a70083a3 439 c = getc (finput);
1ff442ca
NF
440
441 if (after_percent)
442 {
443 if (c == '}')
444 return;
a70083a3 445 putc ('%', fattrs);
1ff442ca
NF
446 }
447 after_percent = 0;
448
449 }
450
451}
452
453
d7020c20
AD
454/*-------------------------------------------------------------------.
455| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
456| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
457| are reversed. |
458`-------------------------------------------------------------------*/
1ff442ca 459
4a120d45 460static void
d7020c20 461parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 462{
a70083a3
AD
463 int token = 0;
464 char *typename = 0;
465 struct bucket *symbol = NULL; /* pts to symbol being defined */
1ff442ca 466
1ff442ca
NF
467 for (;;)
468 {
e6011337
JT
469 int tmp_char = ungetc (skip_white_space (), finput);
470
471 if (tmp_char == '%')
1ff442ca 472 return;
e6011337 473 if (tmp_char == EOF)
a0f6b076 474 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 475
a70083a3 476 token = lex ();
1ff442ca 477 if (token == COMMA)
943819bf
RS
478 {
479 symbol = NULL;
480 continue;
481 }
1ff442ca
NF
482 if (token == TYPENAME)
483 {
95e36146 484 typename = xstrdup (token_buffer);
1ff442ca 485 value_components_used = 1;
943819bf
RS
486 symbol = NULL;
487 }
a70083a3 488 else if (token == IDENTIFIER && *symval->tag == '\"' && symbol)
943819bf 489 {
8e03724b
AD
490 if (symval->alias)
491 warn (_("symbol `%s' used more than once as a literal string"),
492 symval->tag);
493 else if (symbol->alias)
494 warn (_("symbol `%s' given more than one literal string"),
495 symbol->tag);
496 else
497 {
498 symval->class = token_sym;
499 symval->type_name = typename;
500 symval->user_token_number = symbol->user_token_number;
501 symbol->user_token_number = SALIAS;
502 symval->alias = symbol;
503 symbol->alias = symval;
504 /* symbol and symval combined are only one symbol */
505 nsyms--;
506 }
943819bf 507 translations = 1;
8e03724b 508 symbol = NULL;
1ff442ca
NF
509 }
510 else if (token == IDENTIFIER)
511 {
512 int oldclass = symval->class;
943819bf 513 symbol = symval;
1ff442ca 514
943819bf 515 if (symbol->class == what_is_not)
a0f6b076 516 complain (_("symbol %s redefined"), symbol->tag);
943819bf 517 symbol->class = what_is;
d7020c20 518 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 519 symbol->value = nvars++;
1ff442ca
NF
520
521 if (typename)
522 {
943819bf
RS
523 if (symbol->type_name == NULL)
524 symbol->type_name = typename;
a70083a3 525 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 526 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
527 }
528 }
943819bf 529 else if (symbol && token == NUMBER)
a70083a3 530 {
943819bf 531 symbol->user_token_number = numval;
1ff442ca 532 translations = 1;
a70083a3 533 }
1ff442ca 534 else
943819bf 535 {
a0f6b076 536 complain (_("`%s' is invalid in %s"),
d7020c20 537 token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 538 skip_to_char ('%');
943819bf 539 }
1ff442ca
NF
540 }
541
542}
543
1ff442ca 544
d7020c20
AD
545/*------------------------------.
546| Parse what comes after %start |
547`------------------------------*/
1ff442ca 548
4a120d45 549static void
118fb205 550parse_start_decl (void)
1ff442ca
NF
551{
552 if (start_flag)
27821bff
AD
553 complain (_("multiple %s declarations"), "%start");
554 if (lex () != IDENTIFIER)
555 complain (_("invalid %s declaration"), "%start");
943819bf
RS
556 else
557 {
558 start_flag = 1;
559 startval = symval;
560 }
1ff442ca
NF
561}
562
a70083a3
AD
563/*-----------------------------------------------------------.
564| read in a %type declaration and record its information for |
565| get_type_name to access |
566`-----------------------------------------------------------*/
567
568static void
569parse_type_decl (void)
570{
a70083a3
AD
571 char *name;
572
573 if (lex () != TYPENAME)
574 {
575 complain ("%s", _("%type declaration has no <typename>"));
576 skip_to_char ('%');
577 return;
578 }
579
95e36146 580 name = xstrdup (token_buffer);
a70083a3
AD
581
582 for (;;)
583 {
584 int t;
585 int tmp_char = ungetc (skip_white_space (), finput);
586
587 if (tmp_char == '%')
588 return;
589 if (tmp_char == EOF)
590 fatal (_("Premature EOF after %s"), token_buffer);
591
592 t = lex ();
593
594 switch (t)
1ff442ca
NF
595 {
596
597 case COMMA:
598 case SEMICOLON:
599 break;
600
601 case IDENTIFIER:
602 if (symval->type_name == NULL)
603 symval->type_name = name;
a70083a3 604 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 605 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
606
607 break;
608
609 default:
a0f6b076
AD
610 complain (_("invalid %%type declaration due to item: %s"),
611 token_buffer);
a70083a3 612 skip_to_char ('%');
1ff442ca
NF
613 }
614 }
615}
616
617
618
d7020c20
AD
619/*----------------------------------------------------------------.
620| Read in a %left, %right or %nonassoc declaration and record its |
621| information. |
622`----------------------------------------------------------------*/
1ff442ca 623
4a120d45 624static void
d7020c20 625parse_assoc_decl (associativity assoc)
1ff442ca 626{
a70083a3
AD
627 char *name = NULL;
628 int prev = 0;
1ff442ca 629
a70083a3 630 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 631
1ff442ca
NF
632 for (;;)
633 {
a70083a3 634 int t;
e6011337 635 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 636
e6011337 637 if (tmp_char == '%')
1ff442ca 638 return;
e6011337 639 if (tmp_char == EOF)
a0f6b076 640 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 641
a70083a3 642 t = lex ();
1ff442ca
NF
643
644 switch (t)
645 {
1ff442ca 646 case TYPENAME:
95e36146 647 name = xstrdup (token_buffer);
1ff442ca
NF
648 break;
649
650 case COMMA:
651 break;
652
653 case IDENTIFIER:
654 if (symval->prec != 0)
a0f6b076 655 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
656 symval->prec = lastprec;
657 symval->assoc = assoc;
d7020c20 658 if (symval->class == nterm_sym)
a0f6b076 659 complain (_("symbol %s redefined"), symval->tag);
d7020c20 660 symval->class = token_sym;
1ff442ca 661 if (name)
a70083a3 662 { /* record the type, if one is specified */
1ff442ca
NF
663 if (symval->type_name == NULL)
664 symval->type_name = name;
a70083a3 665 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 666 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
667 }
668 break;
669
670 case NUMBER:
671 if (prev == IDENTIFIER)
a70083a3 672 {
1ff442ca
NF
673 symval->user_token_number = numval;
674 translations = 1;
a70083a3
AD
675 }
676 else
677 {
678 complain (_
679 ("invalid text (%s) - number should be after identifier"),
680token_buffer);
681 skip_to_char ('%');
682 }
1ff442ca
NF
683 break;
684
685 case SEMICOLON:
686 return;
687
688 default:
a0f6b076 689 complain (_("unexpected item: %s"), token_buffer);
a70083a3 690 skip_to_char ('%');
1ff442ca
NF
691 }
692
693 prev = t;
694
695 }
696}
697
698
699
d7020c20
AD
700/*-------------------------------------------------------------------.
701| Copy the union declaration into fattrs (and fdefines), where it is |
702| made into the definition of YYSTYPE, the type of elements of the |
703| parser value stack. |
704`-------------------------------------------------------------------*/
1ff442ca 705
4a120d45 706static void
118fb205 707parse_union_decl (void)
1ff442ca 708{
a70083a3
AD
709 int c;
710 int count = 0;
1ff442ca
NF
711
712 if (typed)
27821bff 713 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
714
715 typed = 1;
716
89cab50d 717 if (!no_lines_flag)
27821bff 718 fprintf (fattrs, "\n#line %d \"%s\"\n", lineno, infile);
1ff442ca 719 else
27821bff 720 fprintf (fattrs, "\n");
1ff442ca 721
27821bff 722 fprintf (fattrs, "typedef union");
1ff442ca 723 if (fdefines)
27821bff 724 fprintf (fdefines, "typedef union");
1ff442ca 725
27821bff 726 c = getc (finput);
1ff442ca
NF
727
728 while (c != EOF)
729 {
27821bff 730 putc (c, fattrs);
1ff442ca 731 if (fdefines)
27821bff 732 putc (c, fdefines);
1ff442ca
NF
733
734 switch (c)
735 {
736 case '\n':
737 lineno++;
738 break;
739
740 case '/':
550a72a3 741 copy_comment2 (finput, fattrs, fdefines);
1ff442ca
NF
742 break;
743
1ff442ca
NF
744 case '{':
745 count++;
746 break;
747
748 case '}':
749 if (count == 0)
27821bff 750 complain (_("unmatched %s"), "`}'");
1ff442ca 751 count--;
943819bf 752 if (count <= 0)
1ff442ca 753 {
27821bff 754 fprintf (fattrs, " YYSTYPE;\n");
1ff442ca 755 if (fdefines)
27821bff 756 fprintf (fdefines, " YYSTYPE;\n");
1ff442ca 757 /* JF don't choke on trailing semi */
27821bff
AD
758 c = skip_white_space ();
759 if (c != ';')
a70083a3 760 ungetc (c, finput);
1ff442ca
NF
761 return;
762 }
763 }
764
27821bff 765 c = getc (finput);
1ff442ca
NF
766 }
767}
768
d7020c20
AD
769
770/*-------------------------------------------------------.
771| Parse the declaration %expect N which says to expect N |
772| shift-reduce conflicts. |
773`-------------------------------------------------------*/
1ff442ca 774
4a120d45 775static void
118fb205 776parse_expect_decl (void)
1ff442ca 777{
131e2fef 778 int c = skip_white_space ();
1ff442ca
NF
779 ungetc (c, finput);
780
131e2fef 781 if (!isdigit (c))
79282c5a 782 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
783 else
784 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
785}
786
a70083a3
AD
787
788/*-------------------------------------------------------------------.
789| Parse what comes after %thong. the full syntax is |
790| |
791| %thong <type> token number literal |
792| |
793| the <type> or number may be omitted. The number specifies the |
794| user_token_number. |
795| |
796| Two symbols are entered in the table, one for the token symbol and |
797| one for the literal. Both are given the <type>, if any, from the |
798| declaration. The ->user_token_number of the first is SALIAS and |
799| the ->user_token_number of the second is set to the number, if |
800| any, from the declaration. The two symbols are linked via |
801| pointers in their ->alias fields. |
802| |
803| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
804| only the literal string is retained it is the literal string that |
805| is output to yytname |
806`-------------------------------------------------------------------*/
807
808static void
809parse_thong_decl (void)
7b306f52 810{
a70083a3
AD
811 int token;
812 struct bucket *symbol;
813 char *typename = 0;
95e36146 814 int usrtoknum;
7b306f52 815
a70083a3
AD
816 translations = 1;
817 token = lex (); /* fetch typename or first token */
818 if (token == TYPENAME)
7b306f52 819 {
95e36146 820 typename = xstrdup (token_buffer);
a70083a3
AD
821 value_components_used = 1;
822 token = lex (); /* fetch first token */
7b306f52 823 }
7b306f52 824
a70083a3 825 /* process first token */
7b306f52 826
a70083a3
AD
827 if (token != IDENTIFIER)
828 {
829 complain (_("unrecognized item %s, expected an identifier"),
830 token_buffer);
831 skip_to_char ('%');
832 return;
7b306f52 833 }
d7020c20 834 symval->class = token_sym;
a70083a3
AD
835 symval->type_name = typename;
836 symval->user_token_number = SALIAS;
837 symbol = symval;
7b306f52 838
a70083a3 839 token = lex (); /* get number or literal string */
1ff442ca 840
a70083a3 841 if (token == NUMBER)
943819bf 842 {
a70083a3
AD
843 usrtoknum = numval;
844 token = lex (); /* okay, did number, now get literal */
943819bf 845 }
a70083a3
AD
846 else
847 usrtoknum = 0;
1ff442ca 848
a70083a3 849 /* process literal string token */
1ff442ca 850
a70083a3 851 if (token != IDENTIFIER || *symval->tag != '\"')
1ff442ca 852 {
a70083a3
AD
853 complain (_("expected string constant instead of %s"), token_buffer);
854 skip_to_char ('%');
855 return;
1ff442ca 856 }
d7020c20 857 symval->class = token_sym;
a70083a3
AD
858 symval->type_name = typename;
859 symval->user_token_number = usrtoknum;
1ff442ca 860
a70083a3
AD
861 symval->alias = symbol;
862 symbol->alias = symval;
1ff442ca 863
79282c5a
AD
864 /* symbol and symval combined are only one symbol. */
865 nsyms--;
a70083a3 866}
3cef001a 867
d7020c20 868
a70083a3
AD
869/*----------------------------------------------------------------.
870| Read from finput until `%%' is seen. Discard the `%%'. Handle |
871| any `%' declarations, and copy the contents of any `%{ ... %}' |
872| groups to fattrs. |
873`----------------------------------------------------------------*/
1ff442ca 874
4a120d45 875static void
a70083a3 876read_declarations (void)
1ff442ca 877{
a70083a3
AD
878 int c;
879 int tok;
1ff442ca 880
a70083a3 881 for (;;)
1ff442ca 882 {
a70083a3 883 c = skip_white_space ();
1ff442ca 884
a70083a3
AD
885 if (c == '%')
886 {
887 tok = parse_percent_token ();
1ff442ca 888
a70083a3 889 switch (tok)
943819bf 890 {
a70083a3
AD
891 case TWO_PERCENTS:
892 return;
1ff442ca 893
a70083a3
AD
894 case PERCENT_LEFT_CURLY:
895 copy_definition ();
896 break;
1ff442ca 897
a70083a3 898 case TOKEN:
d7020c20 899 parse_token_decl (token_sym, nterm_sym);
a70083a3 900 break;
1ff442ca 901
a70083a3 902 case NTERM:
d7020c20 903 parse_token_decl (nterm_sym, token_sym);
a70083a3 904 break;
1ff442ca 905
a70083a3
AD
906 case TYPE:
907 parse_type_decl ();
908 break;
1ff442ca 909
a70083a3
AD
910 case START:
911 parse_start_decl ();
912 break;
118fb205 913
a70083a3
AD
914 case UNION:
915 parse_union_decl ();
916 break;
1ff442ca 917
a70083a3
AD
918 case EXPECT:
919 parse_expect_decl ();
920 break;
921 case THONG:
922 parse_thong_decl ();
923 break;
d7020c20 924
a70083a3 925 case LEFT:
d7020c20 926 parse_assoc_decl (left_assoc);
a70083a3 927 break;
1ff442ca 928
a70083a3 929 case RIGHT:
d7020c20 930 parse_assoc_decl (right_assoc);
a70083a3 931 break;
1ff442ca 932
a70083a3 933 case NONASSOC:
d7020c20 934 parse_assoc_decl (non_assoc);
a70083a3 935 break;
1ff442ca 936
a70083a3
AD
937 case SEMANTIC_PARSER:
938 if (semantic_parser == 0)
939 {
940 semantic_parser = 1;
941 open_extra_files ();
942 }
943 break;
1ff442ca 944
a70083a3
AD
945 case PURE_PARSER:
946 pure_parser = 1;
947 break;
1ff442ca 948
a70083a3
AD
949 case NOOP:
950 break;
1ff442ca 951
a70083a3
AD
952 default:
953 complain (_("unrecognized: %s"), token_buffer);
954 skip_to_char ('%');
955 }
956 }
957 else if (c == EOF)
958 fatal (_("no input grammar"));
959 else
960 {
ff4a34be
AD
961 char buf[] = "c";
962 buf[0] = c;
963 complain (_("unknown character: %s"), quote (buf));
a70083a3 964 skip_to_char ('%');
1ff442ca 965 }
1ff442ca 966 }
1ff442ca 967}
a70083a3
AD
968\f
969/*-------------------------------------------------------------------.
970| Assuming that a `{' has just been seen, copy everything up to the |
971| matching `}' into the actions file. STACK_OFFSET is the number of |
972| values in the current rule so far, which says where to find `$0' |
973| with respect to the top of the stack. |
974`-------------------------------------------------------------------*/
1ff442ca 975
4a120d45 976static void
79282c5a 977copy_action (symbol_list *rule, int stack_offset)
1ff442ca 978{
a70083a3 979 int c;
a70083a3 980 int count;
1ff442ca
NF
981
982 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
983 if (semantic_parser)
984 stack_offset = 0;
1ff442ca 985
41aca2e0 986 fprintf (faction, "\ncase %d:\n", nrules);
89cab50d 987 if (!no_lines_flag)
41aca2e0
AD
988 fprintf (faction, "#line %d \"%s\"\n", lineno, infile);
989 putc ('{', faction);
1ff442ca
NF
990
991 count = 1;
a70083a3 992 c = getc (finput);
1ff442ca
NF
993
994 while (count > 0)
995 {
996 while (c != '}')
a70083a3
AD
997 {
998 switch (c)
1ff442ca
NF
999 {
1000 case '\n':
a70083a3 1001 putc (c, faction);
1ff442ca
NF
1002 lineno++;
1003 break;
1004
1005 case '{':
a70083a3 1006 putc (c, faction);
1ff442ca
NF
1007 count++;
1008 break;
1009
1010 case '\'':
1011 case '"':
ca36d2ef 1012 copy_string (finput, faction, c);
1ff442ca
NF
1013 break;
1014
1015 case '/':
550a72a3 1016 copy_comment (finput, faction);
1ff442ca
NF
1017 break;
1018
1019 case '$':
79282c5a 1020 copy_dollar (finput, faction, rule, stack_offset);
1ff442ca
NF
1021 break;
1022
1023 case '@':
7b306f52 1024 copy_at (finput, faction, stack_offset);
6666f98f 1025 break;
1ff442ca
NF
1026
1027 case EOF:
27821bff 1028 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1029
1030 default:
a70083a3
AD
1031 putc (c, faction);
1032 }
1033
1034 c = getc (finput);
1035 }
1036
1037 /* above loop exits when c is '}' */
1038
1039 if (--count)
1040 {
1041 putc (c, faction);
1042 c = getc (finput);
1043 }
1044 }
1045
1046 fprintf (faction, ";\n break;}");
1047}
1048\f
1049/*-------------------------------------------------------------------.
1050| After `%guard' is seen in the input file, copy the actual guard |
1051| into the guards file. If the guard is followed by an action, copy |
1052| that into the actions file. STACK_OFFSET is the number of values |
1053| in the current rule so far, which says where to find `$0' with |
1054| respect to the top of the stack, for the simple parser in which |
1055| the stack is not popped until after the guard is run. |
1056`-------------------------------------------------------------------*/
1057
1058static void
79282c5a 1059copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1060{
1061 int c;
a70083a3 1062 int count;
a70083a3
AD
1063 int brace_flag = 0;
1064
1065 /* offset is always 0 if parser has already popped the stack pointer */
1066 if (semantic_parser)
1067 stack_offset = 0;
1068
1069 fprintf (fguard, "\ncase %d:\n", nrules);
89cab50d 1070 if (!no_lines_flag)
a70083a3
AD
1071 fprintf (fguard, "#line %d \"%s\"\n", lineno, infile);
1072 putc ('{', fguard);
1073
1074 count = 0;
1075 c = getc (finput);
1076
1077 while (brace_flag ? (count > 0) : (c != ';'))
1078 {
1079 switch (c)
1080 {
1081 case '\n':
1082 putc (c, fguard);
1083 lineno++;
1084 break;
1085
1086 case '{':
1087 putc (c, fguard);
1088 brace_flag = 1;
1089 count++;
1090 break;
1091
1092 case '}':
1093 putc (c, fguard);
1094 if (count > 0)
1095 count--;
1096 else
1097 {
1098 complain (_("unmatched %s"), "`}'");
1099 c = getc (finput); /* skip it */
1100 }
1101 break;
1102
1103 case '\'':
1104 case '"':
1105 copy_string (finput, fguard, c);
1106 break;
1107
1108 case '/':
550a72a3 1109 copy_comment (finput, fguard);
a70083a3
AD
1110 break;
1111
1112 case '$':
79282c5a 1113 copy_dollar (finput, fguard, rule, stack_offset);
a70083a3 1114 break;
1ff442ca 1115
a70083a3
AD
1116 case '@':
1117 copy_at (finput, fguard, stack_offset);
1118 break;
1ff442ca 1119
a70083a3
AD
1120 case EOF:
1121 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1122
a70083a3
AD
1123 default:
1124 putc (c, fguard);
1ff442ca 1125 }
a70083a3
AD
1126
1127 if (c != '}' || count != 0)
1128 c = getc (finput);
1ff442ca
NF
1129 }
1130
a70083a3
AD
1131 c = skip_white_space ();
1132
1133 fprintf (fguard, ";\n break;}");
1134 if (c == '{')
1135 copy_action (rule, stack_offset);
1136 else if (c == '=')
1137 {
1138 c = getc (finput); /* why not skip_white_space -wjh */
1139 if (c == '{')
1140 copy_action (rule, stack_offset);
1141 }
1142 else
1143 ungetc (c, finput);
1ff442ca 1144}
a70083a3
AD
1145\f
1146
1147static void
1148record_rule_line (void)
1149{
1150 /* Record each rule's source line number in rline table. */
1ff442ca 1151
a70083a3
AD
1152 if (nrules >= rline_allocated)
1153 {
1154 rline_allocated = nrules * 2;
d7913476 1155 rline = XREALLOC (rline, short, rline_allocated);
a70083a3
AD
1156 }
1157 rline[nrules] = lineno;
1158}
1ff442ca
NF
1159
1160
a70083a3
AD
1161/*-------------------------------------------------------------------.
1162| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1163| with the user's names. |
1164`-------------------------------------------------------------------*/
1ff442ca 1165
4a120d45 1166static bucket *
118fb205 1167gensym (void)
1ff442ca 1168{
a70083a3 1169 bucket *sym;
1ff442ca
NF
1170
1171 sprintf (token_buffer, "@%d", ++gensym_count);
a70083a3 1172 sym = getsym (token_buffer);
d7020c20 1173 sym->class = nterm_sym;
1ff442ca 1174 sym->value = nvars++;
36281465 1175 return sym;
1ff442ca
NF
1176}
1177
a70083a3
AD
1178#if 0
1179/*------------------------------------------------------------------.
1180| read in a %type declaration and record its information for |
1181| get_type_name to access. This is unused. It is only called from |
1182| the #if 0 part of readgram |
1183`------------------------------------------------------------------*/
1184
1185static int
1186get_type (void)
1187{
1188 int k;
1189 int t;
1190 char *name;
1191
1192 t = lex ();
1193
1194 if (t != TYPENAME)
1195 {
1196 complain (_("invalid %s declaration"), "%type");
1197 return t;
1198 }
1199
95e36146 1200 name = xstrdup (token_buffer);
a70083a3
AD
1201
1202 for (;;)
1203 {
1204 t = lex ();
1205
1206 switch (t)
1207 {
1208 case SEMICOLON:
1209 return lex ();
1210
1211 case COMMA:
1212 break;
1213
1214 case IDENTIFIER:
1215 if (symval->type_name == NULL)
1216 symval->type_name = name;
1217 else if (strcmp (name, symval->type_name) != 0)
1218 complain (_("type redeclaration for %s"), symval->tag);
1219
1220 break;
1221
1222 default:
1223 return t;
1224 }
1225 }
1226}
1ff442ca 1227
a70083a3
AD
1228#endif
1229\f
1230/*------------------------------------------------------------------.
1231| Parse the input grammar into a one symbol_list structure. Each |
1232| rule is represented by a sequence of symbols: the left hand side |
1233| followed by the contents of the right hand side, followed by a |
1234| null pointer instead of a symbol to terminate the rule. The next |
1235| symbol is the lhs of the following rule. |
1236| |
1237| All guards and actions are copied out to the appropriate files, |
1238| labelled by the rule number they apply to. |
1239`------------------------------------------------------------------*/
1ff442ca 1240
4a120d45 1241static void
118fb205 1242readgram (void)
1ff442ca 1243{
a70083a3
AD
1244 int t;
1245 bucket *lhs = NULL;
1246 symbol_list *p;
1247 symbol_list *p1;
1248 bucket *bp;
1ff442ca 1249
ff4a34be
AD
1250 /* Points to first symbol_list of current rule. its symbol is the
1251 lhs of the rule. */
1252 symbol_list *crule;
1253 /* Points to the symbol_list preceding crule. */
1254 symbol_list *crule1;
1ff442ca
NF
1255
1256 p1 = NULL;
1257
a70083a3 1258 t = lex ();
1ff442ca
NF
1259
1260 while (t != TWO_PERCENTS && t != ENDFILE)
1261 {
1262 if (t == IDENTIFIER || t == BAR)
1263 {
89cab50d 1264 int action_flag = 0;
ff4a34be
AD
1265 /* Number of symbols in rhs of this rule so far */
1266 int rulelength = 0;
1ff442ca
NF
1267 int xactions = 0; /* JF for error checking */
1268 bucket *first_rhs = 0;
1269
1270 if (t == IDENTIFIER)
1271 {
1272 lhs = symval;
943819bf
RS
1273
1274 if (!start_flag)
1275 {
1276 startval = lhs;
1277 start_flag = 1;
1278 }
a083fbbf 1279
a70083a3 1280 t = lex ();
1ff442ca 1281 if (t != COLON)
943819bf 1282 {
a0f6b076 1283 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1284 unlex (t);
943819bf 1285 }
1ff442ca
NF
1286 }
1287
943819bf 1288 if (nrules == 0 && t == BAR)
1ff442ca 1289 {
a0f6b076 1290 complain (_("grammar starts with vertical bar"));
943819bf 1291 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1292 }
1ff442ca
NF
1293 /* start a new rule and record its lhs. */
1294
1295 nrules++;
1296 nitems++;
1297
1298 record_rule_line ();
1299
d7913476 1300 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1301 p->sym = lhs;
1302
1303 crule1 = p1;
1304 if (p1)
1305 p1->next = p;
1306 else
1307 grammar = p;
1308
1309 p1 = p;
1310 crule = p;
1311
1312 /* mark the rule's lhs as a nonterminal if not already so. */
1313
d7020c20 1314 if (lhs->class == unknown_sym)
1ff442ca 1315 {
d7020c20 1316 lhs->class = nterm_sym;
1ff442ca
NF
1317 lhs->value = nvars;
1318 nvars++;
1319 }
d7020c20 1320 else if (lhs->class == token_sym)
a0f6b076 1321 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1322
1323 /* read the rhs of the rule. */
1324
1325 for (;;)
1326 {
a70083a3 1327 t = lex ();
943819bf
RS
1328 if (t == PREC)
1329 {
a70083a3 1330 t = lex ();
943819bf 1331 crule->ruleprec = symval;
a70083a3 1332 t = lex ();
943819bf 1333 }
1ff442ca 1334
a70083a3
AD
1335 if (!(t == IDENTIFIER || t == LEFT_CURLY))
1336 break;
1ff442ca
NF
1337
1338 /* If next token is an identifier, see if a colon follows it.
a70083a3 1339 If one does, exit this rule now. */
1ff442ca
NF
1340 if (t == IDENTIFIER)
1341 {
a70083a3
AD
1342 bucket *ssave;
1343 int t1;
1ff442ca
NF
1344
1345 ssave = symval;
a70083a3
AD
1346 t1 = lex ();
1347 unlex (t1);
1ff442ca 1348 symval = ssave;
a70083a3
AD
1349 if (t1 == COLON)
1350 break;
1ff442ca 1351
a70083a3 1352 if (!first_rhs) /* JF */
1ff442ca
NF
1353 first_rhs = symval;
1354 /* Not followed by colon =>
1355 process as part of this rule's rhs. */
1356 }
1357
1358 /* If we just passed an action, that action was in the middle
a70083a3
AD
1359 of a rule, so make a dummy rule to reduce it to a
1360 non-terminal. */
89cab50d 1361 if (action_flag)
1ff442ca 1362 {
a70083a3 1363 bucket *sdummy;
1ff442ca 1364
f282676b
AD
1365 /* Since the action was written out with this rule's
1366 number, we must give the new rule this number by
1367 inserting the new rule before it. */
1ff442ca
NF
1368
1369 /* Make a dummy nonterminal, a gensym. */
a70083a3 1370 sdummy = gensym ();
1ff442ca
NF
1371
1372 /* Make a new rule, whose body is empty,
1373 before the current one, so that the action
1374 just read can belong to it. */
1375 nrules++;
1376 nitems++;
1377 record_rule_line ();
d7913476 1378 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1379 if (crule1)
1380 crule1->next = p;
a70083a3
AD
1381 else
1382 grammar = p;
1ff442ca 1383 p->sym = sdummy;
d7913476 1384 crule1 = XCALLOC (symbol_list, 1);
1ff442ca
NF
1385 p->next = crule1;
1386 crule1->next = crule;
1387
f282676b
AD
1388 /* Insert the dummy generated by that rule into this
1389 rule. */
1ff442ca 1390 nitems++;
d7913476 1391 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1392 p->sym = sdummy;
1393 p1->next = p;
1394 p1 = p;
1395
89cab50d 1396 action_flag = 0;
1ff442ca
NF
1397 }
1398
1399 if (t == IDENTIFIER)
1400 {
1401 nitems++;
d7913476 1402 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1403 p->sym = symval;
1404 p1->next = p;
1405 p1 = p;
1406 }
a70083a3 1407 else /* handle an action. */
1ff442ca 1408 {
a70083a3 1409 copy_action (crule, rulelength);
89cab50d 1410 action_flag = 1;
1ff442ca
NF
1411 xactions++; /* JF */
1412 }
1413 rulelength++;
a70083a3 1414 } /* end of read rhs of rule */
1ff442ca
NF
1415
1416 /* Put an empty link in the list to mark the end of this rule */
d7913476 1417 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1418 p1->next = p;
1419 p1 = p;
1420
1421 if (t == PREC)
1422 {
a0f6b076 1423 complain (_("two @prec's in a row"));
a70083a3 1424 t = lex ();
1ff442ca 1425 crule->ruleprec = symval;
a70083a3 1426 t = lex ();
1ff442ca
NF
1427 }
1428 if (t == GUARD)
1429 {
a70083a3 1430 if (!semantic_parser)
ff4a34be 1431 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1432
a70083a3
AD
1433 copy_guard (crule, rulelength);
1434 t = lex ();
1ff442ca
NF
1435 }
1436 else if (t == LEFT_CURLY)
1437 {
a70083a3 1438 /* This case never occurs -wjh */
89cab50d 1439 if (action_flag)
a0f6b076 1440 complain (_("two actions at end of one rule"));
a70083a3 1441 copy_action (crule, rulelength);
89cab50d 1442 action_flag = 1;
943819bf 1443 xactions++; /* -wjh */
a70083a3 1444 t = lex ();
1ff442ca 1445 }
a0f6b076 1446 /* If $$ is being set in default way, report if any type
6666f98f
AD
1447 mismatch. */
1448 else if (!xactions
a70083a3 1449 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1450 {
6666f98f
AD
1451 if (lhs->type_name == 0
1452 || first_rhs->type_name == 0
a70083a3 1453 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1454 complain (_("type clash (`%s' `%s') on default action"),
1455 lhs->type_name ? lhs->type_name : "",
a70083a3 1456 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1457 }
1458 /* Warn if there is no default for $$ but we need one. */
1459 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1460 complain (_("empty rule for typed nonterminal, and no action"));
1ff442ca 1461 if (t == SEMICOLON)
a70083a3 1462 t = lex ();
a083fbbf 1463 }
943819bf 1464#if 0
a70083a3 1465 /* these things can appear as alternatives to rules. */
943819bf
RS
1466/* NO, they cannot.
1467 a) none of the documentation allows them
1468 b) most of them scan forward until finding a next %
1469 thus they may swallow lots of intervening rules
1470*/
1ff442ca
NF
1471 else if (t == TOKEN)
1472 {
d7020c20 1473 parse_token_decl (token_sym, nterm_sym);
a70083a3 1474 t = lex ();
1ff442ca
NF
1475 }
1476 else if (t == NTERM)
1477 {
d7020c20 1478 parse_token_decl (nterm_sym, token_sym);
a70083a3 1479 t = lex ();
1ff442ca
NF
1480 }
1481 else if (t == TYPE)
1482 {
a70083a3 1483 t = get_type ();
1ff442ca
NF
1484 }
1485 else if (t == UNION)
1486 {
a70083a3
AD
1487 parse_union_decl ();
1488 t = lex ();
1ff442ca
NF
1489 }
1490 else if (t == EXPECT)
1491 {
a70083a3
AD
1492 parse_expect_decl ();
1493 t = lex ();
1ff442ca
NF
1494 }
1495 else if (t == START)
1496 {
a70083a3
AD
1497 parse_start_decl ();
1498 t = lex ();
1ff442ca 1499 }
943819bf
RS
1500#endif
1501
1ff442ca 1502 else
943819bf 1503 {
a0f6b076 1504 complain (_("invalid input: %s"), token_buffer);
a70083a3 1505 t = lex ();
943819bf 1506 }
1ff442ca
NF
1507 }
1508
943819bf
RS
1509 /* grammar has been read. Do some checking */
1510
1ff442ca 1511 if (nsyms > MAXSHORT)
a0f6b076
AD
1512 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1513 MAXSHORT);
1ff442ca 1514 if (nrules == 0)
a0f6b076 1515 fatal (_("no rules in the input grammar"));
1ff442ca 1516
ff4a34be
AD
1517 /* JF put out same default YYSTYPE as YACC does */
1518 if (typed == 0
1ff442ca
NF
1519 && !value_components_used)
1520 {
1521 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
a70083a3
AD
1522 but it seems better to be consistent.
1523 Most programs should declare their own type anyway. */
1524 fprintf (fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1ff442ca 1525 if (fdefines)
a70083a3 1526 fprintf (fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1ff442ca
NF
1527 }
1528
1529 /* Report any undefined symbols and consider them nonterminals. */
1530
1531 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1532 if (bp->class == unknown_sym)
1ff442ca 1533 {
a70083a3
AD
1534 complain (_
1535 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1536 bp->tag);
d7020c20 1537 bp->class = nterm_sym;
1ff442ca
NF
1538 bp->value = nvars++;
1539 }
1540
1541 ntokens = nsyms - nvars;
1542}
a70083a3
AD
1543\f
1544/*--------------------------------------------------------------.
1545| For named tokens, but not literal ones, define the name. The |
1546| value is the user token number. |
1547`--------------------------------------------------------------*/
1ff442ca 1548
4a120d45 1549static void
a70083a3 1550output_token_defines (FILE *file)
1ff442ca 1551{
a70083a3
AD
1552 bucket *bp;
1553 char *cp, *symbol;
1554 char c;
1ff442ca 1555
a70083a3 1556 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1557 {
a70083a3
AD
1558 symbol = bp->tag; /* get symbol */
1559
1560 if (bp->value >= ntokens)
1561 continue;
1562 if (bp->user_token_number == SALIAS)
1563 continue;
1564 if ('\'' == *symbol)
1565 continue; /* skip literal character */
1566 if (bp == errtoken)
1567 continue; /* skip error token */
1568 if ('\"' == *symbol)
1ff442ca 1569 {
a70083a3
AD
1570 /* use literal string only if given a symbol with an alias */
1571 if (bp->alias)
1572 symbol = bp->alias->tag;
1573 else
1574 continue;
1575 }
1ff442ca 1576
a70083a3
AD
1577 /* Don't #define nonliteral tokens whose names contain periods. */
1578 cp = symbol;
1579 while ((c = *cp++) && c != '.');
1580 if (c != '\0')
1581 continue;
1ff442ca 1582
a70083a3 1583 fprintf (file, "#define\t%s\t%d\n", symbol,
89cab50d 1584 ((translations && !raw_flag)
a70083a3
AD
1585 ? bp->user_token_number : bp->value));
1586 if (semantic_parser)
1587 fprintf (file, "#define\tT%s\t%d\n", symbol, bp->value);
1ff442ca 1588 }
a70083a3
AD
1589
1590 putc ('\n', file);
1ff442ca 1591}
1ff442ca
NF
1592
1593
a70083a3
AD
1594/*------------------------------------------------------------------.
1595| Assign symbol numbers, and write definition of token names into |
b2ca4022 1596| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1597| of symbols. |
1598`------------------------------------------------------------------*/
1ff442ca 1599
4a120d45 1600static void
118fb205 1601packsymbols (void)
1ff442ca 1602{
a70083a3
AD
1603 bucket *bp;
1604 int tokno = 1;
1605 int i;
1606 int last_user_token_number;
4a120d45 1607 static char DOLLAR[] = "$";
1ff442ca
NF
1608
1609 /* int lossage = 0; JF set but not used */
1610
d7913476 1611 tags = XCALLOC (char *, nsyms + 1);
4a120d45 1612 tags[0] = DOLLAR;
d7913476 1613 user_toknums = XCALLOC (short, nsyms + 1);
943819bf 1614 user_toknums[0] = 0;
1ff442ca 1615
d7913476
AD
1616 sprec = XCALLOC (short, nsyms);
1617 sassoc = XCALLOC (short, nsyms);
1ff442ca
NF
1618
1619 max_user_token_number = 256;
1620 last_user_token_number = 256;
1621
1622 for (bp = firstsymbol; bp; bp = bp->next)
1623 {
d7020c20 1624 if (bp->class == nterm_sym)
1ff442ca
NF
1625 {
1626 bp->value += ntokens;
1627 }
943819bf
RS
1628 else if (bp->alias)
1629 {
0a6384c4
AD
1630 /* this symbol and its alias are a single token defn.
1631 allocate a tokno, and assign to both check agreement of
1632 ->prec and ->assoc fields and make both the same */
1633 if (bp->value == 0)
1634 bp->value = bp->alias->value = tokno++;
943819bf 1635
0a6384c4
AD
1636 if (bp->prec != bp->alias->prec)
1637 {
1638 if (bp->prec != 0 && bp->alias->prec != 0
1639 && bp->user_token_number == SALIAS)
a0f6b076
AD
1640 complain (_("conflicting precedences for %s and %s"),
1641 bp->tag, bp->alias->tag);
0a6384c4
AD
1642 if (bp->prec != 0)
1643 bp->alias->prec = bp->prec;
1644 else
1645 bp->prec = bp->alias->prec;
1646 }
943819bf 1647
0a6384c4
AD
1648 if (bp->assoc != bp->alias->assoc)
1649 {
a0f6b076
AD
1650 if (bp->assoc != 0 && bp->alias->assoc != 0
1651 && bp->user_token_number == SALIAS)
1652 complain (_("conflicting assoc values for %s and %s"),
1653 bp->tag, bp->alias->tag);
1654 if (bp->assoc != 0)
1655 bp->alias->assoc = bp->assoc;
1656 else
1657 bp->assoc = bp->alias->assoc;
1658 }
0a6384c4
AD
1659
1660 if (bp->user_token_number == SALIAS)
a70083a3 1661 continue; /* do not do processing below for SALIASs */
943819bf 1662
a70083a3 1663 }
d7020c20 1664 else /* bp->class == token_sym */
943819bf
RS
1665 {
1666 bp->value = tokno++;
1667 }
1668
d7020c20 1669 if (bp->class == token_sym)
1ff442ca
NF
1670 {
1671 if (translations && !(bp->user_token_number))
1672 bp->user_token_number = ++last_user_token_number;
1673 if (bp->user_token_number > max_user_token_number)
1674 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1675 }
1676
1677 tags[bp->value] = bp->tag;
943819bf 1678 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1679 sprec[bp->value] = bp->prec;
1680 sassoc[bp->value] = bp->assoc;
1681
1682 }
1683
1684 if (translations)
1685 {
a70083a3 1686 int j;
1ff442ca 1687
d7913476 1688 token_translations = XCALLOC (short, max_user_token_number + 1);
1ff442ca 1689
0a6384c4 1690 /* initialize all entries for literal tokens to 2, the internal
a70083a3
AD
1691 token number for $undefined., which represents all invalid
1692 inputs. */
4a120d45 1693 for (j = 0; j <= max_user_token_number; j++)
a70083a3 1694 token_translations[j] = 2;
1ff442ca 1695
943819bf 1696 for (bp = firstsymbol; bp; bp = bp->next)
a70083a3
AD
1697 {
1698 if (bp->value >= ntokens)
1699 continue; /* non-terminal */
1700 if (bp->user_token_number == SALIAS)
0a6384c4 1701 continue;
a70083a3 1702 if (token_translations[bp->user_token_number] != 2)
a0f6b076
AD
1703 complain (_("tokens %s and %s both assigned number %d"),
1704 tags[token_translations[bp->user_token_number]],
a70083a3
AD
1705 bp->tag, bp->user_token_number);
1706 token_translations[bp->user_token_number] = bp->value;
1707 }
1ff442ca
NF
1708 }
1709
1710 error_token_number = errtoken->value;
1711
89cab50d 1712 if (!no_parser_flag)
a70083a3 1713 output_token_defines (ftable);
1ff442ca 1714
d7020c20 1715 if (startval->class == unknown_sym)
a0f6b076 1716 fatal (_("the start symbol %s is undefined"), startval->tag);
d7020c20 1717 else if (startval->class == token_sym)
a0f6b076 1718 fatal (_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1719
1720 start_symbol = startval->value;
1721
89cab50d 1722 if (defines_flag)
1ff442ca 1723 {
a70083a3 1724 output_token_defines (fdefines);
1ff442ca
NF
1725
1726 if (!pure_parser)
1727 {
1728 if (spec_name_prefix)
a70083a3
AD
1729 fprintf (fdefines, "\nextern YYSTYPE %slval;\n",
1730 spec_name_prefix);
1ff442ca 1731 else
a70083a3 1732 fprintf (fdefines, "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1733 }
1734
1735 if (semantic_parser)
1736 for (i = ntokens; i < nsyms; i++)
1737 {
1738 /* don't make these for dummy nonterminals made by gensym. */
1739 if (*tags[i] != '@')
a70083a3 1740 fprintf (fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1ff442ca
NF
1741 }
1742#if 0
1743 /* `fdefines' is now a temporary file, so we need to copy its
1744 contents in `done', so we can't close it here. */
a70083a3 1745 fclose (fdefines);
1ff442ca
NF
1746 fdefines = NULL;
1747#endif
1748 }
1749}
a083fbbf 1750
1ff442ca 1751
a70083a3
AD
1752/*---------------------------------------------------------------.
1753| Convert the rules into the representation using RRHS, RLHS and |
1754| RITEMS. |
1755`---------------------------------------------------------------*/
1ff442ca 1756
4a120d45 1757static void
118fb205 1758packgram (void)
1ff442ca 1759{
a70083a3
AD
1760 int itemno;
1761 int ruleno;
1762 symbol_list *p;
1ff442ca
NF
1763
1764 bucket *ruleprec;
1765
d7913476
AD
1766 ritem = XCALLOC (short, nitems + 1);
1767 rlhs = XCALLOC (short, nrules) - 1;
1768 rrhs = XCALLOC (short, nrules) - 1;
1769 rprec = XCALLOC (short, nrules) - 1;
1770 rprecsym = XCALLOC (short, nrules) - 1;
1771 rassoc = XCALLOC (short, nrules) - 1;
1ff442ca
NF
1772
1773 itemno = 0;
1774 ruleno = 1;
1775
1776 p = grammar;
1777 while (p)
1778 {
1779 rlhs[ruleno] = p->sym->value;
1780 rrhs[ruleno] = itemno;
1781 ruleprec = p->ruleprec;
1782
1783 p = p->next;
1784 while (p && p->sym)
1785 {
1786 ritem[itemno++] = p->sym->value;
1787 /* A rule gets by default the precedence and associativity
1788 of the last token in it. */
d7020c20 1789 if (p->sym->class == token_sym)
1ff442ca
NF
1790 {
1791 rprec[ruleno] = p->sym->prec;
1792 rassoc[ruleno] = p->sym->assoc;
1793 }
a70083a3
AD
1794 if (p)
1795 p = p->next;
1ff442ca
NF
1796 }
1797
1798 /* If this rule has a %prec,
a70083a3 1799 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1800 if (ruleprec)
1801 {
a70083a3
AD
1802 rprec[ruleno] = ruleprec->prec;
1803 rassoc[ruleno] = ruleprec->assoc;
1ff442ca
NF
1804 rprecsym[ruleno] = ruleprec->value;
1805 }
1806
1807 ritem[itemno++] = -ruleno;
1808 ruleno++;
1809
a70083a3
AD
1810 if (p)
1811 p = p->next;
1ff442ca
NF
1812 }
1813
1814 ritem[itemno] = 0;
1815}
a70083a3
AD
1816\f
1817/*-------------------------------------------------------------------.
1818| Read in the grammar specification and record it in the format |
1819| described in gram.h. All guards are copied into the FGUARD file |
1820| and all actions into FACTION, in each case forming the body of a C |
1821| function (YYGUARD or YYACTION) which contains a switch statement |
1822| to decide which guard or action to execute. |
1823`-------------------------------------------------------------------*/
1824
1825void
1826reader (void)
1827{
1828 start_flag = 0;
1829 startval = NULL; /* start symbol not specified yet. */
1830
1831#if 0
1832 /* initially assume token number translation not needed. */
1833 translations = 0;
1834#endif
1835 /* Nowadays translations is always set to 1, since we give `error' a
1836 user-token-number to satisfy the Posix demand for YYERRCODE==256.
1837 */
1838 translations = 1;
1839
1840 nsyms = 1;
1841 nvars = 0;
1842 nrules = 0;
1843 nitems = 0;
1844 rline_allocated = 10;
d7913476 1845 rline = XCALLOC (short, rline_allocated);
a70083a3
AD
1846
1847 typed = 0;
1848 lastprec = 0;
1849
1850 gensym_count = 0;
1851
1852 semantic_parser = 0;
1853 pure_parser = 0;
a70083a3
AD
1854
1855 grammar = NULL;
1856
1857 init_lex ();
1858 lineno = 1;
1859
1860 /* Initialize the symbol table. */
1861 tabinit ();
1862 /* Construct the error token */
1863 errtoken = getsym ("error");
d7020c20 1864 errtoken->class = token_sym;
a70083a3
AD
1865 errtoken->user_token_number = 256; /* Value specified by POSIX. */
1866 /* Construct a token that represents all undefined literal tokens.
1867 It is always token number 2. */
1868 undeftoken = getsym ("$undefined.");
d7020c20 1869 undeftoken->class = token_sym;
a70083a3
AD
1870 undeftoken->user_token_number = 2;
1871
1872 /* Read the declaration section. Copy %{ ... %} groups to FTABLE
1873 and FDEFINES file. Also notice any %token, %left, etc. found
1874 there. */
1875 putc ('\n', ftable);
1876 fprintf (ftable, "\
1877/* %s, made from %s\n\
1878 by GNU bison %s. */\n\
89cab50d 1879\n", no_parser_flag ? "Bison-generated parse tables" : "A Bison parser", infile, VERSION);
a70083a3
AD
1880
1881 fputs ("#define YYBISON 1 /* Identify Bison output. */\n\n", ftable);
1882 read_declarations ();
1883 /* Start writing the guard and action files, if they are needed. */
1884 output_headers ();
1885 /* Read in the grammar, build grammar in list form. Write out
1886 guards and actions. */
1887 readgram ();
1888 /* Now we know whether we need the line-number stack. If we do,
1889 write its type into the .tab.h file. */
1890 if (fdefines)
1891 reader_output_yylsp (fdefines);
1892 /* Write closing delimiters for actions and guards. */
1893 output_trailers ();
89cab50d 1894 if (locations_flag)
a70083a3
AD
1895 fputs ("#define YYLSP_NEEDED\n\n", ftable);
1896 /* Assign the symbols their symbol numbers. Write #defines for the
1897 token symbols into FDEFINES if requested. */
1898 packsymbols ();
1899 /* Convert the grammar into the format described in gram.h. */
1900 packgram ();
1901 /* Free the symbol table data structure since symbols are now all
1902 referred to by symbol number. */
1903 free_symtab ();
1904}
1905
d7020c20 1906
a70083a3
AD
1907void
1908reader_output_yylsp (FILE *f)
1909{
89cab50d 1910 if (locations_flag)
d7020c20
AD
1911 fputs ("\
1912\n\
1913#ifndef YYLTYPE\n\
89cab50d
AD
1914typedef struct yyltype\n\
1915{\n\
1916 int timestamp;\n\
1917 int first_line;\n\
1918 int first_column;\
d7020c20 1919\n\
89cab50d
AD
1920 int last_line;\n\
1921 int last_column;\n\
1922 char *text;\n\
1923} yyltype;\n\
d7020c20 1924\n\
89cab50d 1925# define YYLTYPE yyltype\n\
d7020c20
AD
1926#endif\n\
1927\n",
1928 f);
a70083a3 1929}