]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/state.h (nstates, final_state, first_state, first_shift)
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
a70083a3
AD
2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000
3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
ceed8467 24#include "getargs.h"
1ff442ca 25#include "files.h"
7612000c 26#include "alloc.h"
1ff442ca
NF
27#include "symtab.h"
28#include "lex.h"
29#include "gram.h"
a0f6b076 30#include "complain.h"
6c89f1c1 31#include "output.h"
b2ca4022
AD
32#include "reader.h"
33
34extern bucket *symval;
35extern int numval;
36extern int expected_conflicts;
37extern char *token_buffer;
38extern int maxtoken;
39
40extern void tabinit PARAMS ((void));
41extern void free_symtab PARAMS ((void));
42extern char *printable_version PARAMS ((int));
1ff442ca 43
6666f98f
AD
44#define LTYPESTR "\
45\n\
46#ifndef YYLTYPE\n\
47typedef\n\
48 struct yyltype\n\
49\
50 {\n\
51 int timestamp;\n\
52 int first_line;\n\
53 int first_column;\
54\n\
55 int last_line;\n\
56 int last_column;\n\
57 char *text;\n\
58 }\n\
59\
60 yyltype;\n\
61\n\
62#define YYLTYPE yyltype\n\
63#endif\n\
64\n"
1ff442ca
NF
65
66/* Number of slots allocated (but not necessarily used yet) in `rline' */
4a120d45 67static int rline_allocated;
1ff442ca 68
a70083a3
AD
69typedef struct symbol_list
70{
71 struct symbol_list *next;
72 bucket *sym;
73 bucket *ruleprec;
74}
75symbol_list;
118fb205 76
1ff442ca 77int lineno;
1ff442ca 78char **tags;
d019d655 79short *user_toknums;
4a120d45
JT
80static symbol_list *grammar;
81static int start_flag;
82static bucket *startval;
1ff442ca
NF
83
84/* Nonzero if components of semantic values are used, implying
85 they must be unions. */
86static int value_components_used;
87
a70083a3 88static int typed; /* nonzero if %union has been seen. */
1ff442ca 89
a70083a3 90static int lastprec; /* incremented for each %left, %right or %nonassoc seen */
1ff442ca 91
a70083a3 92static int gensym_count; /* incremented for each generated symbol */
1ff442ca
NF
93
94static bucket *errtoken;
5b2e3c89 95static bucket *undeftoken;
1ff442ca
NF
96
97/* Nonzero if any action or guard uses the @n construct. */
98static int yylsp_needed;
0d533154 99\f
a70083a3 100
0d533154
AD
101/*===================\
102| Low level lexing. |
103\===================*/
943819bf
RS
104
105static void
118fb205 106skip_to_char (int target)
943819bf
RS
107{
108 int c;
109 if (target == '\n')
a0f6b076 110 complain (_(" Skipping to next \\n"));
943819bf 111 else
a0f6b076 112 complain (_(" Skipping to next %c"), target);
943819bf
RS
113
114 do
0d533154 115 c = skip_white_space ();
943819bf 116 while (c != target && c != EOF);
a083fbbf 117 if (c != EOF)
0d533154 118 ungetc (c, finput);
943819bf
RS
119}
120
121
0d533154
AD
122/*---------------------------------------------------------.
123| Read a signed integer from STREAM and return its value. |
124`---------------------------------------------------------*/
125
126static inline int
127read_signed_integer (FILE *stream)
128{
a70083a3
AD
129 int c = getc (stream);
130 int sign = 1;
131 int n = 0;
0d533154
AD
132
133 if (c == '-')
134 {
135 c = getc (stream);
136 sign = -1;
137 }
138
139 while (isdigit (c))
140 {
141 n = 10 * n + (c - '0');
142 c = getc (stream);
143 }
144
145 ungetc (c, stream);
146
147 return sign * n;
148}
149\f
150/*-------------------------------------------------------------------.
151| Dump the string from FINPUT to FOUTPUT. MATCH is the delimiter of |
152| the string (either ' or "). |
153`-------------------------------------------------------------------*/
ae3c3164
AD
154
155static inline void
4a120d45 156copy_string (FILE *fin, FILE *fout, int match)
ae3c3164
AD
157{
158 int c;
159
4a120d45
JT
160 putc (match, fout);
161 c = getc (fin);
ae3c3164
AD
162
163 while (c != match)
164 {
165 if (c == EOF)
166 fatal (_("unterminated string at end of file"));
167 if (c == '\n')
168 {
a0f6b076 169 complain (_("unterminated string"));
4a120d45 170 ungetc (c, fin);
ae3c3164
AD
171 c = match; /* invent terminator */
172 continue;
173 }
174
a70083a3 175 putc (c, fout);
ae3c3164
AD
176
177 if (c == '\\')
178 {
4a120d45 179 c = getc (fin);
ae3c3164
AD
180 if (c == EOF)
181 fatal (_("unterminated string at end of file"));
4a120d45 182 putc (c, fout);
ae3c3164
AD
183 if (c == '\n')
184 lineno++;
185 }
186
a70083a3 187 c = getc (fin);
ae3c3164
AD
188 }
189
a70083a3 190 putc (c, fout);
ae3c3164
AD
191}
192
193
6c89f1c1
AD
194/*---------------------------------------------------------------.
195| Dump the comment from IN to OUT1 and OUT2. C is either `*' or |
196| `/', depending upon the type of comments used. OUT2 might be |
197| NULL. |
198`---------------------------------------------------------------*/
ae3c3164
AD
199
200static inline void
a70083a3 201copy_comment2 (FILE *in, FILE *out1, FILE *out2, int c)
ae3c3164
AD
202{
203 int cplus_comment;
a70083a3 204 int ended;
ae3c3164
AD
205
206 cplus_comment = (c == '/');
27821bff
AD
207 putc (c, out1);
208 if (out2)
209 putc (c, out2);
210 c = getc (in);
ae3c3164
AD
211
212 ended = 0;
213 while (!ended)
214 {
215 if (!cplus_comment && c == '*')
216 {
217 while (c == '*')
218 {
27821bff
AD
219 putc (c, out1);
220 if (out2)
221 putc (c, out2);
222 c = getc (in);
ae3c3164
AD
223 }
224
225 if (c == '/')
226 {
a70083a3 227 putc (c, out1);
27821bff 228 if (out2)
a70083a3 229 putc (c, out2);
ae3c3164
AD
230 ended = 1;
231 }
232 }
233 else if (c == '\n')
234 {
235 lineno++;
27821bff
AD
236 putc (c, out1);
237 if (out2)
238 putc (c, out2);
ae3c3164
AD
239 if (cplus_comment)
240 ended = 1;
241 else
27821bff 242 c = getc (in);
ae3c3164
AD
243 }
244 else if (c == EOF)
245 fatal (_("unterminated comment"));
246 else
247 {
27821bff
AD
248 putc (c, out1);
249 if (out2)
250 putc (c, out2);
251 c = getc (in);
ae3c3164
AD
252 }
253 }
254}
255
256
d019d655
AD
257/*------------------------------------------------------------.
258| Dump the comment from FIN to FOUT. C is either `*' or `/', |
259| depending upon the type of comments used. |
260`------------------------------------------------------------*/
27821bff
AD
261
262static inline void
4a120d45 263copy_comment (FILE *fin, FILE *fout, int c)
27821bff 264{
4a120d45 265 copy_comment2 (fin, fout, NULL, c);
27821bff
AD
266}
267
268
a70083a3
AD
269/*-----------------------------------------------------------------.
270| FIN is pointing to a location (i.e., a `@'). Output to FOUT a |
271| reference to this location. STACK_OFFSET is the number of values |
272| in the current rule so far, which says where to find `$0' with |
273| respect to the top of the stack. |
274`-----------------------------------------------------------------*/
1ff442ca 275
a70083a3
AD
276static inline void
277copy_at (FILE *fin, FILE *fout, int stack_offset)
1ff442ca 278{
a70083a3 279 int c;
1ff442ca 280
a70083a3
AD
281 c = getc (fin);
282 if (c == '$')
1ff442ca 283 {
a70083a3
AD
284 fprintf (fout, "yyloc");
285 yylsp_needed = 1;
286 }
287 else if (isdigit (c) || c == '-')
288 {
289 int n;
1ff442ca 290
a70083a3
AD
291 ungetc (c, fin);
292 n = read_signed_integer (fin);
943819bf 293
a70083a3
AD
294 fprintf (fout, "yylsp[%d]", n - stack_offset);
295 yylsp_needed = 1;
1ff442ca 296 }
a70083a3
AD
297 else
298 complain (_("@%s is invalid"), printable_version (c));
1ff442ca 299}
a70083a3
AD
300\f
301/*-------------------------------------------------------------------.
302| Copy the contents of a `%{ ... %}' into the definitions file. The |
303| `%{' has already been read. Return after reading the `%}'. |
304`-------------------------------------------------------------------*/
1ff442ca 305
4a120d45 306static void
118fb205 307copy_definition (void)
1ff442ca 308{
a70083a3 309 int c;
ae3c3164 310 /* -1 while reading a character if prev char was %. */
a70083a3 311 int after_percent;
1ff442ca
NF
312
313 if (!nolinesflag)
a70083a3 314 fprintf (fattrs, "#line %d \"%s\"\n", lineno, infile);
1ff442ca
NF
315
316 after_percent = 0;
317
ae3c3164 318 c = getc (finput);
1ff442ca
NF
319
320 for (;;)
321 {
322 switch (c)
323 {
324 case '\n':
a70083a3 325 putc (c, fattrs);
1ff442ca
NF
326 lineno++;
327 break;
328
329 case '%':
a70083a3 330 after_percent = -1;
1ff442ca 331 break;
a083fbbf 332
1ff442ca
NF
333 case '\'':
334 case '"':
ae3c3164 335 copy_string (finput, fattrs, c);
1ff442ca
NF
336 break;
337
338 case '/':
ae3c3164
AD
339 putc (c, fattrs);
340 c = getc (finput);
1ff442ca
NF
341 if (c != '*' && c != '/')
342 continue;
ae3c3164 343 copy_comment (finput, fattrs, c);
1ff442ca
NF
344 break;
345
346 case EOF:
a70083a3 347 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
348
349 default:
a70083a3 350 putc (c, fattrs);
1ff442ca
NF
351 }
352
a70083a3 353 c = getc (finput);
1ff442ca
NF
354
355 if (after_percent)
356 {
357 if (c == '}')
358 return;
a70083a3 359 putc ('%', fattrs);
1ff442ca
NF
360 }
361 after_percent = 0;
362
363 }
364
365}
366
367
a70083a3
AD
368/*-----------------------------------------------------------------.
369| Parse what comes after %token or %nterm. For %token, what_is is |
370| STOKEN and what_is_not is SNTERM. For %nterm, the arguments are |
371| reversed. |
372`-----------------------------------------------------------------*/
1ff442ca 373
4a120d45 374static void
118fb205 375parse_token_decl (int what_is, int what_is_not)
1ff442ca 376{
a70083a3
AD
377 int token = 0;
378 char *typename = 0;
379 struct bucket *symbol = NULL; /* pts to symbol being defined */
1ff442ca
NF
380 int k;
381
1ff442ca
NF
382 for (;;)
383 {
e6011337
JT
384 int tmp_char = ungetc (skip_white_space (), finput);
385
386 if (tmp_char == '%')
1ff442ca 387 return;
e6011337 388 if (tmp_char == EOF)
a0f6b076 389 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 390
a70083a3 391 token = lex ();
1ff442ca 392 if (token == COMMA)
943819bf
RS
393 {
394 symbol = NULL;
395 continue;
396 }
1ff442ca
NF
397 if (token == TYPENAME)
398 {
a70083a3
AD
399 k = strlen (token_buffer);
400 typename = NEW2 (k + 1, char);
401 strcpy (typename, token_buffer);
1ff442ca 402 value_components_used = 1;
943819bf
RS
403 symbol = NULL;
404 }
a70083a3 405 else if (token == IDENTIFIER && *symval->tag == '\"' && symbol)
943819bf
RS
406 {
407 translations = 1;
408 symval->class = STOKEN;
409 symval->type_name = typename;
410 symval->user_token_number = symbol->user_token_number;
411 symbol->user_token_number = SALIAS;
412
a083fbbf
RS
413 symval->alias = symbol;
414 symbol->alias = symval;
943819bf
RS
415 symbol = NULL;
416
a70083a3 417 nsyms--; /* symbol and symval combined are only one symbol */
1ff442ca
NF
418 }
419 else if (token == IDENTIFIER)
420 {
421 int oldclass = symval->class;
943819bf 422 symbol = symval;
1ff442ca 423
943819bf 424 if (symbol->class == what_is_not)
a0f6b076 425 complain (_("symbol %s redefined"), symbol->tag);
943819bf 426 symbol->class = what_is;
1ff442ca 427 if (what_is == SNTERM && oldclass != SNTERM)
943819bf 428 symbol->value = nvars++;
1ff442ca
NF
429
430 if (typename)
431 {
943819bf
RS
432 if (symbol->type_name == NULL)
433 symbol->type_name = typename;
a70083a3 434 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 435 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
436 }
437 }
943819bf 438 else if (symbol && token == NUMBER)
a70083a3 439 {
943819bf 440 symbol->user_token_number = numval;
1ff442ca 441 translations = 1;
a70083a3 442 }
1ff442ca 443 else
943819bf 444 {
a0f6b076 445 complain (_("`%s' is invalid in %s"),
a70083a3
AD
446 token_buffer, (what_is == STOKEN) ? "%token" : "%nterm");
447 skip_to_char ('%');
943819bf 448 }
1ff442ca
NF
449 }
450
451}
452
1ff442ca 453
a0f6b076 454/* Parse what comes after %start */
1ff442ca 455
4a120d45 456static void
118fb205 457parse_start_decl (void)
1ff442ca
NF
458{
459 if (start_flag)
27821bff
AD
460 complain (_("multiple %s declarations"), "%start");
461 if (lex () != IDENTIFIER)
462 complain (_("invalid %s declaration"), "%start");
943819bf
RS
463 else
464 {
465 start_flag = 1;
466 startval = symval;
467 }
1ff442ca
NF
468}
469
470
471
a70083a3
AD
472/*--------------------------------------------------------------.
473| Get the data type (alternative in the union) of the value for |
474| symbol n in rule rule. |
475`--------------------------------------------------------------*/
1ff442ca 476
a70083a3
AD
477static char *
478get_type_name (int n, symbol_list * rule)
1ff442ca 479{
a70083a3
AD
480 int i;
481 symbol_list *rp;
1ff442ca 482
a70083a3 483 if (n < 0)
943819bf 484 {
a70083a3
AD
485 complain (_("invalid $ value"));
486 return NULL;
943819bf 487 }
1ff442ca 488
a70083a3
AD
489 rp = rule;
490 i = 0;
1ff442ca 491
a70083a3 492 while (i < n)
1ff442ca 493 {
a70083a3
AD
494 rp = rp->next;
495 if (rp == NULL || rp->sym == NULL)
496 {
497 complain (_("invalid $ value"));
498 return NULL;
499 }
500 i++;
501 }
502
503 return rp->sym->type_name;
504}
505
506
507/*-----------------------------------------------------------.
508| read in a %type declaration and record its information for |
509| get_type_name to access |
510`-----------------------------------------------------------*/
511
512static void
513parse_type_decl (void)
514{
515 int k;
516 char *name;
517
518 if (lex () != TYPENAME)
519 {
520 complain ("%s", _("%type declaration has no <typename>"));
521 skip_to_char ('%');
522 return;
523 }
524
525 k = strlen (token_buffer);
526 name = NEW2 (k + 1, char);
527 strcpy (name, token_buffer);
528
529 for (;;)
530 {
531 int t;
532 int tmp_char = ungetc (skip_white_space (), finput);
533
534 if (tmp_char == '%')
535 return;
536 if (tmp_char == EOF)
537 fatal (_("Premature EOF after %s"), token_buffer);
538
539 t = lex ();
540
541 switch (t)
1ff442ca
NF
542 {
543
544 case COMMA:
545 case SEMICOLON:
546 break;
547
548 case IDENTIFIER:
549 if (symval->type_name == NULL)
550 symval->type_name = name;
a70083a3 551 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 552 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
553
554 break;
555
556 default:
a0f6b076
AD
557 complain (_("invalid %%type declaration due to item: %s"),
558 token_buffer);
a70083a3 559 skip_to_char ('%');
1ff442ca
NF
560 }
561 }
562}
563
564
565
566/* read in a %left, %right or %nonassoc declaration and record its information. */
567/* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC. */
568
4a120d45 569static void
118fb205 570parse_assoc_decl (int assoc)
1ff442ca 571{
a70083a3
AD
572 int k;
573 char *name = NULL;
574 int prev = 0;
1ff442ca 575
a70083a3 576 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 577
1ff442ca
NF
578 for (;;)
579 {
a70083a3 580 int t;
e6011337 581 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 582
e6011337 583 if (tmp_char == '%')
1ff442ca 584 return;
e6011337 585 if (tmp_char == EOF)
a0f6b076 586 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 587
a70083a3 588 t = lex ();
1ff442ca
NF
589
590 switch (t)
591 {
592
593 case TYPENAME:
a70083a3
AD
594 k = strlen (token_buffer);
595 name = NEW2 (k + 1, char);
596 strcpy (name, token_buffer);
1ff442ca
NF
597 break;
598
599 case COMMA:
600 break;
601
602 case IDENTIFIER:
603 if (symval->prec != 0)
a0f6b076 604 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
605 symval->prec = lastprec;
606 symval->assoc = assoc;
607 if (symval->class == SNTERM)
a0f6b076 608 complain (_("symbol %s redefined"), symval->tag);
1ff442ca
NF
609 symval->class = STOKEN;
610 if (name)
a70083a3 611 { /* record the type, if one is specified */
1ff442ca
NF
612 if (symval->type_name == NULL)
613 symval->type_name = name;
a70083a3 614 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 615 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
616 }
617 break;
618
619 case NUMBER:
620 if (prev == IDENTIFIER)
a70083a3 621 {
1ff442ca
NF
622 symval->user_token_number = numval;
623 translations = 1;
a70083a3
AD
624 }
625 else
626 {
627 complain (_
628 ("invalid text (%s) - number should be after identifier"),
629token_buffer);
630 skip_to_char ('%');
631 }
1ff442ca
NF
632 break;
633
634 case SEMICOLON:
635 return;
636
637 default:
a0f6b076 638 complain (_("unexpected item: %s"), token_buffer);
a70083a3 639 skip_to_char ('%');
1ff442ca
NF
640 }
641
642 prev = t;
643
644 }
645}
646
647
648
649/* copy the union declaration into fattrs (and fdefines),
650 where it is made into the
651 definition of YYSTYPE, the type of elements of the parser value stack. */
652
4a120d45 653static void
118fb205 654parse_union_decl (void)
1ff442ca 655{
a70083a3
AD
656 int c;
657 int count = 0;
1ff442ca
NF
658
659 if (typed)
27821bff 660 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
661
662 typed = 1;
663
664 if (!nolinesflag)
27821bff 665 fprintf (fattrs, "\n#line %d \"%s\"\n", lineno, infile);
1ff442ca 666 else
27821bff 667 fprintf (fattrs, "\n");
1ff442ca 668
27821bff 669 fprintf (fattrs, "typedef union");
1ff442ca 670 if (fdefines)
27821bff 671 fprintf (fdefines, "typedef union");
1ff442ca 672
27821bff 673 c = getc (finput);
1ff442ca
NF
674
675 while (c != EOF)
676 {
27821bff 677 putc (c, fattrs);
1ff442ca 678 if (fdefines)
27821bff 679 putc (c, fdefines);
1ff442ca
NF
680
681 switch (c)
682 {
683 case '\n':
684 lineno++;
685 break;
686
687 case '/':
27821bff 688 c = getc (finput);
1ff442ca 689 if (c != '*' && c != '/')
27821bff
AD
690 continue;
691 copy_comment2 (finput, fattrs, fdefines, c);
1ff442ca
NF
692 break;
693
694
695 case '{':
696 count++;
697 break;
698
699 case '}':
700 if (count == 0)
27821bff 701 complain (_("unmatched %s"), "`}'");
1ff442ca 702 count--;
943819bf 703 if (count <= 0)
1ff442ca 704 {
27821bff 705 fprintf (fattrs, " YYSTYPE;\n");
1ff442ca 706 if (fdefines)
27821bff 707 fprintf (fdefines, " YYSTYPE;\n");
1ff442ca 708 /* JF don't choke on trailing semi */
27821bff
AD
709 c = skip_white_space ();
710 if (c != ';')
a70083a3 711 ungetc (c, finput);
1ff442ca
NF
712 return;
713 }
714 }
715
27821bff 716 c = getc (finput);
1ff442ca
NF
717 }
718}
719
720/* parse the declaration %expect N which says to expect N
721 shift-reduce conflicts. */
722
4a120d45 723static void
118fb205 724parse_expect_decl (void)
1ff442ca 725{
a70083a3
AD
726 int c;
727 int count;
1ff442ca
NF
728 char buffer[20];
729
a70083a3 730 c = getc (finput);
1ff442ca 731 while (c == ' ' || c == '\t')
a70083a3 732 c = getc (finput);
1ff442ca
NF
733
734 count = 0;
735 while (c >= '0' && c <= '9')
736 {
737 if (count < 20)
738 buffer[count++] = c;
a70083a3 739 c = getc (finput);
1ff442ca
NF
740 }
741 buffer[count] = 0;
742
743 ungetc (c, finput);
744
943819bf 745 if (count <= 0 || count > 10)
a0f6b076 746 complain ("%s", _("argument of %expect is not an integer"));
1ff442ca
NF
747 expected_conflicts = atoi (buffer);
748}
749
a70083a3
AD
750
751/*-------------------------------------------------------------------.
752| Parse what comes after %thong. the full syntax is |
753| |
754| %thong <type> token number literal |
755| |
756| the <type> or number may be omitted. The number specifies the |
757| user_token_number. |
758| |
759| Two symbols are entered in the table, one for the token symbol and |
760| one for the literal. Both are given the <type>, if any, from the |
761| declaration. The ->user_token_number of the first is SALIAS and |
762| the ->user_token_number of the second is set to the number, if |
763| any, from the declaration. The two symbols are linked via |
764| pointers in their ->alias fields. |
765| |
766| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
767| only the literal string is retained it is the literal string that |
768| is output to yytname |
769`-------------------------------------------------------------------*/
770
771static void
772parse_thong_decl (void)
7b306f52 773{
a70083a3
AD
774 int token;
775 struct bucket *symbol;
776 char *typename = 0;
777 int k, usrtoknum;
7b306f52 778
a70083a3
AD
779 translations = 1;
780 token = lex (); /* fetch typename or first token */
781 if (token == TYPENAME)
7b306f52 782 {
a70083a3
AD
783 k = strlen (token_buffer);
784 typename = NEW2 (k + 1, char);
785 strcpy (typename, token_buffer);
786 value_components_used = 1;
787 token = lex (); /* fetch first token */
7b306f52 788 }
7b306f52 789
a70083a3 790 /* process first token */
7b306f52 791
a70083a3
AD
792 if (token != IDENTIFIER)
793 {
794 complain (_("unrecognized item %s, expected an identifier"),
795 token_buffer);
796 skip_to_char ('%');
797 return;
7b306f52 798 }
a70083a3
AD
799 symval->class = STOKEN;
800 symval->type_name = typename;
801 symval->user_token_number = SALIAS;
802 symbol = symval;
7b306f52 803
a70083a3 804 token = lex (); /* get number or literal string */
1ff442ca 805
a70083a3 806 if (token == NUMBER)
943819bf 807 {
a70083a3
AD
808 usrtoknum = numval;
809 token = lex (); /* okay, did number, now get literal */
943819bf 810 }
a70083a3
AD
811 else
812 usrtoknum = 0;
1ff442ca 813
a70083a3 814 /* process literal string token */
1ff442ca 815
a70083a3 816 if (token != IDENTIFIER || *symval->tag != '\"')
1ff442ca 817 {
a70083a3
AD
818 complain (_("expected string constant instead of %s"), token_buffer);
819 skip_to_char ('%');
820 return;
1ff442ca 821 }
a70083a3
AD
822 symval->class = STOKEN;
823 symval->type_name = typename;
824 symval->user_token_number = usrtoknum;
1ff442ca 825
a70083a3
AD
826 symval->alias = symbol;
827 symbol->alias = symval;
1ff442ca 828
a70083a3
AD
829 nsyms--; /* symbol and symval combined are only one symbol */
830}
3cef001a 831
a70083a3
AD
832/*----------------------------------------------------------------.
833| Read from finput until `%%' is seen. Discard the `%%'. Handle |
834| any `%' declarations, and copy the contents of any `%{ ... %}' |
835| groups to fattrs. |
836`----------------------------------------------------------------*/
1ff442ca 837
4a120d45 838static void
a70083a3 839read_declarations (void)
1ff442ca 840{
a70083a3
AD
841 int c;
842 int tok;
1ff442ca 843
a70083a3 844 for (;;)
1ff442ca 845 {
a70083a3 846 c = skip_white_space ();
1ff442ca 847
a70083a3
AD
848 if (c == '%')
849 {
850 tok = parse_percent_token ();
1ff442ca 851
a70083a3 852 switch (tok)
943819bf 853 {
a70083a3
AD
854 case TWO_PERCENTS:
855 return;
1ff442ca 856
a70083a3
AD
857 case PERCENT_LEFT_CURLY:
858 copy_definition ();
859 break;
1ff442ca 860
a70083a3
AD
861 case TOKEN:
862 parse_token_decl (STOKEN, SNTERM);
863 break;
1ff442ca 864
a70083a3
AD
865 case NTERM:
866 parse_token_decl (SNTERM, STOKEN);
867 break;
1ff442ca 868
a70083a3
AD
869 case TYPE:
870 parse_type_decl ();
871 break;
1ff442ca 872
a70083a3
AD
873 case START:
874 parse_start_decl ();
875 break;
118fb205 876
a70083a3
AD
877 case UNION:
878 parse_union_decl ();
879 break;
1ff442ca 880
a70083a3
AD
881 case EXPECT:
882 parse_expect_decl ();
883 break;
884 case THONG:
885 parse_thong_decl ();
886 break;
887 case LEFT:
888 parse_assoc_decl (LEFT_ASSOC);
889 break;
1ff442ca 890
a70083a3
AD
891 case RIGHT:
892 parse_assoc_decl (RIGHT_ASSOC);
893 break;
1ff442ca 894
a70083a3
AD
895 case NONASSOC:
896 parse_assoc_decl (NON_ASSOC);
897 break;
1ff442ca 898
a70083a3
AD
899 case SEMANTIC_PARSER:
900 if (semantic_parser == 0)
901 {
902 semantic_parser = 1;
903 open_extra_files ();
904 }
905 break;
1ff442ca 906
a70083a3
AD
907 case PURE_PARSER:
908 pure_parser = 1;
909 break;
1ff442ca 910
a70083a3
AD
911 case NOOP:
912 break;
1ff442ca 913
a70083a3
AD
914 default:
915 complain (_("unrecognized: %s"), token_buffer);
916 skip_to_char ('%');
917 }
918 }
919 else if (c == EOF)
920 fatal (_("no input grammar"));
921 else
922 {
923 complain (_("unknown character: %s"), printable_version (c));
924 skip_to_char ('%');
1ff442ca 925 }
1ff442ca 926 }
1ff442ca 927}
a70083a3
AD
928\f
929/*-------------------------------------------------------------------.
930| Assuming that a `{' has just been seen, copy everything up to the |
931| matching `}' into the actions file. STACK_OFFSET is the number of |
932| values in the current rule so far, which says where to find `$0' |
933| with respect to the top of the stack. |
934`-------------------------------------------------------------------*/
1ff442ca 935
4a120d45 936static void
a70083a3 937copy_action (symbol_list * rule, int stack_offset)
1ff442ca 938{
a70083a3
AD
939 int c;
940 int n;
941 int count;
942 char *type_name;
1ff442ca
NF
943
944 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
945 if (semantic_parser)
946 stack_offset = 0;
1ff442ca 947
41aca2e0 948 fprintf (faction, "\ncase %d:\n", nrules);
1ff442ca 949 if (!nolinesflag)
41aca2e0
AD
950 fprintf (faction, "#line %d \"%s\"\n", lineno, infile);
951 putc ('{', faction);
1ff442ca
NF
952
953 count = 1;
a70083a3 954 c = getc (finput);
1ff442ca
NF
955
956 while (count > 0)
957 {
958 while (c != '}')
a70083a3
AD
959 {
960 switch (c)
1ff442ca
NF
961 {
962 case '\n':
a70083a3 963 putc (c, faction);
1ff442ca
NF
964 lineno++;
965 break;
966
967 case '{':
a70083a3 968 putc (c, faction);
1ff442ca
NF
969 count++;
970 break;
971
972 case '\'':
973 case '"':
ca36d2ef 974 copy_string (finput, faction, c);
1ff442ca
NF
975 break;
976
977 case '/':
27821bff
AD
978 putc (c, faction);
979 c = getc (finput);
1ff442ca
NF
980 if (c != '*' && c != '/')
981 continue;
3cef001a 982 copy_comment (finput, faction, c);
1ff442ca
NF
983 break;
984
985 case '$':
a70083a3 986 c = getc (finput);
1ff442ca
NF
987 type_name = NULL;
988
989 if (c == '<')
990 {
a70083a3 991 char *cp = token_buffer;
1ff442ca 992
a70083a3 993 while ((c = getc (finput)) != '>' && c > 0)
118fb205
JT
994 {
995 if (cp == token_buffer + maxtoken)
a70083a3 996 cp = grow_token_buffer (cp);
118fb205
JT
997
998 *cp++ = c;
999 }
1ff442ca
NF
1000 *cp = 0;
1001 type_name = token_buffer;
1002 value_components_used = 1;
1003
a70083a3 1004 c = getc (finput);
1ff442ca
NF
1005 }
1006 if (c == '$')
1007 {
a70083a3 1008 fprintf (faction, "yyval");
41aca2e0 1009 if (!type_name)
a70083a3 1010 type_name = get_type_name (0, rule);
1ff442ca 1011 if (type_name)
a70083a3
AD
1012 fprintf (faction, ".%s", type_name);
1013 if (!type_name && typed)
a0f6b076
AD
1014 complain (_("$$ of `%s' has no declared type"),
1015 rule->sym->tag);
1ff442ca 1016 }
a70083a3 1017 else if (isdigit (c) || c == '-')
1ff442ca
NF
1018 {
1019 ungetc (c, finput);
a70083a3
AD
1020 n = read_signed_integer (finput);
1021 c = getc (finput);
1ff442ca
NF
1022
1023 if (!type_name && n > 0)
a70083a3 1024 type_name = get_type_name (n, rule);
1ff442ca 1025
a70083a3 1026 fprintf (faction, "yyvsp[%d]", n - stack_offset);
1ff442ca 1027 if (type_name)
a70083a3
AD
1028 fprintf (faction, ".%s", type_name);
1029 if (!type_name && typed)
a0f6b076
AD
1030 complain (_("$%d of `%s' has no declared type"),
1031 n, rule->sym->tag);
1ff442ca
NF
1032 continue;
1033 }
1034 else
a0f6b076 1035 complain (_("$%s is invalid"), printable_version (c));
1ff442ca
NF
1036
1037 break;
1038
1039 case '@':
7b306f52 1040 copy_at (finput, faction, stack_offset);
6666f98f 1041 break;
1ff442ca
NF
1042
1043 case EOF:
27821bff 1044 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1045
1046 default:
a70083a3
AD
1047 putc (c, faction);
1048 }
1049
1050 c = getc (finput);
1051 }
1052
1053 /* above loop exits when c is '}' */
1054
1055 if (--count)
1056 {
1057 putc (c, faction);
1058 c = getc (finput);
1059 }
1060 }
1061
1062 fprintf (faction, ";\n break;}");
1063}
1064\f
1065/*-------------------------------------------------------------------.
1066| After `%guard' is seen in the input file, copy the actual guard |
1067| into the guards file. If the guard is followed by an action, copy |
1068| that into the actions file. STACK_OFFSET is the number of values |
1069| in the current rule so far, which says where to find `$0' with |
1070| respect to the top of the stack, for the simple parser in which |
1071| the stack is not popped until after the guard is run. |
1072`-------------------------------------------------------------------*/
1073
1074static void
1075copy_guard (symbol_list * rule, int stack_offset)
1076{
1077 int c;
1078 int n;
1079 int count;
1080 char *type_name;
1081 int brace_flag = 0;
1082
1083 /* offset is always 0 if parser has already popped the stack pointer */
1084 if (semantic_parser)
1085 stack_offset = 0;
1086
1087 fprintf (fguard, "\ncase %d:\n", nrules);
1088 if (!nolinesflag)
1089 fprintf (fguard, "#line %d \"%s\"\n", lineno, infile);
1090 putc ('{', fguard);
1091
1092 count = 0;
1093 c = getc (finput);
1094
1095 while (brace_flag ? (count > 0) : (c != ';'))
1096 {
1097 switch (c)
1098 {
1099 case '\n':
1100 putc (c, fguard);
1101 lineno++;
1102 break;
1103
1104 case '{':
1105 putc (c, fguard);
1106 brace_flag = 1;
1107 count++;
1108 break;
1109
1110 case '}':
1111 putc (c, fguard);
1112 if (count > 0)
1113 count--;
1114 else
1115 {
1116 complain (_("unmatched %s"), "`}'");
1117 c = getc (finput); /* skip it */
1118 }
1119 break;
1120
1121 case '\'':
1122 case '"':
1123 copy_string (finput, fguard, c);
1124 break;
1125
1126 case '/':
1127 putc (c, fguard);
1128 c = getc (finput);
1129 if (c != '*' && c != '/')
1130 continue;
1131 copy_comment (finput, fguard, c);
1132 break;
1133
1134 case '$':
1135 c = getc (finput);
1136 type_name = NULL;
1137
1138 if (c == '<')
1139 {
1140 char *cp = token_buffer;
1141
1142 while ((c = getc (finput)) != '>' && c > 0)
1143 {
1144 if (cp == token_buffer + maxtoken)
1145 cp = grow_token_buffer (cp);
1146
1147 *cp++ = c;
1148 }
1149 *cp = 0;
1150 type_name = token_buffer;
1151
1152 c = getc (finput);
1153 }
1154
1155 if (c == '$')
1156 {
1157 fprintf (fguard, "yyval");
1158 if (!type_name)
1159 type_name = rule->sym->type_name;
1160 if (type_name)
1161 fprintf (fguard, ".%s", type_name);
1162 if (!type_name && typed)
1163 complain (_("$$ of `%s' has no declared type"),
1164 rule->sym->tag);
1165 }
1166 else if (isdigit (c) || c == '-')
1167 {
1168 ungetc (c, finput);
1169 n = read_signed_integer (finput);
1170 c = getc (finput);
1171
1172 if (!type_name && n > 0)
1173 type_name = get_type_name (n, rule);
1174
1175 fprintf (fguard, "yyvsp[%d]", n - stack_offset);
1176 if (type_name)
1177 fprintf (fguard, ".%s", type_name);
1178 if (!type_name && typed)
1179 complain (_("$%d of `%s' has no declared type"),
1180 n, rule->sym->tag);
1181 continue;
1ff442ca 1182 }
a70083a3
AD
1183 else
1184 complain (_("$%s is invalid"), printable_version (c));
1185 break;
1ff442ca 1186
a70083a3
AD
1187 case '@':
1188 copy_at (finput, fguard, stack_offset);
1189 break;
1ff442ca 1190
a70083a3
AD
1191 case EOF:
1192 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1193
a70083a3
AD
1194 default:
1195 putc (c, fguard);
1ff442ca 1196 }
a70083a3
AD
1197
1198 if (c != '}' || count != 0)
1199 c = getc (finput);
1ff442ca
NF
1200 }
1201
a70083a3
AD
1202 c = skip_white_space ();
1203
1204 fprintf (fguard, ";\n break;}");
1205 if (c == '{')
1206 copy_action (rule, stack_offset);
1207 else if (c == '=')
1208 {
1209 c = getc (finput); /* why not skip_white_space -wjh */
1210 if (c == '{')
1211 copy_action (rule, stack_offset);
1212 }
1213 else
1214 ungetc (c, finput);
1ff442ca 1215}
a70083a3
AD
1216\f
1217
1218static void
1219record_rule_line (void)
1220{
1221 /* Record each rule's source line number in rline table. */
1ff442ca 1222
a70083a3
AD
1223 if (nrules >= rline_allocated)
1224 {
1225 rline_allocated = nrules * 2;
1226 rline = (short *) xrealloc ((char *) rline,
1227 rline_allocated * sizeof (short));
1228 }
1229 rline[nrules] = lineno;
1230}
1ff442ca
NF
1231
1232
a70083a3
AD
1233/*-------------------------------------------------------------------.
1234| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1235| with the user's names. |
1236`-------------------------------------------------------------------*/
1ff442ca 1237
4a120d45 1238static bucket *
118fb205 1239gensym (void)
1ff442ca 1240{
a70083a3 1241 bucket *sym;
1ff442ca
NF
1242
1243 sprintf (token_buffer, "@%d", ++gensym_count);
a70083a3 1244 sym = getsym (token_buffer);
1ff442ca
NF
1245 sym->class = SNTERM;
1246 sym->value = nvars++;
36281465 1247 return sym;
1ff442ca
NF
1248}
1249
a70083a3
AD
1250#if 0
1251/*------------------------------------------------------------------.
1252| read in a %type declaration and record its information for |
1253| get_type_name to access. This is unused. It is only called from |
1254| the #if 0 part of readgram |
1255`------------------------------------------------------------------*/
1256
1257static int
1258get_type (void)
1259{
1260 int k;
1261 int t;
1262 char *name;
1263
1264 t = lex ();
1265
1266 if (t != TYPENAME)
1267 {
1268 complain (_("invalid %s declaration"), "%type");
1269 return t;
1270 }
1271
1272 k = strlen (token_buffer);
1273 name = NEW2 (k + 1, char);
1274 strcpy (name, token_buffer);
1275
1276 for (;;)
1277 {
1278 t = lex ();
1279
1280 switch (t)
1281 {
1282 case SEMICOLON:
1283 return lex ();
1284
1285 case COMMA:
1286 break;
1287
1288 case IDENTIFIER:
1289 if (symval->type_name == NULL)
1290 symval->type_name = name;
1291 else if (strcmp (name, symval->type_name) != 0)
1292 complain (_("type redeclaration for %s"), symval->tag);
1293
1294 break;
1295
1296 default:
1297 return t;
1298 }
1299 }
1300}
1ff442ca 1301
a70083a3
AD
1302#endif
1303\f
1304/*------------------------------------------------------------------.
1305| Parse the input grammar into a one symbol_list structure. Each |
1306| rule is represented by a sequence of symbols: the left hand side |
1307| followed by the contents of the right hand side, followed by a |
1308| null pointer instead of a symbol to terminate the rule. The next |
1309| symbol is the lhs of the following rule. |
1310| |
1311| All guards and actions are copied out to the appropriate files, |
1312| labelled by the rule number they apply to. |
1313`------------------------------------------------------------------*/
1ff442ca 1314
4a120d45 1315static void
118fb205 1316readgram (void)
1ff442ca 1317{
a70083a3
AD
1318 int t;
1319 bucket *lhs = NULL;
1320 symbol_list *p;
1321 symbol_list *p1;
1322 bucket *bp;
1ff442ca 1323
a70083a3
AD
1324 symbol_list *crule; /* points to first symbol_list of current rule. */
1325 /* its symbol is the lhs of the rule. */
1326 symbol_list *crule1; /* points to the symbol_list preceding crule. */
1ff442ca
NF
1327
1328 p1 = NULL;
1329
a70083a3 1330 t = lex ();
1ff442ca
NF
1331
1332 while (t != TWO_PERCENTS && t != ENDFILE)
1333 {
1334 if (t == IDENTIFIER || t == BAR)
1335 {
a70083a3
AD
1336 int actionflag = 0;
1337 int rulelength = 0; /* number of symbols in rhs of this rule so far */
1ff442ca
NF
1338 int xactions = 0; /* JF for error checking */
1339 bucket *first_rhs = 0;
1340
1341 if (t == IDENTIFIER)
1342 {
1343 lhs = symval;
943819bf
RS
1344
1345 if (!start_flag)
1346 {
1347 startval = lhs;
1348 start_flag = 1;
1349 }
a083fbbf 1350
a70083a3 1351 t = lex ();
1ff442ca 1352 if (t != COLON)
943819bf 1353 {
a0f6b076 1354 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1355 unlex (t);
943819bf 1356 }
1ff442ca
NF
1357 }
1358
943819bf 1359 if (nrules == 0 && t == BAR)
1ff442ca 1360 {
a0f6b076 1361 complain (_("grammar starts with vertical bar"));
943819bf 1362 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1363 }
1ff442ca
NF
1364 /* start a new rule and record its lhs. */
1365
1366 nrules++;
1367 nitems++;
1368
1369 record_rule_line ();
1370
a70083a3 1371 p = NEW (symbol_list);
1ff442ca
NF
1372 p->sym = lhs;
1373
1374 crule1 = p1;
1375 if (p1)
1376 p1->next = p;
1377 else
1378 grammar = p;
1379
1380 p1 = p;
1381 crule = p;
1382
1383 /* mark the rule's lhs as a nonterminal if not already so. */
1384
1385 if (lhs->class == SUNKNOWN)
1386 {
1387 lhs->class = SNTERM;
1388 lhs->value = nvars;
1389 nvars++;
1390 }
1391 else if (lhs->class == STOKEN)
a0f6b076 1392 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1393
1394 /* read the rhs of the rule. */
1395
1396 for (;;)
1397 {
a70083a3 1398 t = lex ();
943819bf
RS
1399 if (t == PREC)
1400 {
a70083a3 1401 t = lex ();
943819bf 1402 crule->ruleprec = symval;
a70083a3 1403 t = lex ();
943819bf 1404 }
1ff442ca 1405
a70083a3
AD
1406 if (!(t == IDENTIFIER || t == LEFT_CURLY))
1407 break;
1ff442ca
NF
1408
1409 /* If next token is an identifier, see if a colon follows it.
a70083a3 1410 If one does, exit this rule now. */
1ff442ca
NF
1411 if (t == IDENTIFIER)
1412 {
a70083a3
AD
1413 bucket *ssave;
1414 int t1;
1ff442ca
NF
1415
1416 ssave = symval;
a70083a3
AD
1417 t1 = lex ();
1418 unlex (t1);
1ff442ca 1419 symval = ssave;
a70083a3
AD
1420 if (t1 == COLON)
1421 break;
1ff442ca 1422
a70083a3 1423 if (!first_rhs) /* JF */
1ff442ca
NF
1424 first_rhs = symval;
1425 /* Not followed by colon =>
1426 process as part of this rule's rhs. */
1427 }
1428
1429 /* If we just passed an action, that action was in the middle
a70083a3
AD
1430 of a rule, so make a dummy rule to reduce it to a
1431 non-terminal. */
1ff442ca
NF
1432 if (actionflag)
1433 {
a70083a3 1434 bucket *sdummy;
1ff442ca
NF
1435
1436 /* Since the action was written out with this rule's */
943819bf 1437 /* number, we must give the new rule this number */
1ff442ca
NF
1438 /* by inserting the new rule before it. */
1439
1440 /* Make a dummy nonterminal, a gensym. */
a70083a3 1441 sdummy = gensym ();
1ff442ca
NF
1442
1443 /* Make a new rule, whose body is empty,
1444 before the current one, so that the action
1445 just read can belong to it. */
1446 nrules++;
1447 nitems++;
1448 record_rule_line ();
a70083a3 1449 p = NEW (symbol_list);
1ff442ca
NF
1450 if (crule1)
1451 crule1->next = p;
a70083a3
AD
1452 else
1453 grammar = p;
1ff442ca 1454 p->sym = sdummy;
a70083a3 1455 crule1 = NEW (symbol_list);
1ff442ca
NF
1456 p->next = crule1;
1457 crule1->next = crule;
1458
1459 /* insert the dummy generated by that rule into this rule. */
1460 nitems++;
a70083a3 1461 p = NEW (symbol_list);
1ff442ca
NF
1462 p->sym = sdummy;
1463 p1->next = p;
1464 p1 = p;
1465
1466 actionflag = 0;
1467 }
1468
1469 if (t == IDENTIFIER)
1470 {
1471 nitems++;
a70083a3 1472 p = NEW (symbol_list);
1ff442ca
NF
1473 p->sym = symval;
1474 p1->next = p;
1475 p1 = p;
1476 }
a70083a3 1477 else /* handle an action. */
1ff442ca 1478 {
a70083a3 1479 copy_action (crule, rulelength);
1ff442ca
NF
1480 actionflag = 1;
1481 xactions++; /* JF */
1482 }
1483 rulelength++;
a70083a3 1484 } /* end of read rhs of rule */
1ff442ca
NF
1485
1486 /* Put an empty link in the list to mark the end of this rule */
a70083a3 1487 p = NEW (symbol_list);
1ff442ca
NF
1488 p1->next = p;
1489 p1 = p;
1490
1491 if (t == PREC)
1492 {
a0f6b076 1493 complain (_("two @prec's in a row"));
a70083a3 1494 t = lex ();
1ff442ca 1495 crule->ruleprec = symval;
a70083a3 1496 t = lex ();
1ff442ca
NF
1497 }
1498 if (t == GUARD)
1499 {
a70083a3 1500 if (!semantic_parser)
a0f6b076 1501 complain ("%s",
a70083a3
AD
1502 _
1503 ("%guard present but %semantic_parser not specified"));
1ff442ca 1504
a70083a3
AD
1505 copy_guard (crule, rulelength);
1506 t = lex ();
1ff442ca
NF
1507 }
1508 else if (t == LEFT_CURLY)
1509 {
a70083a3 1510 /* This case never occurs -wjh */
6666f98f 1511 if (actionflag)
a0f6b076 1512 complain (_("two actions at end of one rule"));
a70083a3 1513 copy_action (crule, rulelength);
943819bf
RS
1514 actionflag = 1;
1515 xactions++; /* -wjh */
a70083a3 1516 t = lex ();
1ff442ca 1517 }
a0f6b076 1518 /* If $$ is being set in default way, report if any type
6666f98f
AD
1519 mismatch. */
1520 else if (!xactions
a70083a3 1521 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1522 {
6666f98f
AD
1523 if (lhs->type_name == 0
1524 || first_rhs->type_name == 0
a70083a3 1525 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1526 complain (_("type clash (`%s' `%s') on default action"),
1527 lhs->type_name ? lhs->type_name : "",
a70083a3 1528 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1529 }
1530 /* Warn if there is no default for $$ but we need one. */
1531 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1532 complain (_("empty rule for typed nonterminal, and no action"));
1ff442ca 1533 if (t == SEMICOLON)
a70083a3 1534 t = lex ();
a083fbbf 1535 }
943819bf 1536#if 0
a70083a3 1537 /* these things can appear as alternatives to rules. */
943819bf
RS
1538/* NO, they cannot.
1539 a) none of the documentation allows them
1540 b) most of them scan forward until finding a next %
1541 thus they may swallow lots of intervening rules
1542*/
1ff442ca
NF
1543 else if (t == TOKEN)
1544 {
a70083a3
AD
1545 parse_token_decl (STOKEN, SNTERM);
1546 t = lex ();
1ff442ca
NF
1547 }
1548 else if (t == NTERM)
1549 {
a70083a3
AD
1550 parse_token_decl (SNTERM, STOKEN);
1551 t = lex ();
1ff442ca
NF
1552 }
1553 else if (t == TYPE)
1554 {
a70083a3 1555 t = get_type ();
1ff442ca
NF
1556 }
1557 else if (t == UNION)
1558 {
a70083a3
AD
1559 parse_union_decl ();
1560 t = lex ();
1ff442ca
NF
1561 }
1562 else if (t == EXPECT)
1563 {
a70083a3
AD
1564 parse_expect_decl ();
1565 t = lex ();
1ff442ca
NF
1566 }
1567 else if (t == START)
1568 {
a70083a3
AD
1569 parse_start_decl ();
1570 t = lex ();
1ff442ca 1571 }
943819bf
RS
1572#endif
1573
1ff442ca 1574 else
943819bf 1575 {
a0f6b076 1576 complain (_("invalid input: %s"), token_buffer);
a70083a3 1577 t = lex ();
943819bf 1578 }
1ff442ca
NF
1579 }
1580
943819bf
RS
1581 /* grammar has been read. Do some checking */
1582
1ff442ca 1583 if (nsyms > MAXSHORT)
a0f6b076
AD
1584 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1585 MAXSHORT);
1ff442ca 1586 if (nrules == 0)
a0f6b076 1587 fatal (_("no rules in the input grammar"));
1ff442ca 1588
a70083a3 1589 if (typed == 0 /* JF put out same default YYSTYPE as YACC does */
1ff442ca
NF
1590 && !value_components_used)
1591 {
1592 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
a70083a3
AD
1593 but it seems better to be consistent.
1594 Most programs should declare their own type anyway. */
1595 fprintf (fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1ff442ca 1596 if (fdefines)
a70083a3 1597 fprintf (fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1ff442ca
NF
1598 }
1599
1600 /* Report any undefined symbols and consider them nonterminals. */
1601
1602 for (bp = firstsymbol; bp; bp = bp->next)
1603 if (bp->class == SUNKNOWN)
1604 {
a70083a3
AD
1605 complain (_
1606 ("symbol %s is used, but is not defined as a token and has no rules"),
1607bp->tag);
1ff442ca
NF
1608 bp->class = SNTERM;
1609 bp->value = nvars++;
1610 }
1611
1612 ntokens = nsyms - nvars;
1613}
a70083a3
AD
1614\f
1615/*--------------------------------------------------------------.
1616| For named tokens, but not literal ones, define the name. The |
1617| value is the user token number. |
1618`--------------------------------------------------------------*/
1ff442ca 1619
4a120d45 1620static void
a70083a3 1621output_token_defines (FILE *file)
1ff442ca 1622{
a70083a3
AD
1623 bucket *bp;
1624 char *cp, *symbol;
1625 char c;
1ff442ca 1626
a70083a3 1627 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1628 {
a70083a3
AD
1629 symbol = bp->tag; /* get symbol */
1630
1631 if (bp->value >= ntokens)
1632 continue;
1633 if (bp->user_token_number == SALIAS)
1634 continue;
1635 if ('\'' == *symbol)
1636 continue; /* skip literal character */
1637 if (bp == errtoken)
1638 continue; /* skip error token */
1639 if ('\"' == *symbol)
1ff442ca 1640 {
a70083a3
AD
1641 /* use literal string only if given a symbol with an alias */
1642 if (bp->alias)
1643 symbol = bp->alias->tag;
1644 else
1645 continue;
1646 }
1ff442ca 1647
a70083a3
AD
1648 /* Don't #define nonliteral tokens whose names contain periods. */
1649 cp = symbol;
1650 while ((c = *cp++) && c != '.');
1651 if (c != '\0')
1652 continue;
1ff442ca 1653
a70083a3
AD
1654 fprintf (file, "#define\t%s\t%d\n", symbol,
1655 ((translations && !rawtoknumflag)
1656 ? bp->user_token_number : bp->value));
1657 if (semantic_parser)
1658 fprintf (file, "#define\tT%s\t%d\n", symbol, bp->value);
1ff442ca 1659 }
a70083a3
AD
1660
1661 putc ('\n', file);
1ff442ca 1662}
1ff442ca
NF
1663
1664
a70083a3
AD
1665/*------------------------------------------------------------------.
1666| Assign symbol numbers, and write definition of token names into |
b2ca4022 1667| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1668| of symbols. |
1669`------------------------------------------------------------------*/
1ff442ca 1670
4a120d45 1671static void
118fb205 1672packsymbols (void)
1ff442ca 1673{
a70083a3
AD
1674 bucket *bp;
1675 int tokno = 1;
1676 int i;
1677 int last_user_token_number;
4a120d45 1678 static char DOLLAR[] = "$";
1ff442ca
NF
1679
1680 /* int lossage = 0; JF set but not used */
1681
a70083a3 1682 tags = NEW2 (nsyms + 1, char *);
4a120d45 1683 tags[0] = DOLLAR;
a70083a3 1684 user_toknums = NEW2 (nsyms + 1, short);
943819bf 1685 user_toknums[0] = 0;
1ff442ca 1686
a70083a3
AD
1687 sprec = NEW2 (nsyms, short);
1688 sassoc = NEW2 (nsyms, short);
1ff442ca
NF
1689
1690 max_user_token_number = 256;
1691 last_user_token_number = 256;
1692
1693 for (bp = firstsymbol; bp; bp = bp->next)
1694 {
1695 if (bp->class == SNTERM)
1696 {
1697 bp->value += ntokens;
1698 }
943819bf
RS
1699 else if (bp->alias)
1700 {
0a6384c4
AD
1701 /* this symbol and its alias are a single token defn.
1702 allocate a tokno, and assign to both check agreement of
1703 ->prec and ->assoc fields and make both the same */
1704 if (bp->value == 0)
1705 bp->value = bp->alias->value = tokno++;
943819bf 1706
0a6384c4
AD
1707 if (bp->prec != bp->alias->prec)
1708 {
1709 if (bp->prec != 0 && bp->alias->prec != 0
1710 && bp->user_token_number == SALIAS)
a0f6b076
AD
1711 complain (_("conflicting precedences for %s and %s"),
1712 bp->tag, bp->alias->tag);
0a6384c4
AD
1713 if (bp->prec != 0)
1714 bp->alias->prec = bp->prec;
1715 else
1716 bp->prec = bp->alias->prec;
1717 }
943819bf 1718
0a6384c4
AD
1719 if (bp->assoc != bp->alias->assoc)
1720 {
a0f6b076
AD
1721 if (bp->assoc != 0 && bp->alias->assoc != 0
1722 && bp->user_token_number == SALIAS)
1723 complain (_("conflicting assoc values for %s and %s"),
1724 bp->tag, bp->alias->tag);
1725 if (bp->assoc != 0)
1726 bp->alias->assoc = bp->assoc;
1727 else
1728 bp->assoc = bp->alias->assoc;
1729 }
0a6384c4
AD
1730
1731 if (bp->user_token_number == SALIAS)
a70083a3 1732 continue; /* do not do processing below for SALIASs */
943819bf 1733
a70083a3
AD
1734 }
1735 else /* bp->class == STOKEN */
943819bf
RS
1736 {
1737 bp->value = tokno++;
1738 }
1739
1740 if (bp->class == STOKEN)
1ff442ca
NF
1741 {
1742 if (translations && !(bp->user_token_number))
1743 bp->user_token_number = ++last_user_token_number;
1744 if (bp->user_token_number > max_user_token_number)
1745 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1746 }
1747
1748 tags[bp->value] = bp->tag;
943819bf 1749 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1750 sprec[bp->value] = bp->prec;
1751 sassoc[bp->value] = bp->assoc;
1752
1753 }
1754
1755 if (translations)
1756 {
a70083a3 1757 int j;
1ff442ca 1758
a70083a3 1759 token_translations = NEW2 (max_user_token_number + 1, short);
1ff442ca 1760
0a6384c4 1761 /* initialize all entries for literal tokens to 2, the internal
a70083a3
AD
1762 token number for $undefined., which represents all invalid
1763 inputs. */
4a120d45 1764 for (j = 0; j <= max_user_token_number; j++)
a70083a3 1765 token_translations[j] = 2;
1ff442ca 1766
943819bf 1767 for (bp = firstsymbol; bp; bp = bp->next)
a70083a3
AD
1768 {
1769 if (bp->value >= ntokens)
1770 continue; /* non-terminal */
1771 if (bp->user_token_number == SALIAS)
0a6384c4 1772 continue;
a70083a3 1773 if (token_translations[bp->user_token_number] != 2)
a0f6b076
AD
1774 complain (_("tokens %s and %s both assigned number %d"),
1775 tags[token_translations[bp->user_token_number]],
a70083a3
AD
1776 bp->tag, bp->user_token_number);
1777 token_translations[bp->user_token_number] = bp->value;
1778 }
1ff442ca
NF
1779 }
1780
1781 error_token_number = errtoken->value;
1782
a70083a3
AD
1783 if (!noparserflag)
1784 output_token_defines (ftable);
1ff442ca
NF
1785
1786 if (startval->class == SUNKNOWN)
a0f6b076 1787 fatal (_("the start symbol %s is undefined"), startval->tag);
1ff442ca 1788 else if (startval->class == STOKEN)
a0f6b076 1789 fatal (_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1790
1791 start_symbol = startval->value;
1792
1793 if (definesflag)
1794 {
a70083a3 1795 output_token_defines (fdefines);
1ff442ca
NF
1796
1797 if (!pure_parser)
1798 {
1799 if (spec_name_prefix)
a70083a3
AD
1800 fprintf (fdefines, "\nextern YYSTYPE %slval;\n",
1801 spec_name_prefix);
1ff442ca 1802 else
a70083a3 1803 fprintf (fdefines, "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1804 }
1805
1806 if (semantic_parser)
1807 for (i = ntokens; i < nsyms; i++)
1808 {
1809 /* don't make these for dummy nonterminals made by gensym. */
1810 if (*tags[i] != '@')
a70083a3 1811 fprintf (fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1ff442ca
NF
1812 }
1813#if 0
1814 /* `fdefines' is now a temporary file, so we need to copy its
1815 contents in `done', so we can't close it here. */
a70083a3 1816 fclose (fdefines);
1ff442ca
NF
1817 fdefines = NULL;
1818#endif
1819 }
1820}
a083fbbf 1821
1ff442ca 1822
a70083a3
AD
1823/*---------------------------------------------------------------.
1824| Convert the rules into the representation using RRHS, RLHS and |
1825| RITEMS. |
1826`---------------------------------------------------------------*/
1ff442ca 1827
4a120d45 1828static void
118fb205 1829packgram (void)
1ff442ca 1830{
a70083a3
AD
1831 int itemno;
1832 int ruleno;
1833 symbol_list *p;
1ff442ca
NF
1834
1835 bucket *ruleprec;
1836
a70083a3
AD
1837 ritem = NEW2 (nitems + 1, short);
1838 rlhs = NEW2 (nrules, short) - 1;
1839 rrhs = NEW2 (nrules, short) - 1;
1840 rprec = NEW2 (nrules, short) - 1;
1841 rprecsym = NEW2 (nrules, short) - 1;
1842 rassoc = NEW2 (nrules, short) - 1;
1ff442ca
NF
1843
1844 itemno = 0;
1845 ruleno = 1;
1846
1847 p = grammar;
1848 while (p)
1849 {
1850 rlhs[ruleno] = p->sym->value;
1851 rrhs[ruleno] = itemno;
1852 ruleprec = p->ruleprec;
1853
1854 p = p->next;
1855 while (p && p->sym)
1856 {
1857 ritem[itemno++] = p->sym->value;
1858 /* A rule gets by default the precedence and associativity
1859 of the last token in it. */
a70083a3 1860 if (p->sym->class == STOKEN)
1ff442ca
NF
1861 {
1862 rprec[ruleno] = p->sym->prec;
1863 rassoc[ruleno] = p->sym->assoc;
1864 }
a70083a3
AD
1865 if (p)
1866 p = p->next;
1ff442ca
NF
1867 }
1868
1869 /* If this rule has a %prec,
a70083a3 1870 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1871 if (ruleprec)
1872 {
a70083a3
AD
1873 rprec[ruleno] = ruleprec->prec;
1874 rassoc[ruleno] = ruleprec->assoc;
1ff442ca
NF
1875 rprecsym[ruleno] = ruleprec->value;
1876 }
1877
1878 ritem[itemno++] = -ruleno;
1879 ruleno++;
1880
a70083a3
AD
1881 if (p)
1882 p = p->next;
1ff442ca
NF
1883 }
1884
1885 ritem[itemno] = 0;
1886}
a70083a3
AD
1887\f
1888/*-------------------------------------------------------------------.
1889| Read in the grammar specification and record it in the format |
1890| described in gram.h. All guards are copied into the FGUARD file |
1891| and all actions into FACTION, in each case forming the body of a C |
1892| function (YYGUARD or YYACTION) which contains a switch statement |
1893| to decide which guard or action to execute. |
1894`-------------------------------------------------------------------*/
1895
1896void
1897reader (void)
1898{
1899 start_flag = 0;
1900 startval = NULL; /* start symbol not specified yet. */
1901
1902#if 0
1903 /* initially assume token number translation not needed. */
1904 translations = 0;
1905#endif
1906 /* Nowadays translations is always set to 1, since we give `error' a
1907 user-token-number to satisfy the Posix demand for YYERRCODE==256.
1908 */
1909 translations = 1;
1910
1911 nsyms = 1;
1912 nvars = 0;
1913 nrules = 0;
1914 nitems = 0;
1915 rline_allocated = 10;
1916 rline = NEW2 (rline_allocated, short);
1917
1918 typed = 0;
1919 lastprec = 0;
1920
1921 gensym_count = 0;
1922
1923 semantic_parser = 0;
1924 pure_parser = 0;
1925 yylsp_needed = 0;
1926
1927 grammar = NULL;
1928
1929 init_lex ();
1930 lineno = 1;
1931
1932 /* Initialize the symbol table. */
1933 tabinit ();
1934 /* Construct the error token */
1935 errtoken = getsym ("error");
1936 errtoken->class = STOKEN;
1937 errtoken->user_token_number = 256; /* Value specified by POSIX. */
1938 /* Construct a token that represents all undefined literal tokens.
1939 It is always token number 2. */
1940 undeftoken = getsym ("$undefined.");
1941 undeftoken->class = STOKEN;
1942 undeftoken->user_token_number = 2;
1943
1944 /* Read the declaration section. Copy %{ ... %} groups to FTABLE
1945 and FDEFINES file. Also notice any %token, %left, etc. found
1946 there. */
1947 putc ('\n', ftable);
1948 fprintf (ftable, "\
1949/* %s, made from %s\n\
1950 by GNU bison %s. */\n\
1951\n", noparserflag ? "Bison-generated parse tables" : "A Bison parser", infile, VERSION);
1952
1953 fputs ("#define YYBISON 1 /* Identify Bison output. */\n\n", ftable);
1954 read_declarations ();
1955 /* Start writing the guard and action files, if they are needed. */
1956 output_headers ();
1957 /* Read in the grammar, build grammar in list form. Write out
1958 guards and actions. */
1959 readgram ();
1960 /* Now we know whether we need the line-number stack. If we do,
1961 write its type into the .tab.h file. */
1962 if (fdefines)
1963 reader_output_yylsp (fdefines);
1964 /* Write closing delimiters for actions and guards. */
1965 output_trailers ();
1966 if (yylsp_needed)
1967 fputs ("#define YYLSP_NEEDED\n\n", ftable);
1968 /* Assign the symbols their symbol numbers. Write #defines for the
1969 token symbols into FDEFINES if requested. */
1970 packsymbols ();
1971 /* Convert the grammar into the format described in gram.h. */
1972 packgram ();
1973 /* Free the symbol table data structure since symbols are now all
1974 referred to by symbol number. */
1975 free_symtab ();
1976}
1977
1978void
1979reader_output_yylsp (FILE *f)
1980{
1981 if (yylsp_needed)
1982 fprintf (f, LTYPESTR);
1983}