]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/LR0.h: New file.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
a70083a3
AD
2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000
3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
ceed8467 24#include "getargs.h"
1ff442ca 25#include "files.h"
7612000c 26#include "alloc.h"
1ff442ca
NF
27#include "symtab.h"
28#include "lex.h"
29#include "gram.h"
a0f6b076 30#include "complain.h"
6c89f1c1 31#include "output.h"
1ff442ca 32
6666f98f
AD
33#define LTYPESTR "\
34\n\
35#ifndef YYLTYPE\n\
36typedef\n\
37 struct yyltype\n\
38\
39 {\n\
40 int timestamp;\n\
41 int first_line;\n\
42 int first_column;\
43\n\
44 int last_line;\n\
45 int last_column;\n\
46 char *text;\n\
47 }\n\
48\
49 yyltype;\n\
50\n\
51#define YYLTYPE yyltype\n\
52#endif\n\
53\n"
1ff442ca
NF
54
55/* Number of slots allocated (but not necessarily used yet) in `rline' */
4a120d45 56static int rline_allocated;
1ff442ca 57
1ff442ca
NF
58extern bucket *symval;
59extern int numval;
1ff442ca
NF
60extern int expected_conflicts;
61extern char *token_buffer;
118fb205
JT
62extern int maxtoken;
63
a70083a3
AD
64extern void tabinit PARAMS ((void));
65extern void free_symtab PARAMS ((void));
a70083a3 66extern char *printable_version PARAMS ((int));
1ff442ca 67
a70083a3
AD
68typedef struct symbol_list
69{
70 struct symbol_list *next;
71 bucket *sym;
72 bucket *ruleprec;
73}
74symbol_list;
118fb205 75
a70083a3
AD
76
77extern void reader PARAMS ((void));
78extern void reader_output_yylsp PARAMS ((FILE *));
1ff442ca
NF
79
80int lineno;
1ff442ca 81char **tags;
d019d655 82short *user_toknums;
4a120d45
JT
83static symbol_list *grammar;
84static int start_flag;
85static bucket *startval;
1ff442ca
NF
86
87/* Nonzero if components of semantic values are used, implying
88 they must be unions. */
89static int value_components_used;
90
a70083a3 91static int typed; /* nonzero if %union has been seen. */
1ff442ca 92
a70083a3 93static int lastprec; /* incremented for each %left, %right or %nonassoc seen */
1ff442ca 94
a70083a3 95static int gensym_count; /* incremented for each generated symbol */
1ff442ca
NF
96
97static bucket *errtoken;
5b2e3c89 98static bucket *undeftoken;
1ff442ca
NF
99
100/* Nonzero if any action or guard uses the @n construct. */
101static int yylsp_needed;
0d533154 102\f
a70083a3 103
0d533154
AD
104/*===================\
105| Low level lexing. |
106\===================*/
943819bf
RS
107
108static void
118fb205 109skip_to_char (int target)
943819bf
RS
110{
111 int c;
112 if (target == '\n')
a0f6b076 113 complain (_(" Skipping to next \\n"));
943819bf 114 else
a0f6b076 115 complain (_(" Skipping to next %c"), target);
943819bf
RS
116
117 do
0d533154 118 c = skip_white_space ();
943819bf 119 while (c != target && c != EOF);
a083fbbf 120 if (c != EOF)
0d533154 121 ungetc (c, finput);
943819bf
RS
122}
123
124
0d533154
AD
125/*---------------------------------------------------------.
126| Read a signed integer from STREAM and return its value. |
127`---------------------------------------------------------*/
128
129static inline int
130read_signed_integer (FILE *stream)
131{
a70083a3
AD
132 int c = getc (stream);
133 int sign = 1;
134 int n = 0;
0d533154
AD
135
136 if (c == '-')
137 {
138 c = getc (stream);
139 sign = -1;
140 }
141
142 while (isdigit (c))
143 {
144 n = 10 * n + (c - '0');
145 c = getc (stream);
146 }
147
148 ungetc (c, stream);
149
150 return sign * n;
151}
152\f
153/*-------------------------------------------------------------------.
154| Dump the string from FINPUT to FOUTPUT. MATCH is the delimiter of |
155| the string (either ' or "). |
156`-------------------------------------------------------------------*/
ae3c3164
AD
157
158static inline void
4a120d45 159copy_string (FILE *fin, FILE *fout, int match)
ae3c3164
AD
160{
161 int c;
162
4a120d45
JT
163 putc (match, fout);
164 c = getc (fin);
ae3c3164
AD
165
166 while (c != match)
167 {
168 if (c == EOF)
169 fatal (_("unterminated string at end of file"));
170 if (c == '\n')
171 {
a0f6b076 172 complain (_("unterminated string"));
4a120d45 173 ungetc (c, fin);
ae3c3164
AD
174 c = match; /* invent terminator */
175 continue;
176 }
177
a70083a3 178 putc (c, fout);
ae3c3164
AD
179
180 if (c == '\\')
181 {
4a120d45 182 c = getc (fin);
ae3c3164
AD
183 if (c == EOF)
184 fatal (_("unterminated string at end of file"));
4a120d45 185 putc (c, fout);
ae3c3164
AD
186 if (c == '\n')
187 lineno++;
188 }
189
a70083a3 190 c = getc (fin);
ae3c3164
AD
191 }
192
a70083a3 193 putc (c, fout);
ae3c3164
AD
194}
195
196
6c89f1c1
AD
197/*---------------------------------------------------------------.
198| Dump the comment from IN to OUT1 and OUT2. C is either `*' or |
199| `/', depending upon the type of comments used. OUT2 might be |
200| NULL. |
201`---------------------------------------------------------------*/
ae3c3164
AD
202
203static inline void
a70083a3 204copy_comment2 (FILE *in, FILE *out1, FILE *out2, int c)
ae3c3164
AD
205{
206 int cplus_comment;
a70083a3 207 int ended;
ae3c3164
AD
208
209 cplus_comment = (c == '/');
27821bff
AD
210 putc (c, out1);
211 if (out2)
212 putc (c, out2);
213 c = getc (in);
ae3c3164
AD
214
215 ended = 0;
216 while (!ended)
217 {
218 if (!cplus_comment && c == '*')
219 {
220 while (c == '*')
221 {
27821bff
AD
222 putc (c, out1);
223 if (out2)
224 putc (c, out2);
225 c = getc (in);
ae3c3164
AD
226 }
227
228 if (c == '/')
229 {
a70083a3 230 putc (c, out1);
27821bff 231 if (out2)
a70083a3 232 putc (c, out2);
ae3c3164
AD
233 ended = 1;
234 }
235 }
236 else if (c == '\n')
237 {
238 lineno++;
27821bff
AD
239 putc (c, out1);
240 if (out2)
241 putc (c, out2);
ae3c3164
AD
242 if (cplus_comment)
243 ended = 1;
244 else
27821bff 245 c = getc (in);
ae3c3164
AD
246 }
247 else if (c == EOF)
248 fatal (_("unterminated comment"));
249 else
250 {
27821bff
AD
251 putc (c, out1);
252 if (out2)
253 putc (c, out2);
254 c = getc (in);
ae3c3164
AD
255 }
256 }
257}
258
259
d019d655
AD
260/*------------------------------------------------------------.
261| Dump the comment from FIN to FOUT. C is either `*' or `/', |
262| depending upon the type of comments used. |
263`------------------------------------------------------------*/
27821bff
AD
264
265static inline void
4a120d45 266copy_comment (FILE *fin, FILE *fout, int c)
27821bff 267{
4a120d45 268 copy_comment2 (fin, fout, NULL, c);
27821bff
AD
269}
270
271
a70083a3
AD
272/*-----------------------------------------------------------------.
273| FIN is pointing to a location (i.e., a `@'). Output to FOUT a |
274| reference to this location. STACK_OFFSET is the number of values |
275| in the current rule so far, which says where to find `$0' with |
276| respect to the top of the stack. |
277`-----------------------------------------------------------------*/
1ff442ca 278
a70083a3
AD
279static inline void
280copy_at (FILE *fin, FILE *fout, int stack_offset)
1ff442ca 281{
a70083a3 282 int c;
1ff442ca 283
a70083a3
AD
284 c = getc (fin);
285 if (c == '$')
1ff442ca 286 {
a70083a3
AD
287 fprintf (fout, "yyloc");
288 yylsp_needed = 1;
289 }
290 else if (isdigit (c) || c == '-')
291 {
292 int n;
1ff442ca 293
a70083a3
AD
294 ungetc (c, fin);
295 n = read_signed_integer (fin);
943819bf 296
a70083a3
AD
297 fprintf (fout, "yylsp[%d]", n - stack_offset);
298 yylsp_needed = 1;
1ff442ca 299 }
a70083a3
AD
300 else
301 complain (_("@%s is invalid"), printable_version (c));
1ff442ca 302}
a70083a3
AD
303\f
304/*-------------------------------------------------------------------.
305| Copy the contents of a `%{ ... %}' into the definitions file. The |
306| `%{' has already been read. Return after reading the `%}'. |
307`-------------------------------------------------------------------*/
1ff442ca 308
4a120d45 309static void
118fb205 310copy_definition (void)
1ff442ca 311{
a70083a3 312 int c;
ae3c3164 313 /* -1 while reading a character if prev char was %. */
a70083a3 314 int after_percent;
1ff442ca
NF
315
316 if (!nolinesflag)
a70083a3 317 fprintf (fattrs, "#line %d \"%s\"\n", lineno, infile);
1ff442ca
NF
318
319 after_percent = 0;
320
ae3c3164 321 c = getc (finput);
1ff442ca
NF
322
323 for (;;)
324 {
325 switch (c)
326 {
327 case '\n':
a70083a3 328 putc (c, fattrs);
1ff442ca
NF
329 lineno++;
330 break;
331
332 case '%':
a70083a3 333 after_percent = -1;
1ff442ca 334 break;
a083fbbf 335
1ff442ca
NF
336 case '\'':
337 case '"':
ae3c3164 338 copy_string (finput, fattrs, c);
1ff442ca
NF
339 break;
340
341 case '/':
ae3c3164
AD
342 putc (c, fattrs);
343 c = getc (finput);
1ff442ca
NF
344 if (c != '*' && c != '/')
345 continue;
ae3c3164 346 copy_comment (finput, fattrs, c);
1ff442ca
NF
347 break;
348
349 case EOF:
a70083a3 350 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
351
352 default:
a70083a3 353 putc (c, fattrs);
1ff442ca
NF
354 }
355
a70083a3 356 c = getc (finput);
1ff442ca
NF
357
358 if (after_percent)
359 {
360 if (c == '}')
361 return;
a70083a3 362 putc ('%', fattrs);
1ff442ca
NF
363 }
364 after_percent = 0;
365
366 }
367
368}
369
370
a70083a3
AD
371/*-----------------------------------------------------------------.
372| Parse what comes after %token or %nterm. For %token, what_is is |
373| STOKEN and what_is_not is SNTERM. For %nterm, the arguments are |
374| reversed. |
375`-----------------------------------------------------------------*/
1ff442ca 376
4a120d45 377static void
118fb205 378parse_token_decl (int what_is, int what_is_not)
1ff442ca 379{
a70083a3
AD
380 int token = 0;
381 char *typename = 0;
382 struct bucket *symbol = NULL; /* pts to symbol being defined */
1ff442ca
NF
383 int k;
384
1ff442ca
NF
385 for (;;)
386 {
e6011337
JT
387 int tmp_char = ungetc (skip_white_space (), finput);
388
389 if (tmp_char == '%')
1ff442ca 390 return;
e6011337 391 if (tmp_char == EOF)
a0f6b076 392 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 393
a70083a3 394 token = lex ();
1ff442ca 395 if (token == COMMA)
943819bf
RS
396 {
397 symbol = NULL;
398 continue;
399 }
1ff442ca
NF
400 if (token == TYPENAME)
401 {
a70083a3
AD
402 k = strlen (token_buffer);
403 typename = NEW2 (k + 1, char);
404 strcpy (typename, token_buffer);
1ff442ca 405 value_components_used = 1;
943819bf
RS
406 symbol = NULL;
407 }
a70083a3 408 else if (token == IDENTIFIER && *symval->tag == '\"' && symbol)
943819bf
RS
409 {
410 translations = 1;
411 symval->class = STOKEN;
412 symval->type_name = typename;
413 symval->user_token_number = symbol->user_token_number;
414 symbol->user_token_number = SALIAS;
415
a083fbbf
RS
416 symval->alias = symbol;
417 symbol->alias = symval;
943819bf
RS
418 symbol = NULL;
419
a70083a3 420 nsyms--; /* symbol and symval combined are only one symbol */
1ff442ca
NF
421 }
422 else if (token == IDENTIFIER)
423 {
424 int oldclass = symval->class;
943819bf 425 symbol = symval;
1ff442ca 426
943819bf 427 if (symbol->class == what_is_not)
a0f6b076 428 complain (_("symbol %s redefined"), symbol->tag);
943819bf 429 symbol->class = what_is;
1ff442ca 430 if (what_is == SNTERM && oldclass != SNTERM)
943819bf 431 symbol->value = nvars++;
1ff442ca
NF
432
433 if (typename)
434 {
943819bf
RS
435 if (symbol->type_name == NULL)
436 symbol->type_name = typename;
a70083a3 437 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 438 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
439 }
440 }
943819bf 441 else if (symbol && token == NUMBER)
a70083a3 442 {
943819bf 443 symbol->user_token_number = numval;
1ff442ca 444 translations = 1;
a70083a3 445 }
1ff442ca 446 else
943819bf 447 {
a0f6b076 448 complain (_("`%s' is invalid in %s"),
a70083a3
AD
449 token_buffer, (what_is == STOKEN) ? "%token" : "%nterm");
450 skip_to_char ('%');
943819bf 451 }
1ff442ca
NF
452 }
453
454}
455
1ff442ca 456
a0f6b076 457/* Parse what comes after %start */
1ff442ca 458
4a120d45 459static void
118fb205 460parse_start_decl (void)
1ff442ca
NF
461{
462 if (start_flag)
27821bff
AD
463 complain (_("multiple %s declarations"), "%start");
464 if (lex () != IDENTIFIER)
465 complain (_("invalid %s declaration"), "%start");
943819bf
RS
466 else
467 {
468 start_flag = 1;
469 startval = symval;
470 }
1ff442ca
NF
471}
472
473
474
a70083a3
AD
475/*--------------------------------------------------------------.
476| Get the data type (alternative in the union) of the value for |
477| symbol n in rule rule. |
478`--------------------------------------------------------------*/
1ff442ca 479
a70083a3
AD
480static char *
481get_type_name (int n, symbol_list * rule)
1ff442ca 482{
a70083a3
AD
483 int i;
484 symbol_list *rp;
1ff442ca 485
a70083a3 486 if (n < 0)
943819bf 487 {
a70083a3
AD
488 complain (_("invalid $ value"));
489 return NULL;
943819bf 490 }
1ff442ca 491
a70083a3
AD
492 rp = rule;
493 i = 0;
1ff442ca 494
a70083a3 495 while (i < n)
1ff442ca 496 {
a70083a3
AD
497 rp = rp->next;
498 if (rp == NULL || rp->sym == NULL)
499 {
500 complain (_("invalid $ value"));
501 return NULL;
502 }
503 i++;
504 }
505
506 return rp->sym->type_name;
507}
508
509
510/*-----------------------------------------------------------.
511| read in a %type declaration and record its information for |
512| get_type_name to access |
513`-----------------------------------------------------------*/
514
515static void
516parse_type_decl (void)
517{
518 int k;
519 char *name;
520
521 if (lex () != TYPENAME)
522 {
523 complain ("%s", _("%type declaration has no <typename>"));
524 skip_to_char ('%');
525 return;
526 }
527
528 k = strlen (token_buffer);
529 name = NEW2 (k + 1, char);
530 strcpy (name, token_buffer);
531
532 for (;;)
533 {
534 int t;
535 int tmp_char = ungetc (skip_white_space (), finput);
536
537 if (tmp_char == '%')
538 return;
539 if (tmp_char == EOF)
540 fatal (_("Premature EOF after %s"), token_buffer);
541
542 t = lex ();
543
544 switch (t)
1ff442ca
NF
545 {
546
547 case COMMA:
548 case SEMICOLON:
549 break;
550
551 case IDENTIFIER:
552 if (symval->type_name == NULL)
553 symval->type_name = name;
a70083a3 554 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 555 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
556
557 break;
558
559 default:
a0f6b076
AD
560 complain (_("invalid %%type declaration due to item: %s"),
561 token_buffer);
a70083a3 562 skip_to_char ('%');
1ff442ca
NF
563 }
564 }
565}
566
567
568
569/* read in a %left, %right or %nonassoc declaration and record its information. */
570/* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC. */
571
4a120d45 572static void
118fb205 573parse_assoc_decl (int assoc)
1ff442ca 574{
a70083a3
AD
575 int k;
576 char *name = NULL;
577 int prev = 0;
1ff442ca 578
a70083a3 579 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 580
1ff442ca
NF
581 for (;;)
582 {
a70083a3 583 int t;
e6011337 584 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 585
e6011337 586 if (tmp_char == '%')
1ff442ca 587 return;
e6011337 588 if (tmp_char == EOF)
a0f6b076 589 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 590
a70083a3 591 t = lex ();
1ff442ca
NF
592
593 switch (t)
594 {
595
596 case TYPENAME:
a70083a3
AD
597 k = strlen (token_buffer);
598 name = NEW2 (k + 1, char);
599 strcpy (name, token_buffer);
1ff442ca
NF
600 break;
601
602 case COMMA:
603 break;
604
605 case IDENTIFIER:
606 if (symval->prec != 0)
a0f6b076 607 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
608 symval->prec = lastprec;
609 symval->assoc = assoc;
610 if (symval->class == SNTERM)
a0f6b076 611 complain (_("symbol %s redefined"), symval->tag);
1ff442ca
NF
612 symval->class = STOKEN;
613 if (name)
a70083a3 614 { /* record the type, if one is specified */
1ff442ca
NF
615 if (symval->type_name == NULL)
616 symval->type_name = name;
a70083a3 617 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 618 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
619 }
620 break;
621
622 case NUMBER:
623 if (prev == IDENTIFIER)
a70083a3 624 {
1ff442ca
NF
625 symval->user_token_number = numval;
626 translations = 1;
a70083a3
AD
627 }
628 else
629 {
630 complain (_
631 ("invalid text (%s) - number should be after identifier"),
632token_buffer);
633 skip_to_char ('%');
634 }
1ff442ca
NF
635 break;
636
637 case SEMICOLON:
638 return;
639
640 default:
a0f6b076 641 complain (_("unexpected item: %s"), token_buffer);
a70083a3 642 skip_to_char ('%');
1ff442ca
NF
643 }
644
645 prev = t;
646
647 }
648}
649
650
651
652/* copy the union declaration into fattrs (and fdefines),
653 where it is made into the
654 definition of YYSTYPE, the type of elements of the parser value stack. */
655
4a120d45 656static void
118fb205 657parse_union_decl (void)
1ff442ca 658{
a70083a3
AD
659 int c;
660 int count = 0;
1ff442ca
NF
661
662 if (typed)
27821bff 663 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
664
665 typed = 1;
666
667 if (!nolinesflag)
27821bff 668 fprintf (fattrs, "\n#line %d \"%s\"\n", lineno, infile);
1ff442ca 669 else
27821bff 670 fprintf (fattrs, "\n");
1ff442ca 671
27821bff 672 fprintf (fattrs, "typedef union");
1ff442ca 673 if (fdefines)
27821bff 674 fprintf (fdefines, "typedef union");
1ff442ca 675
27821bff 676 c = getc (finput);
1ff442ca
NF
677
678 while (c != EOF)
679 {
27821bff 680 putc (c, fattrs);
1ff442ca 681 if (fdefines)
27821bff 682 putc (c, fdefines);
1ff442ca
NF
683
684 switch (c)
685 {
686 case '\n':
687 lineno++;
688 break;
689
690 case '/':
27821bff 691 c = getc (finput);
1ff442ca 692 if (c != '*' && c != '/')
27821bff
AD
693 continue;
694 copy_comment2 (finput, fattrs, fdefines, c);
1ff442ca
NF
695 break;
696
697
698 case '{':
699 count++;
700 break;
701
702 case '}':
703 if (count == 0)
27821bff 704 complain (_("unmatched %s"), "`}'");
1ff442ca 705 count--;
943819bf 706 if (count <= 0)
1ff442ca 707 {
27821bff 708 fprintf (fattrs, " YYSTYPE;\n");
1ff442ca 709 if (fdefines)
27821bff 710 fprintf (fdefines, " YYSTYPE;\n");
1ff442ca 711 /* JF don't choke on trailing semi */
27821bff
AD
712 c = skip_white_space ();
713 if (c != ';')
a70083a3 714 ungetc (c, finput);
1ff442ca
NF
715 return;
716 }
717 }
718
27821bff 719 c = getc (finput);
1ff442ca
NF
720 }
721}
722
723/* parse the declaration %expect N which says to expect N
724 shift-reduce conflicts. */
725
4a120d45 726static void
118fb205 727parse_expect_decl (void)
1ff442ca 728{
a70083a3
AD
729 int c;
730 int count;
1ff442ca
NF
731 char buffer[20];
732
a70083a3 733 c = getc (finput);
1ff442ca 734 while (c == ' ' || c == '\t')
a70083a3 735 c = getc (finput);
1ff442ca
NF
736
737 count = 0;
738 while (c >= '0' && c <= '9')
739 {
740 if (count < 20)
741 buffer[count++] = c;
a70083a3 742 c = getc (finput);
1ff442ca
NF
743 }
744 buffer[count] = 0;
745
746 ungetc (c, finput);
747
943819bf 748 if (count <= 0 || count > 10)
a0f6b076 749 complain ("%s", _("argument of %expect is not an integer"));
1ff442ca
NF
750 expected_conflicts = atoi (buffer);
751}
752
a70083a3
AD
753
754/*-------------------------------------------------------------------.
755| Parse what comes after %thong. the full syntax is |
756| |
757| %thong <type> token number literal |
758| |
759| the <type> or number may be omitted. The number specifies the |
760| user_token_number. |
761| |
762| Two symbols are entered in the table, one for the token symbol and |
763| one for the literal. Both are given the <type>, if any, from the |
764| declaration. The ->user_token_number of the first is SALIAS and |
765| the ->user_token_number of the second is set to the number, if |
766| any, from the declaration. The two symbols are linked via |
767| pointers in their ->alias fields. |
768| |
769| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
770| only the literal string is retained it is the literal string that |
771| is output to yytname |
772`-------------------------------------------------------------------*/
773
774static void
775parse_thong_decl (void)
7b306f52 776{
a70083a3
AD
777 int token;
778 struct bucket *symbol;
779 char *typename = 0;
780 int k, usrtoknum;
7b306f52 781
a70083a3
AD
782 translations = 1;
783 token = lex (); /* fetch typename or first token */
784 if (token == TYPENAME)
7b306f52 785 {
a70083a3
AD
786 k = strlen (token_buffer);
787 typename = NEW2 (k + 1, char);
788 strcpy (typename, token_buffer);
789 value_components_used = 1;
790 token = lex (); /* fetch first token */
7b306f52 791 }
7b306f52 792
a70083a3 793 /* process first token */
7b306f52 794
a70083a3
AD
795 if (token != IDENTIFIER)
796 {
797 complain (_("unrecognized item %s, expected an identifier"),
798 token_buffer);
799 skip_to_char ('%');
800 return;
7b306f52 801 }
a70083a3
AD
802 symval->class = STOKEN;
803 symval->type_name = typename;
804 symval->user_token_number = SALIAS;
805 symbol = symval;
7b306f52 806
a70083a3 807 token = lex (); /* get number or literal string */
1ff442ca 808
a70083a3 809 if (token == NUMBER)
943819bf 810 {
a70083a3
AD
811 usrtoknum = numval;
812 token = lex (); /* okay, did number, now get literal */
943819bf 813 }
a70083a3
AD
814 else
815 usrtoknum = 0;
1ff442ca 816
a70083a3 817 /* process literal string token */
1ff442ca 818
a70083a3 819 if (token != IDENTIFIER || *symval->tag != '\"')
1ff442ca 820 {
a70083a3
AD
821 complain (_("expected string constant instead of %s"), token_buffer);
822 skip_to_char ('%');
823 return;
1ff442ca 824 }
a70083a3
AD
825 symval->class = STOKEN;
826 symval->type_name = typename;
827 symval->user_token_number = usrtoknum;
1ff442ca 828
a70083a3
AD
829 symval->alias = symbol;
830 symbol->alias = symval;
1ff442ca 831
a70083a3
AD
832 nsyms--; /* symbol and symval combined are only one symbol */
833}
3cef001a 834
a70083a3
AD
835/*----------------------------------------------------------------.
836| Read from finput until `%%' is seen. Discard the `%%'. Handle |
837| any `%' declarations, and copy the contents of any `%{ ... %}' |
838| groups to fattrs. |
839`----------------------------------------------------------------*/
1ff442ca 840
4a120d45 841static void
a70083a3 842read_declarations (void)
1ff442ca 843{
a70083a3
AD
844 int c;
845 int tok;
1ff442ca 846
a70083a3 847 for (;;)
1ff442ca 848 {
a70083a3 849 c = skip_white_space ();
1ff442ca 850
a70083a3
AD
851 if (c == '%')
852 {
853 tok = parse_percent_token ();
1ff442ca 854
a70083a3 855 switch (tok)
943819bf 856 {
a70083a3
AD
857 case TWO_PERCENTS:
858 return;
1ff442ca 859
a70083a3
AD
860 case PERCENT_LEFT_CURLY:
861 copy_definition ();
862 break;
1ff442ca 863
a70083a3
AD
864 case TOKEN:
865 parse_token_decl (STOKEN, SNTERM);
866 break;
1ff442ca 867
a70083a3
AD
868 case NTERM:
869 parse_token_decl (SNTERM, STOKEN);
870 break;
1ff442ca 871
a70083a3
AD
872 case TYPE:
873 parse_type_decl ();
874 break;
1ff442ca 875
a70083a3
AD
876 case START:
877 parse_start_decl ();
878 break;
118fb205 879
a70083a3
AD
880 case UNION:
881 parse_union_decl ();
882 break;
1ff442ca 883
a70083a3
AD
884 case EXPECT:
885 parse_expect_decl ();
886 break;
887 case THONG:
888 parse_thong_decl ();
889 break;
890 case LEFT:
891 parse_assoc_decl (LEFT_ASSOC);
892 break;
1ff442ca 893
a70083a3
AD
894 case RIGHT:
895 parse_assoc_decl (RIGHT_ASSOC);
896 break;
1ff442ca 897
a70083a3
AD
898 case NONASSOC:
899 parse_assoc_decl (NON_ASSOC);
900 break;
1ff442ca 901
a70083a3
AD
902 case SEMANTIC_PARSER:
903 if (semantic_parser == 0)
904 {
905 semantic_parser = 1;
906 open_extra_files ();
907 }
908 break;
1ff442ca 909
a70083a3
AD
910 case PURE_PARSER:
911 pure_parser = 1;
912 break;
1ff442ca 913
a70083a3
AD
914 case NOOP:
915 break;
1ff442ca 916
a70083a3
AD
917 default:
918 complain (_("unrecognized: %s"), token_buffer);
919 skip_to_char ('%');
920 }
921 }
922 else if (c == EOF)
923 fatal (_("no input grammar"));
924 else
925 {
926 complain (_("unknown character: %s"), printable_version (c));
927 skip_to_char ('%');
1ff442ca 928 }
1ff442ca 929 }
1ff442ca 930}
a70083a3
AD
931\f
932/*-------------------------------------------------------------------.
933| Assuming that a `{' has just been seen, copy everything up to the |
934| matching `}' into the actions file. STACK_OFFSET is the number of |
935| values in the current rule so far, which says where to find `$0' |
936| with respect to the top of the stack. |
937`-------------------------------------------------------------------*/
1ff442ca 938
4a120d45 939static void
a70083a3 940copy_action (symbol_list * rule, int stack_offset)
1ff442ca 941{
a70083a3
AD
942 int c;
943 int n;
944 int count;
945 char *type_name;
1ff442ca
NF
946
947 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
948 if (semantic_parser)
949 stack_offset = 0;
1ff442ca 950
41aca2e0 951 fprintf (faction, "\ncase %d:\n", nrules);
1ff442ca 952 if (!nolinesflag)
41aca2e0
AD
953 fprintf (faction, "#line %d \"%s\"\n", lineno, infile);
954 putc ('{', faction);
1ff442ca
NF
955
956 count = 1;
a70083a3 957 c = getc (finput);
1ff442ca
NF
958
959 while (count > 0)
960 {
961 while (c != '}')
a70083a3
AD
962 {
963 switch (c)
1ff442ca
NF
964 {
965 case '\n':
a70083a3 966 putc (c, faction);
1ff442ca
NF
967 lineno++;
968 break;
969
970 case '{':
a70083a3 971 putc (c, faction);
1ff442ca
NF
972 count++;
973 break;
974
975 case '\'':
976 case '"':
ca36d2ef 977 copy_string (finput, faction, c);
1ff442ca
NF
978 break;
979
980 case '/':
27821bff
AD
981 putc (c, faction);
982 c = getc (finput);
1ff442ca
NF
983 if (c != '*' && c != '/')
984 continue;
3cef001a 985 copy_comment (finput, faction, c);
1ff442ca
NF
986 break;
987
988 case '$':
a70083a3 989 c = getc (finput);
1ff442ca
NF
990 type_name = NULL;
991
992 if (c == '<')
993 {
a70083a3 994 char *cp = token_buffer;
1ff442ca 995
a70083a3 996 while ((c = getc (finput)) != '>' && c > 0)
118fb205
JT
997 {
998 if (cp == token_buffer + maxtoken)
a70083a3 999 cp = grow_token_buffer (cp);
118fb205
JT
1000
1001 *cp++ = c;
1002 }
1ff442ca
NF
1003 *cp = 0;
1004 type_name = token_buffer;
1005 value_components_used = 1;
1006
a70083a3 1007 c = getc (finput);
1ff442ca
NF
1008 }
1009 if (c == '$')
1010 {
a70083a3 1011 fprintf (faction, "yyval");
41aca2e0 1012 if (!type_name)
a70083a3 1013 type_name = get_type_name (0, rule);
1ff442ca 1014 if (type_name)
a70083a3
AD
1015 fprintf (faction, ".%s", type_name);
1016 if (!type_name && typed)
a0f6b076
AD
1017 complain (_("$$ of `%s' has no declared type"),
1018 rule->sym->tag);
1ff442ca 1019 }
a70083a3 1020 else if (isdigit (c) || c == '-')
1ff442ca
NF
1021 {
1022 ungetc (c, finput);
a70083a3
AD
1023 n = read_signed_integer (finput);
1024 c = getc (finput);
1ff442ca
NF
1025
1026 if (!type_name && n > 0)
a70083a3 1027 type_name = get_type_name (n, rule);
1ff442ca 1028
a70083a3 1029 fprintf (faction, "yyvsp[%d]", n - stack_offset);
1ff442ca 1030 if (type_name)
a70083a3
AD
1031 fprintf (faction, ".%s", type_name);
1032 if (!type_name && typed)
a0f6b076
AD
1033 complain (_("$%d of `%s' has no declared type"),
1034 n, rule->sym->tag);
1ff442ca
NF
1035 continue;
1036 }
1037 else
a0f6b076 1038 complain (_("$%s is invalid"), printable_version (c));
1ff442ca
NF
1039
1040 break;
1041
1042 case '@':
7b306f52 1043 copy_at (finput, faction, stack_offset);
6666f98f 1044 break;
1ff442ca
NF
1045
1046 case EOF:
27821bff 1047 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1048
1049 default:
a70083a3
AD
1050 putc (c, faction);
1051 }
1052
1053 c = getc (finput);
1054 }
1055
1056 /* above loop exits when c is '}' */
1057
1058 if (--count)
1059 {
1060 putc (c, faction);
1061 c = getc (finput);
1062 }
1063 }
1064
1065 fprintf (faction, ";\n break;}");
1066}
1067\f
1068/*-------------------------------------------------------------------.
1069| After `%guard' is seen in the input file, copy the actual guard |
1070| into the guards file. If the guard is followed by an action, copy |
1071| that into the actions file. STACK_OFFSET is the number of values |
1072| in the current rule so far, which says where to find `$0' with |
1073| respect to the top of the stack, for the simple parser in which |
1074| the stack is not popped until after the guard is run. |
1075`-------------------------------------------------------------------*/
1076
1077static void
1078copy_guard (symbol_list * rule, int stack_offset)
1079{
1080 int c;
1081 int n;
1082 int count;
1083 char *type_name;
1084 int brace_flag = 0;
1085
1086 /* offset is always 0 if parser has already popped the stack pointer */
1087 if (semantic_parser)
1088 stack_offset = 0;
1089
1090 fprintf (fguard, "\ncase %d:\n", nrules);
1091 if (!nolinesflag)
1092 fprintf (fguard, "#line %d \"%s\"\n", lineno, infile);
1093 putc ('{', fguard);
1094
1095 count = 0;
1096 c = getc (finput);
1097
1098 while (brace_flag ? (count > 0) : (c != ';'))
1099 {
1100 switch (c)
1101 {
1102 case '\n':
1103 putc (c, fguard);
1104 lineno++;
1105 break;
1106
1107 case '{':
1108 putc (c, fguard);
1109 brace_flag = 1;
1110 count++;
1111 break;
1112
1113 case '}':
1114 putc (c, fguard);
1115 if (count > 0)
1116 count--;
1117 else
1118 {
1119 complain (_("unmatched %s"), "`}'");
1120 c = getc (finput); /* skip it */
1121 }
1122 break;
1123
1124 case '\'':
1125 case '"':
1126 copy_string (finput, fguard, c);
1127 break;
1128
1129 case '/':
1130 putc (c, fguard);
1131 c = getc (finput);
1132 if (c != '*' && c != '/')
1133 continue;
1134 copy_comment (finput, fguard, c);
1135 break;
1136
1137 case '$':
1138 c = getc (finput);
1139 type_name = NULL;
1140
1141 if (c == '<')
1142 {
1143 char *cp = token_buffer;
1144
1145 while ((c = getc (finput)) != '>' && c > 0)
1146 {
1147 if (cp == token_buffer + maxtoken)
1148 cp = grow_token_buffer (cp);
1149
1150 *cp++ = c;
1151 }
1152 *cp = 0;
1153 type_name = token_buffer;
1154
1155 c = getc (finput);
1156 }
1157
1158 if (c == '$')
1159 {
1160 fprintf (fguard, "yyval");
1161 if (!type_name)
1162 type_name = rule->sym->type_name;
1163 if (type_name)
1164 fprintf (fguard, ".%s", type_name);
1165 if (!type_name && typed)
1166 complain (_("$$ of `%s' has no declared type"),
1167 rule->sym->tag);
1168 }
1169 else if (isdigit (c) || c == '-')
1170 {
1171 ungetc (c, finput);
1172 n = read_signed_integer (finput);
1173 c = getc (finput);
1174
1175 if (!type_name && n > 0)
1176 type_name = get_type_name (n, rule);
1177
1178 fprintf (fguard, "yyvsp[%d]", n - stack_offset);
1179 if (type_name)
1180 fprintf (fguard, ".%s", type_name);
1181 if (!type_name && typed)
1182 complain (_("$%d of `%s' has no declared type"),
1183 n, rule->sym->tag);
1184 continue;
1ff442ca 1185 }
a70083a3
AD
1186 else
1187 complain (_("$%s is invalid"), printable_version (c));
1188 break;
1ff442ca 1189
a70083a3
AD
1190 case '@':
1191 copy_at (finput, fguard, stack_offset);
1192 break;
1ff442ca 1193
a70083a3
AD
1194 case EOF:
1195 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1196
a70083a3
AD
1197 default:
1198 putc (c, fguard);
1ff442ca 1199 }
a70083a3
AD
1200
1201 if (c != '}' || count != 0)
1202 c = getc (finput);
1ff442ca
NF
1203 }
1204
a70083a3
AD
1205 c = skip_white_space ();
1206
1207 fprintf (fguard, ";\n break;}");
1208 if (c == '{')
1209 copy_action (rule, stack_offset);
1210 else if (c == '=')
1211 {
1212 c = getc (finput); /* why not skip_white_space -wjh */
1213 if (c == '{')
1214 copy_action (rule, stack_offset);
1215 }
1216 else
1217 ungetc (c, finput);
1ff442ca 1218}
a70083a3
AD
1219\f
1220
1221static void
1222record_rule_line (void)
1223{
1224 /* Record each rule's source line number in rline table. */
1ff442ca 1225
a70083a3
AD
1226 if (nrules >= rline_allocated)
1227 {
1228 rline_allocated = nrules * 2;
1229 rline = (short *) xrealloc ((char *) rline,
1230 rline_allocated * sizeof (short));
1231 }
1232 rline[nrules] = lineno;
1233}
1ff442ca
NF
1234
1235
a70083a3
AD
1236/*-------------------------------------------------------------------.
1237| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1238| with the user's names. |
1239`-------------------------------------------------------------------*/
1ff442ca 1240
4a120d45 1241static bucket *
118fb205 1242gensym (void)
1ff442ca 1243{
a70083a3 1244 bucket *sym;
1ff442ca
NF
1245
1246 sprintf (token_buffer, "@%d", ++gensym_count);
a70083a3 1247 sym = getsym (token_buffer);
1ff442ca
NF
1248 sym->class = SNTERM;
1249 sym->value = nvars++;
36281465 1250 return sym;
1ff442ca
NF
1251}
1252
a70083a3
AD
1253#if 0
1254/*------------------------------------------------------------------.
1255| read in a %type declaration and record its information for |
1256| get_type_name to access. This is unused. It is only called from |
1257| the #if 0 part of readgram |
1258`------------------------------------------------------------------*/
1259
1260static int
1261get_type (void)
1262{
1263 int k;
1264 int t;
1265 char *name;
1266
1267 t = lex ();
1268
1269 if (t != TYPENAME)
1270 {
1271 complain (_("invalid %s declaration"), "%type");
1272 return t;
1273 }
1274
1275 k = strlen (token_buffer);
1276 name = NEW2 (k + 1, char);
1277 strcpy (name, token_buffer);
1278
1279 for (;;)
1280 {
1281 t = lex ();
1282
1283 switch (t)
1284 {
1285 case SEMICOLON:
1286 return lex ();
1287
1288 case COMMA:
1289 break;
1290
1291 case IDENTIFIER:
1292 if (symval->type_name == NULL)
1293 symval->type_name = name;
1294 else if (strcmp (name, symval->type_name) != 0)
1295 complain (_("type redeclaration for %s"), symval->tag);
1296
1297 break;
1298
1299 default:
1300 return t;
1301 }
1302 }
1303}
1ff442ca 1304
a70083a3
AD
1305#endif
1306\f
1307/*------------------------------------------------------------------.
1308| Parse the input grammar into a one symbol_list structure. Each |
1309| rule is represented by a sequence of symbols: the left hand side |
1310| followed by the contents of the right hand side, followed by a |
1311| null pointer instead of a symbol to terminate the rule. The next |
1312| symbol is the lhs of the following rule. |
1313| |
1314| All guards and actions are copied out to the appropriate files, |
1315| labelled by the rule number they apply to. |
1316`------------------------------------------------------------------*/
1ff442ca 1317
4a120d45 1318static void
118fb205 1319readgram (void)
1ff442ca 1320{
a70083a3
AD
1321 int t;
1322 bucket *lhs = NULL;
1323 symbol_list *p;
1324 symbol_list *p1;
1325 bucket *bp;
1ff442ca 1326
a70083a3
AD
1327 symbol_list *crule; /* points to first symbol_list of current rule. */
1328 /* its symbol is the lhs of the rule. */
1329 symbol_list *crule1; /* points to the symbol_list preceding crule. */
1ff442ca
NF
1330
1331 p1 = NULL;
1332
a70083a3 1333 t = lex ();
1ff442ca
NF
1334
1335 while (t != TWO_PERCENTS && t != ENDFILE)
1336 {
1337 if (t == IDENTIFIER || t == BAR)
1338 {
a70083a3
AD
1339 int actionflag = 0;
1340 int rulelength = 0; /* number of symbols in rhs of this rule so far */
1ff442ca
NF
1341 int xactions = 0; /* JF for error checking */
1342 bucket *first_rhs = 0;
1343
1344 if (t == IDENTIFIER)
1345 {
1346 lhs = symval;
943819bf
RS
1347
1348 if (!start_flag)
1349 {
1350 startval = lhs;
1351 start_flag = 1;
1352 }
a083fbbf 1353
a70083a3 1354 t = lex ();
1ff442ca 1355 if (t != COLON)
943819bf 1356 {
a0f6b076 1357 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1358 unlex (t);
943819bf 1359 }
1ff442ca
NF
1360 }
1361
943819bf 1362 if (nrules == 0 && t == BAR)
1ff442ca 1363 {
a0f6b076 1364 complain (_("grammar starts with vertical bar"));
943819bf 1365 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1366 }
1ff442ca
NF
1367 /* start a new rule and record its lhs. */
1368
1369 nrules++;
1370 nitems++;
1371
1372 record_rule_line ();
1373
a70083a3 1374 p = NEW (symbol_list);
1ff442ca
NF
1375 p->sym = lhs;
1376
1377 crule1 = p1;
1378 if (p1)
1379 p1->next = p;
1380 else
1381 grammar = p;
1382
1383 p1 = p;
1384 crule = p;
1385
1386 /* mark the rule's lhs as a nonterminal if not already so. */
1387
1388 if (lhs->class == SUNKNOWN)
1389 {
1390 lhs->class = SNTERM;
1391 lhs->value = nvars;
1392 nvars++;
1393 }
1394 else if (lhs->class == STOKEN)
a0f6b076 1395 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1396
1397 /* read the rhs of the rule. */
1398
1399 for (;;)
1400 {
a70083a3 1401 t = lex ();
943819bf
RS
1402 if (t == PREC)
1403 {
a70083a3 1404 t = lex ();
943819bf 1405 crule->ruleprec = symval;
a70083a3 1406 t = lex ();
943819bf 1407 }
1ff442ca 1408
a70083a3
AD
1409 if (!(t == IDENTIFIER || t == LEFT_CURLY))
1410 break;
1ff442ca
NF
1411
1412 /* If next token is an identifier, see if a colon follows it.
a70083a3 1413 If one does, exit this rule now. */
1ff442ca
NF
1414 if (t == IDENTIFIER)
1415 {
a70083a3
AD
1416 bucket *ssave;
1417 int t1;
1ff442ca
NF
1418
1419 ssave = symval;
a70083a3
AD
1420 t1 = lex ();
1421 unlex (t1);
1ff442ca 1422 symval = ssave;
a70083a3
AD
1423 if (t1 == COLON)
1424 break;
1ff442ca 1425
a70083a3 1426 if (!first_rhs) /* JF */
1ff442ca
NF
1427 first_rhs = symval;
1428 /* Not followed by colon =>
1429 process as part of this rule's rhs. */
1430 }
1431
1432 /* If we just passed an action, that action was in the middle
a70083a3
AD
1433 of a rule, so make a dummy rule to reduce it to a
1434 non-terminal. */
1ff442ca
NF
1435 if (actionflag)
1436 {
a70083a3 1437 bucket *sdummy;
1ff442ca
NF
1438
1439 /* Since the action was written out with this rule's */
943819bf 1440 /* number, we must give the new rule this number */
1ff442ca
NF
1441 /* by inserting the new rule before it. */
1442
1443 /* Make a dummy nonterminal, a gensym. */
a70083a3 1444 sdummy = gensym ();
1ff442ca
NF
1445
1446 /* Make a new rule, whose body is empty,
1447 before the current one, so that the action
1448 just read can belong to it. */
1449 nrules++;
1450 nitems++;
1451 record_rule_line ();
a70083a3 1452 p = NEW (symbol_list);
1ff442ca
NF
1453 if (crule1)
1454 crule1->next = p;
a70083a3
AD
1455 else
1456 grammar = p;
1ff442ca 1457 p->sym = sdummy;
a70083a3 1458 crule1 = NEW (symbol_list);
1ff442ca
NF
1459 p->next = crule1;
1460 crule1->next = crule;
1461
1462 /* insert the dummy generated by that rule into this rule. */
1463 nitems++;
a70083a3 1464 p = NEW (symbol_list);
1ff442ca
NF
1465 p->sym = sdummy;
1466 p1->next = p;
1467 p1 = p;
1468
1469 actionflag = 0;
1470 }
1471
1472 if (t == IDENTIFIER)
1473 {
1474 nitems++;
a70083a3 1475 p = NEW (symbol_list);
1ff442ca
NF
1476 p->sym = symval;
1477 p1->next = p;
1478 p1 = p;
1479 }
a70083a3 1480 else /* handle an action. */
1ff442ca 1481 {
a70083a3 1482 copy_action (crule, rulelength);
1ff442ca
NF
1483 actionflag = 1;
1484 xactions++; /* JF */
1485 }
1486 rulelength++;
a70083a3 1487 } /* end of read rhs of rule */
1ff442ca
NF
1488
1489 /* Put an empty link in the list to mark the end of this rule */
a70083a3 1490 p = NEW (symbol_list);
1ff442ca
NF
1491 p1->next = p;
1492 p1 = p;
1493
1494 if (t == PREC)
1495 {
a0f6b076 1496 complain (_("two @prec's in a row"));
a70083a3 1497 t = lex ();
1ff442ca 1498 crule->ruleprec = symval;
a70083a3 1499 t = lex ();
1ff442ca
NF
1500 }
1501 if (t == GUARD)
1502 {
a70083a3 1503 if (!semantic_parser)
a0f6b076 1504 complain ("%s",
a70083a3
AD
1505 _
1506 ("%guard present but %semantic_parser not specified"));
1ff442ca 1507
a70083a3
AD
1508 copy_guard (crule, rulelength);
1509 t = lex ();
1ff442ca
NF
1510 }
1511 else if (t == LEFT_CURLY)
1512 {
a70083a3 1513 /* This case never occurs -wjh */
6666f98f 1514 if (actionflag)
a0f6b076 1515 complain (_("two actions at end of one rule"));
a70083a3 1516 copy_action (crule, rulelength);
943819bf
RS
1517 actionflag = 1;
1518 xactions++; /* -wjh */
a70083a3 1519 t = lex ();
1ff442ca 1520 }
a0f6b076 1521 /* If $$ is being set in default way, report if any type
6666f98f
AD
1522 mismatch. */
1523 else if (!xactions
a70083a3 1524 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1525 {
6666f98f
AD
1526 if (lhs->type_name == 0
1527 || first_rhs->type_name == 0
a70083a3 1528 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1529 complain (_("type clash (`%s' `%s') on default action"),
1530 lhs->type_name ? lhs->type_name : "",
a70083a3 1531 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1532 }
1533 /* Warn if there is no default for $$ but we need one. */
1534 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1535 complain (_("empty rule for typed nonterminal, and no action"));
1ff442ca 1536 if (t == SEMICOLON)
a70083a3 1537 t = lex ();
a083fbbf 1538 }
943819bf 1539#if 0
a70083a3 1540 /* these things can appear as alternatives to rules. */
943819bf
RS
1541/* NO, they cannot.
1542 a) none of the documentation allows them
1543 b) most of them scan forward until finding a next %
1544 thus they may swallow lots of intervening rules
1545*/
1ff442ca
NF
1546 else if (t == TOKEN)
1547 {
a70083a3
AD
1548 parse_token_decl (STOKEN, SNTERM);
1549 t = lex ();
1ff442ca
NF
1550 }
1551 else if (t == NTERM)
1552 {
a70083a3
AD
1553 parse_token_decl (SNTERM, STOKEN);
1554 t = lex ();
1ff442ca
NF
1555 }
1556 else if (t == TYPE)
1557 {
a70083a3 1558 t = get_type ();
1ff442ca
NF
1559 }
1560 else if (t == UNION)
1561 {
a70083a3
AD
1562 parse_union_decl ();
1563 t = lex ();
1ff442ca
NF
1564 }
1565 else if (t == EXPECT)
1566 {
a70083a3
AD
1567 parse_expect_decl ();
1568 t = lex ();
1ff442ca
NF
1569 }
1570 else if (t == START)
1571 {
a70083a3
AD
1572 parse_start_decl ();
1573 t = lex ();
1ff442ca 1574 }
943819bf
RS
1575#endif
1576
1ff442ca 1577 else
943819bf 1578 {
a0f6b076 1579 complain (_("invalid input: %s"), token_buffer);
a70083a3 1580 t = lex ();
943819bf 1581 }
1ff442ca
NF
1582 }
1583
943819bf
RS
1584 /* grammar has been read. Do some checking */
1585
1ff442ca 1586 if (nsyms > MAXSHORT)
a0f6b076
AD
1587 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1588 MAXSHORT);
1ff442ca 1589 if (nrules == 0)
a0f6b076 1590 fatal (_("no rules in the input grammar"));
1ff442ca 1591
a70083a3 1592 if (typed == 0 /* JF put out same default YYSTYPE as YACC does */
1ff442ca
NF
1593 && !value_components_used)
1594 {
1595 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
a70083a3
AD
1596 but it seems better to be consistent.
1597 Most programs should declare their own type anyway. */
1598 fprintf (fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1ff442ca 1599 if (fdefines)
a70083a3 1600 fprintf (fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1ff442ca
NF
1601 }
1602
1603 /* Report any undefined symbols and consider them nonterminals. */
1604
1605 for (bp = firstsymbol; bp; bp = bp->next)
1606 if (bp->class == SUNKNOWN)
1607 {
a70083a3
AD
1608 complain (_
1609 ("symbol %s is used, but is not defined as a token and has no rules"),
1610bp->tag);
1ff442ca
NF
1611 bp->class = SNTERM;
1612 bp->value = nvars++;
1613 }
1614
1615 ntokens = nsyms - nvars;
1616}
a70083a3
AD
1617\f
1618/*--------------------------------------------------------------.
1619| For named tokens, but not literal ones, define the name. The |
1620| value is the user token number. |
1621`--------------------------------------------------------------*/
1ff442ca 1622
4a120d45 1623static void
a70083a3 1624output_token_defines (FILE *file)
1ff442ca 1625{
a70083a3
AD
1626 bucket *bp;
1627 char *cp, *symbol;
1628 char c;
1ff442ca 1629
a70083a3 1630 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1631 {
a70083a3
AD
1632 symbol = bp->tag; /* get symbol */
1633
1634 if (bp->value >= ntokens)
1635 continue;
1636 if (bp->user_token_number == SALIAS)
1637 continue;
1638 if ('\'' == *symbol)
1639 continue; /* skip literal character */
1640 if (bp == errtoken)
1641 continue; /* skip error token */
1642 if ('\"' == *symbol)
1ff442ca 1643 {
a70083a3
AD
1644 /* use literal string only if given a symbol with an alias */
1645 if (bp->alias)
1646 symbol = bp->alias->tag;
1647 else
1648 continue;
1649 }
1ff442ca 1650
a70083a3
AD
1651 /* Don't #define nonliteral tokens whose names contain periods. */
1652 cp = symbol;
1653 while ((c = *cp++) && c != '.');
1654 if (c != '\0')
1655 continue;
1ff442ca 1656
a70083a3
AD
1657 fprintf (file, "#define\t%s\t%d\n", symbol,
1658 ((translations && !rawtoknumflag)
1659 ? bp->user_token_number : bp->value));
1660 if (semantic_parser)
1661 fprintf (file, "#define\tT%s\t%d\n", symbol, bp->value);
1ff442ca 1662 }
a70083a3
AD
1663
1664 putc ('\n', file);
1ff442ca 1665}
1ff442ca
NF
1666
1667
a70083a3
AD
1668/*------------------------------------------------------------------.
1669| Assign symbol numbers, and write definition of token names into |
1670| fdefines. Set up vectors tags and sprec of names and precedences |
1671| of symbols. |
1672`------------------------------------------------------------------*/
1ff442ca 1673
4a120d45 1674static void
118fb205 1675packsymbols (void)
1ff442ca 1676{
a70083a3
AD
1677 bucket *bp;
1678 int tokno = 1;
1679 int i;
1680 int last_user_token_number;
4a120d45 1681 static char DOLLAR[] = "$";
1ff442ca
NF
1682
1683 /* int lossage = 0; JF set but not used */
1684
a70083a3 1685 tags = NEW2 (nsyms + 1, char *);
4a120d45 1686 tags[0] = DOLLAR;
a70083a3 1687 user_toknums = NEW2 (nsyms + 1, short);
943819bf 1688 user_toknums[0] = 0;
1ff442ca 1689
a70083a3
AD
1690 sprec = NEW2 (nsyms, short);
1691 sassoc = NEW2 (nsyms, short);
1ff442ca
NF
1692
1693 max_user_token_number = 256;
1694 last_user_token_number = 256;
1695
1696 for (bp = firstsymbol; bp; bp = bp->next)
1697 {
1698 if (bp->class == SNTERM)
1699 {
1700 bp->value += ntokens;
1701 }
943819bf
RS
1702 else if (bp->alias)
1703 {
0a6384c4
AD
1704 /* this symbol and its alias are a single token defn.
1705 allocate a tokno, and assign to both check agreement of
1706 ->prec and ->assoc fields and make both the same */
1707 if (bp->value == 0)
1708 bp->value = bp->alias->value = tokno++;
943819bf 1709
0a6384c4
AD
1710 if (bp->prec != bp->alias->prec)
1711 {
1712 if (bp->prec != 0 && bp->alias->prec != 0
1713 && bp->user_token_number == SALIAS)
a0f6b076
AD
1714 complain (_("conflicting precedences for %s and %s"),
1715 bp->tag, bp->alias->tag);
0a6384c4
AD
1716 if (bp->prec != 0)
1717 bp->alias->prec = bp->prec;
1718 else
1719 bp->prec = bp->alias->prec;
1720 }
943819bf 1721
0a6384c4
AD
1722 if (bp->assoc != bp->alias->assoc)
1723 {
a0f6b076
AD
1724 if (bp->assoc != 0 && bp->alias->assoc != 0
1725 && bp->user_token_number == SALIAS)
1726 complain (_("conflicting assoc values for %s and %s"),
1727 bp->tag, bp->alias->tag);
1728 if (bp->assoc != 0)
1729 bp->alias->assoc = bp->assoc;
1730 else
1731 bp->assoc = bp->alias->assoc;
1732 }
0a6384c4
AD
1733
1734 if (bp->user_token_number == SALIAS)
a70083a3 1735 continue; /* do not do processing below for SALIASs */
943819bf 1736
a70083a3
AD
1737 }
1738 else /* bp->class == STOKEN */
943819bf
RS
1739 {
1740 bp->value = tokno++;
1741 }
1742
1743 if (bp->class == STOKEN)
1ff442ca
NF
1744 {
1745 if (translations && !(bp->user_token_number))
1746 bp->user_token_number = ++last_user_token_number;
1747 if (bp->user_token_number > max_user_token_number)
1748 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1749 }
1750
1751 tags[bp->value] = bp->tag;
943819bf 1752 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1753 sprec[bp->value] = bp->prec;
1754 sassoc[bp->value] = bp->assoc;
1755
1756 }
1757
1758 if (translations)
1759 {
a70083a3 1760 int j;
1ff442ca 1761
a70083a3 1762 token_translations = NEW2 (max_user_token_number + 1, short);
1ff442ca 1763
0a6384c4 1764 /* initialize all entries for literal tokens to 2, the internal
a70083a3
AD
1765 token number for $undefined., which represents all invalid
1766 inputs. */
4a120d45 1767 for (j = 0; j <= max_user_token_number; j++)
a70083a3 1768 token_translations[j] = 2;
1ff442ca 1769
943819bf 1770 for (bp = firstsymbol; bp; bp = bp->next)
a70083a3
AD
1771 {
1772 if (bp->value >= ntokens)
1773 continue; /* non-terminal */
1774 if (bp->user_token_number == SALIAS)
0a6384c4 1775 continue;
a70083a3 1776 if (token_translations[bp->user_token_number] != 2)
a0f6b076
AD
1777 complain (_("tokens %s and %s both assigned number %d"),
1778 tags[token_translations[bp->user_token_number]],
a70083a3
AD
1779 bp->tag, bp->user_token_number);
1780 token_translations[bp->user_token_number] = bp->value;
1781 }
1ff442ca
NF
1782 }
1783
1784 error_token_number = errtoken->value;
1785
a70083a3
AD
1786 if (!noparserflag)
1787 output_token_defines (ftable);
1ff442ca
NF
1788
1789 if (startval->class == SUNKNOWN)
a0f6b076 1790 fatal (_("the start symbol %s is undefined"), startval->tag);
1ff442ca 1791 else if (startval->class == STOKEN)
a0f6b076 1792 fatal (_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1793
1794 start_symbol = startval->value;
1795
1796 if (definesflag)
1797 {
a70083a3 1798 output_token_defines (fdefines);
1ff442ca
NF
1799
1800 if (!pure_parser)
1801 {
1802 if (spec_name_prefix)
a70083a3
AD
1803 fprintf (fdefines, "\nextern YYSTYPE %slval;\n",
1804 spec_name_prefix);
1ff442ca 1805 else
a70083a3 1806 fprintf (fdefines, "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1807 }
1808
1809 if (semantic_parser)
1810 for (i = ntokens; i < nsyms; i++)
1811 {
1812 /* don't make these for dummy nonterminals made by gensym. */
1813 if (*tags[i] != '@')
a70083a3 1814 fprintf (fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1ff442ca
NF
1815 }
1816#if 0
1817 /* `fdefines' is now a temporary file, so we need to copy its
1818 contents in `done', so we can't close it here. */
a70083a3 1819 fclose (fdefines);
1ff442ca
NF
1820 fdefines = NULL;
1821#endif
1822 }
1823}
a083fbbf 1824
1ff442ca 1825
a70083a3
AD
1826/*---------------------------------------------------------------.
1827| Convert the rules into the representation using RRHS, RLHS and |
1828| RITEMS. |
1829`---------------------------------------------------------------*/
1ff442ca 1830
4a120d45 1831static void
118fb205 1832packgram (void)
1ff442ca 1833{
a70083a3
AD
1834 int itemno;
1835 int ruleno;
1836 symbol_list *p;
1ff442ca
NF
1837
1838 bucket *ruleprec;
1839
a70083a3
AD
1840 ritem = NEW2 (nitems + 1, short);
1841 rlhs = NEW2 (nrules, short) - 1;
1842 rrhs = NEW2 (nrules, short) - 1;
1843 rprec = NEW2 (nrules, short) - 1;
1844 rprecsym = NEW2 (nrules, short) - 1;
1845 rassoc = NEW2 (nrules, short) - 1;
1ff442ca
NF
1846
1847 itemno = 0;
1848 ruleno = 1;
1849
1850 p = grammar;
1851 while (p)
1852 {
1853 rlhs[ruleno] = p->sym->value;
1854 rrhs[ruleno] = itemno;
1855 ruleprec = p->ruleprec;
1856
1857 p = p->next;
1858 while (p && p->sym)
1859 {
1860 ritem[itemno++] = p->sym->value;
1861 /* A rule gets by default the precedence and associativity
1862 of the last token in it. */
a70083a3 1863 if (p->sym->class == STOKEN)
1ff442ca
NF
1864 {
1865 rprec[ruleno] = p->sym->prec;
1866 rassoc[ruleno] = p->sym->assoc;
1867 }
a70083a3
AD
1868 if (p)
1869 p = p->next;
1ff442ca
NF
1870 }
1871
1872 /* If this rule has a %prec,
a70083a3 1873 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1874 if (ruleprec)
1875 {
a70083a3
AD
1876 rprec[ruleno] = ruleprec->prec;
1877 rassoc[ruleno] = ruleprec->assoc;
1ff442ca
NF
1878 rprecsym[ruleno] = ruleprec->value;
1879 }
1880
1881 ritem[itemno++] = -ruleno;
1882 ruleno++;
1883
a70083a3
AD
1884 if (p)
1885 p = p->next;
1ff442ca
NF
1886 }
1887
1888 ritem[itemno] = 0;
1889}
a70083a3
AD
1890\f
1891/*-------------------------------------------------------------------.
1892| Read in the grammar specification and record it in the format |
1893| described in gram.h. All guards are copied into the FGUARD file |
1894| and all actions into FACTION, in each case forming the body of a C |
1895| function (YYGUARD or YYACTION) which contains a switch statement |
1896| to decide which guard or action to execute. |
1897`-------------------------------------------------------------------*/
1898
1899void
1900reader (void)
1901{
1902 start_flag = 0;
1903 startval = NULL; /* start symbol not specified yet. */
1904
1905#if 0
1906 /* initially assume token number translation not needed. */
1907 translations = 0;
1908#endif
1909 /* Nowadays translations is always set to 1, since we give `error' a
1910 user-token-number to satisfy the Posix demand for YYERRCODE==256.
1911 */
1912 translations = 1;
1913
1914 nsyms = 1;
1915 nvars = 0;
1916 nrules = 0;
1917 nitems = 0;
1918 rline_allocated = 10;
1919 rline = NEW2 (rline_allocated, short);
1920
1921 typed = 0;
1922 lastprec = 0;
1923
1924 gensym_count = 0;
1925
1926 semantic_parser = 0;
1927 pure_parser = 0;
1928 yylsp_needed = 0;
1929
1930 grammar = NULL;
1931
1932 init_lex ();
1933 lineno = 1;
1934
1935 /* Initialize the symbol table. */
1936 tabinit ();
1937 /* Construct the error token */
1938 errtoken = getsym ("error");
1939 errtoken->class = STOKEN;
1940 errtoken->user_token_number = 256; /* Value specified by POSIX. */
1941 /* Construct a token that represents all undefined literal tokens.
1942 It is always token number 2. */
1943 undeftoken = getsym ("$undefined.");
1944 undeftoken->class = STOKEN;
1945 undeftoken->user_token_number = 2;
1946
1947 /* Read the declaration section. Copy %{ ... %} groups to FTABLE
1948 and FDEFINES file. Also notice any %token, %left, etc. found
1949 there. */
1950 putc ('\n', ftable);
1951 fprintf (ftable, "\
1952/* %s, made from %s\n\
1953 by GNU bison %s. */\n\
1954\n", noparserflag ? "Bison-generated parse tables" : "A Bison parser", infile, VERSION);
1955
1956 fputs ("#define YYBISON 1 /* Identify Bison output. */\n\n", ftable);
1957 read_declarations ();
1958 /* Start writing the guard and action files, if they are needed. */
1959 output_headers ();
1960 /* Read in the grammar, build grammar in list form. Write out
1961 guards and actions. */
1962 readgram ();
1963 /* Now we know whether we need the line-number stack. If we do,
1964 write its type into the .tab.h file. */
1965 if (fdefines)
1966 reader_output_yylsp (fdefines);
1967 /* Write closing delimiters for actions and guards. */
1968 output_trailers ();
1969 if (yylsp_needed)
1970 fputs ("#define YYLSP_NEEDED\n\n", ftable);
1971 /* Assign the symbols their symbol numbers. Write #defines for the
1972 token symbols into FDEFINES if requested. */
1973 packsymbols ();
1974 /* Convert the grammar into the format described in gram.h. */
1975 packgram ();
1976 /* Free the symbol table data structure since symbols are now all
1977 referred to by symbol number. */
1978 free_symtab ();
1979}
1980
1981void
1982reader_output_yylsp (FILE *f)
1983{
1984 if (yylsp_needed)
1985 fprintf (f, LTYPESTR);
1986}