]> git.saurik.com Git - bison.git/blame - src/reader.c
Various anti-`extern in *.c' changes.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
a70083a3
AD
2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000
3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
ceed8467 24#include "getargs.h"
1ff442ca 25#include "files.h"
7612000c 26#include "alloc.h"
1ff442ca
NF
27#include "symtab.h"
28#include "lex.h"
29#include "gram.h"
a0f6b076 30#include "complain.h"
6c89f1c1 31#include "output.h"
b2ca4022 32#include "reader.h"
340ef489 33#include "conflicts.h"
b2ca4022 34
b2ca4022 35extern char *printable_version PARAMS ((int));
1ff442ca 36
6666f98f
AD
37#define LTYPESTR "\
38\n\
39#ifndef YYLTYPE\n\
40typedef\n\
41 struct yyltype\n\
42\
43 {\n\
44 int timestamp;\n\
45 int first_line;\n\
46 int first_column;\
47\n\
48 int last_line;\n\
49 int last_column;\n\
50 char *text;\n\
51 }\n\
52\
53 yyltype;\n\
54\n\
55#define YYLTYPE yyltype\n\
56#endif\n\
57\n"
1ff442ca
NF
58
59/* Number of slots allocated (but not necessarily used yet) in `rline' */
4a120d45 60static int rline_allocated;
1ff442ca 61
a70083a3
AD
62typedef struct symbol_list
63{
64 struct symbol_list *next;
65 bucket *sym;
66 bucket *ruleprec;
67}
68symbol_list;
118fb205 69
1ff442ca 70int lineno;
1ff442ca 71char **tags;
d019d655 72short *user_toknums;
4a120d45
JT
73static symbol_list *grammar;
74static int start_flag;
75static bucket *startval;
1ff442ca
NF
76
77/* Nonzero if components of semantic values are used, implying
78 they must be unions. */
79static int value_components_used;
80
a70083a3 81static int typed; /* nonzero if %union has been seen. */
1ff442ca 82
a70083a3 83static int lastprec; /* incremented for each %left, %right or %nonassoc seen */
1ff442ca 84
a70083a3 85static int gensym_count; /* incremented for each generated symbol */
1ff442ca
NF
86
87static bucket *errtoken;
5b2e3c89 88static bucket *undeftoken;
1ff442ca
NF
89
90/* Nonzero if any action or guard uses the @n construct. */
91static int yylsp_needed;
0d533154 92\f
a70083a3 93
0d533154
AD
94/*===================\
95| Low level lexing. |
96\===================*/
943819bf
RS
97
98static void
118fb205 99skip_to_char (int target)
943819bf
RS
100{
101 int c;
102 if (target == '\n')
a0f6b076 103 complain (_(" Skipping to next \\n"));
943819bf 104 else
a0f6b076 105 complain (_(" Skipping to next %c"), target);
943819bf
RS
106
107 do
0d533154 108 c = skip_white_space ();
943819bf 109 while (c != target && c != EOF);
a083fbbf 110 if (c != EOF)
0d533154 111 ungetc (c, finput);
943819bf
RS
112}
113
114
0d533154
AD
115/*---------------------------------------------------------.
116| Read a signed integer from STREAM and return its value. |
117`---------------------------------------------------------*/
118
119static inline int
120read_signed_integer (FILE *stream)
121{
a70083a3
AD
122 int c = getc (stream);
123 int sign = 1;
124 int n = 0;
0d533154
AD
125
126 if (c == '-')
127 {
128 c = getc (stream);
129 sign = -1;
130 }
131
132 while (isdigit (c))
133 {
134 n = 10 * n + (c - '0');
135 c = getc (stream);
136 }
137
138 ungetc (c, stream);
139
140 return sign * n;
141}
142\f
143/*-------------------------------------------------------------------.
144| Dump the string from FINPUT to FOUTPUT. MATCH is the delimiter of |
145| the string (either ' or "). |
146`-------------------------------------------------------------------*/
ae3c3164
AD
147
148static inline void
4a120d45 149copy_string (FILE *fin, FILE *fout, int match)
ae3c3164
AD
150{
151 int c;
152
4a120d45
JT
153 putc (match, fout);
154 c = getc (fin);
ae3c3164
AD
155
156 while (c != match)
157 {
158 if (c == EOF)
159 fatal (_("unterminated string at end of file"));
160 if (c == '\n')
161 {
a0f6b076 162 complain (_("unterminated string"));
4a120d45 163 ungetc (c, fin);
ae3c3164
AD
164 c = match; /* invent terminator */
165 continue;
166 }
167
a70083a3 168 putc (c, fout);
ae3c3164
AD
169
170 if (c == '\\')
171 {
4a120d45 172 c = getc (fin);
ae3c3164
AD
173 if (c == EOF)
174 fatal (_("unterminated string at end of file"));
4a120d45 175 putc (c, fout);
ae3c3164
AD
176 if (c == '\n')
177 lineno++;
178 }
179
a70083a3 180 c = getc (fin);
ae3c3164
AD
181 }
182
a70083a3 183 putc (c, fout);
ae3c3164
AD
184}
185
186
6c89f1c1
AD
187/*---------------------------------------------------------------.
188| Dump the comment from IN to OUT1 and OUT2. C is either `*' or |
189| `/', depending upon the type of comments used. OUT2 might be |
190| NULL. |
191`---------------------------------------------------------------*/
ae3c3164
AD
192
193static inline void
a70083a3 194copy_comment2 (FILE *in, FILE *out1, FILE *out2, int c)
ae3c3164
AD
195{
196 int cplus_comment;
a70083a3 197 int ended;
ae3c3164
AD
198
199 cplus_comment = (c == '/');
27821bff
AD
200 putc (c, out1);
201 if (out2)
202 putc (c, out2);
203 c = getc (in);
ae3c3164
AD
204
205 ended = 0;
206 while (!ended)
207 {
208 if (!cplus_comment && c == '*')
209 {
210 while (c == '*')
211 {
27821bff
AD
212 putc (c, out1);
213 if (out2)
214 putc (c, out2);
215 c = getc (in);
ae3c3164
AD
216 }
217
218 if (c == '/')
219 {
a70083a3 220 putc (c, out1);
27821bff 221 if (out2)
a70083a3 222 putc (c, out2);
ae3c3164
AD
223 ended = 1;
224 }
225 }
226 else if (c == '\n')
227 {
228 lineno++;
27821bff
AD
229 putc (c, out1);
230 if (out2)
231 putc (c, out2);
ae3c3164
AD
232 if (cplus_comment)
233 ended = 1;
234 else
27821bff 235 c = getc (in);
ae3c3164
AD
236 }
237 else if (c == EOF)
238 fatal (_("unterminated comment"));
239 else
240 {
27821bff
AD
241 putc (c, out1);
242 if (out2)
243 putc (c, out2);
244 c = getc (in);
ae3c3164
AD
245 }
246 }
247}
248
249
d019d655
AD
250/*------------------------------------------------------------.
251| Dump the comment from FIN to FOUT. C is either `*' or `/', |
252| depending upon the type of comments used. |
253`------------------------------------------------------------*/
27821bff
AD
254
255static inline void
4a120d45 256copy_comment (FILE *fin, FILE *fout, int c)
27821bff 257{
4a120d45 258 copy_comment2 (fin, fout, NULL, c);
27821bff
AD
259}
260
261
a70083a3
AD
262/*-----------------------------------------------------------------.
263| FIN is pointing to a location (i.e., a `@'). Output to FOUT a |
264| reference to this location. STACK_OFFSET is the number of values |
265| in the current rule so far, which says where to find `$0' with |
266| respect to the top of the stack. |
267`-----------------------------------------------------------------*/
1ff442ca 268
a70083a3
AD
269static inline void
270copy_at (FILE *fin, FILE *fout, int stack_offset)
1ff442ca 271{
a70083a3 272 int c;
1ff442ca 273
a70083a3
AD
274 c = getc (fin);
275 if (c == '$')
1ff442ca 276 {
a70083a3
AD
277 fprintf (fout, "yyloc");
278 yylsp_needed = 1;
279 }
280 else if (isdigit (c) || c == '-')
281 {
282 int n;
1ff442ca 283
a70083a3
AD
284 ungetc (c, fin);
285 n = read_signed_integer (fin);
943819bf 286
a70083a3
AD
287 fprintf (fout, "yylsp[%d]", n - stack_offset);
288 yylsp_needed = 1;
1ff442ca 289 }
a70083a3
AD
290 else
291 complain (_("@%s is invalid"), printable_version (c));
1ff442ca 292}
a70083a3
AD
293\f
294/*-------------------------------------------------------------------.
295| Copy the contents of a `%{ ... %}' into the definitions file. The |
296| `%{' has already been read. Return after reading the `%}'. |
297`-------------------------------------------------------------------*/
1ff442ca 298
4a120d45 299static void
118fb205 300copy_definition (void)
1ff442ca 301{
a70083a3 302 int c;
ae3c3164 303 /* -1 while reading a character if prev char was %. */
a70083a3 304 int after_percent;
1ff442ca
NF
305
306 if (!nolinesflag)
a70083a3 307 fprintf (fattrs, "#line %d \"%s\"\n", lineno, infile);
1ff442ca
NF
308
309 after_percent = 0;
310
ae3c3164 311 c = getc (finput);
1ff442ca
NF
312
313 for (;;)
314 {
315 switch (c)
316 {
317 case '\n':
a70083a3 318 putc (c, fattrs);
1ff442ca
NF
319 lineno++;
320 break;
321
322 case '%':
a70083a3 323 after_percent = -1;
1ff442ca 324 break;
a083fbbf 325
1ff442ca
NF
326 case '\'':
327 case '"':
ae3c3164 328 copy_string (finput, fattrs, c);
1ff442ca
NF
329 break;
330
331 case '/':
ae3c3164
AD
332 putc (c, fattrs);
333 c = getc (finput);
1ff442ca
NF
334 if (c != '*' && c != '/')
335 continue;
ae3c3164 336 copy_comment (finput, fattrs, c);
1ff442ca
NF
337 break;
338
339 case EOF:
a70083a3 340 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
341
342 default:
a70083a3 343 putc (c, fattrs);
1ff442ca
NF
344 }
345
a70083a3 346 c = getc (finput);
1ff442ca
NF
347
348 if (after_percent)
349 {
350 if (c == '}')
351 return;
a70083a3 352 putc ('%', fattrs);
1ff442ca
NF
353 }
354 after_percent = 0;
355
356 }
357
358}
359
360
a70083a3
AD
361/*-----------------------------------------------------------------.
362| Parse what comes after %token or %nterm. For %token, what_is is |
363| STOKEN and what_is_not is SNTERM. For %nterm, the arguments are |
364| reversed. |
365`-----------------------------------------------------------------*/
1ff442ca 366
4a120d45 367static void
118fb205 368parse_token_decl (int what_is, int what_is_not)
1ff442ca 369{
a70083a3
AD
370 int token = 0;
371 char *typename = 0;
372 struct bucket *symbol = NULL; /* pts to symbol being defined */
1ff442ca
NF
373 int k;
374
1ff442ca
NF
375 for (;;)
376 {
e6011337
JT
377 int tmp_char = ungetc (skip_white_space (), finput);
378
379 if (tmp_char == '%')
1ff442ca 380 return;
e6011337 381 if (tmp_char == EOF)
a0f6b076 382 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 383
a70083a3 384 token = lex ();
1ff442ca 385 if (token == COMMA)
943819bf
RS
386 {
387 symbol = NULL;
388 continue;
389 }
1ff442ca
NF
390 if (token == TYPENAME)
391 {
a70083a3
AD
392 k = strlen (token_buffer);
393 typename = NEW2 (k + 1, char);
394 strcpy (typename, token_buffer);
1ff442ca 395 value_components_used = 1;
943819bf
RS
396 symbol = NULL;
397 }
a70083a3 398 else if (token == IDENTIFIER && *symval->tag == '\"' && symbol)
943819bf
RS
399 {
400 translations = 1;
401 symval->class = STOKEN;
402 symval->type_name = typename;
403 symval->user_token_number = symbol->user_token_number;
404 symbol->user_token_number = SALIAS;
405
a083fbbf
RS
406 symval->alias = symbol;
407 symbol->alias = symval;
943819bf
RS
408 symbol = NULL;
409
a70083a3 410 nsyms--; /* symbol and symval combined are only one symbol */
1ff442ca
NF
411 }
412 else if (token == IDENTIFIER)
413 {
414 int oldclass = symval->class;
943819bf 415 symbol = symval;
1ff442ca 416
943819bf 417 if (symbol->class == what_is_not)
a0f6b076 418 complain (_("symbol %s redefined"), symbol->tag);
943819bf 419 symbol->class = what_is;
1ff442ca 420 if (what_is == SNTERM && oldclass != SNTERM)
943819bf 421 symbol->value = nvars++;
1ff442ca
NF
422
423 if (typename)
424 {
943819bf
RS
425 if (symbol->type_name == NULL)
426 symbol->type_name = typename;
a70083a3 427 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 428 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
429 }
430 }
943819bf 431 else if (symbol && token == NUMBER)
a70083a3 432 {
943819bf 433 symbol->user_token_number = numval;
1ff442ca 434 translations = 1;
a70083a3 435 }
1ff442ca 436 else
943819bf 437 {
a0f6b076 438 complain (_("`%s' is invalid in %s"),
a70083a3
AD
439 token_buffer, (what_is == STOKEN) ? "%token" : "%nterm");
440 skip_to_char ('%');
943819bf 441 }
1ff442ca
NF
442 }
443
444}
445
1ff442ca 446
a0f6b076 447/* Parse what comes after %start */
1ff442ca 448
4a120d45 449static void
118fb205 450parse_start_decl (void)
1ff442ca
NF
451{
452 if (start_flag)
27821bff
AD
453 complain (_("multiple %s declarations"), "%start");
454 if (lex () != IDENTIFIER)
455 complain (_("invalid %s declaration"), "%start");
943819bf
RS
456 else
457 {
458 start_flag = 1;
459 startval = symval;
460 }
1ff442ca
NF
461}
462
463
464
a70083a3
AD
465/*--------------------------------------------------------------.
466| Get the data type (alternative in the union) of the value for |
467| symbol n in rule rule. |
468`--------------------------------------------------------------*/
1ff442ca 469
a70083a3
AD
470static char *
471get_type_name (int n, symbol_list * rule)
1ff442ca 472{
a70083a3
AD
473 int i;
474 symbol_list *rp;
1ff442ca 475
a70083a3 476 if (n < 0)
943819bf 477 {
a70083a3
AD
478 complain (_("invalid $ value"));
479 return NULL;
943819bf 480 }
1ff442ca 481
a70083a3
AD
482 rp = rule;
483 i = 0;
1ff442ca 484
a70083a3 485 while (i < n)
1ff442ca 486 {
a70083a3
AD
487 rp = rp->next;
488 if (rp == NULL || rp->sym == NULL)
489 {
490 complain (_("invalid $ value"));
491 return NULL;
492 }
493 i++;
494 }
495
496 return rp->sym->type_name;
497}
498
499
500/*-----------------------------------------------------------.
501| read in a %type declaration and record its information for |
502| get_type_name to access |
503`-----------------------------------------------------------*/
504
505static void
506parse_type_decl (void)
507{
508 int k;
509 char *name;
510
511 if (lex () != TYPENAME)
512 {
513 complain ("%s", _("%type declaration has no <typename>"));
514 skip_to_char ('%');
515 return;
516 }
517
518 k = strlen (token_buffer);
519 name = NEW2 (k + 1, char);
520 strcpy (name, token_buffer);
521
522 for (;;)
523 {
524 int t;
525 int tmp_char = ungetc (skip_white_space (), finput);
526
527 if (tmp_char == '%')
528 return;
529 if (tmp_char == EOF)
530 fatal (_("Premature EOF after %s"), token_buffer);
531
532 t = lex ();
533
534 switch (t)
1ff442ca
NF
535 {
536
537 case COMMA:
538 case SEMICOLON:
539 break;
540
541 case IDENTIFIER:
542 if (symval->type_name == NULL)
543 symval->type_name = name;
a70083a3 544 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 545 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
546
547 break;
548
549 default:
a0f6b076
AD
550 complain (_("invalid %%type declaration due to item: %s"),
551 token_buffer);
a70083a3 552 skip_to_char ('%');
1ff442ca
NF
553 }
554 }
555}
556
557
558
559/* read in a %left, %right or %nonassoc declaration and record its information. */
560/* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC. */
561
4a120d45 562static void
118fb205 563parse_assoc_decl (int assoc)
1ff442ca 564{
a70083a3
AD
565 int k;
566 char *name = NULL;
567 int prev = 0;
1ff442ca 568
a70083a3 569 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 570
1ff442ca
NF
571 for (;;)
572 {
a70083a3 573 int t;
e6011337 574 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 575
e6011337 576 if (tmp_char == '%')
1ff442ca 577 return;
e6011337 578 if (tmp_char == EOF)
a0f6b076 579 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 580
a70083a3 581 t = lex ();
1ff442ca
NF
582
583 switch (t)
584 {
585
586 case TYPENAME:
a70083a3
AD
587 k = strlen (token_buffer);
588 name = NEW2 (k + 1, char);
589 strcpy (name, token_buffer);
1ff442ca
NF
590 break;
591
592 case COMMA:
593 break;
594
595 case IDENTIFIER:
596 if (symval->prec != 0)
a0f6b076 597 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
598 symval->prec = lastprec;
599 symval->assoc = assoc;
600 if (symval->class == SNTERM)
a0f6b076 601 complain (_("symbol %s redefined"), symval->tag);
1ff442ca
NF
602 symval->class = STOKEN;
603 if (name)
a70083a3 604 { /* record the type, if one is specified */
1ff442ca
NF
605 if (symval->type_name == NULL)
606 symval->type_name = name;
a70083a3 607 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 608 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
609 }
610 break;
611
612 case NUMBER:
613 if (prev == IDENTIFIER)
a70083a3 614 {
1ff442ca
NF
615 symval->user_token_number = numval;
616 translations = 1;
a70083a3
AD
617 }
618 else
619 {
620 complain (_
621 ("invalid text (%s) - number should be after identifier"),
622token_buffer);
623 skip_to_char ('%');
624 }
1ff442ca
NF
625 break;
626
627 case SEMICOLON:
628 return;
629
630 default:
a0f6b076 631 complain (_("unexpected item: %s"), token_buffer);
a70083a3 632 skip_to_char ('%');
1ff442ca
NF
633 }
634
635 prev = t;
636
637 }
638}
639
640
641
642/* copy the union declaration into fattrs (and fdefines),
643 where it is made into the
644 definition of YYSTYPE, the type of elements of the parser value stack. */
645
4a120d45 646static void
118fb205 647parse_union_decl (void)
1ff442ca 648{
a70083a3
AD
649 int c;
650 int count = 0;
1ff442ca
NF
651
652 if (typed)
27821bff 653 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
654
655 typed = 1;
656
657 if (!nolinesflag)
27821bff 658 fprintf (fattrs, "\n#line %d \"%s\"\n", lineno, infile);
1ff442ca 659 else
27821bff 660 fprintf (fattrs, "\n");
1ff442ca 661
27821bff 662 fprintf (fattrs, "typedef union");
1ff442ca 663 if (fdefines)
27821bff 664 fprintf (fdefines, "typedef union");
1ff442ca 665
27821bff 666 c = getc (finput);
1ff442ca
NF
667
668 while (c != EOF)
669 {
27821bff 670 putc (c, fattrs);
1ff442ca 671 if (fdefines)
27821bff 672 putc (c, fdefines);
1ff442ca
NF
673
674 switch (c)
675 {
676 case '\n':
677 lineno++;
678 break;
679
680 case '/':
27821bff 681 c = getc (finput);
1ff442ca 682 if (c != '*' && c != '/')
27821bff
AD
683 continue;
684 copy_comment2 (finput, fattrs, fdefines, c);
1ff442ca
NF
685 break;
686
687
688 case '{':
689 count++;
690 break;
691
692 case '}':
693 if (count == 0)
27821bff 694 complain (_("unmatched %s"), "`}'");
1ff442ca 695 count--;
943819bf 696 if (count <= 0)
1ff442ca 697 {
27821bff 698 fprintf (fattrs, " YYSTYPE;\n");
1ff442ca 699 if (fdefines)
27821bff 700 fprintf (fdefines, " YYSTYPE;\n");
1ff442ca 701 /* JF don't choke on trailing semi */
27821bff
AD
702 c = skip_white_space ();
703 if (c != ';')
a70083a3 704 ungetc (c, finput);
1ff442ca
NF
705 return;
706 }
707 }
708
27821bff 709 c = getc (finput);
1ff442ca
NF
710 }
711}
712
713/* parse the declaration %expect N which says to expect N
714 shift-reduce conflicts. */
715
4a120d45 716static void
118fb205 717parse_expect_decl (void)
1ff442ca 718{
a70083a3
AD
719 int c;
720 int count;
1ff442ca
NF
721 char buffer[20];
722
a70083a3 723 c = getc (finput);
1ff442ca 724 while (c == ' ' || c == '\t')
a70083a3 725 c = getc (finput);
1ff442ca
NF
726
727 count = 0;
728 while (c >= '0' && c <= '9')
729 {
730 if (count < 20)
731 buffer[count++] = c;
a70083a3 732 c = getc (finput);
1ff442ca
NF
733 }
734 buffer[count] = 0;
735
736 ungetc (c, finput);
737
943819bf 738 if (count <= 0 || count > 10)
a0f6b076 739 complain ("%s", _("argument of %expect is not an integer"));
1ff442ca
NF
740 expected_conflicts = atoi (buffer);
741}
742
a70083a3
AD
743
744/*-------------------------------------------------------------------.
745| Parse what comes after %thong. the full syntax is |
746| |
747| %thong <type> token number literal |
748| |
749| the <type> or number may be omitted. The number specifies the |
750| user_token_number. |
751| |
752| Two symbols are entered in the table, one for the token symbol and |
753| one for the literal. Both are given the <type>, if any, from the |
754| declaration. The ->user_token_number of the first is SALIAS and |
755| the ->user_token_number of the second is set to the number, if |
756| any, from the declaration. The two symbols are linked via |
757| pointers in their ->alias fields. |
758| |
759| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
760| only the literal string is retained it is the literal string that |
761| is output to yytname |
762`-------------------------------------------------------------------*/
763
764static void
765parse_thong_decl (void)
7b306f52 766{
a70083a3
AD
767 int token;
768 struct bucket *symbol;
769 char *typename = 0;
770 int k, usrtoknum;
7b306f52 771
a70083a3
AD
772 translations = 1;
773 token = lex (); /* fetch typename or first token */
774 if (token == TYPENAME)
7b306f52 775 {
a70083a3
AD
776 k = strlen (token_buffer);
777 typename = NEW2 (k + 1, char);
778 strcpy (typename, token_buffer);
779 value_components_used = 1;
780 token = lex (); /* fetch first token */
7b306f52 781 }
7b306f52 782
a70083a3 783 /* process first token */
7b306f52 784
a70083a3
AD
785 if (token != IDENTIFIER)
786 {
787 complain (_("unrecognized item %s, expected an identifier"),
788 token_buffer);
789 skip_to_char ('%');
790 return;
7b306f52 791 }
a70083a3
AD
792 symval->class = STOKEN;
793 symval->type_name = typename;
794 symval->user_token_number = SALIAS;
795 symbol = symval;
7b306f52 796
a70083a3 797 token = lex (); /* get number or literal string */
1ff442ca 798
a70083a3 799 if (token == NUMBER)
943819bf 800 {
a70083a3
AD
801 usrtoknum = numval;
802 token = lex (); /* okay, did number, now get literal */
943819bf 803 }
a70083a3
AD
804 else
805 usrtoknum = 0;
1ff442ca 806
a70083a3 807 /* process literal string token */
1ff442ca 808
a70083a3 809 if (token != IDENTIFIER || *symval->tag != '\"')
1ff442ca 810 {
a70083a3
AD
811 complain (_("expected string constant instead of %s"), token_buffer);
812 skip_to_char ('%');
813 return;
1ff442ca 814 }
a70083a3
AD
815 symval->class = STOKEN;
816 symval->type_name = typename;
817 symval->user_token_number = usrtoknum;
1ff442ca 818
a70083a3
AD
819 symval->alias = symbol;
820 symbol->alias = symval;
1ff442ca 821
a70083a3
AD
822 nsyms--; /* symbol and symval combined are only one symbol */
823}
3cef001a 824
a70083a3
AD
825/*----------------------------------------------------------------.
826| Read from finput until `%%' is seen. Discard the `%%'. Handle |
827| any `%' declarations, and copy the contents of any `%{ ... %}' |
828| groups to fattrs. |
829`----------------------------------------------------------------*/
1ff442ca 830
4a120d45 831static void
a70083a3 832read_declarations (void)
1ff442ca 833{
a70083a3
AD
834 int c;
835 int tok;
1ff442ca 836
a70083a3 837 for (;;)
1ff442ca 838 {
a70083a3 839 c = skip_white_space ();
1ff442ca 840
a70083a3
AD
841 if (c == '%')
842 {
843 tok = parse_percent_token ();
1ff442ca 844
a70083a3 845 switch (tok)
943819bf 846 {
a70083a3
AD
847 case TWO_PERCENTS:
848 return;
1ff442ca 849
a70083a3
AD
850 case PERCENT_LEFT_CURLY:
851 copy_definition ();
852 break;
1ff442ca 853
a70083a3
AD
854 case TOKEN:
855 parse_token_decl (STOKEN, SNTERM);
856 break;
1ff442ca 857
a70083a3
AD
858 case NTERM:
859 parse_token_decl (SNTERM, STOKEN);
860 break;
1ff442ca 861
a70083a3
AD
862 case TYPE:
863 parse_type_decl ();
864 break;
1ff442ca 865
a70083a3
AD
866 case START:
867 parse_start_decl ();
868 break;
118fb205 869
a70083a3
AD
870 case UNION:
871 parse_union_decl ();
872 break;
1ff442ca 873
a70083a3
AD
874 case EXPECT:
875 parse_expect_decl ();
876 break;
877 case THONG:
878 parse_thong_decl ();
879 break;
880 case LEFT:
881 parse_assoc_decl (LEFT_ASSOC);
882 break;
1ff442ca 883
a70083a3
AD
884 case RIGHT:
885 parse_assoc_decl (RIGHT_ASSOC);
886 break;
1ff442ca 887
a70083a3
AD
888 case NONASSOC:
889 parse_assoc_decl (NON_ASSOC);
890 break;
1ff442ca 891
a70083a3
AD
892 case SEMANTIC_PARSER:
893 if (semantic_parser == 0)
894 {
895 semantic_parser = 1;
896 open_extra_files ();
897 }
898 break;
1ff442ca 899
a70083a3
AD
900 case PURE_PARSER:
901 pure_parser = 1;
902 break;
1ff442ca 903
a70083a3
AD
904 case NOOP:
905 break;
1ff442ca 906
a70083a3
AD
907 default:
908 complain (_("unrecognized: %s"), token_buffer);
909 skip_to_char ('%');
910 }
911 }
912 else if (c == EOF)
913 fatal (_("no input grammar"));
914 else
915 {
916 complain (_("unknown character: %s"), printable_version (c));
917 skip_to_char ('%');
1ff442ca 918 }
1ff442ca 919 }
1ff442ca 920}
a70083a3
AD
921\f
922/*-------------------------------------------------------------------.
923| Assuming that a `{' has just been seen, copy everything up to the |
924| matching `}' into the actions file. STACK_OFFSET is the number of |
925| values in the current rule so far, which says where to find `$0' |
926| with respect to the top of the stack. |
927`-------------------------------------------------------------------*/
1ff442ca 928
4a120d45 929static void
a70083a3 930copy_action (symbol_list * rule, int stack_offset)
1ff442ca 931{
a70083a3
AD
932 int c;
933 int n;
934 int count;
935 char *type_name;
1ff442ca
NF
936
937 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
938 if (semantic_parser)
939 stack_offset = 0;
1ff442ca 940
41aca2e0 941 fprintf (faction, "\ncase %d:\n", nrules);
1ff442ca 942 if (!nolinesflag)
41aca2e0
AD
943 fprintf (faction, "#line %d \"%s\"\n", lineno, infile);
944 putc ('{', faction);
1ff442ca
NF
945
946 count = 1;
a70083a3 947 c = getc (finput);
1ff442ca
NF
948
949 while (count > 0)
950 {
951 while (c != '}')
a70083a3
AD
952 {
953 switch (c)
1ff442ca
NF
954 {
955 case '\n':
a70083a3 956 putc (c, faction);
1ff442ca
NF
957 lineno++;
958 break;
959
960 case '{':
a70083a3 961 putc (c, faction);
1ff442ca
NF
962 count++;
963 break;
964
965 case '\'':
966 case '"':
ca36d2ef 967 copy_string (finput, faction, c);
1ff442ca
NF
968 break;
969
970 case '/':
27821bff
AD
971 putc (c, faction);
972 c = getc (finput);
1ff442ca
NF
973 if (c != '*' && c != '/')
974 continue;
3cef001a 975 copy_comment (finput, faction, c);
1ff442ca
NF
976 break;
977
978 case '$':
a70083a3 979 c = getc (finput);
1ff442ca
NF
980 type_name = NULL;
981
982 if (c == '<')
983 {
a70083a3 984 char *cp = token_buffer;
1ff442ca 985
a70083a3 986 while ((c = getc (finput)) != '>' && c > 0)
118fb205
JT
987 {
988 if (cp == token_buffer + maxtoken)
a70083a3 989 cp = grow_token_buffer (cp);
118fb205
JT
990
991 *cp++ = c;
992 }
1ff442ca
NF
993 *cp = 0;
994 type_name = token_buffer;
995 value_components_used = 1;
996
a70083a3 997 c = getc (finput);
1ff442ca
NF
998 }
999 if (c == '$')
1000 {
a70083a3 1001 fprintf (faction, "yyval");
41aca2e0 1002 if (!type_name)
a70083a3 1003 type_name = get_type_name (0, rule);
1ff442ca 1004 if (type_name)
a70083a3
AD
1005 fprintf (faction, ".%s", type_name);
1006 if (!type_name && typed)
a0f6b076
AD
1007 complain (_("$$ of `%s' has no declared type"),
1008 rule->sym->tag);
1ff442ca 1009 }
a70083a3 1010 else if (isdigit (c) || c == '-')
1ff442ca
NF
1011 {
1012 ungetc (c, finput);
a70083a3
AD
1013 n = read_signed_integer (finput);
1014 c = getc (finput);
1ff442ca
NF
1015
1016 if (!type_name && n > 0)
a70083a3 1017 type_name = get_type_name (n, rule);
1ff442ca 1018
a70083a3 1019 fprintf (faction, "yyvsp[%d]", n - stack_offset);
1ff442ca 1020 if (type_name)
a70083a3
AD
1021 fprintf (faction, ".%s", type_name);
1022 if (!type_name && typed)
a0f6b076
AD
1023 complain (_("$%d of `%s' has no declared type"),
1024 n, rule->sym->tag);
1ff442ca
NF
1025 continue;
1026 }
1027 else
a0f6b076 1028 complain (_("$%s is invalid"), printable_version (c));
1ff442ca
NF
1029
1030 break;
1031
1032 case '@':
7b306f52 1033 copy_at (finput, faction, stack_offset);
6666f98f 1034 break;
1ff442ca
NF
1035
1036 case EOF:
27821bff 1037 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1038
1039 default:
a70083a3
AD
1040 putc (c, faction);
1041 }
1042
1043 c = getc (finput);
1044 }
1045
1046 /* above loop exits when c is '}' */
1047
1048 if (--count)
1049 {
1050 putc (c, faction);
1051 c = getc (finput);
1052 }
1053 }
1054
1055 fprintf (faction, ";\n break;}");
1056}
1057\f
1058/*-------------------------------------------------------------------.
1059| After `%guard' is seen in the input file, copy the actual guard |
1060| into the guards file. If the guard is followed by an action, copy |
1061| that into the actions file. STACK_OFFSET is the number of values |
1062| in the current rule so far, which says where to find `$0' with |
1063| respect to the top of the stack, for the simple parser in which |
1064| the stack is not popped until after the guard is run. |
1065`-------------------------------------------------------------------*/
1066
1067static void
1068copy_guard (symbol_list * rule, int stack_offset)
1069{
1070 int c;
1071 int n;
1072 int count;
1073 char *type_name;
1074 int brace_flag = 0;
1075
1076 /* offset is always 0 if parser has already popped the stack pointer */
1077 if (semantic_parser)
1078 stack_offset = 0;
1079
1080 fprintf (fguard, "\ncase %d:\n", nrules);
1081 if (!nolinesflag)
1082 fprintf (fguard, "#line %d \"%s\"\n", lineno, infile);
1083 putc ('{', fguard);
1084
1085 count = 0;
1086 c = getc (finput);
1087
1088 while (brace_flag ? (count > 0) : (c != ';'))
1089 {
1090 switch (c)
1091 {
1092 case '\n':
1093 putc (c, fguard);
1094 lineno++;
1095 break;
1096
1097 case '{':
1098 putc (c, fguard);
1099 brace_flag = 1;
1100 count++;
1101 break;
1102
1103 case '}':
1104 putc (c, fguard);
1105 if (count > 0)
1106 count--;
1107 else
1108 {
1109 complain (_("unmatched %s"), "`}'");
1110 c = getc (finput); /* skip it */
1111 }
1112 break;
1113
1114 case '\'':
1115 case '"':
1116 copy_string (finput, fguard, c);
1117 break;
1118
1119 case '/':
1120 putc (c, fguard);
1121 c = getc (finput);
1122 if (c != '*' && c != '/')
1123 continue;
1124 copy_comment (finput, fguard, c);
1125 break;
1126
1127 case '$':
1128 c = getc (finput);
1129 type_name = NULL;
1130
1131 if (c == '<')
1132 {
1133 char *cp = token_buffer;
1134
1135 while ((c = getc (finput)) != '>' && c > 0)
1136 {
1137 if (cp == token_buffer + maxtoken)
1138 cp = grow_token_buffer (cp);
1139
1140 *cp++ = c;
1141 }
1142 *cp = 0;
1143 type_name = token_buffer;
1144
1145 c = getc (finput);
1146 }
1147
1148 if (c == '$')
1149 {
1150 fprintf (fguard, "yyval");
1151 if (!type_name)
1152 type_name = rule->sym->type_name;
1153 if (type_name)
1154 fprintf (fguard, ".%s", type_name);
1155 if (!type_name && typed)
1156 complain (_("$$ of `%s' has no declared type"),
1157 rule->sym->tag);
1158 }
1159 else if (isdigit (c) || c == '-')
1160 {
1161 ungetc (c, finput);
1162 n = read_signed_integer (finput);
1163 c = getc (finput);
1164
1165 if (!type_name && n > 0)
1166 type_name = get_type_name (n, rule);
1167
1168 fprintf (fguard, "yyvsp[%d]", n - stack_offset);
1169 if (type_name)
1170 fprintf (fguard, ".%s", type_name);
1171 if (!type_name && typed)
1172 complain (_("$%d of `%s' has no declared type"),
1173 n, rule->sym->tag);
1174 continue;
1ff442ca 1175 }
a70083a3
AD
1176 else
1177 complain (_("$%s is invalid"), printable_version (c));
1178 break;
1ff442ca 1179
a70083a3
AD
1180 case '@':
1181 copy_at (finput, fguard, stack_offset);
1182 break;
1ff442ca 1183
a70083a3
AD
1184 case EOF:
1185 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1186
a70083a3
AD
1187 default:
1188 putc (c, fguard);
1ff442ca 1189 }
a70083a3
AD
1190
1191 if (c != '}' || count != 0)
1192 c = getc (finput);
1ff442ca
NF
1193 }
1194
a70083a3
AD
1195 c = skip_white_space ();
1196
1197 fprintf (fguard, ";\n break;}");
1198 if (c == '{')
1199 copy_action (rule, stack_offset);
1200 else if (c == '=')
1201 {
1202 c = getc (finput); /* why not skip_white_space -wjh */
1203 if (c == '{')
1204 copy_action (rule, stack_offset);
1205 }
1206 else
1207 ungetc (c, finput);
1ff442ca 1208}
a70083a3
AD
1209\f
1210
1211static void
1212record_rule_line (void)
1213{
1214 /* Record each rule's source line number in rline table. */
1ff442ca 1215
a70083a3
AD
1216 if (nrules >= rline_allocated)
1217 {
1218 rline_allocated = nrules * 2;
1219 rline = (short *) xrealloc ((char *) rline,
1220 rline_allocated * sizeof (short));
1221 }
1222 rline[nrules] = lineno;
1223}
1ff442ca
NF
1224
1225
a70083a3
AD
1226/*-------------------------------------------------------------------.
1227| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1228| with the user's names. |
1229`-------------------------------------------------------------------*/
1ff442ca 1230
4a120d45 1231static bucket *
118fb205 1232gensym (void)
1ff442ca 1233{
a70083a3 1234 bucket *sym;
1ff442ca
NF
1235
1236 sprintf (token_buffer, "@%d", ++gensym_count);
a70083a3 1237 sym = getsym (token_buffer);
1ff442ca
NF
1238 sym->class = SNTERM;
1239 sym->value = nvars++;
36281465 1240 return sym;
1ff442ca
NF
1241}
1242
a70083a3
AD
1243#if 0
1244/*------------------------------------------------------------------.
1245| read in a %type declaration and record its information for |
1246| get_type_name to access. This is unused. It is only called from |
1247| the #if 0 part of readgram |
1248`------------------------------------------------------------------*/
1249
1250static int
1251get_type (void)
1252{
1253 int k;
1254 int t;
1255 char *name;
1256
1257 t = lex ();
1258
1259 if (t != TYPENAME)
1260 {
1261 complain (_("invalid %s declaration"), "%type");
1262 return t;
1263 }
1264
1265 k = strlen (token_buffer);
1266 name = NEW2 (k + 1, char);
1267 strcpy (name, token_buffer);
1268
1269 for (;;)
1270 {
1271 t = lex ();
1272
1273 switch (t)
1274 {
1275 case SEMICOLON:
1276 return lex ();
1277
1278 case COMMA:
1279 break;
1280
1281 case IDENTIFIER:
1282 if (symval->type_name == NULL)
1283 symval->type_name = name;
1284 else if (strcmp (name, symval->type_name) != 0)
1285 complain (_("type redeclaration for %s"), symval->tag);
1286
1287 break;
1288
1289 default:
1290 return t;
1291 }
1292 }
1293}
1ff442ca 1294
a70083a3
AD
1295#endif
1296\f
1297/*------------------------------------------------------------------.
1298| Parse the input grammar into a one symbol_list structure. Each |
1299| rule is represented by a sequence of symbols: the left hand side |
1300| followed by the contents of the right hand side, followed by a |
1301| null pointer instead of a symbol to terminate the rule. The next |
1302| symbol is the lhs of the following rule. |
1303| |
1304| All guards and actions are copied out to the appropriate files, |
1305| labelled by the rule number they apply to. |
1306`------------------------------------------------------------------*/
1ff442ca 1307
4a120d45 1308static void
118fb205 1309readgram (void)
1ff442ca 1310{
a70083a3
AD
1311 int t;
1312 bucket *lhs = NULL;
1313 symbol_list *p;
1314 symbol_list *p1;
1315 bucket *bp;
1ff442ca 1316
a70083a3
AD
1317 symbol_list *crule; /* points to first symbol_list of current rule. */
1318 /* its symbol is the lhs of the rule. */
1319 symbol_list *crule1; /* points to the symbol_list preceding crule. */
1ff442ca
NF
1320
1321 p1 = NULL;
1322
a70083a3 1323 t = lex ();
1ff442ca
NF
1324
1325 while (t != TWO_PERCENTS && t != ENDFILE)
1326 {
1327 if (t == IDENTIFIER || t == BAR)
1328 {
a70083a3
AD
1329 int actionflag = 0;
1330 int rulelength = 0; /* number of symbols in rhs of this rule so far */
1ff442ca
NF
1331 int xactions = 0; /* JF for error checking */
1332 bucket *first_rhs = 0;
1333
1334 if (t == IDENTIFIER)
1335 {
1336 lhs = symval;
943819bf
RS
1337
1338 if (!start_flag)
1339 {
1340 startval = lhs;
1341 start_flag = 1;
1342 }
a083fbbf 1343
a70083a3 1344 t = lex ();
1ff442ca 1345 if (t != COLON)
943819bf 1346 {
a0f6b076 1347 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1348 unlex (t);
943819bf 1349 }
1ff442ca
NF
1350 }
1351
943819bf 1352 if (nrules == 0 && t == BAR)
1ff442ca 1353 {
a0f6b076 1354 complain (_("grammar starts with vertical bar"));
943819bf 1355 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1356 }
1ff442ca
NF
1357 /* start a new rule and record its lhs. */
1358
1359 nrules++;
1360 nitems++;
1361
1362 record_rule_line ();
1363
a70083a3 1364 p = NEW (symbol_list);
1ff442ca
NF
1365 p->sym = lhs;
1366
1367 crule1 = p1;
1368 if (p1)
1369 p1->next = p;
1370 else
1371 grammar = p;
1372
1373 p1 = p;
1374 crule = p;
1375
1376 /* mark the rule's lhs as a nonterminal if not already so. */
1377
1378 if (lhs->class == SUNKNOWN)
1379 {
1380 lhs->class = SNTERM;
1381 lhs->value = nvars;
1382 nvars++;
1383 }
1384 else if (lhs->class == STOKEN)
a0f6b076 1385 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1386
1387 /* read the rhs of the rule. */
1388
1389 for (;;)
1390 {
a70083a3 1391 t = lex ();
943819bf
RS
1392 if (t == PREC)
1393 {
a70083a3 1394 t = lex ();
943819bf 1395 crule->ruleprec = symval;
a70083a3 1396 t = lex ();
943819bf 1397 }
1ff442ca 1398
a70083a3
AD
1399 if (!(t == IDENTIFIER || t == LEFT_CURLY))
1400 break;
1ff442ca
NF
1401
1402 /* If next token is an identifier, see if a colon follows it.
a70083a3 1403 If one does, exit this rule now. */
1ff442ca
NF
1404 if (t == IDENTIFIER)
1405 {
a70083a3
AD
1406 bucket *ssave;
1407 int t1;
1ff442ca
NF
1408
1409 ssave = symval;
a70083a3
AD
1410 t1 = lex ();
1411 unlex (t1);
1ff442ca 1412 symval = ssave;
a70083a3
AD
1413 if (t1 == COLON)
1414 break;
1ff442ca 1415
a70083a3 1416 if (!first_rhs) /* JF */
1ff442ca
NF
1417 first_rhs = symval;
1418 /* Not followed by colon =>
1419 process as part of this rule's rhs. */
1420 }
1421
1422 /* If we just passed an action, that action was in the middle
a70083a3
AD
1423 of a rule, so make a dummy rule to reduce it to a
1424 non-terminal. */
1ff442ca
NF
1425 if (actionflag)
1426 {
a70083a3 1427 bucket *sdummy;
1ff442ca
NF
1428
1429 /* Since the action was written out with this rule's */
943819bf 1430 /* number, we must give the new rule this number */
1ff442ca
NF
1431 /* by inserting the new rule before it. */
1432
1433 /* Make a dummy nonterminal, a gensym. */
a70083a3 1434 sdummy = gensym ();
1ff442ca
NF
1435
1436 /* Make a new rule, whose body is empty,
1437 before the current one, so that the action
1438 just read can belong to it. */
1439 nrules++;
1440 nitems++;
1441 record_rule_line ();
a70083a3 1442 p = NEW (symbol_list);
1ff442ca
NF
1443 if (crule1)
1444 crule1->next = p;
a70083a3
AD
1445 else
1446 grammar = p;
1ff442ca 1447 p->sym = sdummy;
a70083a3 1448 crule1 = NEW (symbol_list);
1ff442ca
NF
1449 p->next = crule1;
1450 crule1->next = crule;
1451
1452 /* insert the dummy generated by that rule into this rule. */
1453 nitems++;
a70083a3 1454 p = NEW (symbol_list);
1ff442ca
NF
1455 p->sym = sdummy;
1456 p1->next = p;
1457 p1 = p;
1458
1459 actionflag = 0;
1460 }
1461
1462 if (t == IDENTIFIER)
1463 {
1464 nitems++;
a70083a3 1465 p = NEW (symbol_list);
1ff442ca
NF
1466 p->sym = symval;
1467 p1->next = p;
1468 p1 = p;
1469 }
a70083a3 1470 else /* handle an action. */
1ff442ca 1471 {
a70083a3 1472 copy_action (crule, rulelength);
1ff442ca
NF
1473 actionflag = 1;
1474 xactions++; /* JF */
1475 }
1476 rulelength++;
a70083a3 1477 } /* end of read rhs of rule */
1ff442ca
NF
1478
1479 /* Put an empty link in the list to mark the end of this rule */
a70083a3 1480 p = NEW (symbol_list);
1ff442ca
NF
1481 p1->next = p;
1482 p1 = p;
1483
1484 if (t == PREC)
1485 {
a0f6b076 1486 complain (_("two @prec's in a row"));
a70083a3 1487 t = lex ();
1ff442ca 1488 crule->ruleprec = symval;
a70083a3 1489 t = lex ();
1ff442ca
NF
1490 }
1491 if (t == GUARD)
1492 {
a70083a3 1493 if (!semantic_parser)
a0f6b076 1494 complain ("%s",
a70083a3
AD
1495 _
1496 ("%guard present but %semantic_parser not specified"));
1ff442ca 1497
a70083a3
AD
1498 copy_guard (crule, rulelength);
1499 t = lex ();
1ff442ca
NF
1500 }
1501 else if (t == LEFT_CURLY)
1502 {
a70083a3 1503 /* This case never occurs -wjh */
6666f98f 1504 if (actionflag)
a0f6b076 1505 complain (_("two actions at end of one rule"));
a70083a3 1506 copy_action (crule, rulelength);
943819bf
RS
1507 actionflag = 1;
1508 xactions++; /* -wjh */
a70083a3 1509 t = lex ();
1ff442ca 1510 }
a0f6b076 1511 /* If $$ is being set in default way, report if any type
6666f98f
AD
1512 mismatch. */
1513 else if (!xactions
a70083a3 1514 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1515 {
6666f98f
AD
1516 if (lhs->type_name == 0
1517 || first_rhs->type_name == 0
a70083a3 1518 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1519 complain (_("type clash (`%s' `%s') on default action"),
1520 lhs->type_name ? lhs->type_name : "",
a70083a3 1521 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1522 }
1523 /* Warn if there is no default for $$ but we need one. */
1524 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1525 complain (_("empty rule for typed nonterminal, and no action"));
1ff442ca 1526 if (t == SEMICOLON)
a70083a3 1527 t = lex ();
a083fbbf 1528 }
943819bf 1529#if 0
a70083a3 1530 /* these things can appear as alternatives to rules. */
943819bf
RS
1531/* NO, they cannot.
1532 a) none of the documentation allows them
1533 b) most of them scan forward until finding a next %
1534 thus they may swallow lots of intervening rules
1535*/
1ff442ca
NF
1536 else if (t == TOKEN)
1537 {
a70083a3
AD
1538 parse_token_decl (STOKEN, SNTERM);
1539 t = lex ();
1ff442ca
NF
1540 }
1541 else if (t == NTERM)
1542 {
a70083a3
AD
1543 parse_token_decl (SNTERM, STOKEN);
1544 t = lex ();
1ff442ca
NF
1545 }
1546 else if (t == TYPE)
1547 {
a70083a3 1548 t = get_type ();
1ff442ca
NF
1549 }
1550 else if (t == UNION)
1551 {
a70083a3
AD
1552 parse_union_decl ();
1553 t = lex ();
1ff442ca
NF
1554 }
1555 else if (t == EXPECT)
1556 {
a70083a3
AD
1557 parse_expect_decl ();
1558 t = lex ();
1ff442ca
NF
1559 }
1560 else if (t == START)
1561 {
a70083a3
AD
1562 parse_start_decl ();
1563 t = lex ();
1ff442ca 1564 }
943819bf
RS
1565#endif
1566
1ff442ca 1567 else
943819bf 1568 {
a0f6b076 1569 complain (_("invalid input: %s"), token_buffer);
a70083a3 1570 t = lex ();
943819bf 1571 }
1ff442ca
NF
1572 }
1573
943819bf
RS
1574 /* grammar has been read. Do some checking */
1575
1ff442ca 1576 if (nsyms > MAXSHORT)
a0f6b076
AD
1577 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1578 MAXSHORT);
1ff442ca 1579 if (nrules == 0)
a0f6b076 1580 fatal (_("no rules in the input grammar"));
1ff442ca 1581
a70083a3 1582 if (typed == 0 /* JF put out same default YYSTYPE as YACC does */
1ff442ca
NF
1583 && !value_components_used)
1584 {
1585 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
a70083a3
AD
1586 but it seems better to be consistent.
1587 Most programs should declare their own type anyway. */
1588 fprintf (fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1ff442ca 1589 if (fdefines)
a70083a3 1590 fprintf (fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1ff442ca
NF
1591 }
1592
1593 /* Report any undefined symbols and consider them nonterminals. */
1594
1595 for (bp = firstsymbol; bp; bp = bp->next)
1596 if (bp->class == SUNKNOWN)
1597 {
a70083a3
AD
1598 complain (_
1599 ("symbol %s is used, but is not defined as a token and has no rules"),
1600bp->tag);
1ff442ca
NF
1601 bp->class = SNTERM;
1602 bp->value = nvars++;
1603 }
1604
1605 ntokens = nsyms - nvars;
1606}
a70083a3
AD
1607\f
1608/*--------------------------------------------------------------.
1609| For named tokens, but not literal ones, define the name. The |
1610| value is the user token number. |
1611`--------------------------------------------------------------*/
1ff442ca 1612
4a120d45 1613static void
a70083a3 1614output_token_defines (FILE *file)
1ff442ca 1615{
a70083a3
AD
1616 bucket *bp;
1617 char *cp, *symbol;
1618 char c;
1ff442ca 1619
a70083a3 1620 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1621 {
a70083a3
AD
1622 symbol = bp->tag; /* get symbol */
1623
1624 if (bp->value >= ntokens)
1625 continue;
1626 if (bp->user_token_number == SALIAS)
1627 continue;
1628 if ('\'' == *symbol)
1629 continue; /* skip literal character */
1630 if (bp == errtoken)
1631 continue; /* skip error token */
1632 if ('\"' == *symbol)
1ff442ca 1633 {
a70083a3
AD
1634 /* use literal string only if given a symbol with an alias */
1635 if (bp->alias)
1636 symbol = bp->alias->tag;
1637 else
1638 continue;
1639 }
1ff442ca 1640
a70083a3
AD
1641 /* Don't #define nonliteral tokens whose names contain periods. */
1642 cp = symbol;
1643 while ((c = *cp++) && c != '.');
1644 if (c != '\0')
1645 continue;
1ff442ca 1646
a70083a3
AD
1647 fprintf (file, "#define\t%s\t%d\n", symbol,
1648 ((translations && !rawtoknumflag)
1649 ? bp->user_token_number : bp->value));
1650 if (semantic_parser)
1651 fprintf (file, "#define\tT%s\t%d\n", symbol, bp->value);
1ff442ca 1652 }
a70083a3
AD
1653
1654 putc ('\n', file);
1ff442ca 1655}
1ff442ca
NF
1656
1657
a70083a3
AD
1658/*------------------------------------------------------------------.
1659| Assign symbol numbers, and write definition of token names into |
b2ca4022 1660| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1661| of symbols. |
1662`------------------------------------------------------------------*/
1ff442ca 1663
4a120d45 1664static void
118fb205 1665packsymbols (void)
1ff442ca 1666{
a70083a3
AD
1667 bucket *bp;
1668 int tokno = 1;
1669 int i;
1670 int last_user_token_number;
4a120d45 1671 static char DOLLAR[] = "$";
1ff442ca
NF
1672
1673 /* int lossage = 0; JF set but not used */
1674
a70083a3 1675 tags = NEW2 (nsyms + 1, char *);
4a120d45 1676 tags[0] = DOLLAR;
a70083a3 1677 user_toknums = NEW2 (nsyms + 1, short);
943819bf 1678 user_toknums[0] = 0;
1ff442ca 1679
a70083a3
AD
1680 sprec = NEW2 (nsyms, short);
1681 sassoc = NEW2 (nsyms, short);
1ff442ca
NF
1682
1683 max_user_token_number = 256;
1684 last_user_token_number = 256;
1685
1686 for (bp = firstsymbol; bp; bp = bp->next)
1687 {
1688 if (bp->class == SNTERM)
1689 {
1690 bp->value += ntokens;
1691 }
943819bf
RS
1692 else if (bp->alias)
1693 {
0a6384c4
AD
1694 /* this symbol and its alias are a single token defn.
1695 allocate a tokno, and assign to both check agreement of
1696 ->prec and ->assoc fields and make both the same */
1697 if (bp->value == 0)
1698 bp->value = bp->alias->value = tokno++;
943819bf 1699
0a6384c4
AD
1700 if (bp->prec != bp->alias->prec)
1701 {
1702 if (bp->prec != 0 && bp->alias->prec != 0
1703 && bp->user_token_number == SALIAS)
a0f6b076
AD
1704 complain (_("conflicting precedences for %s and %s"),
1705 bp->tag, bp->alias->tag);
0a6384c4
AD
1706 if (bp->prec != 0)
1707 bp->alias->prec = bp->prec;
1708 else
1709 bp->prec = bp->alias->prec;
1710 }
943819bf 1711
0a6384c4
AD
1712 if (bp->assoc != bp->alias->assoc)
1713 {
a0f6b076
AD
1714 if (bp->assoc != 0 && bp->alias->assoc != 0
1715 && bp->user_token_number == SALIAS)
1716 complain (_("conflicting assoc values for %s and %s"),
1717 bp->tag, bp->alias->tag);
1718 if (bp->assoc != 0)
1719 bp->alias->assoc = bp->assoc;
1720 else
1721 bp->assoc = bp->alias->assoc;
1722 }
0a6384c4
AD
1723
1724 if (bp->user_token_number == SALIAS)
a70083a3 1725 continue; /* do not do processing below for SALIASs */
943819bf 1726
a70083a3
AD
1727 }
1728 else /* bp->class == STOKEN */
943819bf
RS
1729 {
1730 bp->value = tokno++;
1731 }
1732
1733 if (bp->class == STOKEN)
1ff442ca
NF
1734 {
1735 if (translations && !(bp->user_token_number))
1736 bp->user_token_number = ++last_user_token_number;
1737 if (bp->user_token_number > max_user_token_number)
1738 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1739 }
1740
1741 tags[bp->value] = bp->tag;
943819bf 1742 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1743 sprec[bp->value] = bp->prec;
1744 sassoc[bp->value] = bp->assoc;
1745
1746 }
1747
1748 if (translations)
1749 {
a70083a3 1750 int j;
1ff442ca 1751
a70083a3 1752 token_translations = NEW2 (max_user_token_number + 1, short);
1ff442ca 1753
0a6384c4 1754 /* initialize all entries for literal tokens to 2, the internal
a70083a3
AD
1755 token number for $undefined., which represents all invalid
1756 inputs. */
4a120d45 1757 for (j = 0; j <= max_user_token_number; j++)
a70083a3 1758 token_translations[j] = 2;
1ff442ca 1759
943819bf 1760 for (bp = firstsymbol; bp; bp = bp->next)
a70083a3
AD
1761 {
1762 if (bp->value >= ntokens)
1763 continue; /* non-terminal */
1764 if (bp->user_token_number == SALIAS)
0a6384c4 1765 continue;
a70083a3 1766 if (token_translations[bp->user_token_number] != 2)
a0f6b076
AD
1767 complain (_("tokens %s and %s both assigned number %d"),
1768 tags[token_translations[bp->user_token_number]],
a70083a3
AD
1769 bp->tag, bp->user_token_number);
1770 token_translations[bp->user_token_number] = bp->value;
1771 }
1ff442ca
NF
1772 }
1773
1774 error_token_number = errtoken->value;
1775
a70083a3
AD
1776 if (!noparserflag)
1777 output_token_defines (ftable);
1ff442ca
NF
1778
1779 if (startval->class == SUNKNOWN)
a0f6b076 1780 fatal (_("the start symbol %s is undefined"), startval->tag);
1ff442ca 1781 else if (startval->class == STOKEN)
a0f6b076 1782 fatal (_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1783
1784 start_symbol = startval->value;
1785
1786 if (definesflag)
1787 {
a70083a3 1788 output_token_defines (fdefines);
1ff442ca
NF
1789
1790 if (!pure_parser)
1791 {
1792 if (spec_name_prefix)
a70083a3
AD
1793 fprintf (fdefines, "\nextern YYSTYPE %slval;\n",
1794 spec_name_prefix);
1ff442ca 1795 else
a70083a3 1796 fprintf (fdefines, "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1797 }
1798
1799 if (semantic_parser)
1800 for (i = ntokens; i < nsyms; i++)
1801 {
1802 /* don't make these for dummy nonterminals made by gensym. */
1803 if (*tags[i] != '@')
a70083a3 1804 fprintf (fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1ff442ca
NF
1805 }
1806#if 0
1807 /* `fdefines' is now a temporary file, so we need to copy its
1808 contents in `done', so we can't close it here. */
a70083a3 1809 fclose (fdefines);
1ff442ca
NF
1810 fdefines = NULL;
1811#endif
1812 }
1813}
a083fbbf 1814
1ff442ca 1815
a70083a3
AD
1816/*---------------------------------------------------------------.
1817| Convert the rules into the representation using RRHS, RLHS and |
1818| RITEMS. |
1819`---------------------------------------------------------------*/
1ff442ca 1820
4a120d45 1821static void
118fb205 1822packgram (void)
1ff442ca 1823{
a70083a3
AD
1824 int itemno;
1825 int ruleno;
1826 symbol_list *p;
1ff442ca
NF
1827
1828 bucket *ruleprec;
1829
a70083a3
AD
1830 ritem = NEW2 (nitems + 1, short);
1831 rlhs = NEW2 (nrules, short) - 1;
1832 rrhs = NEW2 (nrules, short) - 1;
1833 rprec = NEW2 (nrules, short) - 1;
1834 rprecsym = NEW2 (nrules, short) - 1;
1835 rassoc = NEW2 (nrules, short) - 1;
1ff442ca
NF
1836
1837 itemno = 0;
1838 ruleno = 1;
1839
1840 p = grammar;
1841 while (p)
1842 {
1843 rlhs[ruleno] = p->sym->value;
1844 rrhs[ruleno] = itemno;
1845 ruleprec = p->ruleprec;
1846
1847 p = p->next;
1848 while (p && p->sym)
1849 {
1850 ritem[itemno++] = p->sym->value;
1851 /* A rule gets by default the precedence and associativity
1852 of the last token in it. */
a70083a3 1853 if (p->sym->class == STOKEN)
1ff442ca
NF
1854 {
1855 rprec[ruleno] = p->sym->prec;
1856 rassoc[ruleno] = p->sym->assoc;
1857 }
a70083a3
AD
1858 if (p)
1859 p = p->next;
1ff442ca
NF
1860 }
1861
1862 /* If this rule has a %prec,
a70083a3 1863 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1864 if (ruleprec)
1865 {
a70083a3
AD
1866 rprec[ruleno] = ruleprec->prec;
1867 rassoc[ruleno] = ruleprec->assoc;
1ff442ca
NF
1868 rprecsym[ruleno] = ruleprec->value;
1869 }
1870
1871 ritem[itemno++] = -ruleno;
1872 ruleno++;
1873
a70083a3
AD
1874 if (p)
1875 p = p->next;
1ff442ca
NF
1876 }
1877
1878 ritem[itemno] = 0;
1879}
a70083a3
AD
1880\f
1881/*-------------------------------------------------------------------.
1882| Read in the grammar specification and record it in the format |
1883| described in gram.h. All guards are copied into the FGUARD file |
1884| and all actions into FACTION, in each case forming the body of a C |
1885| function (YYGUARD or YYACTION) which contains a switch statement |
1886| to decide which guard or action to execute. |
1887`-------------------------------------------------------------------*/
1888
1889void
1890reader (void)
1891{
1892 start_flag = 0;
1893 startval = NULL; /* start symbol not specified yet. */
1894
1895#if 0
1896 /* initially assume token number translation not needed. */
1897 translations = 0;
1898#endif
1899 /* Nowadays translations is always set to 1, since we give `error' a
1900 user-token-number to satisfy the Posix demand for YYERRCODE==256.
1901 */
1902 translations = 1;
1903
1904 nsyms = 1;
1905 nvars = 0;
1906 nrules = 0;
1907 nitems = 0;
1908 rline_allocated = 10;
1909 rline = NEW2 (rline_allocated, short);
1910
1911 typed = 0;
1912 lastprec = 0;
1913
1914 gensym_count = 0;
1915
1916 semantic_parser = 0;
1917 pure_parser = 0;
1918 yylsp_needed = 0;
1919
1920 grammar = NULL;
1921
1922 init_lex ();
1923 lineno = 1;
1924
1925 /* Initialize the symbol table. */
1926 tabinit ();
1927 /* Construct the error token */
1928 errtoken = getsym ("error");
1929 errtoken->class = STOKEN;
1930 errtoken->user_token_number = 256; /* Value specified by POSIX. */
1931 /* Construct a token that represents all undefined literal tokens.
1932 It is always token number 2. */
1933 undeftoken = getsym ("$undefined.");
1934 undeftoken->class = STOKEN;
1935 undeftoken->user_token_number = 2;
1936
1937 /* Read the declaration section. Copy %{ ... %} groups to FTABLE
1938 and FDEFINES file. Also notice any %token, %left, etc. found
1939 there. */
1940 putc ('\n', ftable);
1941 fprintf (ftable, "\
1942/* %s, made from %s\n\
1943 by GNU bison %s. */\n\
1944\n", noparserflag ? "Bison-generated parse tables" : "A Bison parser", infile, VERSION);
1945
1946 fputs ("#define YYBISON 1 /* Identify Bison output. */\n\n", ftable);
1947 read_declarations ();
1948 /* Start writing the guard and action files, if they are needed. */
1949 output_headers ();
1950 /* Read in the grammar, build grammar in list form. Write out
1951 guards and actions. */
1952 readgram ();
1953 /* Now we know whether we need the line-number stack. If we do,
1954 write its type into the .tab.h file. */
1955 if (fdefines)
1956 reader_output_yylsp (fdefines);
1957 /* Write closing delimiters for actions and guards. */
1958 output_trailers ();
1959 if (yylsp_needed)
1960 fputs ("#define YYLSP_NEEDED\n\n", ftable);
1961 /* Assign the symbols their symbol numbers. Write #defines for the
1962 token symbols into FDEFINES if requested. */
1963 packsymbols ();
1964 /* Convert the grammar into the format described in gram.h. */
1965 packgram ();
1966 /* Free the symbol table data structure since symbols are now all
1967 referred to by symbol number. */
1968 free_symtab ();
1969}
1970
1971void
1972reader_output_yylsp (FILE *f)
1973{
1974 if (yylsp_needed)
1975 fprintf (f, LTYPESTR);
1976}