]> git.saurik.com Git - bison.git/blame - src/reader.c
Gratuitous change to NEWS to test mailout
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
a70083a3
AD
2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000
3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
ceed8467 24#include "getargs.h"
1ff442ca 25#include "files.h"
d7913476 26#include "xalloc.h"
1ff442ca
NF
27#include "symtab.h"
28#include "lex.h"
29#include "gram.h"
a0f6b076 30#include "complain.h"
6c89f1c1 31#include "output.h"
b2ca4022 32#include "reader.h"
340ef489 33#include "conflicts.h"
b2ca4022 34
b2ca4022 35extern char *printable_version PARAMS ((int));
1ff442ca 36
1ff442ca 37/* Number of slots allocated (but not necessarily used yet) in `rline' */
4a120d45 38static int rline_allocated;
1ff442ca 39
a70083a3
AD
40typedef struct symbol_list
41{
42 struct symbol_list *next;
43 bucket *sym;
44 bucket *ruleprec;
45}
46symbol_list;
118fb205 47
1ff442ca 48int lineno;
1ff442ca 49char **tags;
d019d655 50short *user_toknums;
4a120d45
JT
51static symbol_list *grammar;
52static int start_flag;
53static bucket *startval;
1ff442ca
NF
54
55/* Nonzero if components of semantic values are used, implying
56 they must be unions. */
57static int value_components_used;
58
d7020c20
AD
59/* Nonzero if %union has been seen. */
60static int typed;
1ff442ca 61
d7020c20
AD
62/* Incremented for each %left, %right or %nonassoc seen */
63static int lastprec;
1ff442ca 64
d7020c20
AD
65/* Incremented for each generated symbol */
66static int gensym_count;
1ff442ca
NF
67
68static bucket *errtoken;
5b2e3c89 69static bucket *undeftoken;
0d533154 70\f
a70083a3 71
0d533154
AD
72/*===================\
73| Low level lexing. |
74\===================*/
943819bf
RS
75
76static void
118fb205 77skip_to_char (int target)
943819bf
RS
78{
79 int c;
80 if (target == '\n')
a0f6b076 81 complain (_(" Skipping to next \\n"));
943819bf 82 else
a0f6b076 83 complain (_(" Skipping to next %c"), target);
943819bf
RS
84
85 do
0d533154 86 c = skip_white_space ();
943819bf 87 while (c != target && c != EOF);
a083fbbf 88 if (c != EOF)
0d533154 89 ungetc (c, finput);
943819bf
RS
90}
91
92
0d533154
AD
93/*---------------------------------------------------------.
94| Read a signed integer from STREAM and return its value. |
95`---------------------------------------------------------*/
96
97static inline int
98read_signed_integer (FILE *stream)
99{
a70083a3
AD
100 int c = getc (stream);
101 int sign = 1;
102 int n = 0;
0d533154
AD
103
104 if (c == '-')
105 {
106 c = getc (stream);
107 sign = -1;
108 }
109
110 while (isdigit (c))
111 {
112 n = 10 * n + (c - '0');
113 c = getc (stream);
114 }
115
116 ungetc (c, stream);
117
118 return sign * n;
119}
120\f
121/*-------------------------------------------------------------------.
122| Dump the string from FINPUT to FOUTPUT. MATCH is the delimiter of |
123| the string (either ' or "). |
124`-------------------------------------------------------------------*/
ae3c3164
AD
125
126static inline void
4a120d45 127copy_string (FILE *fin, FILE *fout, int match)
ae3c3164
AD
128{
129 int c;
130
4a120d45
JT
131 putc (match, fout);
132 c = getc (fin);
ae3c3164
AD
133
134 while (c != match)
135 {
136 if (c == EOF)
137 fatal (_("unterminated string at end of file"));
138 if (c == '\n')
139 {
a0f6b076 140 complain (_("unterminated string"));
4a120d45 141 ungetc (c, fin);
ae3c3164
AD
142 c = match; /* invent terminator */
143 continue;
144 }
145
a70083a3 146 putc (c, fout);
ae3c3164
AD
147
148 if (c == '\\')
149 {
4a120d45 150 c = getc (fin);
ae3c3164
AD
151 if (c == EOF)
152 fatal (_("unterminated string at end of file"));
4a120d45 153 putc (c, fout);
ae3c3164
AD
154 if (c == '\n')
155 lineno++;
156 }
157
a70083a3 158 c = getc (fin);
ae3c3164
AD
159 }
160
a70083a3 161 putc (c, fout);
ae3c3164
AD
162}
163
164
6c89f1c1
AD
165/*---------------------------------------------------------------.
166| Dump the comment from IN to OUT1 and OUT2. C is either `*' or |
167| `/', depending upon the type of comments used. OUT2 might be |
168| NULL. |
169`---------------------------------------------------------------*/
ae3c3164
AD
170
171static inline void
a70083a3 172copy_comment2 (FILE *in, FILE *out1, FILE *out2, int c)
ae3c3164
AD
173{
174 int cplus_comment;
a70083a3 175 int ended;
ae3c3164
AD
176
177 cplus_comment = (c == '/');
27821bff
AD
178 putc (c, out1);
179 if (out2)
180 putc (c, out2);
181 c = getc (in);
ae3c3164
AD
182
183 ended = 0;
184 while (!ended)
185 {
186 if (!cplus_comment && c == '*')
187 {
188 while (c == '*')
189 {
27821bff
AD
190 putc (c, out1);
191 if (out2)
192 putc (c, out2);
193 c = getc (in);
ae3c3164
AD
194 }
195
196 if (c == '/')
197 {
a70083a3 198 putc (c, out1);
27821bff 199 if (out2)
a70083a3 200 putc (c, out2);
ae3c3164
AD
201 ended = 1;
202 }
203 }
204 else if (c == '\n')
205 {
206 lineno++;
27821bff
AD
207 putc (c, out1);
208 if (out2)
209 putc (c, out2);
ae3c3164
AD
210 if (cplus_comment)
211 ended = 1;
212 else
27821bff 213 c = getc (in);
ae3c3164
AD
214 }
215 else if (c == EOF)
216 fatal (_("unterminated comment"));
217 else
218 {
27821bff
AD
219 putc (c, out1);
220 if (out2)
221 putc (c, out2);
222 c = getc (in);
ae3c3164
AD
223 }
224 }
225}
226
227
d019d655
AD
228/*------------------------------------------------------------.
229| Dump the comment from FIN to FOUT. C is either `*' or `/', |
230| depending upon the type of comments used. |
231`------------------------------------------------------------*/
27821bff
AD
232
233static inline void
4a120d45 234copy_comment (FILE *fin, FILE *fout, int c)
27821bff 235{
4a120d45 236 copy_comment2 (fin, fout, NULL, c);
27821bff
AD
237}
238
239
a70083a3
AD
240/*-----------------------------------------------------------------.
241| FIN is pointing to a location (i.e., a `@'). Output to FOUT a |
242| reference to this location. STACK_OFFSET is the number of values |
243| in the current rule so far, which says where to find `$0' with |
244| respect to the top of the stack. |
245`-----------------------------------------------------------------*/
1ff442ca 246
a70083a3
AD
247static inline void
248copy_at (FILE *fin, FILE *fout, int stack_offset)
1ff442ca 249{
a70083a3 250 int c;
1ff442ca 251
a70083a3
AD
252 c = getc (fin);
253 if (c == '$')
1ff442ca 254 {
a70083a3 255 fprintf (fout, "yyloc");
89cab50d 256 locations_flag = 1;
a70083a3
AD
257 }
258 else if (isdigit (c) || c == '-')
259 {
260 int n;
1ff442ca 261
a70083a3
AD
262 ungetc (c, fin);
263 n = read_signed_integer (fin);
943819bf 264
a70083a3 265 fprintf (fout, "yylsp[%d]", n - stack_offset);
89cab50d 266 locations_flag = 1;
1ff442ca 267 }
a70083a3
AD
268 else
269 complain (_("@%s is invalid"), printable_version (c));
1ff442ca 270}
a70083a3
AD
271\f
272/*-------------------------------------------------------------------.
273| Copy the contents of a `%{ ... %}' into the definitions file. The |
274| `%{' has already been read. Return after reading the `%}'. |
275`-------------------------------------------------------------------*/
1ff442ca 276
4a120d45 277static void
118fb205 278copy_definition (void)
1ff442ca 279{
a70083a3 280 int c;
ae3c3164 281 /* -1 while reading a character if prev char was %. */
a70083a3 282 int after_percent;
1ff442ca 283
89cab50d 284 if (!no_lines_flag)
a70083a3 285 fprintf (fattrs, "#line %d \"%s\"\n", lineno, infile);
1ff442ca
NF
286
287 after_percent = 0;
288
ae3c3164 289 c = getc (finput);
1ff442ca
NF
290
291 for (;;)
292 {
293 switch (c)
294 {
295 case '\n':
a70083a3 296 putc (c, fattrs);
1ff442ca
NF
297 lineno++;
298 break;
299
300 case '%':
a70083a3 301 after_percent = -1;
1ff442ca 302 break;
a083fbbf 303
1ff442ca
NF
304 case '\'':
305 case '"':
ae3c3164 306 copy_string (finput, fattrs, c);
1ff442ca
NF
307 break;
308
309 case '/':
ae3c3164
AD
310 putc (c, fattrs);
311 c = getc (finput);
1ff442ca
NF
312 if (c != '*' && c != '/')
313 continue;
ae3c3164 314 copy_comment (finput, fattrs, c);
1ff442ca
NF
315 break;
316
317 case EOF:
a70083a3 318 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
319
320 default:
a70083a3 321 putc (c, fattrs);
1ff442ca
NF
322 }
323
a70083a3 324 c = getc (finput);
1ff442ca
NF
325
326 if (after_percent)
327 {
328 if (c == '}')
329 return;
a70083a3 330 putc ('%', fattrs);
1ff442ca
NF
331 }
332 after_percent = 0;
333
334 }
335
336}
337
338
d7020c20
AD
339/*-------------------------------------------------------------------.
340| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
341| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
342| are reversed. |
343`-------------------------------------------------------------------*/
1ff442ca 344
4a120d45 345static void
d7020c20 346parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 347{
a70083a3
AD
348 int token = 0;
349 char *typename = 0;
350 struct bucket *symbol = NULL; /* pts to symbol being defined */
1ff442ca 351
1ff442ca
NF
352 for (;;)
353 {
e6011337
JT
354 int tmp_char = ungetc (skip_white_space (), finput);
355
356 if (tmp_char == '%')
1ff442ca 357 return;
e6011337 358 if (tmp_char == EOF)
a0f6b076 359 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 360
a70083a3 361 token = lex ();
1ff442ca 362 if (token == COMMA)
943819bf
RS
363 {
364 symbol = NULL;
365 continue;
366 }
1ff442ca
NF
367 if (token == TYPENAME)
368 {
95e36146 369 typename = xstrdup (token_buffer);
1ff442ca 370 value_components_used = 1;
943819bf
RS
371 symbol = NULL;
372 }
a70083a3 373 else if (token == IDENTIFIER && *symval->tag == '\"' && symbol)
943819bf
RS
374 {
375 translations = 1;
d7020c20 376 symval->class = token_sym;
943819bf
RS
377 symval->type_name = typename;
378 symval->user_token_number = symbol->user_token_number;
379 symbol->user_token_number = SALIAS;
380
a083fbbf
RS
381 symval->alias = symbol;
382 symbol->alias = symval;
943819bf
RS
383 symbol = NULL;
384
a70083a3 385 nsyms--; /* symbol and symval combined are only one symbol */
1ff442ca
NF
386 }
387 else if (token == IDENTIFIER)
388 {
389 int oldclass = symval->class;
943819bf 390 symbol = symval;
1ff442ca 391
943819bf 392 if (symbol->class == what_is_not)
a0f6b076 393 complain (_("symbol %s redefined"), symbol->tag);
943819bf 394 symbol->class = what_is;
d7020c20 395 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 396 symbol->value = nvars++;
1ff442ca
NF
397
398 if (typename)
399 {
943819bf
RS
400 if (symbol->type_name == NULL)
401 symbol->type_name = typename;
a70083a3 402 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 403 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
404 }
405 }
943819bf 406 else if (symbol && token == NUMBER)
a70083a3 407 {
943819bf 408 symbol->user_token_number = numval;
1ff442ca 409 translations = 1;
a70083a3 410 }
1ff442ca 411 else
943819bf 412 {
a0f6b076 413 complain (_("`%s' is invalid in %s"),
d7020c20 414 token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 415 skip_to_char ('%');
943819bf 416 }
1ff442ca
NF
417 }
418
419}
420
1ff442ca 421
d7020c20
AD
422/*------------------------------.
423| Parse what comes after %start |
424`------------------------------*/
1ff442ca 425
4a120d45 426static void
118fb205 427parse_start_decl (void)
1ff442ca
NF
428{
429 if (start_flag)
27821bff
AD
430 complain (_("multiple %s declarations"), "%start");
431 if (lex () != IDENTIFIER)
432 complain (_("invalid %s declaration"), "%start");
943819bf
RS
433 else
434 {
435 start_flag = 1;
436 startval = symval;
437 }
1ff442ca
NF
438}
439
440
441
a70083a3
AD
442/*--------------------------------------------------------------.
443| Get the data type (alternative in the union) of the value for |
444| symbol n in rule rule. |
445`--------------------------------------------------------------*/
1ff442ca 446
a70083a3
AD
447static char *
448get_type_name (int n, symbol_list * rule)
1ff442ca 449{
a70083a3
AD
450 int i;
451 symbol_list *rp;
1ff442ca 452
a70083a3 453 if (n < 0)
943819bf 454 {
a70083a3
AD
455 complain (_("invalid $ value"));
456 return NULL;
943819bf 457 }
1ff442ca 458
a70083a3
AD
459 rp = rule;
460 i = 0;
1ff442ca 461
a70083a3 462 while (i < n)
1ff442ca 463 {
a70083a3
AD
464 rp = rp->next;
465 if (rp == NULL || rp->sym == NULL)
466 {
467 complain (_("invalid $ value"));
468 return NULL;
469 }
470 i++;
471 }
472
473 return rp->sym->type_name;
474}
475
476
477/*-----------------------------------------------------------.
478| read in a %type declaration and record its information for |
479| get_type_name to access |
480`-----------------------------------------------------------*/
481
482static void
483parse_type_decl (void)
484{
a70083a3
AD
485 char *name;
486
487 if (lex () != TYPENAME)
488 {
489 complain ("%s", _("%type declaration has no <typename>"));
490 skip_to_char ('%');
491 return;
492 }
493
95e36146 494 name = xstrdup (token_buffer);
a70083a3
AD
495
496 for (;;)
497 {
498 int t;
499 int tmp_char = ungetc (skip_white_space (), finput);
500
501 if (tmp_char == '%')
502 return;
503 if (tmp_char == EOF)
504 fatal (_("Premature EOF after %s"), token_buffer);
505
506 t = lex ();
507
508 switch (t)
1ff442ca
NF
509 {
510
511 case COMMA:
512 case SEMICOLON:
513 break;
514
515 case IDENTIFIER:
516 if (symval->type_name == NULL)
517 symval->type_name = name;
a70083a3 518 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 519 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
520
521 break;
522
523 default:
a0f6b076
AD
524 complain (_("invalid %%type declaration due to item: %s"),
525 token_buffer);
a70083a3 526 skip_to_char ('%');
1ff442ca
NF
527 }
528 }
529}
530
531
532
d7020c20
AD
533/*----------------------------------------------------------------.
534| Read in a %left, %right or %nonassoc declaration and record its |
535| information. |
536`----------------------------------------------------------------*/
1ff442ca 537
4a120d45 538static void
d7020c20 539parse_assoc_decl (associativity assoc)
1ff442ca 540{
a70083a3
AD
541 char *name = NULL;
542 int prev = 0;
1ff442ca 543
a70083a3 544 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 545
1ff442ca
NF
546 for (;;)
547 {
a70083a3 548 int t;
e6011337 549 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 550
e6011337 551 if (tmp_char == '%')
1ff442ca 552 return;
e6011337 553 if (tmp_char == EOF)
a0f6b076 554 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 555
a70083a3 556 t = lex ();
1ff442ca
NF
557
558 switch (t)
559 {
1ff442ca 560 case TYPENAME:
95e36146 561 name = xstrdup (token_buffer);
1ff442ca
NF
562 break;
563
564 case COMMA:
565 break;
566
567 case IDENTIFIER:
568 if (symval->prec != 0)
a0f6b076 569 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
570 symval->prec = lastprec;
571 symval->assoc = assoc;
d7020c20 572 if (symval->class == nterm_sym)
a0f6b076 573 complain (_("symbol %s redefined"), symval->tag);
d7020c20 574 symval->class = token_sym;
1ff442ca 575 if (name)
a70083a3 576 { /* record the type, if one is specified */
1ff442ca
NF
577 if (symval->type_name == NULL)
578 symval->type_name = name;
a70083a3 579 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 580 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
581 }
582 break;
583
584 case NUMBER:
585 if (prev == IDENTIFIER)
a70083a3 586 {
1ff442ca
NF
587 symval->user_token_number = numval;
588 translations = 1;
a70083a3
AD
589 }
590 else
591 {
592 complain (_
593 ("invalid text (%s) - number should be after identifier"),
594token_buffer);
595 skip_to_char ('%');
596 }
1ff442ca
NF
597 break;
598
599 case SEMICOLON:
600 return;
601
602 default:
a0f6b076 603 complain (_("unexpected item: %s"), token_buffer);
a70083a3 604 skip_to_char ('%');
1ff442ca
NF
605 }
606
607 prev = t;
608
609 }
610}
611
612
613
d7020c20
AD
614/*-------------------------------------------------------------------.
615| Copy the union declaration into fattrs (and fdefines), where it is |
616| made into the definition of YYSTYPE, the type of elements of the |
617| parser value stack. |
618`-------------------------------------------------------------------*/
1ff442ca 619
4a120d45 620static void
118fb205 621parse_union_decl (void)
1ff442ca 622{
a70083a3
AD
623 int c;
624 int count = 0;
1ff442ca
NF
625
626 if (typed)
27821bff 627 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
628
629 typed = 1;
630
89cab50d 631 if (!no_lines_flag)
27821bff 632 fprintf (fattrs, "\n#line %d \"%s\"\n", lineno, infile);
1ff442ca 633 else
27821bff 634 fprintf (fattrs, "\n");
1ff442ca 635
27821bff 636 fprintf (fattrs, "typedef union");
1ff442ca 637 if (fdefines)
27821bff 638 fprintf (fdefines, "typedef union");
1ff442ca 639
27821bff 640 c = getc (finput);
1ff442ca
NF
641
642 while (c != EOF)
643 {
27821bff 644 putc (c, fattrs);
1ff442ca 645 if (fdefines)
27821bff 646 putc (c, fdefines);
1ff442ca
NF
647
648 switch (c)
649 {
650 case '\n':
651 lineno++;
652 break;
653
654 case '/':
27821bff 655 c = getc (finput);
1ff442ca 656 if (c != '*' && c != '/')
27821bff
AD
657 continue;
658 copy_comment2 (finput, fattrs, fdefines, c);
1ff442ca
NF
659 break;
660
661
662 case '{':
663 count++;
664 break;
665
666 case '}':
667 if (count == 0)
27821bff 668 complain (_("unmatched %s"), "`}'");
1ff442ca 669 count--;
943819bf 670 if (count <= 0)
1ff442ca 671 {
27821bff 672 fprintf (fattrs, " YYSTYPE;\n");
1ff442ca 673 if (fdefines)
27821bff 674 fprintf (fdefines, " YYSTYPE;\n");
1ff442ca 675 /* JF don't choke on trailing semi */
27821bff
AD
676 c = skip_white_space ();
677 if (c != ';')
a70083a3 678 ungetc (c, finput);
1ff442ca
NF
679 return;
680 }
681 }
682
27821bff 683 c = getc (finput);
1ff442ca
NF
684 }
685}
686
d7020c20
AD
687
688/*-------------------------------------------------------.
689| Parse the declaration %expect N which says to expect N |
690| shift-reduce conflicts. |
691`-------------------------------------------------------*/
1ff442ca 692
4a120d45 693static void
118fb205 694parse_expect_decl (void)
1ff442ca 695{
a70083a3
AD
696 int c;
697 int count;
1ff442ca
NF
698 char buffer[20];
699
a70083a3 700 c = getc (finput);
1ff442ca 701 while (c == ' ' || c == '\t')
a70083a3 702 c = getc (finput);
1ff442ca
NF
703
704 count = 0;
705 while (c >= '0' && c <= '9')
706 {
707 if (count < 20)
708 buffer[count++] = c;
a70083a3 709 c = getc (finput);
1ff442ca
NF
710 }
711 buffer[count] = 0;
712
713 ungetc (c, finput);
714
943819bf 715 if (count <= 0 || count > 10)
a0f6b076 716 complain ("%s", _("argument of %expect is not an integer"));
1ff442ca
NF
717 expected_conflicts = atoi (buffer);
718}
719
a70083a3
AD
720
721/*-------------------------------------------------------------------.
722| Parse what comes after %thong. the full syntax is |
723| |
724| %thong <type> token number literal |
725| |
726| the <type> or number may be omitted. The number specifies the |
727| user_token_number. |
728| |
729| Two symbols are entered in the table, one for the token symbol and |
730| one for the literal. Both are given the <type>, if any, from the |
731| declaration. The ->user_token_number of the first is SALIAS and |
732| the ->user_token_number of the second is set to the number, if |
733| any, from the declaration. The two symbols are linked via |
734| pointers in their ->alias fields. |
735| |
736| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
737| only the literal string is retained it is the literal string that |
738| is output to yytname |
739`-------------------------------------------------------------------*/
740
741static void
742parse_thong_decl (void)
7b306f52 743{
a70083a3
AD
744 int token;
745 struct bucket *symbol;
746 char *typename = 0;
95e36146 747 int usrtoknum;
7b306f52 748
a70083a3
AD
749 translations = 1;
750 token = lex (); /* fetch typename or first token */
751 if (token == TYPENAME)
7b306f52 752 {
95e36146 753 typename = xstrdup (token_buffer);
a70083a3
AD
754 value_components_used = 1;
755 token = lex (); /* fetch first token */
7b306f52 756 }
7b306f52 757
a70083a3 758 /* process first token */
7b306f52 759
a70083a3
AD
760 if (token != IDENTIFIER)
761 {
762 complain (_("unrecognized item %s, expected an identifier"),
763 token_buffer);
764 skip_to_char ('%');
765 return;
7b306f52 766 }
d7020c20 767 symval->class = token_sym;
a70083a3
AD
768 symval->type_name = typename;
769 symval->user_token_number = SALIAS;
770 symbol = symval;
7b306f52 771
a70083a3 772 token = lex (); /* get number or literal string */
1ff442ca 773
a70083a3 774 if (token == NUMBER)
943819bf 775 {
a70083a3
AD
776 usrtoknum = numval;
777 token = lex (); /* okay, did number, now get literal */
943819bf 778 }
a70083a3
AD
779 else
780 usrtoknum = 0;
1ff442ca 781
a70083a3 782 /* process literal string token */
1ff442ca 783
a70083a3 784 if (token != IDENTIFIER || *symval->tag != '\"')
1ff442ca 785 {
a70083a3
AD
786 complain (_("expected string constant instead of %s"), token_buffer);
787 skip_to_char ('%');
788 return;
1ff442ca 789 }
d7020c20 790 symval->class = token_sym;
a70083a3
AD
791 symval->type_name = typename;
792 symval->user_token_number = usrtoknum;
1ff442ca 793
a70083a3
AD
794 symval->alias = symbol;
795 symbol->alias = symval;
1ff442ca 796
a70083a3
AD
797 nsyms--; /* symbol and symval combined are only one symbol */
798}
3cef001a 799
d7020c20 800
a70083a3
AD
801/*----------------------------------------------------------------.
802| Read from finput until `%%' is seen. Discard the `%%'. Handle |
803| any `%' declarations, and copy the contents of any `%{ ... %}' |
804| groups to fattrs. |
805`----------------------------------------------------------------*/
1ff442ca 806
4a120d45 807static void
a70083a3 808read_declarations (void)
1ff442ca 809{
a70083a3
AD
810 int c;
811 int tok;
1ff442ca 812
a70083a3 813 for (;;)
1ff442ca 814 {
a70083a3 815 c = skip_white_space ();
1ff442ca 816
a70083a3
AD
817 if (c == '%')
818 {
819 tok = parse_percent_token ();
1ff442ca 820
a70083a3 821 switch (tok)
943819bf 822 {
a70083a3
AD
823 case TWO_PERCENTS:
824 return;
1ff442ca 825
a70083a3
AD
826 case PERCENT_LEFT_CURLY:
827 copy_definition ();
828 break;
1ff442ca 829
a70083a3 830 case TOKEN:
d7020c20 831 parse_token_decl (token_sym, nterm_sym);
a70083a3 832 break;
1ff442ca 833
a70083a3 834 case NTERM:
d7020c20 835 parse_token_decl (nterm_sym, token_sym);
a70083a3 836 break;
1ff442ca 837
a70083a3
AD
838 case TYPE:
839 parse_type_decl ();
840 break;
1ff442ca 841
a70083a3
AD
842 case START:
843 parse_start_decl ();
844 break;
118fb205 845
a70083a3
AD
846 case UNION:
847 parse_union_decl ();
848 break;
1ff442ca 849
a70083a3
AD
850 case EXPECT:
851 parse_expect_decl ();
852 break;
853 case THONG:
854 parse_thong_decl ();
855 break;
d7020c20 856
a70083a3 857 case LEFT:
d7020c20 858 parse_assoc_decl (left_assoc);
a70083a3 859 break;
1ff442ca 860
a70083a3 861 case RIGHT:
d7020c20 862 parse_assoc_decl (right_assoc);
a70083a3 863 break;
1ff442ca 864
a70083a3 865 case NONASSOC:
d7020c20 866 parse_assoc_decl (non_assoc);
a70083a3 867 break;
1ff442ca 868
a70083a3
AD
869 case SEMANTIC_PARSER:
870 if (semantic_parser == 0)
871 {
872 semantic_parser = 1;
873 open_extra_files ();
874 }
875 break;
1ff442ca 876
a70083a3
AD
877 case PURE_PARSER:
878 pure_parser = 1;
879 break;
1ff442ca 880
a70083a3
AD
881 case NOOP:
882 break;
1ff442ca 883
a70083a3
AD
884 default:
885 complain (_("unrecognized: %s"), token_buffer);
886 skip_to_char ('%');
887 }
888 }
889 else if (c == EOF)
890 fatal (_("no input grammar"));
891 else
892 {
893 complain (_("unknown character: %s"), printable_version (c));
894 skip_to_char ('%');
1ff442ca 895 }
1ff442ca 896 }
1ff442ca 897}
a70083a3
AD
898\f
899/*-------------------------------------------------------------------.
900| Assuming that a `{' has just been seen, copy everything up to the |
901| matching `}' into the actions file. STACK_OFFSET is the number of |
902| values in the current rule so far, which says where to find `$0' |
903| with respect to the top of the stack. |
904`-------------------------------------------------------------------*/
1ff442ca 905
4a120d45 906static void
a70083a3 907copy_action (symbol_list * rule, int stack_offset)
1ff442ca 908{
a70083a3
AD
909 int c;
910 int n;
911 int count;
912 char *type_name;
1ff442ca
NF
913
914 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
915 if (semantic_parser)
916 stack_offset = 0;
1ff442ca 917
41aca2e0 918 fprintf (faction, "\ncase %d:\n", nrules);
89cab50d 919 if (!no_lines_flag)
41aca2e0
AD
920 fprintf (faction, "#line %d \"%s\"\n", lineno, infile);
921 putc ('{', faction);
1ff442ca
NF
922
923 count = 1;
a70083a3 924 c = getc (finput);
1ff442ca
NF
925
926 while (count > 0)
927 {
928 while (c != '}')
a70083a3
AD
929 {
930 switch (c)
1ff442ca
NF
931 {
932 case '\n':
a70083a3 933 putc (c, faction);
1ff442ca
NF
934 lineno++;
935 break;
936
937 case '{':
a70083a3 938 putc (c, faction);
1ff442ca
NF
939 count++;
940 break;
941
942 case '\'':
943 case '"':
ca36d2ef 944 copy_string (finput, faction, c);
1ff442ca
NF
945 break;
946
947 case '/':
27821bff
AD
948 putc (c, faction);
949 c = getc (finput);
1ff442ca
NF
950 if (c != '*' && c != '/')
951 continue;
3cef001a 952 copy_comment (finput, faction, c);
1ff442ca
NF
953 break;
954
955 case '$':
a70083a3 956 c = getc (finput);
1ff442ca
NF
957 type_name = NULL;
958
959 if (c == '<')
960 {
a70083a3 961 char *cp = token_buffer;
1ff442ca 962
a70083a3 963 while ((c = getc (finput)) != '>' && c > 0)
118fb205
JT
964 {
965 if (cp == token_buffer + maxtoken)
a70083a3 966 cp = grow_token_buffer (cp);
118fb205
JT
967
968 *cp++ = c;
969 }
1ff442ca
NF
970 *cp = 0;
971 type_name = token_buffer;
972 value_components_used = 1;
973
a70083a3 974 c = getc (finput);
1ff442ca
NF
975 }
976 if (c == '$')
977 {
a70083a3 978 fprintf (faction, "yyval");
41aca2e0 979 if (!type_name)
a70083a3 980 type_name = get_type_name (0, rule);
1ff442ca 981 if (type_name)
a70083a3
AD
982 fprintf (faction, ".%s", type_name);
983 if (!type_name && typed)
a0f6b076
AD
984 complain (_("$$ of `%s' has no declared type"),
985 rule->sym->tag);
1ff442ca 986 }
a70083a3 987 else if (isdigit (c) || c == '-')
1ff442ca
NF
988 {
989 ungetc (c, finput);
a70083a3
AD
990 n = read_signed_integer (finput);
991 c = getc (finput);
1ff442ca
NF
992
993 if (!type_name && n > 0)
a70083a3 994 type_name = get_type_name (n, rule);
1ff442ca 995
a70083a3 996 fprintf (faction, "yyvsp[%d]", n - stack_offset);
1ff442ca 997 if (type_name)
a70083a3
AD
998 fprintf (faction, ".%s", type_name);
999 if (!type_name && typed)
a0f6b076
AD
1000 complain (_("$%d of `%s' has no declared type"),
1001 n, rule->sym->tag);
1ff442ca
NF
1002 continue;
1003 }
1004 else
a0f6b076 1005 complain (_("$%s is invalid"), printable_version (c));
1ff442ca
NF
1006
1007 break;
1008
1009 case '@':
7b306f52 1010 copy_at (finput, faction, stack_offset);
6666f98f 1011 break;
1ff442ca
NF
1012
1013 case EOF:
27821bff 1014 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1015
1016 default:
a70083a3
AD
1017 putc (c, faction);
1018 }
1019
1020 c = getc (finput);
1021 }
1022
1023 /* above loop exits when c is '}' */
1024
1025 if (--count)
1026 {
1027 putc (c, faction);
1028 c = getc (finput);
1029 }
1030 }
1031
1032 fprintf (faction, ";\n break;}");
1033}
1034\f
1035/*-------------------------------------------------------------------.
1036| After `%guard' is seen in the input file, copy the actual guard |
1037| into the guards file. If the guard is followed by an action, copy |
1038| that into the actions file. STACK_OFFSET is the number of values |
1039| in the current rule so far, which says where to find `$0' with |
1040| respect to the top of the stack, for the simple parser in which |
1041| the stack is not popped until after the guard is run. |
1042`-------------------------------------------------------------------*/
1043
1044static void
1045copy_guard (symbol_list * rule, int stack_offset)
1046{
1047 int c;
1048 int n;
1049 int count;
1050 char *type_name;
1051 int brace_flag = 0;
1052
1053 /* offset is always 0 if parser has already popped the stack pointer */
1054 if (semantic_parser)
1055 stack_offset = 0;
1056
1057 fprintf (fguard, "\ncase %d:\n", nrules);
89cab50d 1058 if (!no_lines_flag)
a70083a3
AD
1059 fprintf (fguard, "#line %d \"%s\"\n", lineno, infile);
1060 putc ('{', fguard);
1061
1062 count = 0;
1063 c = getc (finput);
1064
1065 while (brace_flag ? (count > 0) : (c != ';'))
1066 {
1067 switch (c)
1068 {
1069 case '\n':
1070 putc (c, fguard);
1071 lineno++;
1072 break;
1073
1074 case '{':
1075 putc (c, fguard);
1076 brace_flag = 1;
1077 count++;
1078 break;
1079
1080 case '}':
1081 putc (c, fguard);
1082 if (count > 0)
1083 count--;
1084 else
1085 {
1086 complain (_("unmatched %s"), "`}'");
1087 c = getc (finput); /* skip it */
1088 }
1089 break;
1090
1091 case '\'':
1092 case '"':
1093 copy_string (finput, fguard, c);
1094 break;
1095
1096 case '/':
1097 putc (c, fguard);
1098 c = getc (finput);
1099 if (c != '*' && c != '/')
1100 continue;
1101 copy_comment (finput, fguard, c);
1102 break;
1103
1104 case '$':
1105 c = getc (finput);
1106 type_name = NULL;
1107
1108 if (c == '<')
1109 {
1110 char *cp = token_buffer;
1111
1112 while ((c = getc (finput)) != '>' && c > 0)
1113 {
1114 if (cp == token_buffer + maxtoken)
1115 cp = grow_token_buffer (cp);
1116
1117 *cp++ = c;
1118 }
1119 *cp = 0;
1120 type_name = token_buffer;
1121
1122 c = getc (finput);
1123 }
1124
1125 if (c == '$')
1126 {
1127 fprintf (fguard, "yyval");
1128 if (!type_name)
1129 type_name = rule->sym->type_name;
1130 if (type_name)
1131 fprintf (fguard, ".%s", type_name);
1132 if (!type_name && typed)
1133 complain (_("$$ of `%s' has no declared type"),
1134 rule->sym->tag);
1135 }
1136 else if (isdigit (c) || c == '-')
1137 {
1138 ungetc (c, finput);
1139 n = read_signed_integer (finput);
1140 c = getc (finput);
1141
1142 if (!type_name && n > 0)
1143 type_name = get_type_name (n, rule);
1144
1145 fprintf (fguard, "yyvsp[%d]", n - stack_offset);
1146 if (type_name)
1147 fprintf (fguard, ".%s", type_name);
1148 if (!type_name && typed)
1149 complain (_("$%d of `%s' has no declared type"),
1150 n, rule->sym->tag);
1151 continue;
1ff442ca 1152 }
a70083a3
AD
1153 else
1154 complain (_("$%s is invalid"), printable_version (c));
1155 break;
1ff442ca 1156
a70083a3
AD
1157 case '@':
1158 copy_at (finput, fguard, stack_offset);
1159 break;
1ff442ca 1160
a70083a3
AD
1161 case EOF:
1162 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1163
a70083a3
AD
1164 default:
1165 putc (c, fguard);
1ff442ca 1166 }
a70083a3
AD
1167
1168 if (c != '}' || count != 0)
1169 c = getc (finput);
1ff442ca
NF
1170 }
1171
a70083a3
AD
1172 c = skip_white_space ();
1173
1174 fprintf (fguard, ";\n break;}");
1175 if (c == '{')
1176 copy_action (rule, stack_offset);
1177 else if (c == '=')
1178 {
1179 c = getc (finput); /* why not skip_white_space -wjh */
1180 if (c == '{')
1181 copy_action (rule, stack_offset);
1182 }
1183 else
1184 ungetc (c, finput);
1ff442ca 1185}
a70083a3
AD
1186\f
1187
1188static void
1189record_rule_line (void)
1190{
1191 /* Record each rule's source line number in rline table. */
1ff442ca 1192
a70083a3
AD
1193 if (nrules >= rline_allocated)
1194 {
1195 rline_allocated = nrules * 2;
d7913476 1196 rline = XREALLOC (rline, short, rline_allocated);
a70083a3
AD
1197 }
1198 rline[nrules] = lineno;
1199}
1ff442ca
NF
1200
1201
a70083a3
AD
1202/*-------------------------------------------------------------------.
1203| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1204| with the user's names. |
1205`-------------------------------------------------------------------*/
1ff442ca 1206
4a120d45 1207static bucket *
118fb205 1208gensym (void)
1ff442ca 1209{
a70083a3 1210 bucket *sym;
1ff442ca
NF
1211
1212 sprintf (token_buffer, "@%d", ++gensym_count);
a70083a3 1213 sym = getsym (token_buffer);
d7020c20 1214 sym->class = nterm_sym;
1ff442ca 1215 sym->value = nvars++;
36281465 1216 return sym;
1ff442ca
NF
1217}
1218
a70083a3
AD
1219#if 0
1220/*------------------------------------------------------------------.
1221| read in a %type declaration and record its information for |
1222| get_type_name to access. This is unused. It is only called from |
1223| the #if 0 part of readgram |
1224`------------------------------------------------------------------*/
1225
1226static int
1227get_type (void)
1228{
1229 int k;
1230 int t;
1231 char *name;
1232
1233 t = lex ();
1234
1235 if (t != TYPENAME)
1236 {
1237 complain (_("invalid %s declaration"), "%type");
1238 return t;
1239 }
1240
95e36146 1241 name = xstrdup (token_buffer);
a70083a3
AD
1242
1243 for (;;)
1244 {
1245 t = lex ();
1246
1247 switch (t)
1248 {
1249 case SEMICOLON:
1250 return lex ();
1251
1252 case COMMA:
1253 break;
1254
1255 case IDENTIFIER:
1256 if (symval->type_name == NULL)
1257 symval->type_name = name;
1258 else if (strcmp (name, symval->type_name) != 0)
1259 complain (_("type redeclaration for %s"), symval->tag);
1260
1261 break;
1262
1263 default:
1264 return t;
1265 }
1266 }
1267}
1ff442ca 1268
a70083a3
AD
1269#endif
1270\f
1271/*------------------------------------------------------------------.
1272| Parse the input grammar into a one symbol_list structure. Each |
1273| rule is represented by a sequence of symbols: the left hand side |
1274| followed by the contents of the right hand side, followed by a |
1275| null pointer instead of a symbol to terminate the rule. The next |
1276| symbol is the lhs of the following rule. |
1277| |
1278| All guards and actions are copied out to the appropriate files, |
1279| labelled by the rule number they apply to. |
1280`------------------------------------------------------------------*/
1ff442ca 1281
4a120d45 1282static void
118fb205 1283readgram (void)
1ff442ca 1284{
a70083a3
AD
1285 int t;
1286 bucket *lhs = NULL;
1287 symbol_list *p;
1288 symbol_list *p1;
1289 bucket *bp;
1ff442ca 1290
a70083a3
AD
1291 symbol_list *crule; /* points to first symbol_list of current rule. */
1292 /* its symbol is the lhs of the rule. */
1293 symbol_list *crule1; /* points to the symbol_list preceding crule. */
1ff442ca
NF
1294
1295 p1 = NULL;
1296
a70083a3 1297 t = lex ();
1ff442ca
NF
1298
1299 while (t != TWO_PERCENTS && t != ENDFILE)
1300 {
1301 if (t == IDENTIFIER || t == BAR)
1302 {
89cab50d 1303 int action_flag = 0;
a70083a3 1304 int rulelength = 0; /* number of symbols in rhs of this rule so far */
1ff442ca
NF
1305 int xactions = 0; /* JF for error checking */
1306 bucket *first_rhs = 0;
1307
1308 if (t == IDENTIFIER)
1309 {
1310 lhs = symval;
943819bf
RS
1311
1312 if (!start_flag)
1313 {
1314 startval = lhs;
1315 start_flag = 1;
1316 }
a083fbbf 1317
a70083a3 1318 t = lex ();
1ff442ca 1319 if (t != COLON)
943819bf 1320 {
a0f6b076 1321 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1322 unlex (t);
943819bf 1323 }
1ff442ca
NF
1324 }
1325
943819bf 1326 if (nrules == 0 && t == BAR)
1ff442ca 1327 {
a0f6b076 1328 complain (_("grammar starts with vertical bar"));
943819bf 1329 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1330 }
1ff442ca
NF
1331 /* start a new rule and record its lhs. */
1332
1333 nrules++;
1334 nitems++;
1335
1336 record_rule_line ();
1337
d7913476 1338 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1339 p->sym = lhs;
1340
1341 crule1 = p1;
1342 if (p1)
1343 p1->next = p;
1344 else
1345 grammar = p;
1346
1347 p1 = p;
1348 crule = p;
1349
1350 /* mark the rule's lhs as a nonterminal if not already so. */
1351
d7020c20 1352 if (lhs->class == unknown_sym)
1ff442ca 1353 {
d7020c20 1354 lhs->class = nterm_sym;
1ff442ca
NF
1355 lhs->value = nvars;
1356 nvars++;
1357 }
d7020c20 1358 else if (lhs->class == token_sym)
a0f6b076 1359 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1360
1361 /* read the rhs of the rule. */
1362
1363 for (;;)
1364 {
a70083a3 1365 t = lex ();
943819bf
RS
1366 if (t == PREC)
1367 {
a70083a3 1368 t = lex ();
943819bf 1369 crule->ruleprec = symval;
a70083a3 1370 t = lex ();
943819bf 1371 }
1ff442ca 1372
a70083a3
AD
1373 if (!(t == IDENTIFIER || t == LEFT_CURLY))
1374 break;
1ff442ca
NF
1375
1376 /* If next token is an identifier, see if a colon follows it.
a70083a3 1377 If one does, exit this rule now. */
1ff442ca
NF
1378 if (t == IDENTIFIER)
1379 {
a70083a3
AD
1380 bucket *ssave;
1381 int t1;
1ff442ca
NF
1382
1383 ssave = symval;
a70083a3
AD
1384 t1 = lex ();
1385 unlex (t1);
1ff442ca 1386 symval = ssave;
a70083a3
AD
1387 if (t1 == COLON)
1388 break;
1ff442ca 1389
a70083a3 1390 if (!first_rhs) /* JF */
1ff442ca
NF
1391 first_rhs = symval;
1392 /* Not followed by colon =>
1393 process as part of this rule's rhs. */
1394 }
1395
1396 /* If we just passed an action, that action was in the middle
a70083a3
AD
1397 of a rule, so make a dummy rule to reduce it to a
1398 non-terminal. */
89cab50d 1399 if (action_flag)
1ff442ca 1400 {
a70083a3 1401 bucket *sdummy;
1ff442ca
NF
1402
1403 /* Since the action was written out with this rule's */
943819bf 1404 /* number, we must give the new rule this number */
1ff442ca
NF
1405 /* by inserting the new rule before it. */
1406
1407 /* Make a dummy nonterminal, a gensym. */
a70083a3 1408 sdummy = gensym ();
1ff442ca
NF
1409
1410 /* Make a new rule, whose body is empty,
1411 before the current one, so that the action
1412 just read can belong to it. */
1413 nrules++;
1414 nitems++;
1415 record_rule_line ();
d7913476 1416 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1417 if (crule1)
1418 crule1->next = p;
a70083a3
AD
1419 else
1420 grammar = p;
1ff442ca 1421 p->sym = sdummy;
d7913476 1422 crule1 = XCALLOC (symbol_list, 1);
1ff442ca
NF
1423 p->next = crule1;
1424 crule1->next = crule;
1425
1426 /* insert the dummy generated by that rule into this rule. */
1427 nitems++;
d7913476 1428 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1429 p->sym = sdummy;
1430 p1->next = p;
1431 p1 = p;
1432
89cab50d 1433 action_flag = 0;
1ff442ca
NF
1434 }
1435
1436 if (t == IDENTIFIER)
1437 {
1438 nitems++;
d7913476 1439 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1440 p->sym = symval;
1441 p1->next = p;
1442 p1 = p;
1443 }
a70083a3 1444 else /* handle an action. */
1ff442ca 1445 {
a70083a3 1446 copy_action (crule, rulelength);
89cab50d 1447 action_flag = 1;
1ff442ca
NF
1448 xactions++; /* JF */
1449 }
1450 rulelength++;
a70083a3 1451 } /* end of read rhs of rule */
1ff442ca
NF
1452
1453 /* Put an empty link in the list to mark the end of this rule */
d7913476 1454 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1455 p1->next = p;
1456 p1 = p;
1457
1458 if (t == PREC)
1459 {
a0f6b076 1460 complain (_("two @prec's in a row"));
a70083a3 1461 t = lex ();
1ff442ca 1462 crule->ruleprec = symval;
a70083a3 1463 t = lex ();
1ff442ca
NF
1464 }
1465 if (t == GUARD)
1466 {
a70083a3 1467 if (!semantic_parser)
a0f6b076 1468 complain ("%s",
a70083a3
AD
1469 _
1470 ("%guard present but %semantic_parser not specified"));
1ff442ca 1471
a70083a3
AD
1472 copy_guard (crule, rulelength);
1473 t = lex ();
1ff442ca
NF
1474 }
1475 else if (t == LEFT_CURLY)
1476 {
a70083a3 1477 /* This case never occurs -wjh */
89cab50d 1478 if (action_flag)
a0f6b076 1479 complain (_("two actions at end of one rule"));
a70083a3 1480 copy_action (crule, rulelength);
89cab50d 1481 action_flag = 1;
943819bf 1482 xactions++; /* -wjh */
a70083a3 1483 t = lex ();
1ff442ca 1484 }
a0f6b076 1485 /* If $$ is being set in default way, report if any type
6666f98f
AD
1486 mismatch. */
1487 else if (!xactions
a70083a3 1488 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1489 {
6666f98f
AD
1490 if (lhs->type_name == 0
1491 || first_rhs->type_name == 0
a70083a3 1492 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1493 complain (_("type clash (`%s' `%s') on default action"),
1494 lhs->type_name ? lhs->type_name : "",
a70083a3 1495 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1496 }
1497 /* Warn if there is no default for $$ but we need one. */
1498 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1499 complain (_("empty rule for typed nonterminal, and no action"));
1ff442ca 1500 if (t == SEMICOLON)
a70083a3 1501 t = lex ();
a083fbbf 1502 }
943819bf 1503#if 0
a70083a3 1504 /* these things can appear as alternatives to rules. */
943819bf
RS
1505/* NO, they cannot.
1506 a) none of the documentation allows them
1507 b) most of them scan forward until finding a next %
1508 thus they may swallow lots of intervening rules
1509*/
1ff442ca
NF
1510 else if (t == TOKEN)
1511 {
d7020c20 1512 parse_token_decl (token_sym, nterm_sym);
a70083a3 1513 t = lex ();
1ff442ca
NF
1514 }
1515 else if (t == NTERM)
1516 {
d7020c20 1517 parse_token_decl (nterm_sym, token_sym);
a70083a3 1518 t = lex ();
1ff442ca
NF
1519 }
1520 else if (t == TYPE)
1521 {
a70083a3 1522 t = get_type ();
1ff442ca
NF
1523 }
1524 else if (t == UNION)
1525 {
a70083a3
AD
1526 parse_union_decl ();
1527 t = lex ();
1ff442ca
NF
1528 }
1529 else if (t == EXPECT)
1530 {
a70083a3
AD
1531 parse_expect_decl ();
1532 t = lex ();
1ff442ca
NF
1533 }
1534 else if (t == START)
1535 {
a70083a3
AD
1536 parse_start_decl ();
1537 t = lex ();
1ff442ca 1538 }
943819bf
RS
1539#endif
1540
1ff442ca 1541 else
943819bf 1542 {
a0f6b076 1543 complain (_("invalid input: %s"), token_buffer);
a70083a3 1544 t = lex ();
943819bf 1545 }
1ff442ca
NF
1546 }
1547
943819bf
RS
1548 /* grammar has been read. Do some checking */
1549
1ff442ca 1550 if (nsyms > MAXSHORT)
a0f6b076
AD
1551 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1552 MAXSHORT);
1ff442ca 1553 if (nrules == 0)
a0f6b076 1554 fatal (_("no rules in the input grammar"));
1ff442ca 1555
a70083a3 1556 if (typed == 0 /* JF put out same default YYSTYPE as YACC does */
1ff442ca
NF
1557 && !value_components_used)
1558 {
1559 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
a70083a3
AD
1560 but it seems better to be consistent.
1561 Most programs should declare their own type anyway. */
1562 fprintf (fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1ff442ca 1563 if (fdefines)
a70083a3 1564 fprintf (fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1ff442ca
NF
1565 }
1566
1567 /* Report any undefined symbols and consider them nonterminals. */
1568
1569 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1570 if (bp->class == unknown_sym)
1ff442ca 1571 {
a70083a3
AD
1572 complain (_
1573 ("symbol %s is used, but is not defined as a token and has no rules"),
1574bp->tag);
d7020c20 1575 bp->class = nterm_sym;
1ff442ca
NF
1576 bp->value = nvars++;
1577 }
1578
1579 ntokens = nsyms - nvars;
1580}
a70083a3
AD
1581\f
1582/*--------------------------------------------------------------.
1583| For named tokens, but not literal ones, define the name. The |
1584| value is the user token number. |
1585`--------------------------------------------------------------*/
1ff442ca 1586
4a120d45 1587static void
a70083a3 1588output_token_defines (FILE *file)
1ff442ca 1589{
a70083a3
AD
1590 bucket *bp;
1591 char *cp, *symbol;
1592 char c;
1ff442ca 1593
a70083a3 1594 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1595 {
a70083a3
AD
1596 symbol = bp->tag; /* get symbol */
1597
1598 if (bp->value >= ntokens)
1599 continue;
1600 if (bp->user_token_number == SALIAS)
1601 continue;
1602 if ('\'' == *symbol)
1603 continue; /* skip literal character */
1604 if (bp == errtoken)
1605 continue; /* skip error token */
1606 if ('\"' == *symbol)
1ff442ca 1607 {
a70083a3
AD
1608 /* use literal string only if given a symbol with an alias */
1609 if (bp->alias)
1610 symbol = bp->alias->tag;
1611 else
1612 continue;
1613 }
1ff442ca 1614
a70083a3
AD
1615 /* Don't #define nonliteral tokens whose names contain periods. */
1616 cp = symbol;
1617 while ((c = *cp++) && c != '.');
1618 if (c != '\0')
1619 continue;
1ff442ca 1620
a70083a3 1621 fprintf (file, "#define\t%s\t%d\n", symbol,
89cab50d 1622 ((translations && !raw_flag)
a70083a3
AD
1623 ? bp->user_token_number : bp->value));
1624 if (semantic_parser)
1625 fprintf (file, "#define\tT%s\t%d\n", symbol, bp->value);
1ff442ca 1626 }
a70083a3
AD
1627
1628 putc ('\n', file);
1ff442ca 1629}
1ff442ca
NF
1630
1631
a70083a3
AD
1632/*------------------------------------------------------------------.
1633| Assign symbol numbers, and write definition of token names into |
b2ca4022 1634| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1635| of symbols. |
1636`------------------------------------------------------------------*/
1ff442ca 1637
4a120d45 1638static void
118fb205 1639packsymbols (void)
1ff442ca 1640{
a70083a3
AD
1641 bucket *bp;
1642 int tokno = 1;
1643 int i;
1644 int last_user_token_number;
4a120d45 1645 static char DOLLAR[] = "$";
1ff442ca
NF
1646
1647 /* int lossage = 0; JF set but not used */
1648
d7913476 1649 tags = XCALLOC (char *, nsyms + 1);
4a120d45 1650 tags[0] = DOLLAR;
d7913476 1651 user_toknums = XCALLOC (short, nsyms + 1);
943819bf 1652 user_toknums[0] = 0;
1ff442ca 1653
d7913476
AD
1654 sprec = XCALLOC (short, nsyms);
1655 sassoc = XCALLOC (short, nsyms);
1ff442ca
NF
1656
1657 max_user_token_number = 256;
1658 last_user_token_number = 256;
1659
1660 for (bp = firstsymbol; bp; bp = bp->next)
1661 {
d7020c20 1662 if (bp->class == nterm_sym)
1ff442ca
NF
1663 {
1664 bp->value += ntokens;
1665 }
943819bf
RS
1666 else if (bp->alias)
1667 {
0a6384c4
AD
1668 /* this symbol and its alias are a single token defn.
1669 allocate a tokno, and assign to both check agreement of
1670 ->prec and ->assoc fields and make both the same */
1671 if (bp->value == 0)
1672 bp->value = bp->alias->value = tokno++;
943819bf 1673
0a6384c4
AD
1674 if (bp->prec != bp->alias->prec)
1675 {
1676 if (bp->prec != 0 && bp->alias->prec != 0
1677 && bp->user_token_number == SALIAS)
a0f6b076
AD
1678 complain (_("conflicting precedences for %s and %s"),
1679 bp->tag, bp->alias->tag);
0a6384c4
AD
1680 if (bp->prec != 0)
1681 bp->alias->prec = bp->prec;
1682 else
1683 bp->prec = bp->alias->prec;
1684 }
943819bf 1685
0a6384c4
AD
1686 if (bp->assoc != bp->alias->assoc)
1687 {
a0f6b076
AD
1688 if (bp->assoc != 0 && bp->alias->assoc != 0
1689 && bp->user_token_number == SALIAS)
1690 complain (_("conflicting assoc values for %s and %s"),
1691 bp->tag, bp->alias->tag);
1692 if (bp->assoc != 0)
1693 bp->alias->assoc = bp->assoc;
1694 else
1695 bp->assoc = bp->alias->assoc;
1696 }
0a6384c4
AD
1697
1698 if (bp->user_token_number == SALIAS)
a70083a3 1699 continue; /* do not do processing below for SALIASs */
943819bf 1700
a70083a3 1701 }
d7020c20 1702 else /* bp->class == token_sym */
943819bf
RS
1703 {
1704 bp->value = tokno++;
1705 }
1706
d7020c20 1707 if (bp->class == token_sym)
1ff442ca
NF
1708 {
1709 if (translations && !(bp->user_token_number))
1710 bp->user_token_number = ++last_user_token_number;
1711 if (bp->user_token_number > max_user_token_number)
1712 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1713 }
1714
1715 tags[bp->value] = bp->tag;
943819bf 1716 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1717 sprec[bp->value] = bp->prec;
1718 sassoc[bp->value] = bp->assoc;
1719
1720 }
1721
1722 if (translations)
1723 {
a70083a3 1724 int j;
1ff442ca 1725
d7913476 1726 token_translations = XCALLOC (short, max_user_token_number + 1);
1ff442ca 1727
0a6384c4 1728 /* initialize all entries for literal tokens to 2, the internal
a70083a3
AD
1729 token number for $undefined., which represents all invalid
1730 inputs. */
4a120d45 1731 for (j = 0; j <= max_user_token_number; j++)
a70083a3 1732 token_translations[j] = 2;
1ff442ca 1733
943819bf 1734 for (bp = firstsymbol; bp; bp = bp->next)
a70083a3
AD
1735 {
1736 if (bp->value >= ntokens)
1737 continue; /* non-terminal */
1738 if (bp->user_token_number == SALIAS)
0a6384c4 1739 continue;
a70083a3 1740 if (token_translations[bp->user_token_number] != 2)
a0f6b076
AD
1741 complain (_("tokens %s and %s both assigned number %d"),
1742 tags[token_translations[bp->user_token_number]],
a70083a3
AD
1743 bp->tag, bp->user_token_number);
1744 token_translations[bp->user_token_number] = bp->value;
1745 }
1ff442ca
NF
1746 }
1747
1748 error_token_number = errtoken->value;
1749
89cab50d 1750 if (!no_parser_flag)
a70083a3 1751 output_token_defines (ftable);
1ff442ca 1752
d7020c20 1753 if (startval->class == unknown_sym)
a0f6b076 1754 fatal (_("the start symbol %s is undefined"), startval->tag);
d7020c20 1755 else if (startval->class == token_sym)
a0f6b076 1756 fatal (_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1757
1758 start_symbol = startval->value;
1759
89cab50d 1760 if (defines_flag)
1ff442ca 1761 {
a70083a3 1762 output_token_defines (fdefines);
1ff442ca
NF
1763
1764 if (!pure_parser)
1765 {
1766 if (spec_name_prefix)
a70083a3
AD
1767 fprintf (fdefines, "\nextern YYSTYPE %slval;\n",
1768 spec_name_prefix);
1ff442ca 1769 else
a70083a3 1770 fprintf (fdefines, "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1771 }
1772
1773 if (semantic_parser)
1774 for (i = ntokens; i < nsyms; i++)
1775 {
1776 /* don't make these for dummy nonterminals made by gensym. */
1777 if (*tags[i] != '@')
a70083a3 1778 fprintf (fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1ff442ca
NF
1779 }
1780#if 0
1781 /* `fdefines' is now a temporary file, so we need to copy its
1782 contents in `done', so we can't close it here. */
a70083a3 1783 fclose (fdefines);
1ff442ca
NF
1784 fdefines = NULL;
1785#endif
1786 }
1787}
a083fbbf 1788
1ff442ca 1789
a70083a3
AD
1790/*---------------------------------------------------------------.
1791| Convert the rules into the representation using RRHS, RLHS and |
1792| RITEMS. |
1793`---------------------------------------------------------------*/
1ff442ca 1794
4a120d45 1795static void
118fb205 1796packgram (void)
1ff442ca 1797{
a70083a3
AD
1798 int itemno;
1799 int ruleno;
1800 symbol_list *p;
1ff442ca
NF
1801
1802 bucket *ruleprec;
1803
d7913476
AD
1804 ritem = XCALLOC (short, nitems + 1);
1805 rlhs = XCALLOC (short, nrules) - 1;
1806 rrhs = XCALLOC (short, nrules) - 1;
1807 rprec = XCALLOC (short, nrules) - 1;
1808 rprecsym = XCALLOC (short, nrules) - 1;
1809 rassoc = XCALLOC (short, nrules) - 1;
1ff442ca
NF
1810
1811 itemno = 0;
1812 ruleno = 1;
1813
1814 p = grammar;
1815 while (p)
1816 {
1817 rlhs[ruleno] = p->sym->value;
1818 rrhs[ruleno] = itemno;
1819 ruleprec = p->ruleprec;
1820
1821 p = p->next;
1822 while (p && p->sym)
1823 {
1824 ritem[itemno++] = p->sym->value;
1825 /* A rule gets by default the precedence and associativity
1826 of the last token in it. */
d7020c20 1827 if (p->sym->class == token_sym)
1ff442ca
NF
1828 {
1829 rprec[ruleno] = p->sym->prec;
1830 rassoc[ruleno] = p->sym->assoc;
1831 }
a70083a3
AD
1832 if (p)
1833 p = p->next;
1ff442ca
NF
1834 }
1835
1836 /* If this rule has a %prec,
a70083a3 1837 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1838 if (ruleprec)
1839 {
a70083a3
AD
1840 rprec[ruleno] = ruleprec->prec;
1841 rassoc[ruleno] = ruleprec->assoc;
1ff442ca
NF
1842 rprecsym[ruleno] = ruleprec->value;
1843 }
1844
1845 ritem[itemno++] = -ruleno;
1846 ruleno++;
1847
a70083a3
AD
1848 if (p)
1849 p = p->next;
1ff442ca
NF
1850 }
1851
1852 ritem[itemno] = 0;
1853}
a70083a3
AD
1854\f
1855/*-------------------------------------------------------------------.
1856| Read in the grammar specification and record it in the format |
1857| described in gram.h. All guards are copied into the FGUARD file |
1858| and all actions into FACTION, in each case forming the body of a C |
1859| function (YYGUARD or YYACTION) which contains a switch statement |
1860| to decide which guard or action to execute. |
1861`-------------------------------------------------------------------*/
1862
1863void
1864reader (void)
1865{
1866 start_flag = 0;
1867 startval = NULL; /* start symbol not specified yet. */
1868
1869#if 0
1870 /* initially assume token number translation not needed. */
1871 translations = 0;
1872#endif
1873 /* Nowadays translations is always set to 1, since we give `error' a
1874 user-token-number to satisfy the Posix demand for YYERRCODE==256.
1875 */
1876 translations = 1;
1877
1878 nsyms = 1;
1879 nvars = 0;
1880 nrules = 0;
1881 nitems = 0;
1882 rline_allocated = 10;
d7913476 1883 rline = XCALLOC (short, rline_allocated);
a70083a3
AD
1884
1885 typed = 0;
1886 lastprec = 0;
1887
1888 gensym_count = 0;
1889
1890 semantic_parser = 0;
1891 pure_parser = 0;
a70083a3
AD
1892
1893 grammar = NULL;
1894
1895 init_lex ();
1896 lineno = 1;
1897
1898 /* Initialize the symbol table. */
1899 tabinit ();
1900 /* Construct the error token */
1901 errtoken = getsym ("error");
d7020c20 1902 errtoken->class = token_sym;
a70083a3
AD
1903 errtoken->user_token_number = 256; /* Value specified by POSIX. */
1904 /* Construct a token that represents all undefined literal tokens.
1905 It is always token number 2. */
1906 undeftoken = getsym ("$undefined.");
d7020c20 1907 undeftoken->class = token_sym;
a70083a3
AD
1908 undeftoken->user_token_number = 2;
1909
1910 /* Read the declaration section. Copy %{ ... %} groups to FTABLE
1911 and FDEFINES file. Also notice any %token, %left, etc. found
1912 there. */
1913 putc ('\n', ftable);
1914 fprintf (ftable, "\
1915/* %s, made from %s\n\
1916 by GNU bison %s. */\n\
89cab50d 1917\n", no_parser_flag ? "Bison-generated parse tables" : "A Bison parser", infile, VERSION);
a70083a3
AD
1918
1919 fputs ("#define YYBISON 1 /* Identify Bison output. */\n\n", ftable);
1920 read_declarations ();
1921 /* Start writing the guard and action files, if they are needed. */
1922 output_headers ();
1923 /* Read in the grammar, build grammar in list form. Write out
1924 guards and actions. */
1925 readgram ();
1926 /* Now we know whether we need the line-number stack. If we do,
1927 write its type into the .tab.h file. */
1928 if (fdefines)
1929 reader_output_yylsp (fdefines);
1930 /* Write closing delimiters for actions and guards. */
1931 output_trailers ();
89cab50d 1932 if (locations_flag)
a70083a3
AD
1933 fputs ("#define YYLSP_NEEDED\n\n", ftable);
1934 /* Assign the symbols their symbol numbers. Write #defines for the
1935 token symbols into FDEFINES if requested. */
1936 packsymbols ();
1937 /* Convert the grammar into the format described in gram.h. */
1938 packgram ();
1939 /* Free the symbol table data structure since symbols are now all
1940 referred to by symbol number. */
1941 free_symtab ();
1942}
1943
d7020c20 1944
a70083a3
AD
1945void
1946reader_output_yylsp (FILE *f)
1947{
89cab50d 1948 if (locations_flag)
d7020c20
AD
1949 fputs ("\
1950\n\
1951#ifndef YYLTYPE\n\
89cab50d
AD
1952typedef struct yyltype\n\
1953{\n\
1954 int timestamp;\n\
1955 int first_line;\n\
1956 int first_column;\
d7020c20 1957\n\
89cab50d
AD
1958 int last_line;\n\
1959 int last_column;\n\
1960 char *text;\n\
1961} yyltype;\n\
d7020c20 1962\n\
89cab50d 1963# define YYLTYPE yyltype\n\
d7020c20
AD
1964#endif\n\
1965\n",
1966 f);
a70083a3 1967}