]> git.saurik.com Git - bison.git/blame - src/reader.c
* lib/quote.h, lib/quote.c, lib/quotearg.h, lib/quotearg.c:
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
a70083a3
AD
2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000
3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
ceed8467 24#include "getargs.h"
1ff442ca 25#include "files.h"
d7913476 26#include "xalloc.h"
1ff442ca
NF
27#include "symtab.h"
28#include "lex.h"
29#include "gram.h"
a0f6b076 30#include "complain.h"
6c89f1c1 31#include "output.h"
b2ca4022 32#include "reader.h"
340ef489 33#include "conflicts.h"
ff4a34be 34#include "quote.h"
1ff442ca 35
1ff442ca 36/* Number of slots allocated (but not necessarily used yet) in `rline' */
4a120d45 37static int rline_allocated;
1ff442ca 38
a70083a3
AD
39typedef struct symbol_list
40{
41 struct symbol_list *next;
42 bucket *sym;
43 bucket *ruleprec;
44}
45symbol_list;
118fb205 46
1ff442ca 47int lineno;
1ff442ca 48char **tags;
d019d655 49short *user_toknums;
4a120d45
JT
50static symbol_list *grammar;
51static int start_flag;
52static bucket *startval;
1ff442ca
NF
53
54/* Nonzero if components of semantic values are used, implying
55 they must be unions. */
56static int value_components_used;
57
d7020c20
AD
58/* Nonzero if %union has been seen. */
59static int typed;
1ff442ca 60
d7020c20
AD
61/* Incremented for each %left, %right or %nonassoc seen */
62static int lastprec;
1ff442ca 63
d7020c20
AD
64/* Incremented for each generated symbol */
65static int gensym_count;
1ff442ca
NF
66
67static bucket *errtoken;
5b2e3c89 68static bucket *undeftoken;
0d533154 69\f
a70083a3 70
0d533154
AD
71/*===================\
72| Low level lexing. |
73\===================*/
943819bf
RS
74
75static void
118fb205 76skip_to_char (int target)
943819bf
RS
77{
78 int c;
79 if (target == '\n')
a0f6b076 80 complain (_(" Skipping to next \\n"));
943819bf 81 else
a0f6b076 82 complain (_(" Skipping to next %c"), target);
943819bf
RS
83
84 do
0d533154 85 c = skip_white_space ();
943819bf 86 while (c != target && c != EOF);
a083fbbf 87 if (c != EOF)
0d533154 88 ungetc (c, finput);
943819bf
RS
89}
90
91
0d533154
AD
92/*---------------------------------------------------------.
93| Read a signed integer from STREAM and return its value. |
94`---------------------------------------------------------*/
95
96static inline int
97read_signed_integer (FILE *stream)
98{
a70083a3
AD
99 int c = getc (stream);
100 int sign = 1;
101 int n = 0;
0d533154
AD
102
103 if (c == '-')
104 {
105 c = getc (stream);
106 sign = -1;
107 }
108
109 while (isdigit (c))
110 {
111 n = 10 * n + (c - '0');
112 c = getc (stream);
113 }
114
115 ungetc (c, stream);
116
117 return sign * n;
118}
119\f
120/*-------------------------------------------------------------------.
121| Dump the string from FINPUT to FOUTPUT. MATCH is the delimiter of |
122| the string (either ' or "). |
123`-------------------------------------------------------------------*/
ae3c3164
AD
124
125static inline void
4a120d45 126copy_string (FILE *fin, FILE *fout, int match)
ae3c3164
AD
127{
128 int c;
129
4a120d45
JT
130 putc (match, fout);
131 c = getc (fin);
ae3c3164
AD
132
133 while (c != match)
134 {
135 if (c == EOF)
136 fatal (_("unterminated string at end of file"));
137 if (c == '\n')
138 {
a0f6b076 139 complain (_("unterminated string"));
4a120d45 140 ungetc (c, fin);
ae3c3164
AD
141 c = match; /* invent terminator */
142 continue;
143 }
144
a70083a3 145 putc (c, fout);
ae3c3164
AD
146
147 if (c == '\\')
148 {
4a120d45 149 c = getc (fin);
ae3c3164
AD
150 if (c == EOF)
151 fatal (_("unterminated string at end of file"));
4a120d45 152 putc (c, fout);
ae3c3164
AD
153 if (c == '\n')
154 lineno++;
155 }
156
a70083a3 157 c = getc (fin);
ae3c3164
AD
158 }
159
a70083a3 160 putc (c, fout);
ae3c3164
AD
161}
162
163
6c89f1c1
AD
164/*---------------------------------------------------------------.
165| Dump the comment from IN to OUT1 and OUT2. C is either `*' or |
166| `/', depending upon the type of comments used. OUT2 might be |
167| NULL. |
168`---------------------------------------------------------------*/
ae3c3164
AD
169
170static inline void
a70083a3 171copy_comment2 (FILE *in, FILE *out1, FILE *out2, int c)
ae3c3164
AD
172{
173 int cplus_comment;
a70083a3 174 int ended;
ae3c3164
AD
175
176 cplus_comment = (c == '/');
27821bff
AD
177 putc (c, out1);
178 if (out2)
179 putc (c, out2);
180 c = getc (in);
ae3c3164
AD
181
182 ended = 0;
183 while (!ended)
184 {
185 if (!cplus_comment && c == '*')
186 {
187 while (c == '*')
188 {
27821bff
AD
189 putc (c, out1);
190 if (out2)
191 putc (c, out2);
192 c = getc (in);
ae3c3164
AD
193 }
194
195 if (c == '/')
196 {
a70083a3 197 putc (c, out1);
27821bff 198 if (out2)
a70083a3 199 putc (c, out2);
ae3c3164
AD
200 ended = 1;
201 }
202 }
203 else if (c == '\n')
204 {
205 lineno++;
27821bff
AD
206 putc (c, out1);
207 if (out2)
208 putc (c, out2);
ae3c3164
AD
209 if (cplus_comment)
210 ended = 1;
211 else
27821bff 212 c = getc (in);
ae3c3164
AD
213 }
214 else if (c == EOF)
215 fatal (_("unterminated comment"));
216 else
217 {
27821bff
AD
218 putc (c, out1);
219 if (out2)
220 putc (c, out2);
221 c = getc (in);
ae3c3164
AD
222 }
223 }
224}
225
226
d019d655
AD
227/*------------------------------------------------------------.
228| Dump the comment from FIN to FOUT. C is either `*' or `/', |
229| depending upon the type of comments used. |
230`------------------------------------------------------------*/
27821bff
AD
231
232static inline void
4a120d45 233copy_comment (FILE *fin, FILE *fout, int c)
27821bff 234{
4a120d45 235 copy_comment2 (fin, fout, NULL, c);
27821bff
AD
236}
237
238
a70083a3
AD
239/*-----------------------------------------------------------------.
240| FIN is pointing to a location (i.e., a `@'). Output to FOUT a |
241| reference to this location. STACK_OFFSET is the number of values |
242| in the current rule so far, which says where to find `$0' with |
243| respect to the top of the stack. |
244`-----------------------------------------------------------------*/
1ff442ca 245
a70083a3
AD
246static inline void
247copy_at (FILE *fin, FILE *fout, int stack_offset)
1ff442ca 248{
a70083a3 249 int c;
1ff442ca 250
a70083a3
AD
251 c = getc (fin);
252 if (c == '$')
1ff442ca 253 {
a70083a3 254 fprintf (fout, "yyloc");
89cab50d 255 locations_flag = 1;
a70083a3
AD
256 }
257 else if (isdigit (c) || c == '-')
258 {
259 int n;
1ff442ca 260
a70083a3
AD
261 ungetc (c, fin);
262 n = read_signed_integer (fin);
943819bf 263
a70083a3 264 fprintf (fout, "yylsp[%d]", n - stack_offset);
89cab50d 265 locations_flag = 1;
1ff442ca 266 }
a70083a3 267 else
ff4a34be
AD
268 {
269 char buf[] = "@c";
270 buf[1] = c;
271 complain (_("%s is invalid"), quote (buf));
272 }
1ff442ca 273}
a70083a3
AD
274\f
275/*-------------------------------------------------------------------.
276| Copy the contents of a `%{ ... %}' into the definitions file. The |
277| `%{' has already been read. Return after reading the `%}'. |
278`-------------------------------------------------------------------*/
1ff442ca 279
4a120d45 280static void
118fb205 281copy_definition (void)
1ff442ca 282{
a70083a3 283 int c;
ae3c3164 284 /* -1 while reading a character if prev char was %. */
a70083a3 285 int after_percent;
1ff442ca 286
89cab50d 287 if (!no_lines_flag)
a70083a3 288 fprintf (fattrs, "#line %d \"%s\"\n", lineno, infile);
1ff442ca
NF
289
290 after_percent = 0;
291
ae3c3164 292 c = getc (finput);
1ff442ca
NF
293
294 for (;;)
295 {
296 switch (c)
297 {
298 case '\n':
a70083a3 299 putc (c, fattrs);
1ff442ca
NF
300 lineno++;
301 break;
302
303 case '%':
a70083a3 304 after_percent = -1;
1ff442ca 305 break;
a083fbbf 306
1ff442ca
NF
307 case '\'':
308 case '"':
ae3c3164 309 copy_string (finput, fattrs, c);
1ff442ca
NF
310 break;
311
312 case '/':
ae3c3164
AD
313 putc (c, fattrs);
314 c = getc (finput);
1ff442ca
NF
315 if (c != '*' && c != '/')
316 continue;
ae3c3164 317 copy_comment (finput, fattrs, c);
1ff442ca
NF
318 break;
319
320 case EOF:
a70083a3 321 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
322
323 default:
a70083a3 324 putc (c, fattrs);
1ff442ca
NF
325 }
326
a70083a3 327 c = getc (finput);
1ff442ca
NF
328
329 if (after_percent)
330 {
331 if (c == '}')
332 return;
a70083a3 333 putc ('%', fattrs);
1ff442ca
NF
334 }
335 after_percent = 0;
336
337 }
338
339}
340
341
d7020c20
AD
342/*-------------------------------------------------------------------.
343| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
344| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
345| are reversed. |
346`-------------------------------------------------------------------*/
1ff442ca 347
4a120d45 348static void
d7020c20 349parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 350{
a70083a3
AD
351 int token = 0;
352 char *typename = 0;
353 struct bucket *symbol = NULL; /* pts to symbol being defined */
1ff442ca 354
1ff442ca
NF
355 for (;;)
356 {
e6011337
JT
357 int tmp_char = ungetc (skip_white_space (), finput);
358
359 if (tmp_char == '%')
1ff442ca 360 return;
e6011337 361 if (tmp_char == EOF)
a0f6b076 362 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 363
a70083a3 364 token = lex ();
1ff442ca 365 if (token == COMMA)
943819bf
RS
366 {
367 symbol = NULL;
368 continue;
369 }
1ff442ca
NF
370 if (token == TYPENAME)
371 {
95e36146 372 typename = xstrdup (token_buffer);
1ff442ca 373 value_components_used = 1;
943819bf
RS
374 symbol = NULL;
375 }
a70083a3 376 else if (token == IDENTIFIER && *symval->tag == '\"' && symbol)
943819bf 377 {
8e03724b
AD
378 if (symval->alias)
379 warn (_("symbol `%s' used more than once as a literal string"),
380 symval->tag);
381 else if (symbol->alias)
382 warn (_("symbol `%s' given more than one literal string"),
383 symbol->tag);
384 else
385 {
386 symval->class = token_sym;
387 symval->type_name = typename;
388 symval->user_token_number = symbol->user_token_number;
389 symbol->user_token_number = SALIAS;
390 symval->alias = symbol;
391 symbol->alias = symval;
392 /* symbol and symval combined are only one symbol */
393 nsyms--;
394 }
943819bf 395 translations = 1;
8e03724b 396 symbol = NULL;
1ff442ca
NF
397 }
398 else if (token == IDENTIFIER)
399 {
400 int oldclass = symval->class;
943819bf 401 symbol = symval;
1ff442ca 402
943819bf 403 if (symbol->class == what_is_not)
a0f6b076 404 complain (_("symbol %s redefined"), symbol->tag);
943819bf 405 symbol->class = what_is;
d7020c20 406 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 407 symbol->value = nvars++;
1ff442ca
NF
408
409 if (typename)
410 {
943819bf
RS
411 if (symbol->type_name == NULL)
412 symbol->type_name = typename;
a70083a3 413 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 414 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
415 }
416 }
943819bf 417 else if (symbol && token == NUMBER)
a70083a3 418 {
943819bf 419 symbol->user_token_number = numval;
1ff442ca 420 translations = 1;
a70083a3 421 }
1ff442ca 422 else
943819bf 423 {
a0f6b076 424 complain (_("`%s' is invalid in %s"),
d7020c20 425 token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 426 skip_to_char ('%');
943819bf 427 }
1ff442ca
NF
428 }
429
430}
431
1ff442ca 432
d7020c20
AD
433/*------------------------------.
434| Parse what comes after %start |
435`------------------------------*/
1ff442ca 436
4a120d45 437static void
118fb205 438parse_start_decl (void)
1ff442ca
NF
439{
440 if (start_flag)
27821bff
AD
441 complain (_("multiple %s declarations"), "%start");
442 if (lex () != IDENTIFIER)
443 complain (_("invalid %s declaration"), "%start");
943819bf
RS
444 else
445 {
446 start_flag = 1;
447 startval = symval;
448 }
1ff442ca
NF
449}
450
451
452
a70083a3
AD
453/*--------------------------------------------------------------.
454| Get the data type (alternative in the union) of the value for |
455| symbol n in rule rule. |
456`--------------------------------------------------------------*/
1ff442ca 457
a70083a3
AD
458static char *
459get_type_name (int n, symbol_list * rule)
1ff442ca 460{
a70083a3
AD
461 int i;
462 symbol_list *rp;
1ff442ca 463
a70083a3 464 if (n < 0)
943819bf 465 {
a70083a3
AD
466 complain (_("invalid $ value"));
467 return NULL;
943819bf 468 }
1ff442ca 469
a70083a3
AD
470 rp = rule;
471 i = 0;
1ff442ca 472
a70083a3 473 while (i < n)
1ff442ca 474 {
a70083a3
AD
475 rp = rp->next;
476 if (rp == NULL || rp->sym == NULL)
477 {
478 complain (_("invalid $ value"));
479 return NULL;
480 }
481 i++;
482 }
483
484 return rp->sym->type_name;
485}
486
487
488/*-----------------------------------------------------------.
489| read in a %type declaration and record its information for |
490| get_type_name to access |
491`-----------------------------------------------------------*/
492
493static void
494parse_type_decl (void)
495{
a70083a3
AD
496 char *name;
497
498 if (lex () != TYPENAME)
499 {
500 complain ("%s", _("%type declaration has no <typename>"));
501 skip_to_char ('%');
502 return;
503 }
504
95e36146 505 name = xstrdup (token_buffer);
a70083a3
AD
506
507 for (;;)
508 {
509 int t;
510 int tmp_char = ungetc (skip_white_space (), finput);
511
512 if (tmp_char == '%')
513 return;
514 if (tmp_char == EOF)
515 fatal (_("Premature EOF after %s"), token_buffer);
516
517 t = lex ();
518
519 switch (t)
1ff442ca
NF
520 {
521
522 case COMMA:
523 case SEMICOLON:
524 break;
525
526 case IDENTIFIER:
527 if (symval->type_name == NULL)
528 symval->type_name = name;
a70083a3 529 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 530 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
531
532 break;
533
534 default:
a0f6b076
AD
535 complain (_("invalid %%type declaration due to item: %s"),
536 token_buffer);
a70083a3 537 skip_to_char ('%');
1ff442ca
NF
538 }
539 }
540}
541
542
543
d7020c20
AD
544/*----------------------------------------------------------------.
545| Read in a %left, %right or %nonassoc declaration and record its |
546| information. |
547`----------------------------------------------------------------*/
1ff442ca 548
4a120d45 549static void
d7020c20 550parse_assoc_decl (associativity assoc)
1ff442ca 551{
a70083a3
AD
552 char *name = NULL;
553 int prev = 0;
1ff442ca 554
a70083a3 555 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 556
1ff442ca
NF
557 for (;;)
558 {
a70083a3 559 int t;
e6011337 560 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 561
e6011337 562 if (tmp_char == '%')
1ff442ca 563 return;
e6011337 564 if (tmp_char == EOF)
a0f6b076 565 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 566
a70083a3 567 t = lex ();
1ff442ca
NF
568
569 switch (t)
570 {
1ff442ca 571 case TYPENAME:
95e36146 572 name = xstrdup (token_buffer);
1ff442ca
NF
573 break;
574
575 case COMMA:
576 break;
577
578 case IDENTIFIER:
579 if (symval->prec != 0)
a0f6b076 580 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
581 symval->prec = lastprec;
582 symval->assoc = assoc;
d7020c20 583 if (symval->class == nterm_sym)
a0f6b076 584 complain (_("symbol %s redefined"), symval->tag);
d7020c20 585 symval->class = token_sym;
1ff442ca 586 if (name)
a70083a3 587 { /* record the type, if one is specified */
1ff442ca
NF
588 if (symval->type_name == NULL)
589 symval->type_name = name;
a70083a3 590 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 591 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
592 }
593 break;
594
595 case NUMBER:
596 if (prev == IDENTIFIER)
a70083a3 597 {
1ff442ca
NF
598 symval->user_token_number = numval;
599 translations = 1;
a70083a3
AD
600 }
601 else
602 {
603 complain (_
604 ("invalid text (%s) - number should be after identifier"),
605token_buffer);
606 skip_to_char ('%');
607 }
1ff442ca
NF
608 break;
609
610 case SEMICOLON:
611 return;
612
613 default:
a0f6b076 614 complain (_("unexpected item: %s"), token_buffer);
a70083a3 615 skip_to_char ('%');
1ff442ca
NF
616 }
617
618 prev = t;
619
620 }
621}
622
623
624
d7020c20
AD
625/*-------------------------------------------------------------------.
626| Copy the union declaration into fattrs (and fdefines), where it is |
627| made into the definition of YYSTYPE, the type of elements of the |
628| parser value stack. |
629`-------------------------------------------------------------------*/
1ff442ca 630
4a120d45 631static void
118fb205 632parse_union_decl (void)
1ff442ca 633{
a70083a3
AD
634 int c;
635 int count = 0;
1ff442ca
NF
636
637 if (typed)
27821bff 638 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
639
640 typed = 1;
641
89cab50d 642 if (!no_lines_flag)
27821bff 643 fprintf (fattrs, "\n#line %d \"%s\"\n", lineno, infile);
1ff442ca 644 else
27821bff 645 fprintf (fattrs, "\n");
1ff442ca 646
27821bff 647 fprintf (fattrs, "typedef union");
1ff442ca 648 if (fdefines)
27821bff 649 fprintf (fdefines, "typedef union");
1ff442ca 650
27821bff 651 c = getc (finput);
1ff442ca
NF
652
653 while (c != EOF)
654 {
27821bff 655 putc (c, fattrs);
1ff442ca 656 if (fdefines)
27821bff 657 putc (c, fdefines);
1ff442ca
NF
658
659 switch (c)
660 {
661 case '\n':
662 lineno++;
663 break;
664
665 case '/':
27821bff 666 c = getc (finput);
1ff442ca 667 if (c != '*' && c != '/')
27821bff
AD
668 continue;
669 copy_comment2 (finput, fattrs, fdefines, c);
1ff442ca
NF
670 break;
671
672
673 case '{':
674 count++;
675 break;
676
677 case '}':
678 if (count == 0)
27821bff 679 complain (_("unmatched %s"), "`}'");
1ff442ca 680 count--;
943819bf 681 if (count <= 0)
1ff442ca 682 {
27821bff 683 fprintf (fattrs, " YYSTYPE;\n");
1ff442ca 684 if (fdefines)
27821bff 685 fprintf (fdefines, " YYSTYPE;\n");
1ff442ca 686 /* JF don't choke on trailing semi */
27821bff
AD
687 c = skip_white_space ();
688 if (c != ';')
a70083a3 689 ungetc (c, finput);
1ff442ca
NF
690 return;
691 }
692 }
693
27821bff 694 c = getc (finput);
1ff442ca
NF
695 }
696}
697
d7020c20
AD
698
699/*-------------------------------------------------------.
700| Parse the declaration %expect N which says to expect N |
701| shift-reduce conflicts. |
702`-------------------------------------------------------*/
1ff442ca 703
4a120d45 704static void
118fb205 705parse_expect_decl (void)
1ff442ca 706{
a70083a3 707 int c;
ff4a34be 708 size_t count;
1ff442ca
NF
709 char buffer[20];
710
a70083a3 711 c = getc (finput);
1ff442ca 712 while (c == ' ' || c == '\t')
a70083a3 713 c = getc (finput);
1ff442ca
NF
714
715 count = 0;
716 while (c >= '0' && c <= '9')
717 {
a9e64249 718 if (count < sizeof(buffer) - 1)
1ff442ca 719 buffer[count++] = c;
a70083a3 720 c = getc (finput);
1ff442ca
NF
721 }
722 buffer[count] = 0;
723
724 ungetc (c, finput);
725
943819bf 726 if (count <= 0 || count > 10)
a0f6b076 727 complain ("%s", _("argument of %expect is not an integer"));
1ff442ca
NF
728 expected_conflicts = atoi (buffer);
729}
730
a70083a3
AD
731
732/*-------------------------------------------------------------------.
733| Parse what comes after %thong. the full syntax is |
734| |
735| %thong <type> token number literal |
736| |
737| the <type> or number may be omitted. The number specifies the |
738| user_token_number. |
739| |
740| Two symbols are entered in the table, one for the token symbol and |
741| one for the literal. Both are given the <type>, if any, from the |
742| declaration. The ->user_token_number of the first is SALIAS and |
743| the ->user_token_number of the second is set to the number, if |
744| any, from the declaration. The two symbols are linked via |
745| pointers in their ->alias fields. |
746| |
747| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
748| only the literal string is retained it is the literal string that |
749| is output to yytname |
750`-------------------------------------------------------------------*/
751
752static void
753parse_thong_decl (void)
7b306f52 754{
a70083a3
AD
755 int token;
756 struct bucket *symbol;
757 char *typename = 0;
95e36146 758 int usrtoknum;
7b306f52 759
a70083a3
AD
760 translations = 1;
761 token = lex (); /* fetch typename or first token */
762 if (token == TYPENAME)
7b306f52 763 {
95e36146 764 typename = xstrdup (token_buffer);
a70083a3
AD
765 value_components_used = 1;
766 token = lex (); /* fetch first token */
7b306f52 767 }
7b306f52 768
a70083a3 769 /* process first token */
7b306f52 770
a70083a3
AD
771 if (token != IDENTIFIER)
772 {
773 complain (_("unrecognized item %s, expected an identifier"),
774 token_buffer);
775 skip_to_char ('%');
776 return;
7b306f52 777 }
d7020c20 778 symval->class = token_sym;
a70083a3
AD
779 symval->type_name = typename;
780 symval->user_token_number = SALIAS;
781 symbol = symval;
7b306f52 782
a70083a3 783 token = lex (); /* get number or literal string */
1ff442ca 784
a70083a3 785 if (token == NUMBER)
943819bf 786 {
a70083a3
AD
787 usrtoknum = numval;
788 token = lex (); /* okay, did number, now get literal */
943819bf 789 }
a70083a3
AD
790 else
791 usrtoknum = 0;
1ff442ca 792
a70083a3 793 /* process literal string token */
1ff442ca 794
a70083a3 795 if (token != IDENTIFIER || *symval->tag != '\"')
1ff442ca 796 {
a70083a3
AD
797 complain (_("expected string constant instead of %s"), token_buffer);
798 skip_to_char ('%');
799 return;
1ff442ca 800 }
d7020c20 801 symval->class = token_sym;
a70083a3
AD
802 symval->type_name = typename;
803 symval->user_token_number = usrtoknum;
1ff442ca 804
a70083a3
AD
805 symval->alias = symbol;
806 symbol->alias = symval;
1ff442ca 807
a70083a3
AD
808 nsyms--; /* symbol and symval combined are only one symbol */
809}
3cef001a 810
d7020c20 811
a70083a3
AD
812/*----------------------------------------------------------------.
813| Read from finput until `%%' is seen. Discard the `%%'. Handle |
814| any `%' declarations, and copy the contents of any `%{ ... %}' |
815| groups to fattrs. |
816`----------------------------------------------------------------*/
1ff442ca 817
4a120d45 818static void
a70083a3 819read_declarations (void)
1ff442ca 820{
a70083a3
AD
821 int c;
822 int tok;
1ff442ca 823
a70083a3 824 for (;;)
1ff442ca 825 {
a70083a3 826 c = skip_white_space ();
1ff442ca 827
a70083a3
AD
828 if (c == '%')
829 {
830 tok = parse_percent_token ();
1ff442ca 831
a70083a3 832 switch (tok)
943819bf 833 {
a70083a3
AD
834 case TWO_PERCENTS:
835 return;
1ff442ca 836
a70083a3
AD
837 case PERCENT_LEFT_CURLY:
838 copy_definition ();
839 break;
1ff442ca 840
a70083a3 841 case TOKEN:
d7020c20 842 parse_token_decl (token_sym, nterm_sym);
a70083a3 843 break;
1ff442ca 844
a70083a3 845 case NTERM:
d7020c20 846 parse_token_decl (nterm_sym, token_sym);
a70083a3 847 break;
1ff442ca 848
a70083a3
AD
849 case TYPE:
850 parse_type_decl ();
851 break;
1ff442ca 852
a70083a3
AD
853 case START:
854 parse_start_decl ();
855 break;
118fb205 856
a70083a3
AD
857 case UNION:
858 parse_union_decl ();
859 break;
1ff442ca 860
a70083a3
AD
861 case EXPECT:
862 parse_expect_decl ();
863 break;
864 case THONG:
865 parse_thong_decl ();
866 break;
d7020c20 867
a70083a3 868 case LEFT:
d7020c20 869 parse_assoc_decl (left_assoc);
a70083a3 870 break;
1ff442ca 871
a70083a3 872 case RIGHT:
d7020c20 873 parse_assoc_decl (right_assoc);
a70083a3 874 break;
1ff442ca 875
a70083a3 876 case NONASSOC:
d7020c20 877 parse_assoc_decl (non_assoc);
a70083a3 878 break;
1ff442ca 879
a70083a3
AD
880 case SEMANTIC_PARSER:
881 if (semantic_parser == 0)
882 {
883 semantic_parser = 1;
884 open_extra_files ();
885 }
886 break;
1ff442ca 887
a70083a3
AD
888 case PURE_PARSER:
889 pure_parser = 1;
890 break;
1ff442ca 891
a70083a3
AD
892 case NOOP:
893 break;
1ff442ca 894
a70083a3
AD
895 default:
896 complain (_("unrecognized: %s"), token_buffer);
897 skip_to_char ('%');
898 }
899 }
900 else if (c == EOF)
901 fatal (_("no input grammar"));
902 else
903 {
ff4a34be
AD
904 char buf[] = "c";
905 buf[0] = c;
906 complain (_("unknown character: %s"), quote (buf));
a70083a3 907 skip_to_char ('%');
1ff442ca 908 }
1ff442ca 909 }
1ff442ca 910}
a70083a3
AD
911\f
912/*-------------------------------------------------------------------.
913| Assuming that a `{' has just been seen, copy everything up to the |
914| matching `}' into the actions file. STACK_OFFSET is the number of |
915| values in the current rule so far, which says where to find `$0' |
916| with respect to the top of the stack. |
917`-------------------------------------------------------------------*/
1ff442ca 918
4a120d45 919static void
a70083a3 920copy_action (symbol_list * rule, int stack_offset)
1ff442ca 921{
a70083a3
AD
922 int c;
923 int n;
924 int count;
925 char *type_name;
1ff442ca
NF
926
927 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
928 if (semantic_parser)
929 stack_offset = 0;
1ff442ca 930
41aca2e0 931 fprintf (faction, "\ncase %d:\n", nrules);
89cab50d 932 if (!no_lines_flag)
41aca2e0
AD
933 fprintf (faction, "#line %d \"%s\"\n", lineno, infile);
934 putc ('{', faction);
1ff442ca
NF
935
936 count = 1;
a70083a3 937 c = getc (finput);
1ff442ca
NF
938
939 while (count > 0)
940 {
941 while (c != '}')
a70083a3
AD
942 {
943 switch (c)
1ff442ca
NF
944 {
945 case '\n':
a70083a3 946 putc (c, faction);
1ff442ca
NF
947 lineno++;
948 break;
949
950 case '{':
a70083a3 951 putc (c, faction);
1ff442ca
NF
952 count++;
953 break;
954
955 case '\'':
956 case '"':
ca36d2ef 957 copy_string (finput, faction, c);
1ff442ca
NF
958 break;
959
960 case '/':
27821bff
AD
961 putc (c, faction);
962 c = getc (finput);
1ff442ca
NF
963 if (c != '*' && c != '/')
964 continue;
3cef001a 965 copy_comment (finput, faction, c);
1ff442ca
NF
966 break;
967
968 case '$':
a70083a3 969 c = getc (finput);
1ff442ca
NF
970 type_name = NULL;
971
972 if (c == '<')
973 {
a70083a3 974 char *cp = token_buffer;
1ff442ca 975
a70083a3 976 while ((c = getc (finput)) != '>' && c > 0)
118fb205
JT
977 {
978 if (cp == token_buffer + maxtoken)
a70083a3 979 cp = grow_token_buffer (cp);
118fb205
JT
980
981 *cp++ = c;
982 }
1ff442ca
NF
983 *cp = 0;
984 type_name = token_buffer;
985 value_components_used = 1;
986
a70083a3 987 c = getc (finput);
1ff442ca
NF
988 }
989 if (c == '$')
990 {
a70083a3 991 fprintf (faction, "yyval");
41aca2e0 992 if (!type_name)
a70083a3 993 type_name = get_type_name (0, rule);
1ff442ca 994 if (type_name)
a70083a3
AD
995 fprintf (faction, ".%s", type_name);
996 if (!type_name && typed)
a0f6b076
AD
997 complain (_("$$ of `%s' has no declared type"),
998 rule->sym->tag);
1ff442ca 999 }
a70083a3 1000 else if (isdigit (c) || c == '-')
1ff442ca
NF
1001 {
1002 ungetc (c, finput);
a70083a3
AD
1003 n = read_signed_integer (finput);
1004 c = getc (finput);
1ff442ca
NF
1005
1006 if (!type_name && n > 0)
a70083a3 1007 type_name = get_type_name (n, rule);
1ff442ca 1008
a70083a3 1009 fprintf (faction, "yyvsp[%d]", n - stack_offset);
1ff442ca 1010 if (type_name)
a70083a3
AD
1011 fprintf (faction, ".%s", type_name);
1012 if (!type_name && typed)
a0f6b076
AD
1013 complain (_("$%d of `%s' has no declared type"),
1014 n, rule->sym->tag);
1ff442ca
NF
1015 continue;
1016 }
1017 else
ff4a34be
AD
1018 {
1019 char buf[] = "$c";
1020 buf[1] = c;
1021 complain (_("%s is invalid"), quote (buf));
1022 }
1ff442ca
NF
1023
1024 break;
1025
1026 case '@':
7b306f52 1027 copy_at (finput, faction, stack_offset);
6666f98f 1028 break;
1ff442ca
NF
1029
1030 case EOF:
27821bff 1031 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1032
1033 default:
a70083a3
AD
1034 putc (c, faction);
1035 }
1036
1037 c = getc (finput);
1038 }
1039
1040 /* above loop exits when c is '}' */
1041
1042 if (--count)
1043 {
1044 putc (c, faction);
1045 c = getc (finput);
1046 }
1047 }
1048
1049 fprintf (faction, ";\n break;}");
1050}
1051\f
1052/*-------------------------------------------------------------------.
1053| After `%guard' is seen in the input file, copy the actual guard |
1054| into the guards file. If the guard is followed by an action, copy |
1055| that into the actions file. STACK_OFFSET is the number of values |
1056| in the current rule so far, which says where to find `$0' with |
1057| respect to the top of the stack, for the simple parser in which |
1058| the stack is not popped until after the guard is run. |
1059`-------------------------------------------------------------------*/
1060
1061static void
1062copy_guard (symbol_list * rule, int stack_offset)
1063{
1064 int c;
1065 int n;
1066 int count;
1067 char *type_name;
1068 int brace_flag = 0;
1069
1070 /* offset is always 0 if parser has already popped the stack pointer */
1071 if (semantic_parser)
1072 stack_offset = 0;
1073
1074 fprintf (fguard, "\ncase %d:\n", nrules);
89cab50d 1075 if (!no_lines_flag)
a70083a3
AD
1076 fprintf (fguard, "#line %d \"%s\"\n", lineno, infile);
1077 putc ('{', fguard);
1078
1079 count = 0;
1080 c = getc (finput);
1081
1082 while (brace_flag ? (count > 0) : (c != ';'))
1083 {
1084 switch (c)
1085 {
1086 case '\n':
1087 putc (c, fguard);
1088 lineno++;
1089 break;
1090
1091 case '{':
1092 putc (c, fguard);
1093 brace_flag = 1;
1094 count++;
1095 break;
1096
1097 case '}':
1098 putc (c, fguard);
1099 if (count > 0)
1100 count--;
1101 else
1102 {
1103 complain (_("unmatched %s"), "`}'");
1104 c = getc (finput); /* skip it */
1105 }
1106 break;
1107
1108 case '\'':
1109 case '"':
1110 copy_string (finput, fguard, c);
1111 break;
1112
1113 case '/':
1114 putc (c, fguard);
1115 c = getc (finput);
1116 if (c != '*' && c != '/')
1117 continue;
1118 copy_comment (finput, fguard, c);
1119 break;
1120
1121 case '$':
1122 c = getc (finput);
1123 type_name = NULL;
1124
1125 if (c == '<')
1126 {
1127 char *cp = token_buffer;
1128
1129 while ((c = getc (finput)) != '>' && c > 0)
1130 {
1131 if (cp == token_buffer + maxtoken)
1132 cp = grow_token_buffer (cp);
1133
1134 *cp++ = c;
1135 }
1136 *cp = 0;
1137 type_name = token_buffer;
1138
1139 c = getc (finput);
1140 }
1141
1142 if (c == '$')
1143 {
1144 fprintf (fguard, "yyval");
1145 if (!type_name)
1146 type_name = rule->sym->type_name;
1147 if (type_name)
1148 fprintf (fguard, ".%s", type_name);
1149 if (!type_name && typed)
1150 complain (_("$$ of `%s' has no declared type"),
1151 rule->sym->tag);
1152 }
1153 else if (isdigit (c) || c == '-')
1154 {
1155 ungetc (c, finput);
1156 n = read_signed_integer (finput);
1157 c = getc (finput);
1158
1159 if (!type_name && n > 0)
1160 type_name = get_type_name (n, rule);
1161
1162 fprintf (fguard, "yyvsp[%d]", n - stack_offset);
1163 if (type_name)
1164 fprintf (fguard, ".%s", type_name);
1165 if (!type_name && typed)
1166 complain (_("$%d of `%s' has no declared type"),
1167 n, rule->sym->tag);
1168 continue;
1ff442ca 1169 }
a70083a3 1170 else
ff4a34be
AD
1171 {
1172 char buf[] = "$c";
1173 buf[1] = c;
1174 complain (_("%s is invalid"), quote (buf));
1175 }
a70083a3 1176 break;
1ff442ca 1177
a70083a3
AD
1178 case '@':
1179 copy_at (finput, fguard, stack_offset);
1180 break;
1ff442ca 1181
a70083a3
AD
1182 case EOF:
1183 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1184
a70083a3
AD
1185 default:
1186 putc (c, fguard);
1ff442ca 1187 }
a70083a3
AD
1188
1189 if (c != '}' || count != 0)
1190 c = getc (finput);
1ff442ca
NF
1191 }
1192
a70083a3
AD
1193 c = skip_white_space ();
1194
1195 fprintf (fguard, ";\n break;}");
1196 if (c == '{')
1197 copy_action (rule, stack_offset);
1198 else if (c == '=')
1199 {
1200 c = getc (finput); /* why not skip_white_space -wjh */
1201 if (c == '{')
1202 copy_action (rule, stack_offset);
1203 }
1204 else
1205 ungetc (c, finput);
1ff442ca 1206}
a70083a3
AD
1207\f
1208
1209static void
1210record_rule_line (void)
1211{
1212 /* Record each rule's source line number in rline table. */
1ff442ca 1213
a70083a3
AD
1214 if (nrules >= rline_allocated)
1215 {
1216 rline_allocated = nrules * 2;
d7913476 1217 rline = XREALLOC (rline, short, rline_allocated);
a70083a3
AD
1218 }
1219 rline[nrules] = lineno;
1220}
1ff442ca
NF
1221
1222
a70083a3
AD
1223/*-------------------------------------------------------------------.
1224| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1225| with the user's names. |
1226`-------------------------------------------------------------------*/
1ff442ca 1227
4a120d45 1228static bucket *
118fb205 1229gensym (void)
1ff442ca 1230{
a70083a3 1231 bucket *sym;
1ff442ca
NF
1232
1233 sprintf (token_buffer, "@%d", ++gensym_count);
a70083a3 1234 sym = getsym (token_buffer);
d7020c20 1235 sym->class = nterm_sym;
1ff442ca 1236 sym->value = nvars++;
36281465 1237 return sym;
1ff442ca
NF
1238}
1239
a70083a3
AD
1240#if 0
1241/*------------------------------------------------------------------.
1242| read in a %type declaration and record its information for |
1243| get_type_name to access. This is unused. It is only called from |
1244| the #if 0 part of readgram |
1245`------------------------------------------------------------------*/
1246
1247static int
1248get_type (void)
1249{
1250 int k;
1251 int t;
1252 char *name;
1253
1254 t = lex ();
1255
1256 if (t != TYPENAME)
1257 {
1258 complain (_("invalid %s declaration"), "%type");
1259 return t;
1260 }
1261
95e36146 1262 name = xstrdup (token_buffer);
a70083a3
AD
1263
1264 for (;;)
1265 {
1266 t = lex ();
1267
1268 switch (t)
1269 {
1270 case SEMICOLON:
1271 return lex ();
1272
1273 case COMMA:
1274 break;
1275
1276 case IDENTIFIER:
1277 if (symval->type_name == NULL)
1278 symval->type_name = name;
1279 else if (strcmp (name, symval->type_name) != 0)
1280 complain (_("type redeclaration for %s"), symval->tag);
1281
1282 break;
1283
1284 default:
1285 return t;
1286 }
1287 }
1288}
1ff442ca 1289
a70083a3
AD
1290#endif
1291\f
1292/*------------------------------------------------------------------.
1293| Parse the input grammar into a one symbol_list structure. Each |
1294| rule is represented by a sequence of symbols: the left hand side |
1295| followed by the contents of the right hand side, followed by a |
1296| null pointer instead of a symbol to terminate the rule. The next |
1297| symbol is the lhs of the following rule. |
1298| |
1299| All guards and actions are copied out to the appropriate files, |
1300| labelled by the rule number they apply to. |
1301`------------------------------------------------------------------*/
1ff442ca 1302
4a120d45 1303static void
118fb205 1304readgram (void)
1ff442ca 1305{
a70083a3
AD
1306 int t;
1307 bucket *lhs = NULL;
1308 symbol_list *p;
1309 symbol_list *p1;
1310 bucket *bp;
1ff442ca 1311
ff4a34be
AD
1312 /* Points to first symbol_list of current rule. its symbol is the
1313 lhs of the rule. */
1314 symbol_list *crule;
1315 /* Points to the symbol_list preceding crule. */
1316 symbol_list *crule1;
1ff442ca
NF
1317
1318 p1 = NULL;
1319
a70083a3 1320 t = lex ();
1ff442ca
NF
1321
1322 while (t != TWO_PERCENTS && t != ENDFILE)
1323 {
1324 if (t == IDENTIFIER || t == BAR)
1325 {
89cab50d 1326 int action_flag = 0;
ff4a34be
AD
1327 /* Number of symbols in rhs of this rule so far */
1328 int rulelength = 0;
1ff442ca
NF
1329 int xactions = 0; /* JF for error checking */
1330 bucket *first_rhs = 0;
1331
1332 if (t == IDENTIFIER)
1333 {
1334 lhs = symval;
943819bf
RS
1335
1336 if (!start_flag)
1337 {
1338 startval = lhs;
1339 start_flag = 1;
1340 }
a083fbbf 1341
a70083a3 1342 t = lex ();
1ff442ca 1343 if (t != COLON)
943819bf 1344 {
a0f6b076 1345 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1346 unlex (t);
943819bf 1347 }
1ff442ca
NF
1348 }
1349
943819bf 1350 if (nrules == 0 && t == BAR)
1ff442ca 1351 {
a0f6b076 1352 complain (_("grammar starts with vertical bar"));
943819bf 1353 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1354 }
1ff442ca
NF
1355 /* start a new rule and record its lhs. */
1356
1357 nrules++;
1358 nitems++;
1359
1360 record_rule_line ();
1361
d7913476 1362 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1363 p->sym = lhs;
1364
1365 crule1 = p1;
1366 if (p1)
1367 p1->next = p;
1368 else
1369 grammar = p;
1370
1371 p1 = p;
1372 crule = p;
1373
1374 /* mark the rule's lhs as a nonterminal if not already so. */
1375
d7020c20 1376 if (lhs->class == unknown_sym)
1ff442ca 1377 {
d7020c20 1378 lhs->class = nterm_sym;
1ff442ca
NF
1379 lhs->value = nvars;
1380 nvars++;
1381 }
d7020c20 1382 else if (lhs->class == token_sym)
a0f6b076 1383 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1384
1385 /* read the rhs of the rule. */
1386
1387 for (;;)
1388 {
a70083a3 1389 t = lex ();
943819bf
RS
1390 if (t == PREC)
1391 {
a70083a3 1392 t = lex ();
943819bf 1393 crule->ruleprec = symval;
a70083a3 1394 t = lex ();
943819bf 1395 }
1ff442ca 1396
a70083a3
AD
1397 if (!(t == IDENTIFIER || t == LEFT_CURLY))
1398 break;
1ff442ca
NF
1399
1400 /* If next token is an identifier, see if a colon follows it.
a70083a3 1401 If one does, exit this rule now. */
1ff442ca
NF
1402 if (t == IDENTIFIER)
1403 {
a70083a3
AD
1404 bucket *ssave;
1405 int t1;
1ff442ca
NF
1406
1407 ssave = symval;
a70083a3
AD
1408 t1 = lex ();
1409 unlex (t1);
1ff442ca 1410 symval = ssave;
a70083a3
AD
1411 if (t1 == COLON)
1412 break;
1ff442ca 1413
a70083a3 1414 if (!first_rhs) /* JF */
1ff442ca
NF
1415 first_rhs = symval;
1416 /* Not followed by colon =>
1417 process as part of this rule's rhs. */
1418 }
1419
1420 /* If we just passed an action, that action was in the middle
a70083a3
AD
1421 of a rule, so make a dummy rule to reduce it to a
1422 non-terminal. */
89cab50d 1423 if (action_flag)
1ff442ca 1424 {
a70083a3 1425 bucket *sdummy;
1ff442ca
NF
1426
1427 /* Since the action was written out with this rule's */
943819bf 1428 /* number, we must give the new rule this number */
1ff442ca
NF
1429 /* by inserting the new rule before it. */
1430
1431 /* Make a dummy nonterminal, a gensym. */
a70083a3 1432 sdummy = gensym ();
1ff442ca
NF
1433
1434 /* Make a new rule, whose body is empty,
1435 before the current one, so that the action
1436 just read can belong to it. */
1437 nrules++;
1438 nitems++;
1439 record_rule_line ();
d7913476 1440 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1441 if (crule1)
1442 crule1->next = p;
a70083a3
AD
1443 else
1444 grammar = p;
1ff442ca 1445 p->sym = sdummy;
d7913476 1446 crule1 = XCALLOC (symbol_list, 1);
1ff442ca
NF
1447 p->next = crule1;
1448 crule1->next = crule;
1449
1450 /* insert the dummy generated by that rule into this rule. */
1451 nitems++;
d7913476 1452 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1453 p->sym = sdummy;
1454 p1->next = p;
1455 p1 = p;
1456
89cab50d 1457 action_flag = 0;
1ff442ca
NF
1458 }
1459
1460 if (t == IDENTIFIER)
1461 {
1462 nitems++;
d7913476 1463 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1464 p->sym = symval;
1465 p1->next = p;
1466 p1 = p;
1467 }
a70083a3 1468 else /* handle an action. */
1ff442ca 1469 {
a70083a3 1470 copy_action (crule, rulelength);
89cab50d 1471 action_flag = 1;
1ff442ca
NF
1472 xactions++; /* JF */
1473 }
1474 rulelength++;
a70083a3 1475 } /* end of read rhs of rule */
1ff442ca
NF
1476
1477 /* Put an empty link in the list to mark the end of this rule */
d7913476 1478 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1479 p1->next = p;
1480 p1 = p;
1481
1482 if (t == PREC)
1483 {
a0f6b076 1484 complain (_("two @prec's in a row"));
a70083a3 1485 t = lex ();
1ff442ca 1486 crule->ruleprec = symval;
a70083a3 1487 t = lex ();
1ff442ca
NF
1488 }
1489 if (t == GUARD)
1490 {
a70083a3 1491 if (!semantic_parser)
ff4a34be 1492 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1493
a70083a3
AD
1494 copy_guard (crule, rulelength);
1495 t = lex ();
1ff442ca
NF
1496 }
1497 else if (t == LEFT_CURLY)
1498 {
a70083a3 1499 /* This case never occurs -wjh */
89cab50d 1500 if (action_flag)
a0f6b076 1501 complain (_("two actions at end of one rule"));
a70083a3 1502 copy_action (crule, rulelength);
89cab50d 1503 action_flag = 1;
943819bf 1504 xactions++; /* -wjh */
a70083a3 1505 t = lex ();
1ff442ca 1506 }
a0f6b076 1507 /* If $$ is being set in default way, report if any type
6666f98f
AD
1508 mismatch. */
1509 else if (!xactions
a70083a3 1510 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1511 {
6666f98f
AD
1512 if (lhs->type_name == 0
1513 || first_rhs->type_name == 0
a70083a3 1514 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1515 complain (_("type clash (`%s' `%s') on default action"),
1516 lhs->type_name ? lhs->type_name : "",
a70083a3 1517 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1518 }
1519 /* Warn if there is no default for $$ but we need one. */
1520 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1521 complain (_("empty rule for typed nonterminal, and no action"));
1ff442ca 1522 if (t == SEMICOLON)
a70083a3 1523 t = lex ();
a083fbbf 1524 }
943819bf 1525#if 0
a70083a3 1526 /* these things can appear as alternatives to rules. */
943819bf
RS
1527/* NO, they cannot.
1528 a) none of the documentation allows them
1529 b) most of them scan forward until finding a next %
1530 thus they may swallow lots of intervening rules
1531*/
1ff442ca
NF
1532 else if (t == TOKEN)
1533 {
d7020c20 1534 parse_token_decl (token_sym, nterm_sym);
a70083a3 1535 t = lex ();
1ff442ca
NF
1536 }
1537 else if (t == NTERM)
1538 {
d7020c20 1539 parse_token_decl (nterm_sym, token_sym);
a70083a3 1540 t = lex ();
1ff442ca
NF
1541 }
1542 else if (t == TYPE)
1543 {
a70083a3 1544 t = get_type ();
1ff442ca
NF
1545 }
1546 else if (t == UNION)
1547 {
a70083a3
AD
1548 parse_union_decl ();
1549 t = lex ();
1ff442ca
NF
1550 }
1551 else if (t == EXPECT)
1552 {
a70083a3
AD
1553 parse_expect_decl ();
1554 t = lex ();
1ff442ca
NF
1555 }
1556 else if (t == START)
1557 {
a70083a3
AD
1558 parse_start_decl ();
1559 t = lex ();
1ff442ca 1560 }
943819bf
RS
1561#endif
1562
1ff442ca 1563 else
943819bf 1564 {
a0f6b076 1565 complain (_("invalid input: %s"), token_buffer);
a70083a3 1566 t = lex ();
943819bf 1567 }
1ff442ca
NF
1568 }
1569
943819bf
RS
1570 /* grammar has been read. Do some checking */
1571
1ff442ca 1572 if (nsyms > MAXSHORT)
a0f6b076
AD
1573 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1574 MAXSHORT);
1ff442ca 1575 if (nrules == 0)
a0f6b076 1576 fatal (_("no rules in the input grammar"));
1ff442ca 1577
ff4a34be
AD
1578 /* JF put out same default YYSTYPE as YACC does */
1579 if (typed == 0
1ff442ca
NF
1580 && !value_components_used)
1581 {
1582 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
a70083a3
AD
1583 but it seems better to be consistent.
1584 Most programs should declare their own type anyway. */
1585 fprintf (fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1ff442ca 1586 if (fdefines)
a70083a3 1587 fprintf (fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1ff442ca
NF
1588 }
1589
1590 /* Report any undefined symbols and consider them nonterminals. */
1591
1592 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1593 if (bp->class == unknown_sym)
1ff442ca 1594 {
a70083a3
AD
1595 complain (_
1596 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1597 bp->tag);
d7020c20 1598 bp->class = nterm_sym;
1ff442ca
NF
1599 bp->value = nvars++;
1600 }
1601
1602 ntokens = nsyms - nvars;
1603}
a70083a3
AD
1604\f
1605/*--------------------------------------------------------------.
1606| For named tokens, but not literal ones, define the name. The |
1607| value is the user token number. |
1608`--------------------------------------------------------------*/
1ff442ca 1609
4a120d45 1610static void
a70083a3 1611output_token_defines (FILE *file)
1ff442ca 1612{
a70083a3
AD
1613 bucket *bp;
1614 char *cp, *symbol;
1615 char c;
1ff442ca 1616
a70083a3 1617 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1618 {
a70083a3
AD
1619 symbol = bp->tag; /* get symbol */
1620
1621 if (bp->value >= ntokens)
1622 continue;
1623 if (bp->user_token_number == SALIAS)
1624 continue;
1625 if ('\'' == *symbol)
1626 continue; /* skip literal character */
1627 if (bp == errtoken)
1628 continue; /* skip error token */
1629 if ('\"' == *symbol)
1ff442ca 1630 {
a70083a3
AD
1631 /* use literal string only if given a symbol with an alias */
1632 if (bp->alias)
1633 symbol = bp->alias->tag;
1634 else
1635 continue;
1636 }
1ff442ca 1637
a70083a3
AD
1638 /* Don't #define nonliteral tokens whose names contain periods. */
1639 cp = symbol;
1640 while ((c = *cp++) && c != '.');
1641 if (c != '\0')
1642 continue;
1ff442ca 1643
a70083a3 1644 fprintf (file, "#define\t%s\t%d\n", symbol,
89cab50d 1645 ((translations && !raw_flag)
a70083a3
AD
1646 ? bp->user_token_number : bp->value));
1647 if (semantic_parser)
1648 fprintf (file, "#define\tT%s\t%d\n", symbol, bp->value);
1ff442ca 1649 }
a70083a3
AD
1650
1651 putc ('\n', file);
1ff442ca 1652}
1ff442ca
NF
1653
1654
a70083a3
AD
1655/*------------------------------------------------------------------.
1656| Assign symbol numbers, and write definition of token names into |
b2ca4022 1657| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1658| of symbols. |
1659`------------------------------------------------------------------*/
1ff442ca 1660
4a120d45 1661static void
118fb205 1662packsymbols (void)
1ff442ca 1663{
a70083a3
AD
1664 bucket *bp;
1665 int tokno = 1;
1666 int i;
1667 int last_user_token_number;
4a120d45 1668 static char DOLLAR[] = "$";
1ff442ca
NF
1669
1670 /* int lossage = 0; JF set but not used */
1671
d7913476 1672 tags = XCALLOC (char *, nsyms + 1);
4a120d45 1673 tags[0] = DOLLAR;
d7913476 1674 user_toknums = XCALLOC (short, nsyms + 1);
943819bf 1675 user_toknums[0] = 0;
1ff442ca 1676
d7913476
AD
1677 sprec = XCALLOC (short, nsyms);
1678 sassoc = XCALLOC (short, nsyms);
1ff442ca
NF
1679
1680 max_user_token_number = 256;
1681 last_user_token_number = 256;
1682
1683 for (bp = firstsymbol; bp; bp = bp->next)
1684 {
d7020c20 1685 if (bp->class == nterm_sym)
1ff442ca
NF
1686 {
1687 bp->value += ntokens;
1688 }
943819bf
RS
1689 else if (bp->alias)
1690 {
0a6384c4
AD
1691 /* this symbol and its alias are a single token defn.
1692 allocate a tokno, and assign to both check agreement of
1693 ->prec and ->assoc fields and make both the same */
1694 if (bp->value == 0)
1695 bp->value = bp->alias->value = tokno++;
943819bf 1696
0a6384c4
AD
1697 if (bp->prec != bp->alias->prec)
1698 {
1699 if (bp->prec != 0 && bp->alias->prec != 0
1700 && bp->user_token_number == SALIAS)
a0f6b076
AD
1701 complain (_("conflicting precedences for %s and %s"),
1702 bp->tag, bp->alias->tag);
0a6384c4
AD
1703 if (bp->prec != 0)
1704 bp->alias->prec = bp->prec;
1705 else
1706 bp->prec = bp->alias->prec;
1707 }
943819bf 1708
0a6384c4
AD
1709 if (bp->assoc != bp->alias->assoc)
1710 {
a0f6b076
AD
1711 if (bp->assoc != 0 && bp->alias->assoc != 0
1712 && bp->user_token_number == SALIAS)
1713 complain (_("conflicting assoc values for %s and %s"),
1714 bp->tag, bp->alias->tag);
1715 if (bp->assoc != 0)
1716 bp->alias->assoc = bp->assoc;
1717 else
1718 bp->assoc = bp->alias->assoc;
1719 }
0a6384c4
AD
1720
1721 if (bp->user_token_number == SALIAS)
a70083a3 1722 continue; /* do not do processing below for SALIASs */
943819bf 1723
a70083a3 1724 }
d7020c20 1725 else /* bp->class == token_sym */
943819bf
RS
1726 {
1727 bp->value = tokno++;
1728 }
1729
d7020c20 1730 if (bp->class == token_sym)
1ff442ca
NF
1731 {
1732 if (translations && !(bp->user_token_number))
1733 bp->user_token_number = ++last_user_token_number;
1734 if (bp->user_token_number > max_user_token_number)
1735 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1736 }
1737
1738 tags[bp->value] = bp->tag;
943819bf 1739 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1740 sprec[bp->value] = bp->prec;
1741 sassoc[bp->value] = bp->assoc;
1742
1743 }
1744
1745 if (translations)
1746 {
a70083a3 1747 int j;
1ff442ca 1748
d7913476 1749 token_translations = XCALLOC (short, max_user_token_number + 1);
1ff442ca 1750
0a6384c4 1751 /* initialize all entries for literal tokens to 2, the internal
a70083a3
AD
1752 token number for $undefined., which represents all invalid
1753 inputs. */
4a120d45 1754 for (j = 0; j <= max_user_token_number; j++)
a70083a3 1755 token_translations[j] = 2;
1ff442ca 1756
943819bf 1757 for (bp = firstsymbol; bp; bp = bp->next)
a70083a3
AD
1758 {
1759 if (bp->value >= ntokens)
1760 continue; /* non-terminal */
1761 if (bp->user_token_number == SALIAS)
0a6384c4 1762 continue;
a70083a3 1763 if (token_translations[bp->user_token_number] != 2)
a0f6b076
AD
1764 complain (_("tokens %s and %s both assigned number %d"),
1765 tags[token_translations[bp->user_token_number]],
a70083a3
AD
1766 bp->tag, bp->user_token_number);
1767 token_translations[bp->user_token_number] = bp->value;
1768 }
1ff442ca
NF
1769 }
1770
1771 error_token_number = errtoken->value;
1772
89cab50d 1773 if (!no_parser_flag)
a70083a3 1774 output_token_defines (ftable);
1ff442ca 1775
d7020c20 1776 if (startval->class == unknown_sym)
a0f6b076 1777 fatal (_("the start symbol %s is undefined"), startval->tag);
d7020c20 1778 else if (startval->class == token_sym)
a0f6b076 1779 fatal (_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1780
1781 start_symbol = startval->value;
1782
89cab50d 1783 if (defines_flag)
1ff442ca 1784 {
a70083a3 1785 output_token_defines (fdefines);
1ff442ca
NF
1786
1787 if (!pure_parser)
1788 {
1789 if (spec_name_prefix)
a70083a3
AD
1790 fprintf (fdefines, "\nextern YYSTYPE %slval;\n",
1791 spec_name_prefix);
1ff442ca 1792 else
a70083a3 1793 fprintf (fdefines, "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1794 }
1795
1796 if (semantic_parser)
1797 for (i = ntokens; i < nsyms; i++)
1798 {
1799 /* don't make these for dummy nonterminals made by gensym. */
1800 if (*tags[i] != '@')
a70083a3 1801 fprintf (fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1ff442ca
NF
1802 }
1803#if 0
1804 /* `fdefines' is now a temporary file, so we need to copy its
1805 contents in `done', so we can't close it here. */
a70083a3 1806 fclose (fdefines);
1ff442ca
NF
1807 fdefines = NULL;
1808#endif
1809 }
1810}
a083fbbf 1811
1ff442ca 1812
a70083a3
AD
1813/*---------------------------------------------------------------.
1814| Convert the rules into the representation using RRHS, RLHS and |
1815| RITEMS. |
1816`---------------------------------------------------------------*/
1ff442ca 1817
4a120d45 1818static void
118fb205 1819packgram (void)
1ff442ca 1820{
a70083a3
AD
1821 int itemno;
1822 int ruleno;
1823 symbol_list *p;
1ff442ca
NF
1824
1825 bucket *ruleprec;
1826
d7913476
AD
1827 ritem = XCALLOC (short, nitems + 1);
1828 rlhs = XCALLOC (short, nrules) - 1;
1829 rrhs = XCALLOC (short, nrules) - 1;
1830 rprec = XCALLOC (short, nrules) - 1;
1831 rprecsym = XCALLOC (short, nrules) - 1;
1832 rassoc = XCALLOC (short, nrules) - 1;
1ff442ca
NF
1833
1834 itemno = 0;
1835 ruleno = 1;
1836
1837 p = grammar;
1838 while (p)
1839 {
1840 rlhs[ruleno] = p->sym->value;
1841 rrhs[ruleno] = itemno;
1842 ruleprec = p->ruleprec;
1843
1844 p = p->next;
1845 while (p && p->sym)
1846 {
1847 ritem[itemno++] = p->sym->value;
1848 /* A rule gets by default the precedence and associativity
1849 of the last token in it. */
d7020c20 1850 if (p->sym->class == token_sym)
1ff442ca
NF
1851 {
1852 rprec[ruleno] = p->sym->prec;
1853 rassoc[ruleno] = p->sym->assoc;
1854 }
a70083a3
AD
1855 if (p)
1856 p = p->next;
1ff442ca
NF
1857 }
1858
1859 /* If this rule has a %prec,
a70083a3 1860 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1861 if (ruleprec)
1862 {
a70083a3
AD
1863 rprec[ruleno] = ruleprec->prec;
1864 rassoc[ruleno] = ruleprec->assoc;
1ff442ca
NF
1865 rprecsym[ruleno] = ruleprec->value;
1866 }
1867
1868 ritem[itemno++] = -ruleno;
1869 ruleno++;
1870
a70083a3
AD
1871 if (p)
1872 p = p->next;
1ff442ca
NF
1873 }
1874
1875 ritem[itemno] = 0;
1876}
a70083a3
AD
1877\f
1878/*-------------------------------------------------------------------.
1879| Read in the grammar specification and record it in the format |
1880| described in gram.h. All guards are copied into the FGUARD file |
1881| and all actions into FACTION, in each case forming the body of a C |
1882| function (YYGUARD or YYACTION) which contains a switch statement |
1883| to decide which guard or action to execute. |
1884`-------------------------------------------------------------------*/
1885
1886void
1887reader (void)
1888{
1889 start_flag = 0;
1890 startval = NULL; /* start symbol not specified yet. */
1891
1892#if 0
1893 /* initially assume token number translation not needed. */
1894 translations = 0;
1895#endif
1896 /* Nowadays translations is always set to 1, since we give `error' a
1897 user-token-number to satisfy the Posix demand for YYERRCODE==256.
1898 */
1899 translations = 1;
1900
1901 nsyms = 1;
1902 nvars = 0;
1903 nrules = 0;
1904 nitems = 0;
1905 rline_allocated = 10;
d7913476 1906 rline = XCALLOC (short, rline_allocated);
a70083a3
AD
1907
1908 typed = 0;
1909 lastprec = 0;
1910
1911 gensym_count = 0;
1912
1913 semantic_parser = 0;
1914 pure_parser = 0;
a70083a3
AD
1915
1916 grammar = NULL;
1917
1918 init_lex ();
1919 lineno = 1;
1920
1921 /* Initialize the symbol table. */
1922 tabinit ();
1923 /* Construct the error token */
1924 errtoken = getsym ("error");
d7020c20 1925 errtoken->class = token_sym;
a70083a3
AD
1926 errtoken->user_token_number = 256; /* Value specified by POSIX. */
1927 /* Construct a token that represents all undefined literal tokens.
1928 It is always token number 2. */
1929 undeftoken = getsym ("$undefined.");
d7020c20 1930 undeftoken->class = token_sym;
a70083a3
AD
1931 undeftoken->user_token_number = 2;
1932
1933 /* Read the declaration section. Copy %{ ... %} groups to FTABLE
1934 and FDEFINES file. Also notice any %token, %left, etc. found
1935 there. */
1936 putc ('\n', ftable);
1937 fprintf (ftable, "\
1938/* %s, made from %s\n\
1939 by GNU bison %s. */\n\
89cab50d 1940\n", no_parser_flag ? "Bison-generated parse tables" : "A Bison parser", infile, VERSION);
a70083a3
AD
1941
1942 fputs ("#define YYBISON 1 /* Identify Bison output. */\n\n", ftable);
1943 read_declarations ();
1944 /* Start writing the guard and action files, if they are needed. */
1945 output_headers ();
1946 /* Read in the grammar, build grammar in list form. Write out
1947 guards and actions. */
1948 readgram ();
1949 /* Now we know whether we need the line-number stack. If we do,
1950 write its type into the .tab.h file. */
1951 if (fdefines)
1952 reader_output_yylsp (fdefines);
1953 /* Write closing delimiters for actions and guards. */
1954 output_trailers ();
89cab50d 1955 if (locations_flag)
a70083a3
AD
1956 fputs ("#define YYLSP_NEEDED\n\n", ftable);
1957 /* Assign the symbols their symbol numbers. Write #defines for the
1958 token symbols into FDEFINES if requested. */
1959 packsymbols ();
1960 /* Convert the grammar into the format described in gram.h. */
1961 packgram ();
1962 /* Free the symbol table data structure since symbols are now all
1963 referred to by symbol number. */
1964 free_symtab ();
1965}
1966
d7020c20 1967
a70083a3
AD
1968void
1969reader_output_yylsp (FILE *f)
1970{
89cab50d 1971 if (locations_flag)
d7020c20
AD
1972 fputs ("\
1973\n\
1974#ifndef YYLTYPE\n\
89cab50d
AD
1975typedef struct yyltype\n\
1976{\n\
1977 int timestamp;\n\
1978 int first_line;\n\
1979 int first_column;\
d7020c20 1980\n\
89cab50d
AD
1981 int last_line;\n\
1982 int last_column;\n\
1983 char *text;\n\
1984} yyltype;\n\
d7020c20 1985\n\
89cab50d 1986# define YYLTYPE yyltype\n\
d7020c20
AD
1987#endif\n\
1988\n",
1989 f);
a70083a3 1990}