]> git.saurik.com Git - bison.git/blame - src/reader.c
* lib/quote.h (PARAMS): New macro. Use it.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
a70083a3
AD
2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000
3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
ceed8467 24#include "getargs.h"
1ff442ca 25#include "files.h"
d7913476 26#include "xalloc.h"
1ff442ca
NF
27#include "symtab.h"
28#include "lex.h"
29#include "gram.h"
a0f6b076 30#include "complain.h"
6c89f1c1 31#include "output.h"
b2ca4022 32#include "reader.h"
340ef489 33#include "conflicts.h"
ff4a34be 34#include "quote.h"
1ff442ca 35
1ff442ca 36/* Number of slots allocated (but not necessarily used yet) in `rline' */
4a120d45 37static int rline_allocated;
1ff442ca 38
a70083a3
AD
39typedef struct symbol_list
40{
41 struct symbol_list *next;
42 bucket *sym;
43 bucket *ruleprec;
44}
45symbol_list;
118fb205 46
1ff442ca 47int lineno;
1ff442ca 48char **tags;
d019d655 49short *user_toknums;
4a120d45
JT
50static symbol_list *grammar;
51static int start_flag;
52static bucket *startval;
1ff442ca
NF
53
54/* Nonzero if components of semantic values are used, implying
55 they must be unions. */
56static int value_components_used;
57
d7020c20
AD
58/* Nonzero if %union has been seen. */
59static int typed;
1ff442ca 60
d7020c20
AD
61/* Incremented for each %left, %right or %nonassoc seen */
62static int lastprec;
1ff442ca 63
d7020c20
AD
64/* Incremented for each generated symbol */
65static int gensym_count;
1ff442ca
NF
66
67static bucket *errtoken;
5b2e3c89 68static bucket *undeftoken;
0d533154 69\f
a70083a3 70
0d533154
AD
71/*===================\
72| Low level lexing. |
73\===================*/
943819bf
RS
74
75static void
118fb205 76skip_to_char (int target)
943819bf
RS
77{
78 int c;
79 if (target == '\n')
a0f6b076 80 complain (_(" Skipping to next \\n"));
943819bf 81 else
a0f6b076 82 complain (_(" Skipping to next %c"), target);
943819bf
RS
83
84 do
0d533154 85 c = skip_white_space ();
943819bf 86 while (c != target && c != EOF);
a083fbbf 87 if (c != EOF)
0d533154 88 ungetc (c, finput);
943819bf
RS
89}
90
91
0d533154
AD
92/*---------------------------------------------------------.
93| Read a signed integer from STREAM and return its value. |
94`---------------------------------------------------------*/
95
96static inline int
97read_signed_integer (FILE *stream)
98{
a70083a3
AD
99 int c = getc (stream);
100 int sign = 1;
101 int n = 0;
0d533154
AD
102
103 if (c == '-')
104 {
105 c = getc (stream);
106 sign = -1;
107 }
108
109 while (isdigit (c))
110 {
111 n = 10 * n + (c - '0');
112 c = getc (stream);
113 }
114
115 ungetc (c, stream);
116
117 return sign * n;
118}
119\f
79282c5a
AD
120/*--------------------------------------------------------------.
121| Get the data type (alternative in the union) of the value for |
122| symbol N in rule RULE. |
123`--------------------------------------------------------------*/
124
125static char *
126get_type_name (int n, symbol_list * rule)
127{
128 int i;
129 symbol_list *rp;
130
131 if (n < 0)
132 {
133 complain (_("invalid $ value"));
134 return NULL;
135 }
136
137 rp = rule;
138 i = 0;
139
140 while (i < n)
141 {
142 rp = rp->next;
143 if (rp == NULL || rp->sym == NULL)
144 {
145 complain (_("invalid $ value"));
146 return NULL;
147 }
148 i++;
149 }
150
151 return rp->sym->type_name;
152}
153\f
0d533154
AD
154/*-------------------------------------------------------------------.
155| Dump the string from FINPUT to FOUTPUT. MATCH is the delimiter of |
156| the string (either ' or "). |
157`-------------------------------------------------------------------*/
ae3c3164
AD
158
159static inline void
4a120d45 160copy_string (FILE *fin, FILE *fout, int match)
ae3c3164
AD
161{
162 int c;
163
4a120d45
JT
164 putc (match, fout);
165 c = getc (fin);
ae3c3164
AD
166
167 while (c != match)
168 {
169 if (c == EOF)
170 fatal (_("unterminated string at end of file"));
171 if (c == '\n')
172 {
a0f6b076 173 complain (_("unterminated string"));
4a120d45 174 ungetc (c, fin);
ae3c3164
AD
175 c = match; /* invent terminator */
176 continue;
177 }
178
a70083a3 179 putc (c, fout);
ae3c3164
AD
180
181 if (c == '\\')
182 {
4a120d45 183 c = getc (fin);
ae3c3164
AD
184 if (c == EOF)
185 fatal (_("unterminated string at end of file"));
4a120d45 186 putc (c, fout);
ae3c3164
AD
187 if (c == '\n')
188 lineno++;
189 }
190
a70083a3 191 c = getc (fin);
ae3c3164
AD
192 }
193
a70083a3 194 putc (c, fout);
ae3c3164
AD
195}
196
197
550a72a3
AD
198/*----------------------------------------------------------------.
199| Dump the wannabee comment from IN to OUT1 and OUT2. In fact we |
200| just saw a `/', which might or might not be a comment. In any |
201| case, copy what we saw. |
202| |
203| OUT2 might be NULL. |
204`----------------------------------------------------------------*/
ae3c3164
AD
205
206static inline void
550a72a3 207copy_comment2 (FILE *fin, FILE *out1, FILE *out2)
ae3c3164
AD
208{
209 int cplus_comment;
a70083a3 210 int ended;
550a72a3
AD
211 int c;
212
213 /* We read a `/', output it. */
214 putc ('/', out1);
215 if (out2)
216 putc ('/', out2);
217
218 switch ((c = getc (fin)))
219 {
220 case '/':
221 cplus_comment = 1;
222 break;
223 case '*':
224 cplus_comment = 0;
225 break;
226 default:
227 ungetc (c, fin);
228 return;
229 }
ae3c3164 230
27821bff
AD
231 putc (c, out1);
232 if (out2)
233 putc (c, out2);
550a72a3 234 c = getc (fin);
ae3c3164
AD
235
236 ended = 0;
237 while (!ended)
238 {
239 if (!cplus_comment && c == '*')
240 {
241 while (c == '*')
242 {
27821bff
AD
243 putc (c, out1);
244 if (out2)
245 putc (c, out2);
550a72a3 246 c = getc (fin);
ae3c3164
AD
247 }
248
249 if (c == '/')
250 {
a70083a3 251 putc (c, out1);
27821bff 252 if (out2)
a70083a3 253 putc (c, out2);
ae3c3164
AD
254 ended = 1;
255 }
256 }
257 else if (c == '\n')
258 {
259 lineno++;
27821bff
AD
260 putc (c, out1);
261 if (out2)
262 putc (c, out2);
ae3c3164
AD
263 if (cplus_comment)
264 ended = 1;
265 else
550a72a3 266 c = getc (fin);
ae3c3164
AD
267 }
268 else if (c == EOF)
269 fatal (_("unterminated comment"));
270 else
271 {
27821bff
AD
272 putc (c, out1);
273 if (out2)
274 putc (c, out2);
550a72a3 275 c = getc (fin);
ae3c3164
AD
276 }
277 }
278}
279
280
550a72a3
AD
281/*-------------------------------------------------------------------.
282| Dump the comment (actually the current string starting with a `/') |
283| from FIN to FOUT. |
284`-------------------------------------------------------------------*/
27821bff
AD
285
286static inline void
550a72a3 287copy_comment (FILE *fin, FILE *fout)
27821bff 288{
550a72a3 289 copy_comment2 (fin, fout, NULL);
27821bff
AD
290}
291
292
a70083a3
AD
293/*-----------------------------------------------------------------.
294| FIN is pointing to a location (i.e., a `@'). Output to FOUT a |
295| reference to this location. STACK_OFFSET is the number of values |
296| in the current rule so far, which says where to find `$0' with |
297| respect to the top of the stack. |
298`-----------------------------------------------------------------*/
1ff442ca 299
a70083a3
AD
300static inline void
301copy_at (FILE *fin, FILE *fout, int stack_offset)
1ff442ca 302{
a70083a3 303 int c;
1ff442ca 304
a70083a3
AD
305 c = getc (fin);
306 if (c == '$')
1ff442ca 307 {
a70083a3 308 fprintf (fout, "yyloc");
89cab50d 309 locations_flag = 1;
a70083a3
AD
310 }
311 else if (isdigit (c) || c == '-')
312 {
313 int n;
1ff442ca 314
a70083a3
AD
315 ungetc (c, fin);
316 n = read_signed_integer (fin);
943819bf 317
a70083a3 318 fprintf (fout, "yylsp[%d]", n - stack_offset);
89cab50d 319 locations_flag = 1;
1ff442ca 320 }
a70083a3 321 else
ff4a34be
AD
322 {
323 char buf[] = "@c";
324 buf[1] = c;
325 complain (_("%s is invalid"), quote (buf));
326 }
1ff442ca 327}
79282c5a
AD
328
329
330/*-------------------------------------------------------------------.
331| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
332| |
333| Possible inputs: $[<TYPENAME>]($|integer) |
334| |
335| Output to FOUT a reference to this semantic value. STACK_OFFSET is |
336| the number of values in the current rule so far, which says where |
337| to find `$0' with respect to the top of the stack. |
338`-------------------------------------------------------------------*/
339
340static inline void
341copy_dollar (FILE *fin, FILE *fout,
342 symbol_list *rule, int stack_offset)
343{
344 int c = getc (fin);
345 char *type_name = NULL;
346
f282676b 347 /* Get the type name if explicit. */
79282c5a
AD
348 if (c == '<')
349 {
f282676b 350 read_type_name (fin);
79282c5a
AD
351 type_name = token_buffer;
352 value_components_used = 1;
79282c5a
AD
353 c = getc (fin);
354 }
355
356 if (c == '$')
357 {
358 fprintf (fout, "yyval");
359 if (!type_name)
360 type_name = get_type_name (0, rule);
361 if (type_name)
362 fprintf (fout, ".%s", type_name);
363 if (!type_name && typed)
364 complain (_("$$ of `%s' has no declared type"),
365 rule->sym->tag);
366 }
367 else if (isdigit (c) || c == '-')
368 {
369 int n;
370 ungetc (c, fin);
371 n = read_signed_integer (fin);
372
373 if (!type_name && n > 0)
374 type_name = get_type_name (n, rule);
375
376 fprintf (fout, "yyvsp[%d]", n - stack_offset);
377 if (type_name)
378 fprintf (fout, ".%s", type_name);
379 if (!type_name && typed)
380 complain (_("$%d of `%s' has no declared type"),
381 n, rule->sym->tag);
382 }
383 else
384 {
385 char buf[] = "$c";
386 buf[1] = c;
387 complain (_("%s is invalid"), quote (buf));
388 }
389}
a70083a3
AD
390\f
391/*-------------------------------------------------------------------.
392| Copy the contents of a `%{ ... %}' into the definitions file. The |
393| `%{' has already been read. Return after reading the `%}'. |
394`-------------------------------------------------------------------*/
1ff442ca 395
4a120d45 396static void
118fb205 397copy_definition (void)
1ff442ca 398{
a70083a3 399 int c;
ae3c3164 400 /* -1 while reading a character if prev char was %. */
a70083a3 401 int after_percent;
1ff442ca 402
89cab50d 403 if (!no_lines_flag)
a70083a3 404 fprintf (fattrs, "#line %d \"%s\"\n", lineno, infile);
1ff442ca
NF
405
406 after_percent = 0;
407
ae3c3164 408 c = getc (finput);
1ff442ca
NF
409
410 for (;;)
411 {
412 switch (c)
413 {
414 case '\n':
a70083a3 415 putc (c, fattrs);
1ff442ca
NF
416 lineno++;
417 break;
418
419 case '%':
a70083a3 420 after_percent = -1;
1ff442ca 421 break;
a083fbbf 422
1ff442ca
NF
423 case '\'':
424 case '"':
ae3c3164 425 copy_string (finput, fattrs, c);
1ff442ca
NF
426 break;
427
428 case '/':
550a72a3 429 copy_comment (finput, fattrs);
1ff442ca
NF
430 break;
431
432 case EOF:
a70083a3 433 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
434
435 default:
a70083a3 436 putc (c, fattrs);
1ff442ca
NF
437 }
438
a70083a3 439 c = getc (finput);
1ff442ca
NF
440
441 if (after_percent)
442 {
443 if (c == '}')
444 return;
a70083a3 445 putc ('%', fattrs);
1ff442ca
NF
446 }
447 after_percent = 0;
448
449 }
450
451}
452
453
d7020c20
AD
454/*-------------------------------------------------------------------.
455| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
456| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
457| are reversed. |
458`-------------------------------------------------------------------*/
1ff442ca 459
4a120d45 460static void
d7020c20 461parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 462{
a70083a3
AD
463 int token = 0;
464 char *typename = 0;
1ff442ca 465
1e9798d5
AD
466 /* The symbol being defined. */
467 struct bucket *symbol = NULL;
468
469 /* After `%token' and `%nterm', any number of symbols maybe be
470 defined. */
1ff442ca
NF
471 for (;;)
472 {
e6011337
JT
473 int tmp_char = ungetc (skip_white_space (), finput);
474
1e9798d5
AD
475 /* `%' (for instance from `%token', or from `%%' etc.) is the
476 only valid means to end this declaration. */
e6011337 477 if (tmp_char == '%')
1ff442ca 478 return;
e6011337 479 if (tmp_char == EOF)
a0f6b076 480 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 481
a70083a3 482 token = lex ();
1ff442ca 483 if (token == COMMA)
943819bf
RS
484 {
485 symbol = NULL;
486 continue;
487 }
1ff442ca
NF
488 if (token == TYPENAME)
489 {
95e36146 490 typename = xstrdup (token_buffer);
1ff442ca 491 value_components_used = 1;
943819bf
RS
492 symbol = NULL;
493 }
a70083a3 494 else if (token == IDENTIFIER && *symval->tag == '\"' && symbol)
943819bf 495 {
8e03724b
AD
496 if (symval->alias)
497 warn (_("symbol `%s' used more than once as a literal string"),
498 symval->tag);
499 else if (symbol->alias)
500 warn (_("symbol `%s' given more than one literal string"),
501 symbol->tag);
502 else
503 {
504 symval->class = token_sym;
505 symval->type_name = typename;
506 symval->user_token_number = symbol->user_token_number;
507 symbol->user_token_number = SALIAS;
508 symval->alias = symbol;
509 symbol->alias = symval;
510 /* symbol and symval combined are only one symbol */
511 nsyms--;
512 }
943819bf 513 translations = 1;
8e03724b 514 symbol = NULL;
1ff442ca
NF
515 }
516 else if (token == IDENTIFIER)
517 {
518 int oldclass = symval->class;
943819bf 519 symbol = symval;
1ff442ca 520
943819bf 521 if (symbol->class == what_is_not)
a0f6b076 522 complain (_("symbol %s redefined"), symbol->tag);
943819bf 523 symbol->class = what_is;
d7020c20 524 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 525 symbol->value = nvars++;
1ff442ca
NF
526
527 if (typename)
528 {
943819bf
RS
529 if (symbol->type_name == NULL)
530 symbol->type_name = typename;
a70083a3 531 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 532 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
533 }
534 }
943819bf 535 else if (symbol && token == NUMBER)
a70083a3 536 {
943819bf 537 symbol->user_token_number = numval;
1ff442ca 538 translations = 1;
a70083a3 539 }
1ff442ca 540 else
943819bf 541 {
a0f6b076 542 complain (_("`%s' is invalid in %s"),
d7020c20 543 token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 544 skip_to_char ('%');
943819bf 545 }
1ff442ca
NF
546 }
547
548}
549
1ff442ca 550
d7020c20
AD
551/*------------------------------.
552| Parse what comes after %start |
553`------------------------------*/
1ff442ca 554
4a120d45 555static void
118fb205 556parse_start_decl (void)
1ff442ca
NF
557{
558 if (start_flag)
27821bff
AD
559 complain (_("multiple %s declarations"), "%start");
560 if (lex () != IDENTIFIER)
561 complain (_("invalid %s declaration"), "%start");
943819bf
RS
562 else
563 {
564 start_flag = 1;
565 startval = symval;
566 }
1ff442ca
NF
567}
568
a70083a3
AD
569/*-----------------------------------------------------------.
570| read in a %type declaration and record its information for |
571| get_type_name to access |
572`-----------------------------------------------------------*/
573
574static void
575parse_type_decl (void)
576{
a70083a3
AD
577 char *name;
578
579 if (lex () != TYPENAME)
580 {
581 complain ("%s", _("%type declaration has no <typename>"));
582 skip_to_char ('%');
583 return;
584 }
585
95e36146 586 name = xstrdup (token_buffer);
a70083a3
AD
587
588 for (;;)
589 {
590 int t;
591 int tmp_char = ungetc (skip_white_space (), finput);
592
593 if (tmp_char == '%')
594 return;
595 if (tmp_char == EOF)
596 fatal (_("Premature EOF after %s"), token_buffer);
597
598 t = lex ();
599
600 switch (t)
1ff442ca
NF
601 {
602
603 case COMMA:
604 case SEMICOLON:
605 break;
606
607 case IDENTIFIER:
608 if (symval->type_name == NULL)
609 symval->type_name = name;
a70083a3 610 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 611 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
612
613 break;
614
615 default:
a0f6b076
AD
616 complain (_("invalid %%type declaration due to item: %s"),
617 token_buffer);
a70083a3 618 skip_to_char ('%');
1ff442ca
NF
619 }
620 }
621}
622
623
624
d7020c20
AD
625/*----------------------------------------------------------------.
626| Read in a %left, %right or %nonassoc declaration and record its |
627| information. |
628`----------------------------------------------------------------*/
1ff442ca 629
4a120d45 630static void
d7020c20 631parse_assoc_decl (associativity assoc)
1ff442ca 632{
a70083a3
AD
633 char *name = NULL;
634 int prev = 0;
1ff442ca 635
a70083a3 636 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 637
1ff442ca
NF
638 for (;;)
639 {
a70083a3 640 int t;
e6011337 641 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 642
e6011337 643 if (tmp_char == '%')
1ff442ca 644 return;
e6011337 645 if (tmp_char == EOF)
a0f6b076 646 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 647
a70083a3 648 t = lex ();
1ff442ca
NF
649
650 switch (t)
651 {
1ff442ca 652 case TYPENAME:
95e36146 653 name = xstrdup (token_buffer);
1ff442ca
NF
654 break;
655
656 case COMMA:
657 break;
658
659 case IDENTIFIER:
660 if (symval->prec != 0)
a0f6b076 661 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
662 symval->prec = lastprec;
663 symval->assoc = assoc;
d7020c20 664 if (symval->class == nterm_sym)
a0f6b076 665 complain (_("symbol %s redefined"), symval->tag);
d7020c20 666 symval->class = token_sym;
1ff442ca 667 if (name)
a70083a3 668 { /* record the type, if one is specified */
1ff442ca
NF
669 if (symval->type_name == NULL)
670 symval->type_name = name;
a70083a3 671 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 672 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
673 }
674 break;
675
676 case NUMBER:
677 if (prev == IDENTIFIER)
a70083a3 678 {
1ff442ca
NF
679 symval->user_token_number = numval;
680 translations = 1;
a70083a3
AD
681 }
682 else
683 {
684 complain (_
685 ("invalid text (%s) - number should be after identifier"),
686token_buffer);
687 skip_to_char ('%');
688 }
1ff442ca
NF
689 break;
690
691 case SEMICOLON:
692 return;
693
694 default:
a0f6b076 695 complain (_("unexpected item: %s"), token_buffer);
a70083a3 696 skip_to_char ('%');
1ff442ca
NF
697 }
698
699 prev = t;
700
701 }
702}
703
704
705
d7020c20
AD
706/*-------------------------------------------------------------------.
707| Copy the union declaration into fattrs (and fdefines), where it is |
708| made into the definition of YYSTYPE, the type of elements of the |
709| parser value stack. |
710`-------------------------------------------------------------------*/
1ff442ca 711
4a120d45 712static void
118fb205 713parse_union_decl (void)
1ff442ca 714{
a70083a3
AD
715 int c;
716 int count = 0;
1ff442ca
NF
717
718 if (typed)
27821bff 719 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
720
721 typed = 1;
722
89cab50d 723 if (!no_lines_flag)
27821bff 724 fprintf (fattrs, "\n#line %d \"%s\"\n", lineno, infile);
1ff442ca 725 else
27821bff 726 fprintf (fattrs, "\n");
1ff442ca 727
27821bff 728 fprintf (fattrs, "typedef union");
1ff442ca 729 if (fdefines)
27821bff 730 fprintf (fdefines, "typedef union");
1ff442ca 731
27821bff 732 c = getc (finput);
1ff442ca
NF
733
734 while (c != EOF)
735 {
27821bff 736 putc (c, fattrs);
1ff442ca 737 if (fdefines)
27821bff 738 putc (c, fdefines);
1ff442ca
NF
739
740 switch (c)
741 {
742 case '\n':
743 lineno++;
744 break;
745
746 case '/':
550a72a3 747 copy_comment2 (finput, fattrs, fdefines);
1ff442ca
NF
748 break;
749
1ff442ca
NF
750 case '{':
751 count++;
752 break;
753
754 case '}':
755 if (count == 0)
27821bff 756 complain (_("unmatched %s"), "`}'");
1ff442ca 757 count--;
943819bf 758 if (count <= 0)
1ff442ca 759 {
27821bff 760 fprintf (fattrs, " YYSTYPE;\n");
1ff442ca 761 if (fdefines)
27821bff 762 fprintf (fdefines, " YYSTYPE;\n");
1ff442ca 763 /* JF don't choke on trailing semi */
27821bff
AD
764 c = skip_white_space ();
765 if (c != ';')
a70083a3 766 ungetc (c, finput);
1ff442ca
NF
767 return;
768 }
769 }
770
27821bff 771 c = getc (finput);
1ff442ca
NF
772 }
773}
774
d7020c20
AD
775
776/*-------------------------------------------------------.
777| Parse the declaration %expect N which says to expect N |
778| shift-reduce conflicts. |
779`-------------------------------------------------------*/
1ff442ca 780
4a120d45 781static void
118fb205 782parse_expect_decl (void)
1ff442ca 783{
131e2fef 784 int c = skip_white_space ();
1ff442ca
NF
785 ungetc (c, finput);
786
131e2fef 787 if (!isdigit (c))
79282c5a 788 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
789 else
790 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
791}
792
a70083a3
AD
793
794/*-------------------------------------------------------------------.
795| Parse what comes after %thong. the full syntax is |
796| |
797| %thong <type> token number literal |
798| |
799| the <type> or number may be omitted. The number specifies the |
800| user_token_number. |
801| |
802| Two symbols are entered in the table, one for the token symbol and |
803| one for the literal. Both are given the <type>, if any, from the |
804| declaration. The ->user_token_number of the first is SALIAS and |
805| the ->user_token_number of the second is set to the number, if |
806| any, from the declaration. The two symbols are linked via |
807| pointers in their ->alias fields. |
808| |
809| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
810| only the literal string is retained it is the literal string that |
811| is output to yytname |
812`-------------------------------------------------------------------*/
813
814static void
815parse_thong_decl (void)
7b306f52 816{
a70083a3
AD
817 int token;
818 struct bucket *symbol;
819 char *typename = 0;
95e36146 820 int usrtoknum;
7b306f52 821
a70083a3
AD
822 translations = 1;
823 token = lex (); /* fetch typename or first token */
824 if (token == TYPENAME)
7b306f52 825 {
95e36146 826 typename = xstrdup (token_buffer);
a70083a3
AD
827 value_components_used = 1;
828 token = lex (); /* fetch first token */
7b306f52 829 }
7b306f52 830
a70083a3 831 /* process first token */
7b306f52 832
a70083a3
AD
833 if (token != IDENTIFIER)
834 {
835 complain (_("unrecognized item %s, expected an identifier"),
836 token_buffer);
837 skip_to_char ('%');
838 return;
7b306f52 839 }
d7020c20 840 symval->class = token_sym;
a70083a3
AD
841 symval->type_name = typename;
842 symval->user_token_number = SALIAS;
843 symbol = symval;
7b306f52 844
a70083a3 845 token = lex (); /* get number or literal string */
1ff442ca 846
a70083a3 847 if (token == NUMBER)
943819bf 848 {
a70083a3
AD
849 usrtoknum = numval;
850 token = lex (); /* okay, did number, now get literal */
943819bf 851 }
a70083a3
AD
852 else
853 usrtoknum = 0;
1ff442ca 854
a70083a3 855 /* process literal string token */
1ff442ca 856
a70083a3 857 if (token != IDENTIFIER || *symval->tag != '\"')
1ff442ca 858 {
a70083a3
AD
859 complain (_("expected string constant instead of %s"), token_buffer);
860 skip_to_char ('%');
861 return;
1ff442ca 862 }
d7020c20 863 symval->class = token_sym;
a70083a3
AD
864 symval->type_name = typename;
865 symval->user_token_number = usrtoknum;
1ff442ca 866
a70083a3
AD
867 symval->alias = symbol;
868 symbol->alias = symval;
1ff442ca 869
79282c5a
AD
870 /* symbol and symval combined are only one symbol. */
871 nsyms--;
a70083a3 872}
3cef001a 873
d7020c20 874
a70083a3
AD
875/*----------------------------------------------------------------.
876| Read from finput until `%%' is seen. Discard the `%%'. Handle |
877| any `%' declarations, and copy the contents of any `%{ ... %}' |
878| groups to fattrs. |
879`----------------------------------------------------------------*/
1ff442ca 880
4a120d45 881static void
a70083a3 882read_declarations (void)
1ff442ca 883{
a70083a3
AD
884 int c;
885 int tok;
1ff442ca 886
a70083a3 887 for (;;)
1ff442ca 888 {
a70083a3 889 c = skip_white_space ();
1ff442ca 890
a70083a3
AD
891 if (c == '%')
892 {
893 tok = parse_percent_token ();
1ff442ca 894
a70083a3 895 switch (tok)
943819bf 896 {
a70083a3
AD
897 case TWO_PERCENTS:
898 return;
1ff442ca 899
a70083a3
AD
900 case PERCENT_LEFT_CURLY:
901 copy_definition ();
902 break;
1ff442ca 903
a70083a3 904 case TOKEN:
d7020c20 905 parse_token_decl (token_sym, nterm_sym);
a70083a3 906 break;
1ff442ca 907
a70083a3 908 case NTERM:
d7020c20 909 parse_token_decl (nterm_sym, token_sym);
a70083a3 910 break;
1ff442ca 911
a70083a3
AD
912 case TYPE:
913 parse_type_decl ();
914 break;
1ff442ca 915
a70083a3
AD
916 case START:
917 parse_start_decl ();
918 break;
118fb205 919
a70083a3
AD
920 case UNION:
921 parse_union_decl ();
922 break;
1ff442ca 923
a70083a3
AD
924 case EXPECT:
925 parse_expect_decl ();
926 break;
927 case THONG:
928 parse_thong_decl ();
929 break;
d7020c20 930
a70083a3 931 case LEFT:
d7020c20 932 parse_assoc_decl (left_assoc);
a70083a3 933 break;
1ff442ca 934
a70083a3 935 case RIGHT:
d7020c20 936 parse_assoc_decl (right_assoc);
a70083a3 937 break;
1ff442ca 938
a70083a3 939 case NONASSOC:
d7020c20 940 parse_assoc_decl (non_assoc);
a70083a3 941 break;
1ff442ca 942
a70083a3
AD
943 case SEMANTIC_PARSER:
944 if (semantic_parser == 0)
945 {
946 semantic_parser = 1;
947 open_extra_files ();
948 }
949 break;
1ff442ca 950
a70083a3
AD
951 case PURE_PARSER:
952 pure_parser = 1;
953 break;
1ff442ca 954
a70083a3
AD
955 case NOOP:
956 break;
1ff442ca 957
a70083a3
AD
958 default:
959 complain (_("unrecognized: %s"), token_buffer);
960 skip_to_char ('%');
961 }
962 }
963 else if (c == EOF)
964 fatal (_("no input grammar"));
965 else
966 {
ff4a34be
AD
967 char buf[] = "c";
968 buf[0] = c;
969 complain (_("unknown character: %s"), quote (buf));
a70083a3 970 skip_to_char ('%');
1ff442ca 971 }
1ff442ca 972 }
1ff442ca 973}
a70083a3
AD
974\f
975/*-------------------------------------------------------------------.
976| Assuming that a `{' has just been seen, copy everything up to the |
977| matching `}' into the actions file. STACK_OFFSET is the number of |
978| values in the current rule so far, which says where to find `$0' |
979| with respect to the top of the stack. |
980`-------------------------------------------------------------------*/
1ff442ca 981
4a120d45 982static void
79282c5a 983copy_action (symbol_list *rule, int stack_offset)
1ff442ca 984{
a70083a3 985 int c;
a70083a3 986 int count;
1ff442ca
NF
987
988 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
989 if (semantic_parser)
990 stack_offset = 0;
1ff442ca 991
41aca2e0 992 fprintf (faction, "\ncase %d:\n", nrules);
89cab50d 993 if (!no_lines_flag)
41aca2e0
AD
994 fprintf (faction, "#line %d \"%s\"\n", lineno, infile);
995 putc ('{', faction);
1ff442ca
NF
996
997 count = 1;
a70083a3 998 c = getc (finput);
1ff442ca
NF
999
1000 while (count > 0)
1001 {
1002 while (c != '}')
a70083a3
AD
1003 {
1004 switch (c)
1ff442ca
NF
1005 {
1006 case '\n':
a70083a3 1007 putc (c, faction);
1ff442ca
NF
1008 lineno++;
1009 break;
1010
1011 case '{':
a70083a3 1012 putc (c, faction);
1ff442ca
NF
1013 count++;
1014 break;
1015
1016 case '\'':
1017 case '"':
ca36d2ef 1018 copy_string (finput, faction, c);
1ff442ca
NF
1019 break;
1020
1021 case '/':
550a72a3 1022 copy_comment (finput, faction);
1ff442ca
NF
1023 break;
1024
1025 case '$':
79282c5a 1026 copy_dollar (finput, faction, rule, stack_offset);
1ff442ca
NF
1027 break;
1028
1029 case '@':
7b306f52 1030 copy_at (finput, faction, stack_offset);
6666f98f 1031 break;
1ff442ca
NF
1032
1033 case EOF:
27821bff 1034 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1035
1036 default:
a70083a3
AD
1037 putc (c, faction);
1038 }
1039
1040 c = getc (finput);
1041 }
1042
1043 /* above loop exits when c is '}' */
1044
1045 if (--count)
1046 {
1047 putc (c, faction);
1048 c = getc (finput);
1049 }
1050 }
1051
1052 fprintf (faction, ";\n break;}");
1053}
1054\f
1055/*-------------------------------------------------------------------.
1056| After `%guard' is seen in the input file, copy the actual guard |
1057| into the guards file. If the guard is followed by an action, copy |
1058| that into the actions file. STACK_OFFSET is the number of values |
1059| in the current rule so far, which says where to find `$0' with |
1060| respect to the top of the stack, for the simple parser in which |
1061| the stack is not popped until after the guard is run. |
1062`-------------------------------------------------------------------*/
1063
1064static void
79282c5a 1065copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1066{
1067 int c;
a70083a3 1068 int count;
a70083a3
AD
1069 int brace_flag = 0;
1070
1071 /* offset is always 0 if parser has already popped the stack pointer */
1072 if (semantic_parser)
1073 stack_offset = 0;
1074
1075 fprintf (fguard, "\ncase %d:\n", nrules);
89cab50d 1076 if (!no_lines_flag)
a70083a3
AD
1077 fprintf (fguard, "#line %d \"%s\"\n", lineno, infile);
1078 putc ('{', fguard);
1079
1080 count = 0;
1081 c = getc (finput);
1082
1083 while (brace_flag ? (count > 0) : (c != ';'))
1084 {
1085 switch (c)
1086 {
1087 case '\n':
1088 putc (c, fguard);
1089 lineno++;
1090 break;
1091
1092 case '{':
1093 putc (c, fguard);
1094 brace_flag = 1;
1095 count++;
1096 break;
1097
1098 case '}':
1099 putc (c, fguard);
1100 if (count > 0)
1101 count--;
1102 else
1103 {
1104 complain (_("unmatched %s"), "`}'");
1105 c = getc (finput); /* skip it */
1106 }
1107 break;
1108
1109 case '\'':
1110 case '"':
1111 copy_string (finput, fguard, c);
1112 break;
1113
1114 case '/':
550a72a3 1115 copy_comment (finput, fguard);
a70083a3
AD
1116 break;
1117
1118 case '$':
79282c5a 1119 copy_dollar (finput, fguard, rule, stack_offset);
a70083a3 1120 break;
1ff442ca 1121
a70083a3
AD
1122 case '@':
1123 copy_at (finput, fguard, stack_offset);
1124 break;
1ff442ca 1125
a70083a3
AD
1126 case EOF:
1127 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1128
a70083a3
AD
1129 default:
1130 putc (c, fguard);
1ff442ca 1131 }
a70083a3
AD
1132
1133 if (c != '}' || count != 0)
1134 c = getc (finput);
1ff442ca
NF
1135 }
1136
a70083a3
AD
1137 c = skip_white_space ();
1138
1139 fprintf (fguard, ";\n break;}");
1140 if (c == '{')
1141 copy_action (rule, stack_offset);
1142 else if (c == '=')
1143 {
1144 c = getc (finput); /* why not skip_white_space -wjh */
1145 if (c == '{')
1146 copy_action (rule, stack_offset);
1147 }
1148 else
1149 ungetc (c, finput);
1ff442ca 1150}
a70083a3
AD
1151\f
1152
1153static void
1154record_rule_line (void)
1155{
1156 /* Record each rule's source line number in rline table. */
1ff442ca 1157
a70083a3
AD
1158 if (nrules >= rline_allocated)
1159 {
1160 rline_allocated = nrules * 2;
d7913476 1161 rline = XREALLOC (rline, short, rline_allocated);
a70083a3
AD
1162 }
1163 rline[nrules] = lineno;
1164}
1ff442ca
NF
1165
1166
a70083a3
AD
1167/*-------------------------------------------------------------------.
1168| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1169| with the user's names. |
1170`-------------------------------------------------------------------*/
1ff442ca 1171
4a120d45 1172static bucket *
118fb205 1173gensym (void)
1ff442ca 1174{
a70083a3 1175 bucket *sym;
1ff442ca
NF
1176
1177 sprintf (token_buffer, "@%d", ++gensym_count);
a70083a3 1178 sym = getsym (token_buffer);
d7020c20 1179 sym->class = nterm_sym;
1ff442ca 1180 sym->value = nvars++;
36281465 1181 return sym;
1ff442ca
NF
1182}
1183
a70083a3
AD
1184#if 0
1185/*------------------------------------------------------------------.
1186| read in a %type declaration and record its information for |
1187| get_type_name to access. This is unused. It is only called from |
1188| the #if 0 part of readgram |
1189`------------------------------------------------------------------*/
1190
1191static int
1192get_type (void)
1193{
1194 int k;
1195 int t;
1196 char *name;
1197
1198 t = lex ();
1199
1200 if (t != TYPENAME)
1201 {
1202 complain (_("invalid %s declaration"), "%type");
1203 return t;
1204 }
1205
95e36146 1206 name = xstrdup (token_buffer);
a70083a3
AD
1207
1208 for (;;)
1209 {
1210 t = lex ();
1211
1212 switch (t)
1213 {
1214 case SEMICOLON:
1215 return lex ();
1216
1217 case COMMA:
1218 break;
1219
1220 case IDENTIFIER:
1221 if (symval->type_name == NULL)
1222 symval->type_name = name;
1223 else if (strcmp (name, symval->type_name) != 0)
1224 complain (_("type redeclaration for %s"), symval->tag);
1225
1226 break;
1227
1228 default:
1229 return t;
1230 }
1231 }
1232}
1ff442ca 1233
a70083a3
AD
1234#endif
1235\f
1236/*------------------------------------------------------------------.
1237| Parse the input grammar into a one symbol_list structure. Each |
1238| rule is represented by a sequence of symbols: the left hand side |
1239| followed by the contents of the right hand side, followed by a |
1240| null pointer instead of a symbol to terminate the rule. The next |
1241| symbol is the lhs of the following rule. |
1242| |
1243| All guards and actions are copied out to the appropriate files, |
1244| labelled by the rule number they apply to. |
1245`------------------------------------------------------------------*/
1ff442ca 1246
4a120d45 1247static void
118fb205 1248readgram (void)
1ff442ca 1249{
a70083a3
AD
1250 int t;
1251 bucket *lhs = NULL;
1252 symbol_list *p;
1253 symbol_list *p1;
1254 bucket *bp;
1ff442ca 1255
ff4a34be
AD
1256 /* Points to first symbol_list of current rule. its symbol is the
1257 lhs of the rule. */
1258 symbol_list *crule;
1259 /* Points to the symbol_list preceding crule. */
1260 symbol_list *crule1;
1ff442ca
NF
1261
1262 p1 = NULL;
1263
a70083a3 1264 t = lex ();
1ff442ca
NF
1265
1266 while (t != TWO_PERCENTS && t != ENDFILE)
1267 {
1268 if (t == IDENTIFIER || t == BAR)
1269 {
89cab50d 1270 int action_flag = 0;
ff4a34be
AD
1271 /* Number of symbols in rhs of this rule so far */
1272 int rulelength = 0;
1ff442ca
NF
1273 int xactions = 0; /* JF for error checking */
1274 bucket *first_rhs = 0;
1275
1276 if (t == IDENTIFIER)
1277 {
1278 lhs = symval;
943819bf
RS
1279
1280 if (!start_flag)
1281 {
1282 startval = lhs;
1283 start_flag = 1;
1284 }
a083fbbf 1285
a70083a3 1286 t = lex ();
1ff442ca 1287 if (t != COLON)
943819bf 1288 {
a0f6b076 1289 complain (_("ill-formed rule: initial symbol not followed by colon"));
a70083a3 1290 unlex (t);
943819bf 1291 }
1ff442ca
NF
1292 }
1293
943819bf 1294 if (nrules == 0 && t == BAR)
1ff442ca 1295 {
a0f6b076 1296 complain (_("grammar starts with vertical bar"));
943819bf 1297 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1298 }
1ff442ca
NF
1299 /* start a new rule and record its lhs. */
1300
1301 nrules++;
1302 nitems++;
1303
1304 record_rule_line ();
1305
d7913476 1306 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1307 p->sym = lhs;
1308
1309 crule1 = p1;
1310 if (p1)
1311 p1->next = p;
1312 else
1313 grammar = p;
1314
1315 p1 = p;
1316 crule = p;
1317
1318 /* mark the rule's lhs as a nonterminal if not already so. */
1319
d7020c20 1320 if (lhs->class == unknown_sym)
1ff442ca 1321 {
d7020c20 1322 lhs->class = nterm_sym;
1ff442ca
NF
1323 lhs->value = nvars;
1324 nvars++;
1325 }
d7020c20 1326 else if (lhs->class == token_sym)
a0f6b076 1327 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1328
1329 /* read the rhs of the rule. */
1330
1331 for (;;)
1332 {
a70083a3 1333 t = lex ();
943819bf
RS
1334 if (t == PREC)
1335 {
a70083a3 1336 t = lex ();
943819bf 1337 crule->ruleprec = symval;
a70083a3 1338 t = lex ();
943819bf 1339 }
1ff442ca 1340
a70083a3
AD
1341 if (!(t == IDENTIFIER || t == LEFT_CURLY))
1342 break;
1ff442ca
NF
1343
1344 /* If next token is an identifier, see if a colon follows it.
a70083a3 1345 If one does, exit this rule now. */
1ff442ca
NF
1346 if (t == IDENTIFIER)
1347 {
a70083a3
AD
1348 bucket *ssave;
1349 int t1;
1ff442ca
NF
1350
1351 ssave = symval;
a70083a3
AD
1352 t1 = lex ();
1353 unlex (t1);
1ff442ca 1354 symval = ssave;
a70083a3
AD
1355 if (t1 == COLON)
1356 break;
1ff442ca 1357
a70083a3 1358 if (!first_rhs) /* JF */
1ff442ca
NF
1359 first_rhs = symval;
1360 /* Not followed by colon =>
1361 process as part of this rule's rhs. */
1362 }
1363
1364 /* If we just passed an action, that action was in the middle
a70083a3
AD
1365 of a rule, so make a dummy rule to reduce it to a
1366 non-terminal. */
89cab50d 1367 if (action_flag)
1ff442ca 1368 {
a70083a3 1369 bucket *sdummy;
1ff442ca 1370
f282676b
AD
1371 /* Since the action was written out with this rule's
1372 number, we must give the new rule this number by
1373 inserting the new rule before it. */
1ff442ca
NF
1374
1375 /* Make a dummy nonterminal, a gensym. */
a70083a3 1376 sdummy = gensym ();
1ff442ca
NF
1377
1378 /* Make a new rule, whose body is empty,
1379 before the current one, so that the action
1380 just read can belong to it. */
1381 nrules++;
1382 nitems++;
1383 record_rule_line ();
d7913476 1384 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1385 if (crule1)
1386 crule1->next = p;
a70083a3
AD
1387 else
1388 grammar = p;
1ff442ca 1389 p->sym = sdummy;
d7913476 1390 crule1 = XCALLOC (symbol_list, 1);
1ff442ca
NF
1391 p->next = crule1;
1392 crule1->next = crule;
1393
f282676b
AD
1394 /* Insert the dummy generated by that rule into this
1395 rule. */
1ff442ca 1396 nitems++;
d7913476 1397 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1398 p->sym = sdummy;
1399 p1->next = p;
1400 p1 = p;
1401
89cab50d 1402 action_flag = 0;
1ff442ca
NF
1403 }
1404
1405 if (t == IDENTIFIER)
1406 {
1407 nitems++;
d7913476 1408 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1409 p->sym = symval;
1410 p1->next = p;
1411 p1 = p;
1412 }
a70083a3 1413 else /* handle an action. */
1ff442ca 1414 {
a70083a3 1415 copy_action (crule, rulelength);
89cab50d 1416 action_flag = 1;
1ff442ca
NF
1417 xactions++; /* JF */
1418 }
1419 rulelength++;
a70083a3 1420 } /* end of read rhs of rule */
1ff442ca
NF
1421
1422 /* Put an empty link in the list to mark the end of this rule */
d7913476 1423 p = XCALLOC (symbol_list, 1);
1ff442ca
NF
1424 p1->next = p;
1425 p1 = p;
1426
1427 if (t == PREC)
1428 {
a0f6b076 1429 complain (_("two @prec's in a row"));
a70083a3 1430 t = lex ();
1ff442ca 1431 crule->ruleprec = symval;
a70083a3 1432 t = lex ();
1ff442ca
NF
1433 }
1434 if (t == GUARD)
1435 {
a70083a3 1436 if (!semantic_parser)
ff4a34be 1437 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1438
a70083a3
AD
1439 copy_guard (crule, rulelength);
1440 t = lex ();
1ff442ca
NF
1441 }
1442 else if (t == LEFT_CURLY)
1443 {
a70083a3 1444 /* This case never occurs -wjh */
89cab50d 1445 if (action_flag)
a0f6b076 1446 complain (_("two actions at end of one rule"));
a70083a3 1447 copy_action (crule, rulelength);
89cab50d 1448 action_flag = 1;
943819bf 1449 xactions++; /* -wjh */
a70083a3 1450 t = lex ();
1ff442ca 1451 }
a0f6b076 1452 /* If $$ is being set in default way, report if any type
6666f98f
AD
1453 mismatch. */
1454 else if (!xactions
a70083a3 1455 && first_rhs && lhs->type_name != first_rhs->type_name)
1ff442ca 1456 {
6666f98f
AD
1457 if (lhs->type_name == 0
1458 || first_rhs->type_name == 0
a70083a3 1459 || strcmp (lhs->type_name, first_rhs->type_name))
a0f6b076
AD
1460 complain (_("type clash (`%s' `%s') on default action"),
1461 lhs->type_name ? lhs->type_name : "",
a70083a3 1462 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1463 }
1464 /* Warn if there is no default for $$ but we need one. */
1465 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1466 complain (_("empty rule for typed nonterminal, and no action"));
1ff442ca 1467 if (t == SEMICOLON)
a70083a3 1468 t = lex ();
a083fbbf 1469 }
943819bf 1470#if 0
a70083a3 1471 /* these things can appear as alternatives to rules. */
943819bf
RS
1472/* NO, they cannot.
1473 a) none of the documentation allows them
1474 b) most of them scan forward until finding a next %
1475 thus they may swallow lots of intervening rules
1476*/
1ff442ca
NF
1477 else if (t == TOKEN)
1478 {
d7020c20 1479 parse_token_decl (token_sym, nterm_sym);
a70083a3 1480 t = lex ();
1ff442ca
NF
1481 }
1482 else if (t == NTERM)
1483 {
d7020c20 1484 parse_token_decl (nterm_sym, token_sym);
a70083a3 1485 t = lex ();
1ff442ca
NF
1486 }
1487 else if (t == TYPE)
1488 {
a70083a3 1489 t = get_type ();
1ff442ca
NF
1490 }
1491 else if (t == UNION)
1492 {
a70083a3
AD
1493 parse_union_decl ();
1494 t = lex ();
1ff442ca
NF
1495 }
1496 else if (t == EXPECT)
1497 {
a70083a3
AD
1498 parse_expect_decl ();
1499 t = lex ();
1ff442ca
NF
1500 }
1501 else if (t == START)
1502 {
a70083a3
AD
1503 parse_start_decl ();
1504 t = lex ();
1ff442ca 1505 }
943819bf
RS
1506#endif
1507
1ff442ca 1508 else
943819bf 1509 {
a0f6b076 1510 complain (_("invalid input: %s"), token_buffer);
a70083a3 1511 t = lex ();
943819bf 1512 }
1ff442ca
NF
1513 }
1514
943819bf
RS
1515 /* grammar has been read. Do some checking */
1516
1ff442ca 1517 if (nsyms > MAXSHORT)
a0f6b076
AD
1518 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1519 MAXSHORT);
1ff442ca 1520 if (nrules == 0)
a0f6b076 1521 fatal (_("no rules in the input grammar"));
1ff442ca 1522
ff4a34be
AD
1523 /* JF put out same default YYSTYPE as YACC does */
1524 if (typed == 0
1ff442ca
NF
1525 && !value_components_used)
1526 {
1527 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
a70083a3
AD
1528 but it seems better to be consistent.
1529 Most programs should declare their own type anyway. */
1530 fprintf (fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1ff442ca 1531 if (fdefines)
a70083a3 1532 fprintf (fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1ff442ca
NF
1533 }
1534
1535 /* Report any undefined symbols and consider them nonterminals. */
1536
1537 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1538 if (bp->class == unknown_sym)
1ff442ca 1539 {
a70083a3
AD
1540 complain (_
1541 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1542 bp->tag);
d7020c20 1543 bp->class = nterm_sym;
1ff442ca
NF
1544 bp->value = nvars++;
1545 }
1546
1547 ntokens = nsyms - nvars;
1548}
a70083a3
AD
1549\f
1550/*--------------------------------------------------------------.
1551| For named tokens, but not literal ones, define the name. The |
1552| value is the user token number. |
1553`--------------------------------------------------------------*/
1ff442ca 1554
4a120d45 1555static void
a70083a3 1556output_token_defines (FILE *file)
1ff442ca 1557{
a70083a3
AD
1558 bucket *bp;
1559 char *cp, *symbol;
1560 char c;
1ff442ca 1561
a70083a3 1562 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1563 {
a70083a3
AD
1564 symbol = bp->tag; /* get symbol */
1565
1566 if (bp->value >= ntokens)
1567 continue;
1568 if (bp->user_token_number == SALIAS)
1569 continue;
1570 if ('\'' == *symbol)
1571 continue; /* skip literal character */
1572 if (bp == errtoken)
1573 continue; /* skip error token */
1574 if ('\"' == *symbol)
1ff442ca 1575 {
a70083a3
AD
1576 /* use literal string only if given a symbol with an alias */
1577 if (bp->alias)
1578 symbol = bp->alias->tag;
1579 else
1580 continue;
1581 }
1ff442ca 1582
a70083a3
AD
1583 /* Don't #define nonliteral tokens whose names contain periods. */
1584 cp = symbol;
1585 while ((c = *cp++) && c != '.');
1586 if (c != '\0')
1587 continue;
1ff442ca 1588
a70083a3 1589 fprintf (file, "#define\t%s\t%d\n", symbol,
89cab50d 1590 ((translations && !raw_flag)
a70083a3
AD
1591 ? bp->user_token_number : bp->value));
1592 if (semantic_parser)
1593 fprintf (file, "#define\tT%s\t%d\n", symbol, bp->value);
1ff442ca 1594 }
a70083a3
AD
1595
1596 putc ('\n', file);
1ff442ca 1597}
1ff442ca
NF
1598
1599
a70083a3
AD
1600/*------------------------------------------------------------------.
1601| Assign symbol numbers, and write definition of token names into |
b2ca4022 1602| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1603| of symbols. |
1604`------------------------------------------------------------------*/
1ff442ca 1605
4a120d45 1606static void
118fb205 1607packsymbols (void)
1ff442ca 1608{
a70083a3
AD
1609 bucket *bp;
1610 int tokno = 1;
1611 int i;
1612 int last_user_token_number;
4a120d45 1613 static char DOLLAR[] = "$";
1ff442ca
NF
1614
1615 /* int lossage = 0; JF set but not used */
1616
d7913476 1617 tags = XCALLOC (char *, nsyms + 1);
4a120d45 1618 tags[0] = DOLLAR;
d7913476 1619 user_toknums = XCALLOC (short, nsyms + 1);
943819bf 1620 user_toknums[0] = 0;
1ff442ca 1621
d7913476
AD
1622 sprec = XCALLOC (short, nsyms);
1623 sassoc = XCALLOC (short, nsyms);
1ff442ca
NF
1624
1625 max_user_token_number = 256;
1626 last_user_token_number = 256;
1627
1628 for (bp = firstsymbol; bp; bp = bp->next)
1629 {
d7020c20 1630 if (bp->class == nterm_sym)
1ff442ca
NF
1631 {
1632 bp->value += ntokens;
1633 }
943819bf
RS
1634 else if (bp->alias)
1635 {
0a6384c4
AD
1636 /* this symbol and its alias are a single token defn.
1637 allocate a tokno, and assign to both check agreement of
1638 ->prec and ->assoc fields and make both the same */
1639 if (bp->value == 0)
1640 bp->value = bp->alias->value = tokno++;
943819bf 1641
0a6384c4
AD
1642 if (bp->prec != bp->alias->prec)
1643 {
1644 if (bp->prec != 0 && bp->alias->prec != 0
1645 && bp->user_token_number == SALIAS)
a0f6b076
AD
1646 complain (_("conflicting precedences for %s and %s"),
1647 bp->tag, bp->alias->tag);
0a6384c4
AD
1648 if (bp->prec != 0)
1649 bp->alias->prec = bp->prec;
1650 else
1651 bp->prec = bp->alias->prec;
1652 }
943819bf 1653
0a6384c4
AD
1654 if (bp->assoc != bp->alias->assoc)
1655 {
a0f6b076
AD
1656 if (bp->assoc != 0 && bp->alias->assoc != 0
1657 && bp->user_token_number == SALIAS)
1658 complain (_("conflicting assoc values for %s and %s"),
1659 bp->tag, bp->alias->tag);
1660 if (bp->assoc != 0)
1661 bp->alias->assoc = bp->assoc;
1662 else
1663 bp->assoc = bp->alias->assoc;
1664 }
0a6384c4
AD
1665
1666 if (bp->user_token_number == SALIAS)
a70083a3 1667 continue; /* do not do processing below for SALIASs */
943819bf 1668
a70083a3 1669 }
d7020c20 1670 else /* bp->class == token_sym */
943819bf
RS
1671 {
1672 bp->value = tokno++;
1673 }
1674
d7020c20 1675 if (bp->class == token_sym)
1ff442ca
NF
1676 {
1677 if (translations && !(bp->user_token_number))
1678 bp->user_token_number = ++last_user_token_number;
1679 if (bp->user_token_number > max_user_token_number)
1680 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1681 }
1682
1683 tags[bp->value] = bp->tag;
943819bf 1684 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1685 sprec[bp->value] = bp->prec;
1686 sassoc[bp->value] = bp->assoc;
1687
1688 }
1689
1690 if (translations)
1691 {
a70083a3 1692 int j;
1ff442ca 1693
d7913476 1694 token_translations = XCALLOC (short, max_user_token_number + 1);
1ff442ca 1695
0a6384c4 1696 /* initialize all entries for literal tokens to 2, the internal
a70083a3
AD
1697 token number for $undefined., which represents all invalid
1698 inputs. */
4a120d45 1699 for (j = 0; j <= max_user_token_number; j++)
a70083a3 1700 token_translations[j] = 2;
1ff442ca 1701
943819bf 1702 for (bp = firstsymbol; bp; bp = bp->next)
a70083a3
AD
1703 {
1704 if (bp->value >= ntokens)
1705 continue; /* non-terminal */
1706 if (bp->user_token_number == SALIAS)
0a6384c4 1707 continue;
a70083a3 1708 if (token_translations[bp->user_token_number] != 2)
a0f6b076
AD
1709 complain (_("tokens %s and %s both assigned number %d"),
1710 tags[token_translations[bp->user_token_number]],
a70083a3
AD
1711 bp->tag, bp->user_token_number);
1712 token_translations[bp->user_token_number] = bp->value;
1713 }
1ff442ca
NF
1714 }
1715
1716 error_token_number = errtoken->value;
1717
89cab50d 1718 if (!no_parser_flag)
a70083a3 1719 output_token_defines (ftable);
1ff442ca 1720
d7020c20 1721 if (startval->class == unknown_sym)
a0f6b076 1722 fatal (_("the start symbol %s is undefined"), startval->tag);
d7020c20 1723 else if (startval->class == token_sym)
a0f6b076 1724 fatal (_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1725
1726 start_symbol = startval->value;
1727
89cab50d 1728 if (defines_flag)
1ff442ca 1729 {
a70083a3 1730 output_token_defines (fdefines);
1ff442ca
NF
1731
1732 if (!pure_parser)
1733 {
1734 if (spec_name_prefix)
a70083a3
AD
1735 fprintf (fdefines, "\nextern YYSTYPE %slval;\n",
1736 spec_name_prefix);
1ff442ca 1737 else
a70083a3 1738 fprintf (fdefines, "\nextern YYSTYPE yylval;\n");
1ff442ca
NF
1739 }
1740
1741 if (semantic_parser)
1742 for (i = ntokens; i < nsyms; i++)
1743 {
1744 /* don't make these for dummy nonterminals made by gensym. */
1745 if (*tags[i] != '@')
a70083a3 1746 fprintf (fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1ff442ca
NF
1747 }
1748#if 0
1749 /* `fdefines' is now a temporary file, so we need to copy its
1750 contents in `done', so we can't close it here. */
a70083a3 1751 fclose (fdefines);
1ff442ca
NF
1752 fdefines = NULL;
1753#endif
1754 }
1755}
a083fbbf 1756
1ff442ca 1757
a70083a3
AD
1758/*---------------------------------------------------------------.
1759| Convert the rules into the representation using RRHS, RLHS and |
1760| RITEMS. |
1761`---------------------------------------------------------------*/
1ff442ca 1762
4a120d45 1763static void
118fb205 1764packgram (void)
1ff442ca 1765{
a70083a3
AD
1766 int itemno;
1767 int ruleno;
1768 symbol_list *p;
1ff442ca
NF
1769
1770 bucket *ruleprec;
1771
d7913476
AD
1772 ritem = XCALLOC (short, nitems + 1);
1773 rlhs = XCALLOC (short, nrules) - 1;
1774 rrhs = XCALLOC (short, nrules) - 1;
1775 rprec = XCALLOC (short, nrules) - 1;
1776 rprecsym = XCALLOC (short, nrules) - 1;
1777 rassoc = XCALLOC (short, nrules) - 1;
1ff442ca
NF
1778
1779 itemno = 0;
1780 ruleno = 1;
1781
1782 p = grammar;
1783 while (p)
1784 {
1785 rlhs[ruleno] = p->sym->value;
1786 rrhs[ruleno] = itemno;
1787 ruleprec = p->ruleprec;
1788
1789 p = p->next;
1790 while (p && p->sym)
1791 {
1792 ritem[itemno++] = p->sym->value;
1793 /* A rule gets by default the precedence and associativity
1794 of the last token in it. */
d7020c20 1795 if (p->sym->class == token_sym)
1ff442ca
NF
1796 {
1797 rprec[ruleno] = p->sym->prec;
1798 rassoc[ruleno] = p->sym->assoc;
1799 }
a70083a3
AD
1800 if (p)
1801 p = p->next;
1ff442ca
NF
1802 }
1803
1804 /* If this rule has a %prec,
a70083a3 1805 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1806 if (ruleprec)
1807 {
a70083a3
AD
1808 rprec[ruleno] = ruleprec->prec;
1809 rassoc[ruleno] = ruleprec->assoc;
1ff442ca
NF
1810 rprecsym[ruleno] = ruleprec->value;
1811 }
1812
1813 ritem[itemno++] = -ruleno;
1814 ruleno++;
1815
a70083a3
AD
1816 if (p)
1817 p = p->next;
1ff442ca
NF
1818 }
1819
1820 ritem[itemno] = 0;
1821}
a70083a3
AD
1822\f
1823/*-------------------------------------------------------------------.
1824| Read in the grammar specification and record it in the format |
1825| described in gram.h. All guards are copied into the FGUARD file |
1826| and all actions into FACTION, in each case forming the body of a C |
1827| function (YYGUARD or YYACTION) which contains a switch statement |
1828| to decide which guard or action to execute. |
1829`-------------------------------------------------------------------*/
1830
1831void
1832reader (void)
1833{
1834 start_flag = 0;
1835 startval = NULL; /* start symbol not specified yet. */
1836
1837#if 0
1838 /* initially assume token number translation not needed. */
1839 translations = 0;
1840#endif
1841 /* Nowadays translations is always set to 1, since we give `error' a
1842 user-token-number to satisfy the Posix demand for YYERRCODE==256.
1843 */
1844 translations = 1;
1845
1846 nsyms = 1;
1847 nvars = 0;
1848 nrules = 0;
1849 nitems = 0;
1850 rline_allocated = 10;
d7913476 1851 rline = XCALLOC (short, rline_allocated);
a70083a3
AD
1852
1853 typed = 0;
1854 lastprec = 0;
1855
1856 gensym_count = 0;
1857
1858 semantic_parser = 0;
1859 pure_parser = 0;
a70083a3
AD
1860
1861 grammar = NULL;
1862
1863 init_lex ();
1864 lineno = 1;
1865
1866 /* Initialize the symbol table. */
1867 tabinit ();
1868 /* Construct the error token */
1869 errtoken = getsym ("error");
d7020c20 1870 errtoken->class = token_sym;
a70083a3
AD
1871 errtoken->user_token_number = 256; /* Value specified by POSIX. */
1872 /* Construct a token that represents all undefined literal tokens.
1873 It is always token number 2. */
1874 undeftoken = getsym ("$undefined.");
d7020c20 1875 undeftoken->class = token_sym;
a70083a3
AD
1876 undeftoken->user_token_number = 2;
1877
1878 /* Read the declaration section. Copy %{ ... %} groups to FTABLE
1879 and FDEFINES file. Also notice any %token, %left, etc. found
1880 there. */
1881 putc ('\n', ftable);
1882 fprintf (ftable, "\
1883/* %s, made from %s\n\
1884 by GNU bison %s. */\n\
89cab50d 1885\n", no_parser_flag ? "Bison-generated parse tables" : "A Bison parser", infile, VERSION);
a70083a3
AD
1886
1887 fputs ("#define YYBISON 1 /* Identify Bison output. */\n\n", ftable);
1888 read_declarations ();
1889 /* Start writing the guard and action files, if they are needed. */
1890 output_headers ();
1891 /* Read in the grammar, build grammar in list form. Write out
1892 guards and actions. */
1893 readgram ();
1894 /* Now we know whether we need the line-number stack. If we do,
1895 write its type into the .tab.h file. */
1896 if (fdefines)
1897 reader_output_yylsp (fdefines);
1898 /* Write closing delimiters for actions and guards. */
1899 output_trailers ();
89cab50d 1900 if (locations_flag)
a70083a3
AD
1901 fputs ("#define YYLSP_NEEDED\n\n", ftable);
1902 /* Assign the symbols their symbol numbers. Write #defines for the
1903 token symbols into FDEFINES if requested. */
1904 packsymbols ();
1905 /* Convert the grammar into the format described in gram.h. */
1906 packgram ();
1907 /* Free the symbol table data structure since symbols are now all
1908 referred to by symbol number. */
1909 free_symtab ();
1910}
1911
d7020c20 1912
a70083a3
AD
1913void
1914reader_output_yylsp (FILE *f)
1915{
89cab50d 1916 if (locations_flag)
d7020c20
AD
1917 fputs ("\
1918\n\
1919#ifndef YYLTYPE\n\
89cab50d
AD
1920typedef struct yyltype\n\
1921{\n\
1922 int timestamp;\n\
1923 int first_line;\n\
1924 int first_column;\
d7020c20 1925\n\
89cab50d
AD
1926 int last_line;\n\
1927 int last_column;\n\
1928 char *text;\n\
1929} yyltype;\n\
d7020c20 1930\n\
89cab50d 1931# define YYLTYPE yyltype\n\
d7020c20
AD
1932#endif\n\
1933\n",
1934 f);
a70083a3 1935}