]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/lex.c (literalchar): Simply return the char you decoded, non
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
037ca2f1 2 Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
8c7ebe49 24#include "obstack.h"
2a91a95e
AD
25#include "quotearg.h"
26#include "quote.h"
ceed8467 27#include "getargs.h"
1ff442ca 28#include "files.h"
1ff442ca 29#include "symtab.h"
82b6d266 30#include "options.h"
1ff442ca
NF
31#include "lex.h"
32#include "gram.h"
a0f6b076 33#include "complain.h"
6c89f1c1 34#include "output.h"
b2ca4022 35#include "reader.h"
340ef489 36#include "conflicts.h"
11d82f03 37#include "muscle_tab.h"
1ff442ca 38
a70083a3
AD
39typedef struct symbol_list
40{
41 struct symbol_list *next;
42 bucket *sym;
b29b2ed5 43 int line;
a70083a3
AD
44 bucket *ruleprec;
45}
46symbol_list;
118fb205 47
1ff442ca 48int lineno;
1ff442ca 49char **tags;
d019d655 50short *user_toknums;
4a120d45
JT
51static symbol_list *grammar;
52static int start_flag;
53static bucket *startval;
1ff442ca
NF
54
55/* Nonzero if components of semantic values are used, implying
56 they must be unions. */
57static int value_components_used;
58
d7020c20
AD
59/* Nonzero if %union has been seen. */
60static int typed;
1ff442ca 61
d7020c20
AD
62/* Incremented for each %left, %right or %nonassoc seen */
63static int lastprec;
1ff442ca 64
1ff442ca 65static bucket *errtoken;
5b2e3c89 66static bucket *undeftoken;
b29b2ed5
AD
67
68
6255b435 69static symbol_list *
b29b2ed5
AD
70symbol_list_new (bucket *sym)
71{
72 symbol_list *res = XMALLOC (symbol_list, 1);
73 res->next = NULL;
74 res->sym = sym;
75 res->line = lineno;
76 res->ruleprec = NULL;
77 return res;
78}
79
0d533154 80\f
a70083a3 81
0d533154
AD
82/*===================\
83| Low level lexing. |
84\===================*/
943819bf
RS
85
86static void
118fb205 87skip_to_char (int target)
943819bf
RS
88{
89 int c;
90 if (target == '\n')
a0f6b076 91 complain (_(" Skipping to next \\n"));
943819bf 92 else
a0f6b076 93 complain (_(" Skipping to next %c"), target);
943819bf
RS
94
95 do
0d533154 96 c = skip_white_space ();
943819bf 97 while (c != target && c != EOF);
a083fbbf 98 if (c != EOF)
0d533154 99 ungetc (c, finput);
943819bf
RS
100}
101
102
0d533154
AD
103/*---------------------------------------------------------.
104| Read a signed integer from STREAM and return its value. |
105`---------------------------------------------------------*/
106
107static inline int
108read_signed_integer (FILE *stream)
109{
a70083a3
AD
110 int c = getc (stream);
111 int sign = 1;
112 int n = 0;
0d533154
AD
113
114 if (c == '-')
115 {
116 c = getc (stream);
117 sign = -1;
118 }
119
120 while (isdigit (c))
121 {
122 n = 10 * n + (c - '0');
123 c = getc (stream);
124 }
125
126 ungetc (c, stream);
127
128 return sign * n;
129}
130\f
79282c5a
AD
131/*--------------------------------------------------------------.
132| Get the data type (alternative in the union) of the value for |
133| symbol N in rule RULE. |
134`--------------------------------------------------------------*/
135
136static char *
b29b2ed5 137get_type_name (int n, symbol_list *rule)
79282c5a
AD
138{
139 int i;
140 symbol_list *rp;
141
142 if (n < 0)
143 {
144 complain (_("invalid $ value"));
145 return NULL;
146 }
147
148 rp = rule;
149 i = 0;
150
151 while (i < n)
152 {
153 rp = rp->next;
154 if (rp == NULL || rp->sym == NULL)
155 {
156 complain (_("invalid $ value"));
157 return NULL;
158 }
159 i++;
160 }
161
162 return rp->sym->type_name;
163}
164\f
337bab46
AD
165/*------------------------------------------------------------.
166| Dump the string from FIN to OOUT if non null. MATCH is the |
167| delimiter of the string (either ' or "). |
168`------------------------------------------------------------*/
ae3c3164
AD
169
170static inline void
b6610515 171copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
172{
173 int c;
174
b6610515
RA
175 if (store)
176 obstack_1grow (oout, match);
8c7ebe49 177
4a120d45 178 c = getc (fin);
ae3c3164
AD
179
180 while (c != match)
181 {
182 if (c == EOF)
183 fatal (_("unterminated string at end of file"));
184 if (c == '\n')
185 {
a0f6b076 186 complain (_("unterminated string"));
4a120d45 187 ungetc (c, fin);
ae3c3164
AD
188 c = match; /* invent terminator */
189 continue;
190 }
191
337bab46 192 obstack_1grow (oout, c);
ae3c3164
AD
193
194 if (c == '\\')
195 {
4a120d45 196 c = getc (fin);
ae3c3164
AD
197 if (c == EOF)
198 fatal (_("unterminated string at end of file"));
337bab46 199 obstack_1grow (oout, c);
8c7ebe49 200
ae3c3164
AD
201 if (c == '\n')
202 lineno++;
203 }
204
a70083a3 205 c = getc (fin);
ae3c3164
AD
206 }
207
b6610515
RA
208 if (store)
209 obstack_1grow (oout, c);
210}
211
212/* FIXME. */
213
214static inline void
215copy_string (FILE *fin, struct obstack *oout, int match)
216{
217 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
218}
219
b6610515
RA
220/* FIXME. */
221
222static inline void
223copy_identifier (FILE *fin, struct obstack *oout)
224{
225 int c;
226
227 while (isalnum (c = getc (fin)) || c == '_')
228 obstack_1grow (oout, c);
229
230 ungetc (c, fin);
231}
ae3c3164 232
337bab46
AD
233/*-----------------------------------------------------------------.
234| Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
235| NULL). In fact we just saw a `/', which might or might not be a |
236| comment. In any case, copy what we saw. |
237| |
238| OUT2 might be NULL. |
239`-----------------------------------------------------------------*/
ae3c3164
AD
240
241static inline void
337bab46 242copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
ae3c3164
AD
243{
244 int cplus_comment;
a70083a3 245 int ended;
550a72a3
AD
246 int c;
247
248 /* We read a `/', output it. */
337bab46 249 obstack_1grow (oout1, '/');
896fe5c1
AD
250 if (oout2)
251 obstack_1grow (oout2, '/');
550a72a3
AD
252
253 switch ((c = getc (fin)))
254 {
255 case '/':
256 cplus_comment = 1;
257 break;
258 case '*':
259 cplus_comment = 0;
260 break;
261 default:
262 ungetc (c, fin);
263 return;
264 }
ae3c3164 265
337bab46 266 obstack_1grow (oout1, c);
896fe5c1
AD
267 if (oout2)
268 obstack_1grow (oout2, c);
550a72a3 269 c = getc (fin);
ae3c3164
AD
270
271 ended = 0;
272 while (!ended)
273 {
274 if (!cplus_comment && c == '*')
275 {
276 while (c == '*')
277 {
337bab46 278 obstack_1grow (oout1, c);
896fe5c1
AD
279 if (oout2)
280 obstack_1grow (oout2, c);
550a72a3 281 c = getc (fin);
ae3c3164
AD
282 }
283
284 if (c == '/')
285 {
337bab46 286 obstack_1grow (oout1, c);
896fe5c1
AD
287 if (oout2)
288 obstack_1grow (oout2, c);
ae3c3164
AD
289 ended = 1;
290 }
291 }
292 else if (c == '\n')
293 {
294 lineno++;
337bab46 295 obstack_1grow (oout1, c);
896fe5c1
AD
296 if (oout2)
297 obstack_1grow (oout2, c);
ae3c3164
AD
298 if (cplus_comment)
299 ended = 1;
300 else
550a72a3 301 c = getc (fin);
ae3c3164
AD
302 }
303 else if (c == EOF)
304 fatal (_("unterminated comment"));
305 else
306 {
337bab46 307 obstack_1grow (oout1, c);
896fe5c1
AD
308 if (oout2)
309 obstack_1grow (oout2, c);
550a72a3 310 c = getc (fin);
ae3c3164
AD
311 }
312 }
313}
314
315
550a72a3
AD
316/*-------------------------------------------------------------------.
317| Dump the comment (actually the current string starting with a `/') |
337bab46 318| from FIN to OOUT. |
550a72a3 319`-------------------------------------------------------------------*/
27821bff
AD
320
321static inline void
337bab46 322copy_comment (FILE *fin, struct obstack *oout)
27821bff 323{
337bab46 324 copy_comment2 (fin, oout, NULL);
27821bff
AD
325}
326
327
a70083a3 328/*-----------------------------------------------------------------.
337bab46 329| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
330| reference to this location. STACK_OFFSET is the number of values |
331| in the current rule so far, which says where to find `$0' with |
332| respect to the top of the stack. |
333`-----------------------------------------------------------------*/
1ff442ca 334
a70083a3 335static inline void
337bab46 336copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 337{
a70083a3 338 int c;
1ff442ca 339
a70083a3
AD
340 c = getc (fin);
341 if (c == '$')
1ff442ca 342 {
ff4423cc 343 obstack_sgrow (oout, "yyloc");
89cab50d 344 locations_flag = 1;
a70083a3
AD
345 }
346 else if (isdigit (c) || c == '-')
347 {
348 int n;
1ff442ca 349
a70083a3
AD
350 ungetc (c, fin);
351 n = read_signed_integer (fin);
943819bf 352
337bab46 353 obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
89cab50d 354 locations_flag = 1;
1ff442ca 355 }
a70083a3 356 else
ff4a34be
AD
357 {
358 char buf[] = "@c";
359 buf[1] = c;
360 complain (_("%s is invalid"), quote (buf));
361 }
1ff442ca 362}
79282c5a
AD
363
364
365/*-------------------------------------------------------------------.
366| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
367| |
368| Possible inputs: $[<TYPENAME>]($|integer) |
369| |
337bab46 370| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
371| the number of values in the current rule so far, which says where |
372| to find `$0' with respect to the top of the stack. |
373`-------------------------------------------------------------------*/
374
375static inline void
337bab46 376copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
377 symbol_list *rule, int stack_offset)
378{
379 int c = getc (fin);
b0ce6046 380 const char *type_name = NULL;
79282c5a 381
f282676b 382 /* Get the type name if explicit. */
79282c5a
AD
383 if (c == '<')
384 {
f282676b 385 read_type_name (fin);
79282c5a
AD
386 type_name = token_buffer;
387 value_components_used = 1;
79282c5a
AD
388 c = getc (fin);
389 }
390
391 if (c == '$')
392 {
ff4423cc 393 obstack_sgrow (oout, "yyval");
8c7ebe49 394
79282c5a
AD
395 if (!type_name)
396 type_name = get_type_name (0, rule);
397 if (type_name)
337bab46 398 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
399 if (!type_name && typed)
400 complain (_("$$ of `%s' has no declared type"),
401 rule->sym->tag);
402 }
403 else if (isdigit (c) || c == '-')
404 {
405 int n;
406 ungetc (c, fin);
407 n = read_signed_integer (fin);
408
409 if (!type_name && n > 0)
410 type_name = get_type_name (n, rule);
411
337bab46 412 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
8c7ebe49 413
79282c5a 414 if (type_name)
337bab46 415 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
416 if (!type_name && typed)
417 complain (_("$%d of `%s' has no declared type"),
418 n, rule->sym->tag);
419 }
420 else
421 {
422 char buf[] = "$c";
423 buf[1] = c;
424 complain (_("%s is invalid"), quote (buf));
425 }
426}
a70083a3
AD
427\f
428/*-------------------------------------------------------------------.
429| Copy the contents of a `%{ ... %}' into the definitions file. The |
430| `%{' has already been read. Return after reading the `%}'. |
431`-------------------------------------------------------------------*/
1ff442ca 432
4a120d45 433static void
118fb205 434copy_definition (void)
1ff442ca 435{
a70083a3 436 int c;
ae3c3164 437 /* -1 while reading a character if prev char was %. */
a70083a3 438 int after_percent;
1ff442ca 439
89cab50d 440 if (!no_lines_flag)
25b222fa
MA
441 {
442 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 443 lineno, quotearg_style (c_quoting_style,
25b222fa
MA
444 muscle_find("filename")));
445 }
1ff442ca
NF
446
447 after_percent = 0;
448
ae3c3164 449 c = getc (finput);
1ff442ca
NF
450
451 for (;;)
452 {
453 switch (c)
454 {
455 case '\n':
dd60faec 456 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
457 lineno++;
458 break;
459
460 case '%':
a70083a3 461 after_percent = -1;
1ff442ca 462 break;
a083fbbf 463
1ff442ca
NF
464 case '\'':
465 case '"':
337bab46 466 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
467 break;
468
469 case '/':
337bab46 470 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
471 break;
472
473 case EOF:
a70083a3 474 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
475
476 default:
dd60faec 477 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
478 }
479
a70083a3 480 c = getc (finput);
1ff442ca
NF
481
482 if (after_percent)
483 {
484 if (c == '}')
485 return;
dd60faec 486 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
487 }
488 after_percent = 0;
1ff442ca 489 }
1ff442ca
NF
490}
491
492
d7020c20
AD
493/*-------------------------------------------------------------------.
494| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
495| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
496| are reversed. |
497`-------------------------------------------------------------------*/
1ff442ca 498
4a120d45 499static void
d7020c20 500parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 501{
342b8b6e
AD
502 token_t token = tok_undef;
503 char *typename = NULL;
1ff442ca 504
1e9798d5
AD
505 /* The symbol being defined. */
506 struct bucket *symbol = NULL;
507
508 /* After `%token' and `%nterm', any number of symbols maybe be
509 defined. */
1ff442ca
NF
510 for (;;)
511 {
e6011337
JT
512 int tmp_char = ungetc (skip_white_space (), finput);
513
1e9798d5
AD
514 /* `%' (for instance from `%token', or from `%%' etc.) is the
515 only valid means to end this declaration. */
e6011337 516 if (tmp_char == '%')
1ff442ca 517 return;
e6011337 518 if (tmp_char == EOF)
a0f6b076 519 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 520
a70083a3 521 token = lex ();
511e79b3 522 if (token == tok_comma)
943819bf
RS
523 {
524 symbol = NULL;
525 continue;
526 }
511e79b3 527 if (token == tok_typename)
1ff442ca 528 {
95e36146 529 typename = xstrdup (token_buffer);
1ff442ca 530 value_components_used = 1;
943819bf
RS
531 symbol = NULL;
532 }
511e79b3 533 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 534 {
8e03724b
AD
535 if (symval->alias)
536 warn (_("symbol `%s' used more than once as a literal string"),
537 symval->tag);
538 else if (symbol->alias)
539 warn (_("symbol `%s' given more than one literal string"),
540 symbol->tag);
541 else
542 {
543 symval->class = token_sym;
544 symval->type_name = typename;
545 symval->user_token_number = symbol->user_token_number;
546 symbol->user_token_number = SALIAS;
547 symval->alias = symbol;
548 symbol->alias = symval;
549 /* symbol and symval combined are only one symbol */
550 nsyms--;
551 }
8e03724b 552 symbol = NULL;
1ff442ca 553 }
511e79b3 554 else if (token == tok_identifier)
1ff442ca
NF
555 {
556 int oldclass = symval->class;
943819bf 557 symbol = symval;
1ff442ca 558
943819bf 559 if (symbol->class == what_is_not)
a0f6b076 560 complain (_("symbol %s redefined"), symbol->tag);
943819bf 561 symbol->class = what_is;
d7020c20 562 if (what_is == nterm_sym && oldclass != nterm_sym)
943819bf 563 symbol->value = nvars++;
1ff442ca
NF
564
565 if (typename)
566 {
943819bf
RS
567 if (symbol->type_name == NULL)
568 symbol->type_name = typename;
a70083a3 569 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 570 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
571 }
572 }
511e79b3 573 else if (symbol && token == tok_number)
a70083a3 574 {
943819bf 575 symbol->user_token_number = numval;
a70083a3 576 }
1ff442ca 577 else
943819bf 578 {
a0f6b076 579 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
580 token_buffer,
581 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 582 skip_to_char ('%');
943819bf 583 }
1ff442ca
NF
584 }
585
586}
587
1ff442ca 588
d7020c20
AD
589/*------------------------------.
590| Parse what comes after %start |
591`------------------------------*/
1ff442ca 592
4a120d45 593static void
118fb205 594parse_start_decl (void)
1ff442ca
NF
595{
596 if (start_flag)
27821bff 597 complain (_("multiple %s declarations"), "%start");
511e79b3 598 if (lex () != tok_identifier)
27821bff 599 complain (_("invalid %s declaration"), "%start");
943819bf
RS
600 else
601 {
602 start_flag = 1;
603 startval = symval;
604 }
1ff442ca
NF
605}
606
a70083a3
AD
607/*-----------------------------------------------------------.
608| read in a %type declaration and record its information for |
609| get_type_name to access |
610`-----------------------------------------------------------*/
611
612static void
613parse_type_decl (void)
614{
a70083a3
AD
615 char *name;
616
511e79b3 617 if (lex () != tok_typename)
a70083a3
AD
618 {
619 complain ("%s", _("%type declaration has no <typename>"));
620 skip_to_char ('%');
621 return;
622 }
623
95e36146 624 name = xstrdup (token_buffer);
a70083a3
AD
625
626 for (;;)
627 {
f17bcd1f 628 token_t t;
a70083a3
AD
629 int tmp_char = ungetc (skip_white_space (), finput);
630
631 if (tmp_char == '%')
632 return;
633 if (tmp_char == EOF)
634 fatal (_("Premature EOF after %s"), token_buffer);
635
636 t = lex ();
637
638 switch (t)
1ff442ca
NF
639 {
640
511e79b3
AD
641 case tok_comma:
642 case tok_semicolon:
1ff442ca
NF
643 break;
644
511e79b3 645 case tok_identifier:
1ff442ca
NF
646 if (symval->type_name == NULL)
647 symval->type_name = name;
a70083a3 648 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 649 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
650
651 break;
652
653 default:
a0f6b076
AD
654 complain (_("invalid %%type declaration due to item: %s"),
655 token_buffer);
a70083a3 656 skip_to_char ('%');
1ff442ca
NF
657 }
658 }
659}
660
661
662
d7020c20
AD
663/*----------------------------------------------------------------.
664| Read in a %left, %right or %nonassoc declaration and record its |
665| information. |
666`----------------------------------------------------------------*/
1ff442ca 667
4a120d45 668static void
d7020c20 669parse_assoc_decl (associativity assoc)
1ff442ca 670{
a70083a3
AD
671 char *name = NULL;
672 int prev = 0;
1ff442ca 673
a70083a3 674 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 675
1ff442ca
NF
676 for (;;)
677 {
f17bcd1f 678 token_t t;
e6011337 679 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 680
e6011337 681 if (tmp_char == '%')
1ff442ca 682 return;
e6011337 683 if (tmp_char == EOF)
a0f6b076 684 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 685
a70083a3 686 t = lex ();
1ff442ca
NF
687
688 switch (t)
689 {
511e79b3 690 case tok_typename:
95e36146 691 name = xstrdup (token_buffer);
1ff442ca
NF
692 break;
693
511e79b3 694 case tok_comma:
1ff442ca
NF
695 break;
696
511e79b3 697 case tok_identifier:
1ff442ca 698 if (symval->prec != 0)
a0f6b076 699 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
700 symval->prec = lastprec;
701 symval->assoc = assoc;
d7020c20 702 if (symval->class == nterm_sym)
a0f6b076 703 complain (_("symbol %s redefined"), symval->tag);
d7020c20 704 symval->class = token_sym;
1ff442ca 705 if (name)
a70083a3 706 { /* record the type, if one is specified */
1ff442ca
NF
707 if (symval->type_name == NULL)
708 symval->type_name = name;
a70083a3 709 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 710 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
711 }
712 break;
713
511e79b3
AD
714 case tok_number:
715 if (prev == tok_identifier)
a70083a3 716 {
1ff442ca 717 symval->user_token_number = numval;
a70083a3
AD
718 }
719 else
720 {
721 complain (_
722 ("invalid text (%s) - number should be after identifier"),
723token_buffer);
724 skip_to_char ('%');
725 }
1ff442ca
NF
726 break;
727
511e79b3 728 case tok_semicolon:
1ff442ca
NF
729 return;
730
731 default:
a0f6b076 732 complain (_("unexpected item: %s"), token_buffer);
a70083a3 733 skip_to_char ('%');
1ff442ca
NF
734 }
735
736 prev = t;
1ff442ca
NF
737 }
738}
739
740
741
dd60faec 742/*--------------------------------------------------------------.
180d45ba
PB
743| Copy the union declaration into the stype muscle |
744| (and fdefines), where it is made into the definition of |
745| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 746`--------------------------------------------------------------*/
1ff442ca 747
4a120d45 748static void
118fb205 749parse_union_decl (void)
1ff442ca 750{
a70083a3
AD
751 int c;
752 int count = 0;
180d45ba 753 struct obstack union_obstack;
5f7e0832
AD
754 const char *prologue = "\
755#ifndef YYSTYPE\n\
756typedef union";
757 const char *epilogue = "\
758 yystype;\n\
759# define YYSTYPE yystype\n\
760#endif\n";
1ff442ca
NF
761
762 if (typed)
27821bff 763 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
764
765 typed = 1;
766
f6ec6d13
AD
767 /* FIXME: I'm worried: are you sure attrs_obstack is properly
768 filled? */
5f7e0832
AD
769 /* I don't see any reasons to keep this line, because we should
770 create a special skeleton for this option. */
180d45ba 771 if (no_lines_flag)
dd60faec 772 obstack_1grow (&attrs_obstack, '\n');
342b8b6e 773
180d45ba
PB
774 obstack_init (&union_obstack);
775 obstack_sgrow (&union_obstack, "union");
896fe5c1 776 if (defines_flag)
5f7e0832 777 obstack_sgrow (&defines_obstack, prologue);
1ff442ca 778
27821bff 779 c = getc (finput);
1ff442ca
NF
780
781 while (c != EOF)
782 {
342b8b6e
AD
783 /* If C contains '/', it is output by copy_comment (). */
784 if (c != '/')
785 {
f6ec6d13 786 obstack_1grow (&union_obstack, c);
342b8b6e
AD
787 if (defines_flag)
788 obstack_1grow (&defines_obstack, c);
789 }
1ff442ca
NF
790
791 switch (c)
792 {
793 case '\n':
794 lineno++;
795 break;
796
797 case '/':
180d45ba 798 copy_comment2 (finput, &defines_obstack, &union_obstack);
1ff442ca
NF
799 break;
800
1ff442ca
NF
801 case '{':
802 count++;
803 break;
804
805 case '}':
806 if (count == 0)
27821bff 807 complain (_("unmatched %s"), "`}'");
1ff442ca 808 count--;
943819bf 809 if (count <= 0)
1ff442ca 810 {
896fe5c1 811 if (defines_flag)
5f7e0832 812 obstack_sgrow (&defines_obstack, epilogue);
1ff442ca 813 /* JF don't choke on trailing semi */
27821bff
AD
814 c = skip_white_space ();
815 if (c != ';')
a70083a3 816 ungetc (c, finput);
180d45ba
PB
817 obstack_1grow (&union_obstack, 0);
818 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
819 return;
820 }
821 }
822
27821bff 823 c = getc (finput);
1ff442ca 824 }
180d45ba 825
1ff442ca
NF
826}
827
d7020c20
AD
828
829/*-------------------------------------------------------.
830| Parse the declaration %expect N which says to expect N |
831| shift-reduce conflicts. |
832`-------------------------------------------------------*/
1ff442ca 833
4a120d45 834static void
118fb205 835parse_expect_decl (void)
1ff442ca 836{
131e2fef 837 int c = skip_white_space ();
1ff442ca
NF
838 ungetc (c, finput);
839
131e2fef 840 if (!isdigit (c))
79282c5a 841 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
842 else
843 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
844}
845
a70083a3
AD
846
847/*-------------------------------------------------------------------.
848| Parse what comes after %thong. the full syntax is |
849| |
850| %thong <type> token number literal |
851| |
852| the <type> or number may be omitted. The number specifies the |
853| user_token_number. |
854| |
855| Two symbols are entered in the table, one for the token symbol and |
856| one for the literal. Both are given the <type>, if any, from the |
857| declaration. The ->user_token_number of the first is SALIAS and |
858| the ->user_token_number of the second is set to the number, if |
859| any, from the declaration. The two symbols are linked via |
860| pointers in their ->alias fields. |
861| |
862| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
863| only the literal string is retained it is the literal string that |
864| is output to yytname |
865`-------------------------------------------------------------------*/
866
867static void
868parse_thong_decl (void)
7b306f52 869{
f17bcd1f 870 token_t token;
a70083a3
AD
871 struct bucket *symbol;
872 char *typename = 0;
6b7e85b9 873 int usrtoknum = SUNDEF;
7b306f52 874
a70083a3 875 token = lex (); /* fetch typename or first token */
511e79b3 876 if (token == tok_typename)
7b306f52 877 {
95e36146 878 typename = xstrdup (token_buffer);
a70083a3
AD
879 value_components_used = 1;
880 token = lex (); /* fetch first token */
7b306f52 881 }
7b306f52 882
a70083a3 883 /* process first token */
7b306f52 884
511e79b3 885 if (token != tok_identifier)
a70083a3
AD
886 {
887 complain (_("unrecognized item %s, expected an identifier"),
888 token_buffer);
889 skip_to_char ('%');
890 return;
7b306f52 891 }
d7020c20 892 symval->class = token_sym;
a70083a3
AD
893 symval->type_name = typename;
894 symval->user_token_number = SALIAS;
895 symbol = symval;
7b306f52 896
a70083a3 897 token = lex (); /* get number or literal string */
1ff442ca 898
511e79b3 899 if (token == tok_number)
943819bf 900 {
a70083a3
AD
901 usrtoknum = numval;
902 token = lex (); /* okay, did number, now get literal */
943819bf 903 }
1ff442ca 904
a70083a3 905 /* process literal string token */
1ff442ca 906
511e79b3 907 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 908 {
a70083a3
AD
909 complain (_("expected string constant instead of %s"), token_buffer);
910 skip_to_char ('%');
911 return;
1ff442ca 912 }
d7020c20 913 symval->class = token_sym;
a70083a3
AD
914 symval->type_name = typename;
915 symval->user_token_number = usrtoknum;
1ff442ca 916
a70083a3
AD
917 symval->alias = symbol;
918 symbol->alias = symval;
1ff442ca 919
79282c5a
AD
920 /* symbol and symval combined are only one symbol. */
921 nsyms--;
a70083a3 922}
3cef001a 923
b6610515 924static void
11d82f03 925parse_muscle_decl (void)
b6610515
RA
926{
927 int ch = ungetc (skip_white_space (), finput);
11d82f03
MA
928 char* muscle_key;
929 char* muscle_value;
b6610515
RA
930
931 /* Read key. */
932 if (!isalpha (ch) && ch != '_')
933 {
934 complain (_("invalid %s declaration"), "%define");
935 skip_to_char ('%');
936 return;
937 }
11d82f03
MA
938 copy_identifier (finput, &muscle_obstack);
939 obstack_1grow (&muscle_obstack, 0);
940 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 941
b6610515
RA
942 /* Read value. */
943 ch = skip_white_space ();
944 if (ch != '"')
945 {
946 ungetc (ch, finput);
947 if (ch != EOF)
948 {
949 complain (_("invalid %s declaration"), "%define");
950 skip_to_char ('%');
951 return;
952 }
953 else
954 fatal (_("Premature EOF after %s"), "\"");
955 }
11d82f03
MA
956 copy_string2 (finput, &muscle_obstack, '"', 0);
957 obstack_1grow (&muscle_obstack, 0);
958 muscle_value = obstack_finish (&muscle_obstack);
b6610515 959
b6610515 960 /* Store the (key, value) pair in the environment. */
11d82f03 961 muscle_insert (muscle_key, muscle_value);
b6610515
RA
962}
963
2ba3b73c 964
426cf563
MA
965
966/*---------------------------------.
a870c567 967| Parse a double quoted parameter. |
426cf563
MA
968`---------------------------------*/
969
970static const char *
971parse_dquoted_param (const char *from)
972{
973 struct obstack param_obstack;
974 const char *param = NULL;
975 int c;
976
977 obstack_init (&param_obstack);
978 c = skip_white_space ();
979
980 if (c != '"')
981 {
982 complain (_("invalid %s declaration"), from);
983 ungetc (c, finput);
984 skip_to_char ('%');
985 return NULL;
986 }
987
2648a72d
AD
988 while ((c = literalchar ()) != '"')
989 obstack_1grow (&param_obstack, c);
a870c567 990
426cf563
MA
991 obstack_1grow (&param_obstack, '\0');
992 param = obstack_finish (&param_obstack);
993
994 if (c != '"' || strlen (param) == 0)
995 {
996 complain (_("invalid %s declaration"), from);
997 if (c != '"')
998 ungetc (c, finput);
999 skip_to_char ('%');
1000 return NULL;
1001 }
1002
1003 return param;
1004}
1005
2ba3b73c
MA
1006/*----------------------------------.
1007| Parse what comes after %skeleton. |
1008`----------------------------------*/
1009
a870c567 1010static void
2ba3b73c
MA
1011parse_skel_decl (void)
1012{
426cf563 1013 skeleton = parse_dquoted_param ("%skeleton");
2ba3b73c
MA
1014}
1015
a70083a3
AD
1016/*----------------------------------------------------------------.
1017| Read from finput until `%%' is seen. Discard the `%%'. Handle |
1018| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 1019| groups to ATTRS_OBSTACK. |
a70083a3 1020`----------------------------------------------------------------*/
1ff442ca 1021
4a120d45 1022static void
a70083a3 1023read_declarations (void)
1ff442ca 1024{
a70083a3 1025 for (;;)
1ff442ca 1026 {
951366c1 1027 int c = skip_white_space ();
1ff442ca 1028
a70083a3
AD
1029 if (c == '%')
1030 {
951366c1 1031 token_t tok = parse_percent_token ();
1ff442ca 1032
a70083a3 1033 switch (tok)
943819bf 1034 {
511e79b3 1035 case tok_two_percents:
a70083a3 1036 return;
1ff442ca 1037
511e79b3 1038 case tok_percent_left_curly:
a70083a3
AD
1039 copy_definition ();
1040 break;
1ff442ca 1041
511e79b3 1042 case tok_token:
d7020c20 1043 parse_token_decl (token_sym, nterm_sym);
a70083a3 1044 break;
1ff442ca 1045
511e79b3 1046 case tok_nterm:
d7020c20 1047 parse_token_decl (nterm_sym, token_sym);
a70083a3 1048 break;
1ff442ca 1049
511e79b3 1050 case tok_type:
a70083a3
AD
1051 parse_type_decl ();
1052 break;
1ff442ca 1053
511e79b3 1054 case tok_start:
a70083a3
AD
1055 parse_start_decl ();
1056 break;
118fb205 1057
511e79b3 1058 case tok_union:
a70083a3
AD
1059 parse_union_decl ();
1060 break;
1ff442ca 1061
511e79b3 1062 case tok_expect:
a70083a3
AD
1063 parse_expect_decl ();
1064 break;
6deb4447 1065
511e79b3 1066 case tok_thong:
a70083a3
AD
1067 parse_thong_decl ();
1068 break;
d7020c20 1069
511e79b3 1070 case tok_left:
d7020c20 1071 parse_assoc_decl (left_assoc);
a70083a3 1072 break;
1ff442ca 1073
511e79b3 1074 case tok_right:
d7020c20 1075 parse_assoc_decl (right_assoc);
a70083a3 1076 break;
1ff442ca 1077
511e79b3 1078 case tok_nonassoc:
d7020c20 1079 parse_assoc_decl (non_assoc);
a70083a3 1080 break;
1ff442ca 1081
b6610515 1082 case tok_define:
11d82f03 1083 parse_muscle_decl ();
b6610515 1084 break;
342b8b6e 1085
2ba3b73c
MA
1086 case tok_skel:
1087 parse_skel_decl ();
1088 break;
b6610515 1089
511e79b3 1090 case tok_noop:
a70083a3 1091 break;
1ff442ca 1092
951366c1
AD
1093 case tok_stropt:
1094 case tok_intopt:
1095 case tok_obsolete:
951366c1
AD
1096 abort ();
1097 break;
1098
e0c40012 1099 case tok_illegal:
a70083a3
AD
1100 default:
1101 complain (_("unrecognized: %s"), token_buffer);
1102 skip_to_char ('%');
1103 }
1104 }
1105 else if (c == EOF)
1106 fatal (_("no input grammar"));
1107 else
1108 {
ff4a34be
AD
1109 char buf[] = "c";
1110 buf[0] = c;
1111 complain (_("unknown character: %s"), quote (buf));
a70083a3 1112 skip_to_char ('%');
1ff442ca 1113 }
1ff442ca 1114 }
1ff442ca 1115}
a70083a3
AD
1116\f
1117/*-------------------------------------------------------------------.
1118| Assuming that a `{' has just been seen, copy everything up to the |
1119| matching `}' into the actions file. STACK_OFFSET is the number of |
1120| values in the current rule so far, which says where to find `$0' |
1121| with respect to the top of the stack. |
1122`-------------------------------------------------------------------*/
1ff442ca 1123
4a120d45 1124static void
79282c5a 1125copy_action (symbol_list *rule, int stack_offset)
1ff442ca 1126{
a70083a3 1127 int c;
a70083a3 1128 int count;
1ff442ca
NF
1129
1130 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1131 if (semantic_parser)
1132 stack_offset = 0;
1ff442ca 1133
25b222fa 1134 obstack_fgrow1 (&action_obstack, "\ncase %d:\n", nrules);
8c7ebe49 1135
89cab50d 1136 if (!no_lines_flag)
8c7ebe49 1137 {
25b222fa 1138 obstack_fgrow2 (&action_obstack, muscle_find ("linef"),
342b8b6e 1139 lineno, quotearg_style (c_quoting_style,
25b222fa 1140 muscle_find ("filename")));
8c7ebe49
AD
1141 }
1142 obstack_1grow (&action_obstack, '{');
1ff442ca
NF
1143
1144 count = 1;
a70083a3 1145 c = getc (finput);
1ff442ca
NF
1146
1147 while (count > 0)
1148 {
1149 while (c != '}')
a70083a3
AD
1150 {
1151 switch (c)
1ff442ca
NF
1152 {
1153 case '\n':
8c7ebe49 1154 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1155 lineno++;
1156 break;
1157
1158 case '{':
8c7ebe49 1159 obstack_1grow (&action_obstack, c);
1ff442ca
NF
1160 count++;
1161 break;
1162
1163 case '\'':
1164 case '"':
337bab46 1165 copy_string (finput, &action_obstack, c);
1ff442ca
NF
1166 break;
1167
1168 case '/':
337bab46 1169 copy_comment (finput, &action_obstack);
1ff442ca
NF
1170 break;
1171
1172 case '$':
337bab46 1173 copy_dollar (finput, &action_obstack,
8c7ebe49 1174 rule, stack_offset);
1ff442ca
NF
1175 break;
1176
1177 case '@':
337bab46 1178 copy_at (finput, &action_obstack,
8c7ebe49 1179 stack_offset);
6666f98f 1180 break;
1ff442ca
NF
1181
1182 case EOF:
27821bff 1183 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1184
1185 default:
8c7ebe49 1186 obstack_1grow (&action_obstack, c);
a70083a3
AD
1187 }
1188
1189 c = getc (finput);
1190 }
1191
1192 /* above loop exits when c is '}' */
1193
1194 if (--count)
1195 {
8c7ebe49 1196 obstack_1grow (&action_obstack, c);
a70083a3
AD
1197 c = getc (finput);
1198 }
1199 }
1200
ff4423cc 1201 obstack_sgrow (&action_obstack, ";\n break;}");
a70083a3
AD
1202}
1203\f
1204/*-------------------------------------------------------------------.
1205| After `%guard' is seen in the input file, copy the actual guard |
1206| into the guards file. If the guard is followed by an action, copy |
1207| that into the actions file. STACK_OFFSET is the number of values |
1208| in the current rule so far, which says where to find `$0' with |
1209| respect to the top of the stack, for the simple parser in which |
1210| the stack is not popped until after the guard is run. |
1211`-------------------------------------------------------------------*/
1212
1213static void
79282c5a 1214copy_guard (symbol_list *rule, int stack_offset)
a70083a3
AD
1215{
1216 int c;
a70083a3 1217 int count;
a70083a3
AD
1218 int brace_flag = 0;
1219
1220 /* offset is always 0 if parser has already popped the stack pointer */
1221 if (semantic_parser)
1222 stack_offset = 0;
1223
ea5607fd 1224 obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
89cab50d 1225 if (!no_lines_flag)
25b222fa 1226 obstack_fgrow2 (&guard_obstack, muscle_find ("linef"),
682d48cd 1227 lineno, quotearg_style (c_quoting_style,
11d82f03 1228 muscle_find ("filename")));
ea5607fd 1229 obstack_1grow (&guard_obstack, '{');
a70083a3
AD
1230
1231 count = 0;
1232 c = getc (finput);
1233
1234 while (brace_flag ? (count > 0) : (c != ';'))
1235 {
1236 switch (c)
1237 {
1238 case '\n':
ea5607fd 1239 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1240 lineno++;
1241 break;
1242
1243 case '{':
ea5607fd 1244 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1245 brace_flag = 1;
1246 count++;
1247 break;
1248
1249 case '}':
ea5607fd 1250 obstack_1grow (&guard_obstack, c);
a70083a3
AD
1251 if (count > 0)
1252 count--;
1253 else
1254 {
1255 complain (_("unmatched %s"), "`}'");
1256 c = getc (finput); /* skip it */
1257 }
1258 break;
1259
1260 case '\'':
1261 case '"':
337bab46 1262 copy_string (finput, &guard_obstack, c);
a70083a3
AD
1263 break;
1264
1265 case '/':
337bab46 1266 copy_comment (finput, &guard_obstack);
a70083a3
AD
1267 break;
1268
1269 case '$':
337bab46 1270 copy_dollar (finput, &guard_obstack, rule, stack_offset);
a70083a3 1271 break;
1ff442ca 1272
a70083a3 1273 case '@':
337bab46 1274 copy_at (finput, &guard_obstack, stack_offset);
a70083a3 1275 break;
1ff442ca 1276
a70083a3
AD
1277 case EOF:
1278 fatal ("%s", _("unterminated %guard clause"));
1ff442ca 1279
a70083a3 1280 default:
ea5607fd 1281 obstack_1grow (&guard_obstack, c);
1ff442ca 1282 }
a70083a3
AD
1283
1284 if (c != '}' || count != 0)
1285 c = getc (finput);
1ff442ca
NF
1286 }
1287
a70083a3
AD
1288 c = skip_white_space ();
1289
ff4423cc 1290 obstack_sgrow (&guard_obstack, ";\n break;}");
a70083a3
AD
1291 if (c == '{')
1292 copy_action (rule, stack_offset);
1293 else if (c == '=')
1294 {
1295 c = getc (finput); /* why not skip_white_space -wjh */
1296 if (c == '{')
1297 copy_action (rule, stack_offset);
1298 }
1299 else
1300 ungetc (c, finput);
1ff442ca 1301}
a70083a3
AD
1302\f
1303
a70083a3
AD
1304/*-------------------------------------------------------------------.
1305| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1306| with the user's names. |
1307`-------------------------------------------------------------------*/
1ff442ca 1308
4a120d45 1309static bucket *
118fb205 1310gensym (void)
1ff442ca 1311{
274d42ce
AD
1312 /* Incremented for each generated symbol */
1313 static int gensym_count = 0;
1314 static char buf[256];
1315
a70083a3 1316 bucket *sym;
1ff442ca 1317
274d42ce
AD
1318 sprintf (buf, "@%d", ++gensym_count);
1319 token_buffer = buf;
a70083a3 1320 sym = getsym (token_buffer);
d7020c20 1321 sym->class = nterm_sym;
1ff442ca 1322 sym->value = nvars++;
36281465 1323 return sym;
1ff442ca 1324}
a70083a3 1325\f
107f7dfb
AD
1326/*-------------------------------------------------------------------.
1327| Parse the input grammar into a one symbol_list structure. Each |
1328| rule is represented by a sequence of symbols: the left hand side |
1329| followed by the contents of the right hand side, followed by a |
1330| null pointer instead of a symbol to terminate the rule. The next |
1331| symbol is the lhs of the following rule. |
1332| |
1333| All guards and actions are copied out to the appropriate files, |
1334| labelled by the rule number they apply to. |
1335| |
1336| Bison used to allow some %directives in the rules sections, but |
1337| this is no longer consider appropriate: (i) the documented grammar |
1338| doesn't claim it, (ii), it would promote bad style, (iii), error |
1339| recovery for %directives consists in skipping the junk until a `%' |
1340| is seen and helrp synchronizing. This scheme is definitely wrong |
1341| in the rules section. |
1342`-------------------------------------------------------------------*/
1ff442ca 1343
4a120d45 1344static void
118fb205 1345readgram (void)
1ff442ca 1346{
f17bcd1f 1347 token_t t;
a70083a3 1348 bucket *lhs = NULL;
107f7dfb
AD
1349 symbol_list *p = NULL;
1350 symbol_list *p1 = NULL;
a70083a3 1351 bucket *bp;
1ff442ca 1352
ff4a34be
AD
1353 /* Points to first symbol_list of current rule. its symbol is the
1354 lhs of the rule. */
107f7dfb 1355 symbol_list *crule = NULL;
ff4a34be 1356 /* Points to the symbol_list preceding crule. */
107f7dfb 1357 symbol_list *crule1 = NULL;
1ff442ca 1358
a70083a3 1359 t = lex ();
1ff442ca 1360
511e79b3 1361 while (t != tok_two_percents && t != tok_eof)
107f7dfb
AD
1362 if (t == tok_identifier || t == tok_bar)
1363 {
1364 int action_flag = 0;
1365 /* Number of symbols in rhs of this rule so far */
1366 int rulelength = 0;
1367 int xactions = 0; /* JF for error checking */
1368 bucket *first_rhs = 0;
1369
1370 if (t == tok_identifier)
1371 {
1372 lhs = symval;
1373
1374 if (!start_flag)
1375 {
1376 startval = lhs;
1377 start_flag = 1;
1378 }
1ff442ca 1379
107f7dfb
AD
1380 t = lex ();
1381 if (t != tok_colon)
1382 {
1383 complain (_("ill-formed rule: initial symbol not followed by colon"));
1384 unlex (t);
1385 }
1386 }
1387
1388 if (nrules == 0 && t == tok_bar)
1389 {
1390 complain (_("grammar starts with vertical bar"));
1391 lhs = symval; /* BOGUS: use a random symval */
1392 }
1393 /* start a new rule and record its lhs. */
1394
1395 nrules++;
1396 nitems++;
1397
1398 p = symbol_list_new (lhs);
1399
1400 crule1 = p1;
1401 if (p1)
1402 p1->next = p;
1403 else
1404 grammar = p;
1ff442ca 1405
107f7dfb
AD
1406 p1 = p;
1407 crule = p;
1ff442ca 1408
107f7dfb 1409 /* mark the rule's lhs as a nonterminal if not already so. */
1ff442ca 1410
107f7dfb
AD
1411 if (lhs->class == unknown_sym)
1412 {
1413 lhs->class = nterm_sym;
1414 lhs->value = nvars;
1415 nvars++;
1416 }
1417 else if (lhs->class == token_sym)
1418 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca 1419
107f7dfb 1420 /* read the rhs of the rule. */
1ff442ca 1421
107f7dfb
AD
1422 for (;;)
1423 {
1424 t = lex ();
1425 if (t == tok_prec)
1426 {
1427 t = lex ();
1428 crule->ruleprec = symval;
1429 t = lex ();
1430 }
1431
1432 if (!(t == tok_identifier || t == tok_left_curly))
1433 break;
1ff442ca 1434
107f7dfb
AD
1435 /* If next token is an identifier, see if a colon follows it.
1436 If one does, exit this rule now. */
1437 if (t == tok_identifier)
1438 {
1439 bucket *ssave;
1440 token_t t1;
1441
1442 ssave = symval;
1443 t1 = lex ();
1444 unlex (t1);
1445 symval = ssave;
1446 if (t1 == tok_colon)
1447 break;
1448
1449 if (!first_rhs) /* JF */
1450 first_rhs = symval;
1451 /* Not followed by colon =>
1452 process as part of this rule's rhs. */
1453 }
1454
1455 /* If we just passed an action, that action was in the middle
1456 of a rule, so make a dummy rule to reduce it to a
1457 non-terminal. */
1458 if (action_flag)
1459 {
1460 /* Since the action was written out with this rule's
1461 number, we must give the new rule this number by
1462 inserting the new rule before it. */
1463
1464 /* Make a dummy nonterminal, a gensym. */
1465 bucket *sdummy = gensym ();
1466
1467 /* Make a new rule, whose body is empty, before the
1468 current one, so that the action just read can
1469 belong to it. */
1470 nrules++;
1471 nitems++;
1472 p = symbol_list_new (sdummy);
1473 /* Attach its lineno to that of the host rule. */
1474 p->line = crule->line;
1475 if (crule1)
1476 crule1->next = p;
1477 else
1478 grammar = p;
1479 /* End of the rule. */
1480 crule1 = symbol_list_new (NULL);
1481 crule1->next = crule;
1482
1483 p->next = crule1;
1484
1485 /* Insert the dummy generated by that rule into this
1486 rule. */
1487 nitems++;
1488 p = symbol_list_new (sdummy);
1489 p1->next = p;
1490 p1 = p;
1491
1492 action_flag = 0;
1493 }
1494
1495 if (t == tok_identifier)
1496 {
1497 nitems++;
1498 p = symbol_list_new (symval);
1499 p1->next = p;
1500 p1 = p;
1501 }
1502 else /* handle an action. */
1503 {
1504 copy_action (crule, rulelength);
1505 action_flag = 1;
1506 xactions++; /* JF */
1507 }
1508 rulelength++;
1509 } /* end of read rhs of rule */
1510
1511 /* Put an empty link in the list to mark the end of this rule */
1512 p = symbol_list_new (NULL);
1513 p1->next = p;
1514 p1 = p;
1515
1516 if (t == tok_prec)
1517 {
1518 complain (_("two @prec's in a row"));
1519 t = lex ();
1520 crule->ruleprec = symval;
1521 t = lex ();
1522 }
1523 if (t == tok_guard)
1524 {
1525 if (!semantic_parser)
1526 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1527
107f7dfb 1528 copy_guard (crule, rulelength);
a70083a3 1529 t = lex ();
107f7dfb
AD
1530 }
1531 else if (t == tok_left_curly)
1532 {
1533 /* This case never occurs -wjh */
1534 if (action_flag)
1535 complain (_("two actions at end of one rule"));
1536 copy_action (crule, rulelength);
1537 action_flag = 1;
1538 xactions++; /* -wjh */
1539 t = lex ();
1540 }
1541 /* If $$ is being set in default way, report if any type
1542 mismatch. */
1543 else if (!xactions
1544 && first_rhs && lhs->type_name != first_rhs->type_name)
1545 {
1546 if (lhs->type_name == 0
1547 || first_rhs->type_name == 0
1548 || strcmp (lhs->type_name, first_rhs->type_name))
1549 complain (_("type clash (`%s' `%s') on default action"),
1550 lhs->type_name ? lhs->type_name : "",
1551 first_rhs->type_name ? first_rhs->type_name : "");
1552 }
1553 /* Warn if there is no default for $$ but we need one. */
1554 else if (!xactions && !first_rhs && lhs->type_name != 0)
1555 complain (_("empty rule for typed nonterminal, and no action"));
1556 if (t == tok_semicolon)
a70083a3 1557 t = lex ();
107f7dfb
AD
1558 }
1559 else
1560 {
1561 complain (_("invalid input: %s"), quote (token_buffer));
1562 t = lex ();
1563 }
943819bf 1564
1ff442ca 1565
943819bf
RS
1566 /* grammar has been read. Do some checking */
1567
1ff442ca 1568 if (nsyms > MAXSHORT)
a0f6b076
AD
1569 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1570 MAXSHORT);
1ff442ca 1571 if (nrules == 0)
a0f6b076 1572 fatal (_("no rules in the input grammar"));
1ff442ca 1573
1ff442ca
NF
1574 /* Report any undefined symbols and consider them nonterminals. */
1575
1576 for (bp = firstsymbol; bp; bp = bp->next)
d7020c20 1577 if (bp->class == unknown_sym)
1ff442ca 1578 {
a70083a3
AD
1579 complain (_
1580 ("symbol %s is used, but is not defined as a token and has no rules"),
ff4a34be 1581 bp->tag);
d7020c20 1582 bp->class = nterm_sym;
1ff442ca
NF
1583 bp->value = nvars++;
1584 }
1585
1586 ntokens = nsyms - nvars;
1587}
ff48177d
MA
1588
1589/* At the end of the grammar file, some C source code must
63c2d5de 1590 be stored. It is going to be associated to the epilogue
ff48177d
MA
1591 directive. */
1592static void
1593read_additionnal_code (void)
1594{
1595 char c;
63c2d5de 1596 struct obstack el_obstack;
342b8b6e 1597
63c2d5de 1598 obstack_init (&el_obstack);
ff48177d 1599
710ddc4f
MA
1600 if (!no_lines_flag)
1601 {
1602 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1603 lineno, quotearg_style (c_quoting_style,
1604 muscle_find("filename")));
1605 }
1606
ff48177d 1607 while ((c = getc (finput)) != EOF)
63c2d5de 1608 obstack_1grow (&el_obstack, c);
342b8b6e 1609
63c2d5de 1610 obstack_1grow (&el_obstack, 0);
11d82f03 1611 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1612}
1613
a70083a3
AD
1614\f
1615/*--------------------------------------------------------------.
1616| For named tokens, but not literal ones, define the name. The |
1617| value is the user token number. |
1618`--------------------------------------------------------------*/
1ff442ca 1619
4a120d45 1620static void
896fe5c1 1621output_token_defines (struct obstack *oout)
1ff442ca 1622{
a70083a3
AD
1623 bucket *bp;
1624 char *cp, *symbol;
1625 char c;
1ff442ca 1626
a70083a3 1627 for (bp = firstsymbol; bp; bp = bp->next)
1ff442ca 1628 {
a70083a3
AD
1629 symbol = bp->tag; /* get symbol */
1630
1631 if (bp->value >= ntokens)
1632 continue;
1633 if (bp->user_token_number == SALIAS)
1634 continue;
1635 if ('\'' == *symbol)
1636 continue; /* skip literal character */
1637 if (bp == errtoken)
1638 continue; /* skip error token */
1639 if ('\"' == *symbol)
1ff442ca 1640 {
a70083a3
AD
1641 /* use literal string only if given a symbol with an alias */
1642 if (bp->alias)
1643 symbol = bp->alias->tag;
1644 else
1645 continue;
1646 }
1ff442ca 1647
a70083a3
AD
1648 /* Don't #define nonliteral tokens whose names contain periods. */
1649 cp = symbol;
1650 while ((c = *cp++) && c != '.');
1651 if (c != '\0')
1652 continue;
1ff442ca 1653
0b8afb77 1654 obstack_fgrow2 (oout, "# define\t%s\t%d\n",
342b8b6e 1655 symbol, bp->user_token_number);
a70083a3 1656 if (semantic_parser)
342b8b6e
AD
1657 /* FIXME: This is certainly dead wrong, and should be just as
1658 above. --akim. */
0b8afb77 1659 obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
1660 }
1661}
1ff442ca
NF
1662
1663
037ca2f1
AD
1664/*------------------------------------------------------------------.
1665| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1666| number. |
1667`------------------------------------------------------------------*/
1668
1669static void
1670token_translations_init (void)
1671{
1672 bucket *bp = NULL;
1673 int i;
1674
1675 token_translations = XCALLOC (short, max_user_token_number + 1);
1676
1677 /* Initialize all entries for literal tokens to 2, the internal
1678 token number for $undefined., which represents all invalid
1679 inputs. */
1680 for (i = 0; i <= max_user_token_number; i++)
1681 token_translations[i] = 2;
1682
1683 for (bp = firstsymbol; bp; bp = bp->next)
1684 {
1685 /* Non-terminal? */
1686 if (bp->value >= ntokens)
1687 continue;
1688 /* A token string alias? */
1689 if (bp->user_token_number == SALIAS)
1690 continue;
6b7e85b9
AD
1691
1692 assert (bp->user_token_number != SUNDEF);
1693
037ca2f1
AD
1694 /* A token which translation has already been set? */
1695 if (token_translations[bp->user_token_number] != 2)
1696 complain (_("tokens %s and %s both assigned number %d"),
1697 tags[token_translations[bp->user_token_number]],
1698 bp->tag, bp->user_token_number);
1699 token_translations[bp->user_token_number] = bp->value;
1700 }
1701}
1702
1703
a70083a3
AD
1704/*------------------------------------------------------------------.
1705| Assign symbol numbers, and write definition of token names into |
b2ca4022 1706| FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
a70083a3
AD
1707| of symbols. |
1708`------------------------------------------------------------------*/
1ff442ca 1709
4a120d45 1710static void
118fb205 1711packsymbols (void)
1ff442ca 1712{
342b8b6e 1713 bucket *bp = NULL;
a70083a3 1714 int tokno = 1;
a70083a3 1715 int last_user_token_number;
4a120d45 1716 static char DOLLAR[] = "$";
1ff442ca 1717
d7913476 1718 tags = XCALLOC (char *, nsyms + 1);
d7913476 1719 user_toknums = XCALLOC (short, nsyms + 1);
1ff442ca 1720
d7913476
AD
1721 sprec = XCALLOC (short, nsyms);
1722 sassoc = XCALLOC (short, nsyms);
1ff442ca 1723
037ca2f1
AD
1724 /* The EOF token. */
1725 tags[0] = DOLLAR;
1726 user_toknums[0] = 0;
1727
1ff442ca
NF
1728 max_user_token_number = 256;
1729 last_user_token_number = 256;
1730
1731 for (bp = firstsymbol; bp; bp = bp->next)
1732 {
d7020c20 1733 if (bp->class == nterm_sym)
1ff442ca
NF
1734 {
1735 bp->value += ntokens;
1736 }
943819bf
RS
1737 else if (bp->alias)
1738 {
0a6384c4
AD
1739 /* this symbol and its alias are a single token defn.
1740 allocate a tokno, and assign to both check agreement of
1741 ->prec and ->assoc fields and make both the same */
1742 if (bp->value == 0)
1743 bp->value = bp->alias->value = tokno++;
943819bf 1744
0a6384c4
AD
1745 if (bp->prec != bp->alias->prec)
1746 {
1747 if (bp->prec != 0 && bp->alias->prec != 0
1748 && bp->user_token_number == SALIAS)
a0f6b076
AD
1749 complain (_("conflicting precedences for %s and %s"),
1750 bp->tag, bp->alias->tag);
0a6384c4
AD
1751 if (bp->prec != 0)
1752 bp->alias->prec = bp->prec;
1753 else
1754 bp->prec = bp->alias->prec;
1755 }
943819bf 1756
0a6384c4
AD
1757 if (bp->assoc != bp->alias->assoc)
1758 {
a0f6b076
AD
1759 if (bp->assoc != 0 && bp->alias->assoc != 0
1760 && bp->user_token_number == SALIAS)
1761 complain (_("conflicting assoc values for %s and %s"),
1762 bp->tag, bp->alias->tag);
1763 if (bp->assoc != 0)
1764 bp->alias->assoc = bp->assoc;
1765 else
1766 bp->assoc = bp->alias->assoc;
1767 }
0a6384c4
AD
1768
1769 if (bp->user_token_number == SALIAS)
a70083a3 1770 continue; /* do not do processing below for SALIASs */
943819bf 1771
a70083a3 1772 }
d7020c20 1773 else /* bp->class == token_sym */
943819bf
RS
1774 {
1775 bp->value = tokno++;
1776 }
1777
d7020c20 1778 if (bp->class == token_sym)
1ff442ca 1779 {
6b7e85b9 1780 if (bp->user_token_number == SUNDEF)
1ff442ca
NF
1781 bp->user_token_number = ++last_user_token_number;
1782 if (bp->user_token_number > max_user_token_number)
1783 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1784 }
1785
1786 tags[bp->value] = bp->tag;
943819bf 1787 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1788 sprec[bp->value] = bp->prec;
1789 sassoc[bp->value] = bp->assoc;
1ff442ca
NF
1790 }
1791
037ca2f1 1792 token_translations_init ();
1ff442ca
NF
1793
1794 error_token_number = errtoken->value;
1795
e3f1699f
AD
1796 if (startval->class == unknown_sym)
1797 fatal (_("the start symbol %s is undefined"), startval->tag);
1798 else if (startval->class == token_sym)
1799 fatal (_("the start symbol %s is a token"), startval->tag);
1800
1801 start_symbol = startval->value;
1802}
1803
1804
1805/*-----------------------------------.
1806| Output definition of token names. |
1807`-----------------------------------*/
1808
1809static void
1810symbols_output (void)
1811{
342b8b6e
AD
1812 {
1813 struct obstack tokendefs;
1814 obstack_init (&tokendefs);
1815 output_token_defines (&tokendefs);
1816 obstack_1grow (&tokendefs, 0);
1817 muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1818 obstack_free (&tokendefs, NULL);
1819 }
b6610515 1820
89cab50d 1821 if (defines_flag)
1ff442ca 1822 {
896fe5c1 1823 output_token_defines (&defines_obstack);
1ff442ca
NF
1824
1825 if (!pure_parser)
78af9bbc
AD
1826 obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1827 spec_name_prefix);
1ff442ca 1828 if (semantic_parser)
037ca2f1
AD
1829 {
1830 int i;
1831
1832 for (i = ntokens; i < nsyms; i++)
1833 {
1834 /* don't make these for dummy nonterminals made by gensym. */
1835 if (*tags[i] != '@')
1836 obstack_fgrow2 (&defines_obstack,
1837 "# define\tNT%s\t%d\n", tags[i], i);
1838 }
1ff442ca 1839#if 0
037ca2f1
AD
1840 /* `fdefines' is now a temporary file, so we need to copy its
1841 contents in `done', so we can't close it here. */
1842 fclose (fdefines);
1843 fdefines = NULL;
1ff442ca 1844#endif
037ca2f1 1845 }
1ff442ca
NF
1846 }
1847}
a083fbbf 1848
1ff442ca 1849
a70083a3
AD
1850/*---------------------------------------------------------------.
1851| Convert the rules into the representation using RRHS, RLHS and |
1852| RITEMS. |
1853`---------------------------------------------------------------*/
1ff442ca 1854
4a120d45 1855static void
118fb205 1856packgram (void)
1ff442ca 1857{
a70083a3
AD
1858 int itemno;
1859 int ruleno;
1860 symbol_list *p;
1ff442ca 1861
d7913476 1862 ritem = XCALLOC (short, nitems + 1);
b2ed6e58 1863 rule_table = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1864
1865 itemno = 0;
1866 ruleno = 1;
1867
1868 p = grammar;
1869 while (p)
1870 {
b29b2ed5 1871 bucket *ruleprec = p->ruleprec;
b2ed6e58
AD
1872 rule_table[ruleno].lhs = p->sym->value;
1873 rule_table[ruleno].rhs = itemno;
b29b2ed5 1874 rule_table[ruleno].line = p->line;
68f1e3ed 1875 rule_table[ruleno].useful = TRUE;
1ff442ca
NF
1876
1877 p = p->next;
1878 while (p && p->sym)
1879 {
1880 ritem[itemno++] = p->sym->value;
1881 /* A rule gets by default the precedence and associativity
1882 of the last token in it. */
d7020c20 1883 if (p->sym->class == token_sym)
1ff442ca 1884 {
652a871c
AD
1885 rule_table[ruleno].prec = p->sym->prec;
1886 rule_table[ruleno].assoc = p->sym->assoc;
1ff442ca 1887 }
a70083a3
AD
1888 if (p)
1889 p = p->next;
1ff442ca
NF
1890 }
1891
1892 /* If this rule has a %prec,
a70083a3 1893 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1894 if (ruleprec)
1895 {
652a871c
AD
1896 rule_table[ruleno].prec = ruleprec->prec;
1897 rule_table[ruleno].assoc = ruleprec->assoc;
1898 rule_table[ruleno].precsym = ruleprec->value;
1ff442ca
NF
1899 }
1900
1901 ritem[itemno++] = -ruleno;
1902 ruleno++;
1903
a70083a3
AD
1904 if (p)
1905 p = p->next;
1ff442ca
NF
1906 }
1907
1908 ritem[itemno] = 0;
3067fbef
AD
1909
1910 if (trace_flag)
1911 ritem_print (stderr);
1ff442ca 1912}
a70083a3
AD
1913\f
1914/*-------------------------------------------------------------------.
1915| Read in the grammar specification and record it in the format |
ea5607fd 1916| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1917| and all actions into ACTION_OBSTACK, in each case forming the body |
1918| of a C function (YYGUARD or YYACTION) which contains a switch |
1919| statement to decide which guard or action to execute. |
a70083a3
AD
1920`-------------------------------------------------------------------*/
1921
1922void
1923reader (void)
1924{
1925 start_flag = 0;
1926 startval = NULL; /* start symbol not specified yet. */
1927
a70083a3
AD
1928 nsyms = 1;
1929 nvars = 0;
1930 nrules = 0;
1931 nitems = 0;
a70083a3
AD
1932
1933 typed = 0;
1934 lastprec = 0;
1935
a70083a3
AD
1936 semantic_parser = 0;
1937 pure_parser = 0;
a70083a3
AD
1938
1939 grammar = NULL;
1940
342b8b6e 1941 lex_init ();
a70083a3
AD
1942 lineno = 1;
1943
11d82f03
MA
1944 /* Initialize the muscle obstack. */
1945 obstack_init (&muscle_obstack);
82e236e2 1946
a70083a3
AD
1947 /* Initialize the symbol table. */
1948 tabinit ();
b6610515 1949
a70083a3
AD
1950 /* Construct the error token */
1951 errtoken = getsym ("error");
d7020c20 1952 errtoken->class = token_sym;
a70083a3 1953 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 1954
a70083a3
AD
1955 /* Construct a token that represents all undefined literal tokens.
1956 It is always token number 2. */
1957 undeftoken = getsym ("$undefined.");
d7020c20 1958 undeftoken->class = token_sym;
a70083a3
AD
1959 undeftoken->user_token_number = 2;
1960
896fe5c1
AD
1961 /* Read the declaration section. Copy %{ ... %} groups to
1962 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1963 etc. found there. */
a70083a3 1964 read_declarations ();
a70083a3
AD
1965 /* Read in the grammar, build grammar in list form. Write out
1966 guards and actions. */
1967 readgram ();
ff48177d
MA
1968 /* Some C code is given at the end of the grammar file. */
1969 read_additionnal_code ();
b0c4483e 1970
a70083a3 1971 /* Now we know whether we need the line-number stack. If we do,
b0c4483e
PB
1972 write its type into the .tab.h file.
1973 This is no longer need with header skeleton. */
1974
a70083a3
AD
1975 /* Assign the symbols their symbol numbers. Write #defines for the
1976 token symbols into FDEFINES if requested. */
1977 packsymbols ();
1978 /* Convert the grammar into the format described in gram.h. */
1979 packgram ();
edad7067
AD
1980 /* Output the headers. */
1981 symbols_output ();
a70083a3 1982}