]> git.saurik.com Git - bison.git/blame - src/reader.c
* data/bison.simple (YYCOPY): Fix [] quoting problem in the non-GCC case.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca
NF
30#include "lex.h"
31#include "gram.h"
a0f6b076 32#include "complain.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
340ef489 35#include "conflicts.h"
11d82f03 36#include "muscle_tab.h"
1ff442ca 37
a70083a3
AD
38typedef struct symbol_list
39{
40 struct symbol_list *next;
db8837cb 41 symbol_t *sym;
b29b2ed5 42 int line;
f499b062 43
3f96f4dc
AD
44 /* The action is attached to the LHS of a rule. */
45 const char *action;
46 int action_line;
f499b062
AD
47
48 /* The guard is attached to the LHS of a rule. */
49 const char *guard;
50 int guard_line;
db8837cb 51 symbol_t *ruleprec;
d945f5cd 52} symbol_list;
118fb205 53
1ff442ca 54int lineno;
280a38c3
AD
55static symbol_list *grammar = NULL;
56static int start_flag = 0;
db8837cb 57static symbol_t *startval = NULL;
1ff442ca
NF
58
59/* Nonzero if components of semantic values are used, implying
60 they must be unions. */
61static int value_components_used;
62
d7020c20 63/* Nonzero if %union has been seen. */
280a38c3 64static int typed = 0;
1ff442ca 65
d7020c20 66/* Incremented for each %left, %right or %nonassoc seen */
280a38c3 67static int lastprec = 0;
1ff442ca 68
db8837cb
AD
69symbol_t *errtoken = NULL;
70symbol_t *undeftoken = NULL;
71symbol_t *eoftoken = NULL;
72symbol_t *axiom = NULL;
b29b2ed5 73
6255b435 74static symbol_list *
db8837cb 75symbol_list_new (symbol_t *sym)
b29b2ed5
AD
76{
77 symbol_list *res = XMALLOC (symbol_list, 1);
78 res->next = NULL;
79 res->sym = sym;
80 res->line = lineno;
d945f5cd
AD
81 res->action = NULL;
82 res->action_line = 0;
f499b062
AD
83 res->guard = NULL;
84 res->guard_line = 0;
b29b2ed5
AD
85 res->ruleprec = NULL;
86 return res;
87}
88
72a23c97 89/*------------------------.
db8837cb 90| Operations on symbols. |
72a23c97
AD
91`------------------------*/
92
93
94/*-----------------------------------------------------------.
95| If THIS is not defined, report an error, and consider it a |
96| nonterminal. |
97`-----------------------------------------------------------*/
98
99static bool
db8837cb 100symbol_check_defined (symbol_t *this)
72a23c97
AD
101{
102 if (this->class == unknown_sym)
103 {
104 complain
105 (_("symbol %s is used, but is not defined as a token and has no rules"),
106 this->tag);
107 this->class = nterm_sym;
108 this->number = nvars++;
109 }
110
111 return TRUE;
112}
113
114
115/*-------------------------------------------------------------------.
116| Assign a symbol number, and write the definition of the token name |
117| into FDEFINES. Put in SYMBOLS. |
118`-------------------------------------------------------------------*/
119
120static bool
db8837cb 121symbol_make_alias (symbol_t *symbol, char *typename)
72a23c97
AD
122{
123 if (symval->alias)
124 warn (_("symbol `%s' used more than once as a literal string"),
125 symval->tag);
126 else if (symbol->alias)
127 warn (_("symbol `%s' given more than one literal string"),
128 symbol->tag);
129 else
130 {
131 symval->class = token_sym;
132 symval->type_name = typename;
133 symval->user_token_number = symbol->user_token_number;
134 symbol->user_token_number = SALIAS;
135 symval->alias = symbol;
136 symbol->alias = symval;
137 /* symbol and symval combined are only one symbol */
138 nsyms--;
139 ntokens--;
140 assert (ntokens == symbol->number || ntokens == symval->number);
141 symbol->number = symval->number =
142 (symval->number < symbol->number) ? symval->number : symbol->number;
143 }
144
145 return TRUE;
146}
147
148/*---------------------------------------------------------.
149| Check that THIS, and its alias, have same precedence and |
150| associativity. |
151`---------------------------------------------------------*/
152
153static bool
db8837cb 154symbol_check_alias_consistence (symbol_t *this)
72a23c97
AD
155{
156 /* Check only those who _are_ the aliases. */
157 if (this->alias && this->user_token_number == SALIAS)
158 {
159 if (this->prec != this->alias->prec)
160 {
161 if (this->prec != 0 && this->alias->prec != 0)
162 complain (_("conflicting precedences for %s and %s"),
163 this->tag, this->alias->tag);
164 if (this->prec != 0)
165 this->alias->prec = this->prec;
166 else
167 this->prec = this->alias->prec;
168 }
169
170 if (this->assoc != this->alias->assoc)
171 {
172 if (this->assoc != 0 && this->alias->assoc != 0)
173 complain (_("conflicting assoc values for %s and %s"),
174 this->tag, this->alias->tag);
175 if (this->assoc != 0)
176 this->alias->assoc = this->assoc;
177 else
178 this->assoc = this->alias->assoc;
179 }
180 }
181 return TRUE;
182}
183
184
185/*-------------------------------------------------------------------.
186| Assign a symbol number, and write the definition of the token name |
187| into FDEFINES. Put in SYMBOLS. |
188`-------------------------------------------------------------------*/
189
190static bool
db8837cb 191symbol_pack (symbol_t *this)
72a23c97 192{
72a23c97
AD
193 if (this->class == nterm_sym)
194 {
195 this->number += ntokens;
196 }
197 else if (this->alias)
198 {
199 /* This symbol and its alias are a single token defn.
200 Allocate a tokno, and assign to both check agreement of
201 prec and assoc fields and make both the same */
5fbb0954 202 if (this->number == NUMBER_UNDEFINED)
72a23c97
AD
203 {
204 if (this == eoftoken || this->alias == eoftoken)
205 this->number = this->alias->number = 0;
206 else
207 {
5fbb0954 208 assert (this->alias->number != NUMBER_UNDEFINED);
72a23c97
AD
209 this->number = this->alias->number;
210 }
211 }
212 /* Do not do processing below for SALIASs. */
213 if (this->user_token_number == SALIAS)
214 return TRUE;
215 }
216 else /* this->class == token_sym */
217 {
5fbb0954 218 assert (this->number != NUMBER_UNDEFINED);
72a23c97
AD
219 }
220
72a23c97
AD
221 symbols[this->number] = this;
222 return TRUE;
223}
224
225
226
227
228/*--------------------------------------------------.
229| Put THIS in TOKEN_TRANSLATIONS if it is a token. |
230`--------------------------------------------------*/
231
232static bool
db8837cb 233symbol_translation (symbol_t *this)
72a23c97 234{
72a23c97
AD
235 /* Non-terminal? */
236 if (this->class == token_sym
237 && this->user_token_number != SALIAS)
238 {
239 /* A token which translation has already been set? */
007a50a4 240 if (token_translations[this->user_token_number] != undeftoken->number)
72a23c97
AD
241 complain (_("tokens %s and %s both assigned number %d"),
242 symbols[token_translations[this->user_token_number]]->tag,
243 this->tag, this->user_token_number);
244
72a23c97
AD
245 token_translations[this->user_token_number] = this->number;
246 }
247
248 return TRUE;
249}
0d533154 250\f
a70083a3 251
0d533154
AD
252/*===================\
253| Low level lexing. |
254\===================*/
943819bf
RS
255
256static void
118fb205 257skip_to_char (int target)
943819bf
RS
258{
259 int c;
260 if (target == '\n')
a0f6b076 261 complain (_(" Skipping to next \\n"));
943819bf 262 else
a0f6b076 263 complain (_(" Skipping to next %c"), target);
943819bf
RS
264
265 do
0d533154 266 c = skip_white_space ();
943819bf 267 while (c != target && c != EOF);
a083fbbf 268 if (c != EOF)
0d533154 269 ungetc (c, finput);
943819bf
RS
270}
271
272
0d533154
AD
273/*---------------------------------------------------------.
274| Read a signed integer from STREAM and return its value. |
275`---------------------------------------------------------*/
276
277static inline int
278read_signed_integer (FILE *stream)
279{
a70083a3
AD
280 int c = getc (stream);
281 int sign = 1;
282 int n = 0;
0d533154
AD
283
284 if (c == '-')
285 {
286 c = getc (stream);
287 sign = -1;
288 }
289
290 while (isdigit (c))
291 {
292 n = 10 * n + (c - '0');
293 c = getc (stream);
294 }
295
296 ungetc (c, stream);
297
298 return sign * n;
299}
300\f
79282c5a
AD
301/*--------------------------------------------------------------.
302| Get the data type (alternative in the union) of the value for |
303| symbol N in rule RULE. |
304`--------------------------------------------------------------*/
305
306static char *
b29b2ed5 307get_type_name (int n, symbol_list *rule)
79282c5a
AD
308{
309 int i;
310 symbol_list *rp;
311
312 if (n < 0)
313 {
314 complain (_("invalid $ value"));
315 return NULL;
316 }
317
318 rp = rule;
319 i = 0;
320
321 while (i < n)
322 {
323 rp = rp->next;
324 if (rp == NULL || rp->sym == NULL)
325 {
326 complain (_("invalid $ value"));
327 return NULL;
328 }
f3849179 329 ++i;
79282c5a
AD
330 }
331
332 return rp->sym->type_name;
333}
334\f
2b7ed18a
RA
335/*------------------------------------------------------------------.
336| Copy the character C to OOUT, and insert quadigraphs when needed. |
337`------------------------------------------------------------------*/
338
339static inline void
340copy_character (struct obstack *oout, int c)
341{
342 switch (c)
343 {
344 case '[':
345 obstack_sgrow (oout, "@<:@");
346 break;
347
348 case ']':
349 obstack_sgrow (oout, "@:>@");
350 break;
351
352 default:
353 obstack_1grow (oout, c);
354 }
355}
356
337bab46
AD
357/*------------------------------------------------------------.
358| Dump the string from FIN to OOUT if non null. MATCH is the |
359| delimiter of the string (either ' or "). |
360`------------------------------------------------------------*/
ae3c3164
AD
361
362static inline void
b6610515 363copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
364{
365 int c;
366
b6610515
RA
367 if (store)
368 obstack_1grow (oout, match);
8c7ebe49 369
4a120d45 370 c = getc (fin);
ae3c3164
AD
371
372 while (c != match)
373 {
374 if (c == EOF)
375 fatal (_("unterminated string at end of file"));
376 if (c == '\n')
377 {
a0f6b076 378 complain (_("unterminated string"));
4a120d45 379 ungetc (c, fin);
ae3c3164
AD
380 c = match; /* invent terminator */
381 continue;
382 }
383
2b7ed18a 384 copy_character (oout, c);
ae3c3164
AD
385
386 if (c == '\\')
387 {
4a120d45 388 c = getc (fin);
ae3c3164
AD
389 if (c == EOF)
390 fatal (_("unterminated string at end of file"));
2b7ed18a 391 copy_character (oout, c);
8c7ebe49 392
ae3c3164 393 if (c == '\n')
f3849179 394 ++lineno;
ae3c3164
AD
395 }
396
a70083a3 397 c = getc (fin);
ae3c3164
AD
398 }
399
b6610515
RA
400 if (store)
401 obstack_1grow (oout, c);
402}
403
404/* FIXME. */
405
406static inline void
407copy_string (FILE *fin, struct obstack *oout, int match)
408{
409 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
410}
411
b6610515
RA
412/* FIXME. */
413
414static inline void
415copy_identifier (FILE *fin, struct obstack *oout)
416{
417 int c;
418
419 while (isalnum (c = getc (fin)) || c == '_')
420 obstack_1grow (oout, c);
421
422 ungetc (c, fin);
423}
ae3c3164 424
2666f928
AD
425
426/*------------------------------------------------------------------.
427| Dump the wannabee comment from IN to OOUT. In fact we just saw a |
428| `/', which might or might not be a comment. In any case, copy |
429| what we saw. |
430`------------------------------------------------------------------*/
ae3c3164
AD
431
432static inline void
2666f928 433copy_comment (FILE *fin, struct obstack *oout)
ae3c3164
AD
434{
435 int cplus_comment;
a70083a3 436 int ended;
550a72a3
AD
437 int c;
438
439 /* We read a `/', output it. */
2666f928 440 obstack_1grow (oout, '/');
550a72a3
AD
441
442 switch ((c = getc (fin)))
443 {
444 case '/':
445 cplus_comment = 1;
446 break;
447 case '*':
448 cplus_comment = 0;
449 break;
450 default:
451 ungetc (c, fin);
452 return;
453 }
ae3c3164 454
2666f928 455 obstack_1grow (oout, c);
550a72a3 456 c = getc (fin);
ae3c3164
AD
457
458 ended = 0;
459 while (!ended)
460 {
461 if (!cplus_comment && c == '*')
462 {
463 while (c == '*')
464 {
2666f928 465 obstack_1grow (oout, c);
550a72a3 466 c = getc (fin);
ae3c3164
AD
467 }
468
469 if (c == '/')
470 {
2666f928 471 obstack_1grow (oout, c);
ae3c3164
AD
472 ended = 1;
473 }
474 }
475 else if (c == '\n')
476 {
f3849179 477 ++lineno;
2666f928 478 obstack_1grow (oout, c);
ae3c3164
AD
479 if (cplus_comment)
480 ended = 1;
481 else
550a72a3 482 c = getc (fin);
ae3c3164
AD
483 }
484 else if (c == EOF)
485 fatal (_("unterminated comment"));
486 else
487 {
2b7ed18a 488 copy_character (oout, c);
550a72a3 489 c = getc (fin);
ae3c3164
AD
490 }
491 }
492}
493
494
a70083a3 495/*-----------------------------------------------------------------.
337bab46 496| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
497| reference to this location. STACK_OFFSET is the number of values |
498| in the current rule so far, which says where to find `$0' with |
499| respect to the top of the stack. |
500`-----------------------------------------------------------------*/
1ff442ca 501
a70083a3 502static inline void
337bab46 503copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 504{
a70083a3 505 int c;
1ff442ca 506
a70083a3
AD
507 c = getc (fin);
508 if (c == '$')
1ff442ca 509 {
ff4423cc 510 obstack_sgrow (oout, "yyloc");
89cab50d 511 locations_flag = 1;
a70083a3
AD
512 }
513 else if (isdigit (c) || c == '-')
514 {
515 int n;
1ff442ca 516
a70083a3
AD
517 ungetc (c, fin);
518 n = read_signed_integer (fin);
11e2beca
AD
519 if (n > stack_offset)
520 complain (_("invalid value: %s%d"), "@", n);
521 else
522 {
523 /* Offset is always 0 if parser has already popped the stack
524 pointer. */
525 obstack_fgrow1 (oout, "yylsp[%d]",
526 n - (semantic_parser ? 0 : stack_offset));
527 locations_flag = 1;
528 }
1ff442ca 529 }
a70083a3 530 else
ff4a34be
AD
531 {
532 char buf[] = "@c";
533 buf[1] = c;
534 complain (_("%s is invalid"), quote (buf));
535 }
1ff442ca 536}
79282c5a
AD
537
538
539/*-------------------------------------------------------------------.
540| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
541| |
542| Possible inputs: $[<TYPENAME>]($|integer) |
543| |
337bab46 544| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
545| the number of values in the current rule so far, which says where |
546| to find `$0' with respect to the top of the stack. |
547`-------------------------------------------------------------------*/
548
549static inline void
337bab46 550copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
551 symbol_list *rule, int stack_offset)
552{
553 int c = getc (fin);
b0ce6046 554 const char *type_name = NULL;
79282c5a 555
f282676b 556 /* Get the type name if explicit. */
79282c5a
AD
557 if (c == '<')
558 {
f282676b 559 read_type_name (fin);
79282c5a
AD
560 type_name = token_buffer;
561 value_components_used = 1;
79282c5a
AD
562 c = getc (fin);
563 }
564
565 if (c == '$')
566 {
ff4423cc 567 obstack_sgrow (oout, "yyval");
8c7ebe49 568
79282c5a
AD
569 if (!type_name)
570 type_name = get_type_name (0, rule);
571 if (type_name)
337bab46 572 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
573 if (!type_name && typed)
574 complain (_("$$ of `%s' has no declared type"),
575 rule->sym->tag);
576 }
577 else if (isdigit (c) || c == '-')
578 {
579 int n;
580 ungetc (c, fin);
581 n = read_signed_integer (fin);
582
11e2beca
AD
583 if (n > stack_offset)
584 complain (_("invalid value: %s%d"), "$", n);
585 else
586 {
587 if (!type_name && n > 0)
588 type_name = get_type_name (n, rule);
589
590 /* Offset is always 0 if parser has already popped the stack
591 pointer. */
592 obstack_fgrow1 (oout, "yyvsp[%d]",
593 n - (semantic_parser ? 0 : stack_offset));
594
595 if (type_name)
596 obstack_fgrow1 (oout, ".%s", type_name);
597 if (!type_name && typed)
598 complain (_("$%d of `%s' has no declared type"),
599 n, rule->sym->tag);
600 }
79282c5a
AD
601 }
602 else
603 {
604 char buf[] = "$c";
605 buf[1] = c;
606 complain (_("%s is invalid"), quote (buf));
607 }
608}
a70083a3
AD
609\f
610/*-------------------------------------------------------------------.
611| Copy the contents of a `%{ ... %}' into the definitions file. The |
612| `%{' has already been read. Return after reading the `%}'. |
613`-------------------------------------------------------------------*/
1ff442ca 614
4a120d45 615static void
0dd1580a 616copy_definition (struct obstack *oout)
1ff442ca 617{
a70083a3 618 int c;
ae3c3164 619 /* -1 while reading a character if prev char was %. */
a70083a3 620 int after_percent;
1ff442ca 621
89cab50d 622 if (!no_lines_flag)
25b222fa 623 {
0dd1580a 624 obstack_fgrow2 (oout, muscle_find ("linef"),
342b8b6e 625 lineno, quotearg_style (c_quoting_style,
b7c49edf 626 muscle_find ("filename")));
25b222fa 627 }
1ff442ca
NF
628
629 after_percent = 0;
630
ae3c3164 631 c = getc (finput);
1ff442ca
NF
632
633 for (;;)
634 {
635 switch (c)
636 {
637 case '\n':
0dd1580a 638 obstack_1grow (oout, c);
f3849179 639 ++lineno;
1ff442ca
NF
640 break;
641
642 case '%':
a70083a3 643 after_percent = -1;
1ff442ca 644 break;
a083fbbf 645
1ff442ca
NF
646 case '\'':
647 case '"':
0dd1580a 648 copy_string (finput, oout, c);
1ff442ca
NF
649 break;
650
651 case '/':
0dd1580a 652 copy_comment (finput, oout);
1ff442ca
NF
653 break;
654
655 case EOF:
a70083a3 656 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
657
658 default:
2b7ed18a 659 copy_character (oout, c);
1ff442ca
NF
660 }
661
a70083a3 662 c = getc (finput);
1ff442ca
NF
663
664 if (after_percent)
665 {
666 if (c == '}')
667 return;
0dd1580a 668 obstack_1grow (oout, '%');
1ff442ca
NF
669 }
670 after_percent = 0;
1ff442ca 671 }
1ff442ca
NF
672}
673
674
d7020c20
AD
675/*-------------------------------------------------------------------.
676| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
677| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
678| are reversed. |
679`-------------------------------------------------------------------*/
1ff442ca 680
4a120d45 681static void
d7020c20 682parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 683{
342b8b6e
AD
684 token_t token = tok_undef;
685 char *typename = NULL;
1ff442ca 686
1e9798d5 687 /* The symbol being defined. */
db8837cb 688 symbol_t *symbol = NULL;
1e9798d5
AD
689
690 /* After `%token' and `%nterm', any number of symbols maybe be
691 defined. */
1ff442ca
NF
692 for (;;)
693 {
e6011337
JT
694 int tmp_char = ungetc (skip_white_space (), finput);
695
1e9798d5
AD
696 /* `%' (for instance from `%token', or from `%%' etc.) is the
697 only valid means to end this declaration. */
e6011337 698 if (tmp_char == '%')
1ff442ca 699 return;
e6011337 700 if (tmp_char == EOF)
a0f6b076 701 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 702
a70083a3 703 token = lex ();
511e79b3 704 if (token == tok_comma)
943819bf
RS
705 {
706 symbol = NULL;
707 continue;
708 }
511e79b3 709 if (token == tok_typename)
1ff442ca 710 {
95e36146 711 typename = xstrdup (token_buffer);
1ff442ca 712 value_components_used = 1;
943819bf
RS
713 symbol = NULL;
714 }
511e79b3 715 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 716 {
db8837cb 717 symbol_make_alias (symbol, typename);
8e03724b 718 symbol = NULL;
1ff442ca 719 }
511e79b3 720 else if (token == tok_identifier)
1ff442ca
NF
721 {
722 int oldclass = symval->class;
943819bf 723 symbol = symval;
1ff442ca 724
943819bf 725 if (symbol->class == what_is_not)
a0f6b076 726 complain (_("symbol %s redefined"), symbol->tag);
943819bf 727 symbol->class = what_is;
d7020c20 728 if (what_is == nterm_sym && oldclass != nterm_sym)
d9b739c3 729 symbol->number = nvars++;
5fbb0954 730 if (what_is == token_sym && symbol->number == NUMBER_UNDEFINED)
bd02036a 731 symbol->number = ntokens++;
1ff442ca
NF
732
733 if (typename)
734 {
943819bf
RS
735 if (symbol->type_name == NULL)
736 symbol->type_name = typename;
a70083a3 737 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 738 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
739 }
740 }
511e79b3 741 else if (symbol && token == tok_number)
a70083a3 742 {
943819bf 743 symbol->user_token_number = numval;
b7c49edf
AD
744 /* User defined EOF token? */
745 if (numval == 0)
72a23c97
AD
746 {
747 eoftoken = symbol;
748 eoftoken->number = 0;
749 /* It is always mapped to 0, so it was already counted in
750 NTOKENS. */
751 --ntokens;
752 }
a70083a3 753 }
1ff442ca 754 else
943819bf 755 {
a0f6b076 756 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
757 token_buffer,
758 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 759 skip_to_char ('%');
943819bf 760 }
1ff442ca
NF
761 }
762
763}
764
1ff442ca 765
d7020c20
AD
766/*------------------------------.
767| Parse what comes after %start |
768`------------------------------*/
1ff442ca 769
4a120d45 770static void
118fb205 771parse_start_decl (void)
1ff442ca
NF
772{
773 if (start_flag)
27821bff 774 complain (_("multiple %s declarations"), "%start");
511e79b3 775 if (lex () != tok_identifier)
27821bff 776 complain (_("invalid %s declaration"), "%start");
943819bf
RS
777 else
778 {
779 start_flag = 1;
780 startval = symval;
781 }
1ff442ca
NF
782}
783
a70083a3
AD
784/*-----------------------------------------------------------.
785| read in a %type declaration and record its information for |
786| get_type_name to access |
787`-----------------------------------------------------------*/
788
789static void
790parse_type_decl (void)
791{
a70083a3
AD
792 char *name;
793
511e79b3 794 if (lex () != tok_typename)
a70083a3
AD
795 {
796 complain ("%s", _("%type declaration has no <typename>"));
797 skip_to_char ('%');
798 return;
799 }
800
95e36146 801 name = xstrdup (token_buffer);
a70083a3
AD
802
803 for (;;)
804 {
f17bcd1f 805 token_t t;
a70083a3
AD
806 int tmp_char = ungetc (skip_white_space (), finput);
807
808 if (tmp_char == '%')
809 return;
810 if (tmp_char == EOF)
811 fatal (_("Premature EOF after %s"), token_buffer);
812
813 t = lex ();
814
815 switch (t)
1ff442ca
NF
816 {
817
511e79b3
AD
818 case tok_comma:
819 case tok_semicolon:
1ff442ca
NF
820 break;
821
511e79b3 822 case tok_identifier:
1ff442ca
NF
823 if (symval->type_name == NULL)
824 symval->type_name = name;
a70083a3 825 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 826 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
827
828 break;
829
830 default:
a0f6b076
AD
831 complain (_("invalid %%type declaration due to item: %s"),
832 token_buffer);
a70083a3 833 skip_to_char ('%');
1ff442ca
NF
834 }
835 }
836}
837
838
839
d7020c20
AD
840/*----------------------------------------------------------------.
841| Read in a %left, %right or %nonassoc declaration and record its |
842| information. |
843`----------------------------------------------------------------*/
1ff442ca 844
4a120d45 845static void
d7020c20 846parse_assoc_decl (associativity assoc)
1ff442ca 847{
a70083a3
AD
848 char *name = NULL;
849 int prev = 0;
1ff442ca 850
f3849179
AD
851 /* Assign a new precedence level, never 0. */
852 ++lastprec;
1ff442ca 853
1ff442ca
NF
854 for (;;)
855 {
f17bcd1f 856 token_t t;
e6011337 857 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 858
e6011337 859 if (tmp_char == '%')
1ff442ca 860 return;
e6011337 861 if (tmp_char == EOF)
a0f6b076 862 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 863
a70083a3 864 t = lex ();
1ff442ca
NF
865
866 switch (t)
867 {
511e79b3 868 case tok_typename:
95e36146 869 name = xstrdup (token_buffer);
1ff442ca
NF
870 break;
871
511e79b3 872 case tok_comma:
1ff442ca
NF
873 break;
874
511e79b3 875 case tok_identifier:
1ff442ca 876 if (symval->prec != 0)
a0f6b076 877 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
878 symval->prec = lastprec;
879 symval->assoc = assoc;
d7020c20 880 if (symval->class == nterm_sym)
a0f6b076 881 complain (_("symbol %s redefined"), symval->tag);
5fbb0954 882 if (symval->number == NUMBER_UNDEFINED)
72a23c97
AD
883 {
884 symval->number = ntokens++;
885 symval->class = token_sym;
886 }
1ff442ca 887 if (name)
a70083a3 888 { /* record the type, if one is specified */
1ff442ca
NF
889 if (symval->type_name == NULL)
890 symval->type_name = name;
a70083a3 891 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 892 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
893 }
894 break;
895
511e79b3
AD
896 case tok_number:
897 if (prev == tok_identifier)
a70083a3 898 {
1ff442ca 899 symval->user_token_number = numval;
a70083a3
AD
900 }
901 else
902 {
72a23c97
AD
903 complain
904 (_("invalid text (%s) - number should be after identifier"),
905 token_buffer);
a70083a3
AD
906 skip_to_char ('%');
907 }
1ff442ca
NF
908 break;
909
511e79b3 910 case tok_semicolon:
1ff442ca
NF
911 return;
912
913 default:
a0f6b076 914 complain (_("unexpected item: %s"), token_buffer);
a70083a3 915 skip_to_char ('%');
1ff442ca
NF
916 }
917
918 prev = t;
1ff442ca
NF
919 }
920}
921
922
923
dd60faec 924/*--------------------------------------------------------------.
180d45ba
PB
925| Copy the union declaration into the stype muscle |
926| (and fdefines), where it is made into the definition of |
927| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 928`--------------------------------------------------------------*/
1ff442ca 929
4a120d45 930static void
118fb205 931parse_union_decl (void)
1ff442ca 932{
a70083a3
AD
933 int c;
934 int count = 0;
428046f8 935 bool done = FALSE;
180d45ba 936 struct obstack union_obstack;
1ff442ca 937 if (typed)
27821bff 938 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
939
940 typed = 1;
941
642cb8f8 942 MUSCLE_INSERT_INT ("stype_line", lineno);
180d45ba
PB
943 obstack_init (&union_obstack);
944 obstack_sgrow (&union_obstack, "union");
1ff442ca 945
428046f8 946 while (!done)
1ff442ca 947 {
428046f8
AD
948 c = xgetc (finput);
949
342b8b6e
AD
950 /* If C contains '/', it is output by copy_comment (). */
951 if (c != '/')
2666f928 952 obstack_1grow (&union_obstack, c);
1ff442ca
NF
953
954 switch (c)
955 {
956 case '\n':
f3849179 957 ++lineno;
1ff442ca
NF
958 break;
959
960 case '/':
2666f928 961 copy_comment (finput, &union_obstack);
1ff442ca
NF
962 break;
963
1ff442ca 964 case '{':
f3849179 965 ++count;
1ff442ca
NF
966 break;
967
968 case '}':
428046f8 969 /* FIXME: Errr. How could this happen???. --akim */
1ff442ca 970 if (count == 0)
27821bff 971 complain (_("unmatched %s"), "`}'");
1ff442ca 972 count--;
428046f8
AD
973 if (!count)
974 done = TRUE;
975 break;
1ff442ca 976 }
1ff442ca 977 }
180d45ba 978
428046f8
AD
979 /* JF don't choke on trailing semi */
980 c = skip_white_space ();
981 if (c != ';')
982 ungetc (c, finput);
983 obstack_1grow (&union_obstack, 0);
984 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
985}
986
d7020c20
AD
987
988/*-------------------------------------------------------.
989| Parse the declaration %expect N which says to expect N |
990| shift-reduce conflicts. |
991`-------------------------------------------------------*/
1ff442ca 992
4a120d45 993static void
118fb205 994parse_expect_decl (void)
1ff442ca 995{
131e2fef 996 int c = skip_white_space ();
1ff442ca
NF
997 ungetc (c, finput);
998
131e2fef 999 if (!isdigit (c))
79282c5a 1000 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
1001 else
1002 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
1003}
1004
a70083a3
AD
1005
1006/*-------------------------------------------------------------------.
1007| Parse what comes after %thong. the full syntax is |
1008| |
1009| %thong <type> token number literal |
1010| |
1011| the <type> or number may be omitted. The number specifies the |
1012| user_token_number. |
1013| |
1014| Two symbols are entered in the table, one for the token symbol and |
1015| one for the literal. Both are given the <type>, if any, from the |
1016| declaration. The ->user_token_number of the first is SALIAS and |
1017| the ->user_token_number of the second is set to the number, if |
1018| any, from the declaration. The two symbols are linked via |
1019| pointers in their ->alias fields. |
1020| |
1021| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
1022| only the literal string is retained it is the literal string that |
1023| is output to yytname |
1024`-------------------------------------------------------------------*/
1025
1026static void
1027parse_thong_decl (void)
7b306f52 1028{
f17bcd1f 1029 token_t token;
db8837cb 1030 symbol_t *symbol;
a70083a3 1031 char *typename = 0;
6b7e85b9 1032 int usrtoknum = SUNDEF;
7b306f52 1033
a70083a3 1034 token = lex (); /* fetch typename or first token */
511e79b3 1035 if (token == tok_typename)
7b306f52 1036 {
95e36146 1037 typename = xstrdup (token_buffer);
a70083a3
AD
1038 value_components_used = 1;
1039 token = lex (); /* fetch first token */
7b306f52 1040 }
7b306f52 1041
a70083a3 1042 /* process first token */
7b306f52 1043
511e79b3 1044 if (token != tok_identifier)
a70083a3
AD
1045 {
1046 complain (_("unrecognized item %s, expected an identifier"),
1047 token_buffer);
1048 skip_to_char ('%');
1049 return;
7b306f52 1050 }
d7020c20 1051 symval->class = token_sym;
a70083a3
AD
1052 symval->type_name = typename;
1053 symval->user_token_number = SALIAS;
1054 symbol = symval;
7b306f52 1055
a70083a3 1056 token = lex (); /* get number or literal string */
1ff442ca 1057
511e79b3 1058 if (token == tok_number)
943819bf 1059 {
a70083a3
AD
1060 usrtoknum = numval;
1061 token = lex (); /* okay, did number, now get literal */
943819bf 1062 }
1ff442ca 1063
a70083a3 1064 /* process literal string token */
1ff442ca 1065
511e79b3 1066 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 1067 {
a70083a3
AD
1068 complain (_("expected string constant instead of %s"), token_buffer);
1069 skip_to_char ('%');
1070 return;
1ff442ca 1071 }
d7020c20 1072 symval->class = token_sym;
a70083a3
AD
1073 symval->type_name = typename;
1074 symval->user_token_number = usrtoknum;
1ff442ca 1075
a70083a3
AD
1076 symval->alias = symbol;
1077 symbol->alias = symval;
1ff442ca 1078
79282c5a
AD
1079 /* symbol and symval combined are only one symbol. */
1080 nsyms--;
a70083a3 1081}
3cef001a 1082
11e2beca 1083
b6610515 1084static void
11d82f03 1085parse_muscle_decl (void)
b6610515
RA
1086{
1087 int ch = ungetc (skip_white_space (), finput);
b7c49edf
AD
1088 char *muscle_key;
1089 char *muscle_value;
b6610515
RA
1090
1091 /* Read key. */
1092 if (!isalpha (ch) && ch != '_')
1093 {
1094 complain (_("invalid %s declaration"), "%define");
1095 skip_to_char ('%');
1096 return;
1097 }
11d82f03
MA
1098 copy_identifier (finput, &muscle_obstack);
1099 obstack_1grow (&muscle_obstack, 0);
1100 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 1101
b6610515
RA
1102 /* Read value. */
1103 ch = skip_white_space ();
1104 if (ch != '"')
1105 {
1106 ungetc (ch, finput);
1107 if (ch != EOF)
1108 {
1109 complain (_("invalid %s declaration"), "%define");
1110 skip_to_char ('%');
1111 return;
1112 }
1113 else
1114 fatal (_("Premature EOF after %s"), "\"");
1115 }
11d82f03
MA
1116 copy_string2 (finput, &muscle_obstack, '"', 0);
1117 obstack_1grow (&muscle_obstack, 0);
1118 muscle_value = obstack_finish (&muscle_obstack);
b6610515 1119
b6610515 1120 /* Store the (key, value) pair in the environment. */
11d82f03 1121 muscle_insert (muscle_key, muscle_value);
b6610515
RA
1122}
1123
2ba3b73c 1124
426cf563
MA
1125
1126/*---------------------------------.
a870c567 1127| Parse a double quoted parameter. |
426cf563
MA
1128`---------------------------------*/
1129
1130static const char *
1131parse_dquoted_param (const char *from)
1132{
1133 struct obstack param_obstack;
1134 const char *param = NULL;
1135 int c;
1136
1137 obstack_init (&param_obstack);
1138 c = skip_white_space ();
1139
1140 if (c != '"')
1141 {
1142 complain (_("invalid %s declaration"), from);
1143 ungetc (c, finput);
1144 skip_to_char ('%');
1145 return NULL;
1146 }
1147
2648a72d
AD
1148 while ((c = literalchar ()) != '"')
1149 obstack_1grow (&param_obstack, c);
a870c567 1150
426cf563
MA
1151 obstack_1grow (&param_obstack, '\0');
1152 param = obstack_finish (&param_obstack);
1153
1154 if (c != '"' || strlen (param) == 0)
1155 {
1156 complain (_("invalid %s declaration"), from);
1157 if (c != '"')
1158 ungetc (c, finput);
1159 skip_to_char ('%');
1160 return NULL;
1161 }
1162
1163 return param;
1164}
1165
2ba3b73c
MA
1166/*----------------------------------.
1167| Parse what comes after %skeleton. |
1168`----------------------------------*/
1169
a870c567 1170static void
2ba3b73c
MA
1171parse_skel_decl (void)
1172{
426cf563 1173 skeleton = parse_dquoted_param ("%skeleton");
2ba3b73c
MA
1174}
1175
a70083a3
AD
1176/*----------------------------------------------------------------.
1177| Read from finput until `%%' is seen. Discard the `%%'. Handle |
1178| any `%' declarations, and copy the contents of any `%{ ... %}' |
0dd1580a 1179| groups to PRE_PROLOGUE_OBSTACK or POST_PROLOGUE_OBSTACK. |
a70083a3 1180`----------------------------------------------------------------*/
1ff442ca 1181
4a120d45 1182static void
a70083a3 1183read_declarations (void)
1ff442ca 1184{
a70083a3 1185 for (;;)
1ff442ca 1186 {
951366c1 1187 int c = skip_white_space ();
1ff442ca 1188
a70083a3
AD
1189 if (c == '%')
1190 {
951366c1 1191 token_t tok = parse_percent_token ();
1ff442ca 1192
a70083a3 1193 switch (tok)
943819bf 1194 {
511e79b3 1195 case tok_two_percents:
a70083a3 1196 return;
1ff442ca 1197
511e79b3 1198 case tok_percent_left_curly:
0dd1580a
RA
1199 if (!typed)
1200 copy_definition (&pre_prologue_obstack);
1201 else
1202 copy_definition (&post_prologue_obstack);
a70083a3 1203 break;
1ff442ca 1204
511e79b3 1205 case tok_token:
d7020c20 1206 parse_token_decl (token_sym, nterm_sym);
a70083a3 1207 break;
1ff442ca 1208
511e79b3 1209 case tok_nterm:
d7020c20 1210 parse_token_decl (nterm_sym, token_sym);
a70083a3 1211 break;
1ff442ca 1212
511e79b3 1213 case tok_type:
a70083a3
AD
1214 parse_type_decl ();
1215 break;
1ff442ca 1216
511e79b3 1217 case tok_start:
a70083a3
AD
1218 parse_start_decl ();
1219 break;
118fb205 1220
511e79b3 1221 case tok_union:
a70083a3
AD
1222 parse_union_decl ();
1223 break;
1ff442ca 1224
511e79b3 1225 case tok_expect:
a70083a3
AD
1226 parse_expect_decl ();
1227 break;
6deb4447 1228
511e79b3 1229 case tok_thong:
a70083a3
AD
1230 parse_thong_decl ();
1231 break;
d7020c20 1232
511e79b3 1233 case tok_left:
d7020c20 1234 parse_assoc_decl (left_assoc);
a70083a3 1235 break;
1ff442ca 1236
511e79b3 1237 case tok_right:
d7020c20 1238 parse_assoc_decl (right_assoc);
a70083a3 1239 break;
1ff442ca 1240
511e79b3 1241 case tok_nonassoc:
d7020c20 1242 parse_assoc_decl (non_assoc);
a70083a3 1243 break;
1ff442ca 1244
b6610515 1245 case tok_define:
11d82f03 1246 parse_muscle_decl ();
b6610515 1247 break;
342b8b6e 1248
2ba3b73c
MA
1249 case tok_skel:
1250 parse_skel_decl ();
1251 break;
b6610515 1252
511e79b3 1253 case tok_noop:
a70083a3 1254 break;
1ff442ca 1255
951366c1
AD
1256 case tok_stropt:
1257 case tok_intopt:
1258 case tok_obsolete:
72a23c97 1259 assert (0);
951366c1
AD
1260 break;
1261
e0c40012 1262 case tok_illegal:
a70083a3
AD
1263 default:
1264 complain (_("unrecognized: %s"), token_buffer);
1265 skip_to_char ('%');
1266 }
1267 }
1268 else if (c == EOF)
1269 fatal (_("no input grammar"));
1270 else
1271 {
ff4a34be
AD
1272 char buf[] = "c";
1273 buf[0] = c;
1274 complain (_("unknown character: %s"), quote (buf));
a70083a3 1275 skip_to_char ('%');
1ff442ca 1276 }
1ff442ca 1277 }
1ff442ca 1278}
a70083a3
AD
1279\f
1280/*-------------------------------------------------------------------.
1281| Assuming that a `{' has just been seen, copy everything up to the |
1282| matching `}' into the actions file. STACK_OFFSET is the number of |
1283| values in the current rule so far, which says where to find `$0' |
1284| with respect to the top of the stack. |
14d293ac 1285| |
11e2beca
AD
1286| This routine is used both for actions and guards. Only |
1287| ACTION_OBSTACK is used, but this is fine, since we use only |
14d293ac 1288| pointers to relevant portions inside this obstack. |
a70083a3 1289`-------------------------------------------------------------------*/
1ff442ca 1290
4a120d45 1291static void
14d293ac 1292parse_braces (symbol_list *rule, int stack_offset)
1ff442ca 1293{
a70083a3 1294 int c;
a70083a3 1295 int count;
1ff442ca 1296
1ff442ca 1297 count = 1;
1ff442ca
NF
1298 while (count > 0)
1299 {
14d293ac
AD
1300 while ((c = getc (finput)) != '}')
1301 switch (c)
1302 {
1303 case '\n':
1304 obstack_1grow (&action_obstack, c);
f3849179 1305 ++lineno;
14d293ac 1306 break;
1ff442ca 1307
14d293ac
AD
1308 case '{':
1309 obstack_1grow (&action_obstack, c);
f3849179 1310 ++count;
14d293ac 1311 break;
1ff442ca 1312
14d293ac
AD
1313 case '\'':
1314 case '"':
1315 copy_string (finput, &action_obstack, c);
1316 break;
1ff442ca 1317
14d293ac
AD
1318 case '/':
1319 copy_comment (finput, &action_obstack);
1320 break;
1ff442ca 1321
14d293ac
AD
1322 case '$':
1323 copy_dollar (finput, &action_obstack,
1324 rule, stack_offset);
1325 break;
1ff442ca 1326
14d293ac
AD
1327 case '@':
1328 copy_at (finput, &action_obstack,
1329 stack_offset);
1330 break;
a70083a3 1331
14d293ac
AD
1332 case EOF:
1333 fatal (_("unmatched %s"), "`{'");
a70083a3 1334
14d293ac
AD
1335 default:
1336 obstack_1grow (&action_obstack, c);
1337 }
a70083a3 1338
14d293ac 1339 /* Above loop exits when C is '}'. */
a70083a3 1340 if (--count)
2b25d624 1341 obstack_1grow (&action_obstack, c);
a70083a3
AD
1342 }
1343
3f96f4dc 1344 obstack_1grow (&action_obstack, '\0');
a70083a3 1345}
14d293ac 1346
a70083a3
AD
1347
1348static void
14d293ac 1349parse_action (symbol_list *rule, int stack_offset)
a70083a3 1350{
14d293ac
AD
1351 rule->action_line = lineno;
1352 parse_braces (rule, stack_offset);
1353 rule->action = obstack_finish (&action_obstack);
1354}
a70083a3 1355
a70083a3 1356
14d293ac
AD
1357static void
1358parse_guard (symbol_list *rule, int stack_offset)
1359{
1360 token_t t = lex ();
1361 if (t != tok_left_curly)
1362 complain (_("invalid %s declaration"), "%guard");
f499b062 1363 rule->guard_line = lineno;
14d293ac
AD
1364 parse_braces (rule, stack_offset);
1365 rule->guard = obstack_finish (&action_obstack);
1ff442ca 1366}
14d293ac 1367
a70083a3
AD
1368\f
1369
a70083a3
AD
1370/*-------------------------------------------------------------------.
1371| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1372| with the user's names. |
1373`-------------------------------------------------------------------*/
1ff442ca 1374
db8837cb 1375static symbol_t *
118fb205 1376gensym (void)
1ff442ca 1377{
274d42ce
AD
1378 /* Incremented for each generated symbol */
1379 static int gensym_count = 0;
1380 static char buf[256];
1381
db8837cb 1382 symbol_t *sym;
1ff442ca 1383
274d42ce
AD
1384 sprintf (buf, "@%d", ++gensym_count);
1385 token_buffer = buf;
a70083a3 1386 sym = getsym (token_buffer);
d7020c20 1387 sym->class = nterm_sym;
d9b739c3 1388 sym->number = nvars++;
36281465 1389 return sym;
1ff442ca 1390}
a70083a3 1391\f
107f7dfb
AD
1392/*-------------------------------------------------------------------.
1393| Parse the input grammar into a one symbol_list structure. Each |
1394| rule is represented by a sequence of symbols: the left hand side |
1395| followed by the contents of the right hand side, followed by a |
1396| null pointer instead of a symbol to terminate the rule. The next |
1397| symbol is the lhs of the following rule. |
1398| |
1399| All guards and actions are copied out to the appropriate files, |
1400| labelled by the rule number they apply to. |
1401| |
1402| Bison used to allow some %directives in the rules sections, but |
1403| this is no longer consider appropriate: (i) the documented grammar |
1404| doesn't claim it, (ii), it would promote bad style, (iii), error |
1405| recovery for %directives consists in skipping the junk until a `%' |
1406| is seen and helrp synchronizing. This scheme is definitely wrong |
1407| in the rules section. |
1408`-------------------------------------------------------------------*/
1ff442ca 1409
4a120d45 1410static void
118fb205 1411readgram (void)
1ff442ca 1412{
f17bcd1f 1413 token_t t;
db8837cb 1414 symbol_t *lhs = NULL;
107f7dfb
AD
1415 symbol_list *p = NULL;
1416 symbol_list *p1 = NULL;
1ff442ca 1417
ff4a34be
AD
1418 /* Points to first symbol_list of current rule. its symbol is the
1419 lhs of the rule. */
107f7dfb 1420 symbol_list *crule = NULL;
ff4a34be 1421 /* Points to the symbol_list preceding crule. */
107f7dfb 1422 symbol_list *crule1 = NULL;
1ff442ca 1423
a70083a3 1424 t = lex ();
1ff442ca 1425
511e79b3 1426 while (t != tok_two_percents && t != tok_eof)
107f7dfb
AD
1427 if (t == tok_identifier || t == tok_bar)
1428 {
1429 int action_flag = 0;
1430 /* Number of symbols in rhs of this rule so far */
1431 int rulelength = 0;
1432 int xactions = 0; /* JF for error checking */
db8837cb 1433 symbol_t *first_rhs = 0;
107f7dfb
AD
1434
1435 if (t == tok_identifier)
1436 {
1437 lhs = symval;
1438
1439 if (!start_flag)
1440 {
1441 startval = lhs;
1442 start_flag = 1;
1443 }
1ff442ca 1444
107f7dfb
AD
1445 t = lex ();
1446 if (t != tok_colon)
1447 {
1448 complain (_("ill-formed rule: initial symbol not followed by colon"));
1449 unlex (t);
1450 }
1451 }
1452
1453 if (nrules == 0 && t == tok_bar)
1454 {
1455 complain (_("grammar starts with vertical bar"));
1456 lhs = symval; /* BOGUS: use a random symval */
1457 }
1458 /* start a new rule and record its lhs. */
1459
f3849179 1460 ++nrules;
5123689b 1461 ++nritems;
107f7dfb
AD
1462
1463 p = symbol_list_new (lhs);
1464
1465 crule1 = p1;
1466 if (p1)
1467 p1->next = p;
1468 else
1469 grammar = p;
1ff442ca 1470
107f7dfb
AD
1471 p1 = p;
1472 crule = p;
1ff442ca 1473
107f7dfb 1474 /* mark the rule's lhs as a nonterminal if not already so. */
1ff442ca 1475
107f7dfb
AD
1476 if (lhs->class == unknown_sym)
1477 {
1478 lhs->class = nterm_sym;
d9b739c3 1479 lhs->number = nvars;
f3849179 1480 ++nvars;
107f7dfb
AD
1481 }
1482 else if (lhs->class == token_sym)
1483 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca 1484
107f7dfb 1485 /* read the rhs of the rule. */
1ff442ca 1486
107f7dfb
AD
1487 for (;;)
1488 {
1489 t = lex ();
1490 if (t == tok_prec)
1491 {
1492 t = lex ();
1493 crule->ruleprec = symval;
1494 t = lex ();
1495 }
1496
1497 if (!(t == tok_identifier || t == tok_left_curly))
1498 break;
1ff442ca 1499
107f7dfb
AD
1500 /* If next token is an identifier, see if a colon follows it.
1501 If one does, exit this rule now. */
1502 if (t == tok_identifier)
1503 {
db8837cb 1504 symbol_t *ssave;
107f7dfb
AD
1505 token_t t1;
1506
1507 ssave = symval;
1508 t1 = lex ();
1509 unlex (t1);
1510 symval = ssave;
1511 if (t1 == tok_colon)
e5352bc7 1512 {
fff9bf0b 1513 warn (_("previous rule lacks an ending `;'"));
e5352bc7
AD
1514 break;
1515 }
107f7dfb
AD
1516
1517 if (!first_rhs) /* JF */
1518 first_rhs = symval;
1519 /* Not followed by colon =>
1520 process as part of this rule's rhs. */
1521 }
1522
1523 /* If we just passed an action, that action was in the middle
1524 of a rule, so make a dummy rule to reduce it to a
1525 non-terminal. */
1526 if (action_flag)
1527 {
1528 /* Since the action was written out with this rule's
1529 number, we must give the new rule this number by
1530 inserting the new rule before it. */
1531
1532 /* Make a dummy nonterminal, a gensym. */
db8837cb 1533 symbol_t *sdummy = gensym ();
107f7dfb
AD
1534
1535 /* Make a new rule, whose body is empty, before the
1536 current one, so that the action just read can
1537 belong to it. */
f3849179 1538 ++nrules;
5123689b 1539 ++nritems;
107f7dfb
AD
1540 p = symbol_list_new (sdummy);
1541 /* Attach its lineno to that of the host rule. */
1542 p->line = crule->line;
82c035a8
AD
1543 /* Move the action from the host rule to this one. */
1544 p->action = crule->action;
1545 p->action_line = crule->action_line;
1546 crule->action = NULL;
1547
107f7dfb
AD
1548 if (crule1)
1549 crule1->next = p;
1550 else
1551 grammar = p;
1552 /* End of the rule. */
1553 crule1 = symbol_list_new (NULL);
1554 crule1->next = crule;
1555
1556 p->next = crule1;
1557
1558 /* Insert the dummy generated by that rule into this
1559 rule. */
5123689b 1560 ++nritems;
107f7dfb
AD
1561 p = symbol_list_new (sdummy);
1562 p1->next = p;
1563 p1 = p;
1564
1565 action_flag = 0;
1566 }
1567
1568 if (t == tok_identifier)
1569 {
5123689b 1570 ++nritems;
107f7dfb
AD
1571 p = symbol_list_new (symval);
1572 p1->next = p;
1573 p1 = p;
1574 }
1575 else /* handle an action. */
1576 {
14d293ac 1577 parse_action (crule, rulelength);
107f7dfb 1578 action_flag = 1;
f3849179 1579 ++xactions; /* JF */
107f7dfb 1580 }
f3849179 1581 ++rulelength;
107f7dfb
AD
1582 } /* end of read rhs of rule */
1583
1584 /* Put an empty link in the list to mark the end of this rule */
1585 p = symbol_list_new (NULL);
1586 p1->next = p;
1587 p1 = p;
1588
1589 if (t == tok_prec)
1590 {
1591 complain (_("two @prec's in a row"));
1592 t = lex ();
1593 crule->ruleprec = symval;
1594 t = lex ();
1595 }
f499b062 1596
107f7dfb
AD
1597 if (t == tok_guard)
1598 {
1599 if (!semantic_parser)
1600 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1601
14d293ac 1602 parse_guard (crule, rulelength);
a70083a3 1603 t = lex ();
107f7dfb 1604 }
f499b062
AD
1605
1606 if (t == tok_left_curly)
107f7dfb
AD
1607 {
1608 /* This case never occurs -wjh */
1609 if (action_flag)
1610 complain (_("two actions at end of one rule"));
14d293ac 1611 parse_action (crule, rulelength);
107f7dfb 1612 action_flag = 1;
f3849179 1613 ++xactions; /* -wjh */
107f7dfb
AD
1614 t = lex ();
1615 }
1616 /* If $$ is being set in default way, report if any type
1617 mismatch. */
1618 else if (!xactions
1619 && first_rhs && lhs->type_name != first_rhs->type_name)
1620 {
1621 if (lhs->type_name == 0
1622 || first_rhs->type_name == 0
1623 || strcmp (lhs->type_name, first_rhs->type_name))
1624 complain (_("type clash (`%s' `%s') on default action"),
1625 lhs->type_name ? lhs->type_name : "",
1626 first_rhs->type_name ? first_rhs->type_name : "");
1627 }
1628 /* Warn if there is no default for $$ but we need one. */
1629 else if (!xactions && !first_rhs && lhs->type_name != 0)
1630 complain (_("empty rule for typed nonterminal, and no action"));
bfcf1f3a 1631 if (t == tok_two_percents || t == tok_eof)
fff9bf0b 1632 warn (_("previous rule lacks an ending `;'"));
107f7dfb 1633 if (t == tok_semicolon)
a70083a3 1634 t = lex ();
107f7dfb
AD
1635 }
1636 else
1637 {
1638 complain (_("invalid input: %s"), quote (token_buffer));
1639 t = lex ();
1640 }
943819bf 1641
b68e7744
AD
1642 /* grammar has been read. Do some checking */
1643
1644 if (nrules == 0)
1645 fatal (_("no rules in the input grammar"));
1646
1647 /* Report any undefined symbols and consider them nonterminals. */
db8837cb 1648 symbols_do (symbol_check_defined, NULL);
b68e7744 1649
ff442794
AD
1650 /* Insert the initial rule, which line is that of the first rule
1651 (not that of the start symbol):
30171f79
AD
1652
1653 axiom: %start EOF. */
1654 p = symbol_list_new (axiom);
ff442794 1655 p->line = grammar->line;
30171f79
AD
1656 p->next = symbol_list_new (startval);
1657 p->next->next = symbol_list_new (eoftoken);
1658 p->next->next->next = symbol_list_new (NULL);
1659 p->next->next->next->next = grammar;
1660 nrules += 1;
5123689b 1661 nritems += 3;
30171f79
AD
1662 grammar = p;
1663 startval = axiom;
1ff442ca 1664
62a3e4f0 1665 if (nsyms > SHRT_MAX)
a0f6b076 1666 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
62a3e4f0 1667 SHRT_MAX);
1ff442ca 1668
72a23c97 1669 assert (nsyms == ntokens + nvars);
1ff442ca 1670}
ff48177d
MA
1671
1672/* At the end of the grammar file, some C source code must
63c2d5de 1673 be stored. It is going to be associated to the epilogue
ff48177d
MA
1674 directive. */
1675static void
1676read_additionnal_code (void)
1677{
9101a310 1678 int c;
63c2d5de 1679 struct obstack el_obstack;
342b8b6e 1680
63c2d5de 1681 obstack_init (&el_obstack);
ff48177d 1682
710ddc4f
MA
1683 if (!no_lines_flag)
1684 {
1685 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1686 lineno, quotearg_style (c_quoting_style,
b7c49edf 1687 muscle_find ("filename")));
710ddc4f
MA
1688 }
1689
ff48177d 1690 while ((c = getc (finput)) != EOF)
2b7ed18a 1691 copy_character (&el_obstack, c);
342b8b6e 1692
63c2d5de 1693 obstack_1grow (&el_obstack, 0);
11d82f03 1694 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1695}
1696
a70083a3 1697\f
037ca2f1
AD
1698/*------------------------------------------------------------------.
1699| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1700| number. |
1701`------------------------------------------------------------------*/
1702
1703static void
1704token_translations_init (void)
1705{
23c5a174 1706 int num_256_available_p = TRUE;
037ca2f1
AD
1707 int i;
1708
23c5a174
AD
1709 /* Find the highest user token number, and whether 256, the POSIX
1710 preferred user token number for the error token, is used. */
1711 max_user_token_number = 0;
1712 for (i = 0; i < ntokens; ++i)
1713 {
1714 symbol_t *this = symbols[i];
1715 if (this->user_token_number != SUNDEF)
1716 {
1717 if (this->user_token_number > max_user_token_number)
1718 max_user_token_number = this->user_token_number;
1719 if (this->user_token_number == 256)
1720 num_256_available_p = FALSE;
1721 }
1722 }
1723
1724 /* If 256 is not used, assign it to error, to follow POSIX. */
1725 if (num_256_available_p && errtoken->user_token_number == SUNDEF)
1726 errtoken->user_token_number = 256;
1727
1728 /* Set the missing user numbers. */
1729 if (max_user_token_number < 256)
1730 max_user_token_number = 256;
1731
72a23c97
AD
1732 for (i = 0; i < ntokens; ++i)
1733 {
db8837cb 1734 symbol_t *this = symbols[i];
72a23c97 1735 if (this->user_token_number == SUNDEF)
23c5a174 1736 this->user_token_number = ++max_user_token_number;
72a23c97
AD
1737 if (this->user_token_number > max_user_token_number)
1738 max_user_token_number = this->user_token_number;
72a23c97
AD
1739 }
1740
680e8701 1741 token_translations = XCALLOC (token_number_t, max_user_token_number + 1);
037ca2f1
AD
1742
1743 /* Initialize all entries for literal tokens to 2, the internal
1744 token number for $undefined., which represents all invalid
1745 inputs. */
18bcecb0 1746 for (i = 0; i < max_user_token_number + 1; i++)
007a50a4 1747 token_translations[i] = undeftoken->number;
db8837cb 1748 symbols_do (symbol_translation, NULL);
037ca2f1
AD
1749}
1750
1751
0e78e603
AD
1752/*----------------------------------------------------------------.
1753| Assign symbol numbers, and write definition of token names into |
1754| FDEFINES. Set up vectors SYMBOL_TABLE, TAGS of symbols. |
1755`----------------------------------------------------------------*/
1ff442ca 1756
4a120d45 1757static void
118fb205 1758packsymbols (void)
1ff442ca 1759{
db8837cb 1760 symbols = XCALLOC (symbol_t *, nsyms);
1ff442ca 1761
db8837cb
AD
1762 symbols_do (symbol_check_alias_consistence, NULL);
1763 symbols_do (symbol_pack, NULL);
1ff442ca 1764
037ca2f1 1765 token_translations_init ();
1ff442ca 1766
e3f1699f
AD
1767 if (startval->class == unknown_sym)
1768 fatal (_("the start symbol %s is undefined"), startval->tag);
1769 else if (startval->class == token_sym)
1770 fatal (_("the start symbol %s is a token"), startval->tag);
1771
d9b739c3 1772 start_symbol = startval->number;
e3f1699f
AD
1773}
1774
1775
a70083a3
AD
1776/*---------------------------------------------------------------.
1777| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 1778| RITEM. |
a70083a3 1779`---------------------------------------------------------------*/
1ff442ca 1780
4a120d45 1781static void
118fb205 1782packgram (void)
1ff442ca 1783{
a70083a3
AD
1784 int itemno;
1785 int ruleno;
1786 symbol_list *p;
1ff442ca 1787
62a3e4f0 1788 ritem = XCALLOC (item_number_t, nritems + 1);
1a2b5d37 1789 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1790
1791 itemno = 0;
1792 ruleno = 1;
1793
1794 p = grammar;
1795 while (p)
1796 {
db8837cb 1797 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 1798 rules[ruleno].user_number = ruleno;
c3b407f4 1799 rules[ruleno].number = ruleno;
bba97eb2 1800 rules[ruleno].lhs = p->sym;
99013900 1801 rules[ruleno].rhs = ritem + itemno;
1a2b5d37
AD
1802 rules[ruleno].line = p->line;
1803 rules[ruleno].useful = TRUE;
1804 rules[ruleno].action = p->action;
1805 rules[ruleno].action_line = p->action_line;
1806 rules[ruleno].guard = p->guard;
1807 rules[ruleno].guard_line = p->guard_line;
1ff442ca
NF
1808
1809 p = p->next;
1810 while (p && p->sym)
1811 {
5fbb0954
AD
1812 /* item_number_t = token_number_t.
1813 But the former needs to contain more: negative rule numbers. */
1814 ritem[itemno++] = token_number_as_item_number (p->sym->number);
1ff442ca
NF
1815 /* A rule gets by default the precedence and associativity
1816 of the last token in it. */
d7020c20 1817 if (p->sym->class == token_sym)
03b31c0c 1818 rules[ruleno].prec = p->sym;
a70083a3
AD
1819 if (p)
1820 p = p->next;
1ff442ca
NF
1821 }
1822
1823 /* If this rule has a %prec,
a70083a3 1824 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1825 if (ruleprec)
1826 {
03b31c0c
AD
1827 rules[ruleno].precsym = ruleprec;
1828 rules[ruleno].prec = ruleprec;
1ff442ca 1829 }
1ff442ca 1830 ritem[itemno++] = -ruleno;
f3849179 1831 ++ruleno;
1ff442ca 1832
a70083a3
AD
1833 if (p)
1834 p = p->next;
1ff442ca
NF
1835 }
1836
1837 ritem[itemno] = 0;
5123689b 1838 assert (itemno == nritems);
3067fbef
AD
1839
1840 if (trace_flag)
1841 ritem_print (stderr);
1ff442ca 1842}
a70083a3
AD
1843\f
1844/*-------------------------------------------------------------------.
1845| Read in the grammar specification and record it in the format |
ea5607fd 1846| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1847| and all actions into ACTION_OBSTACK, in each case forming the body |
1848| of a C function (YYGUARD or YYACTION) which contains a switch |
1849| statement to decide which guard or action to execute. |
a70083a3
AD
1850`-------------------------------------------------------------------*/
1851
1852void
1853reader (void)
1854{
342b8b6e 1855 lex_init ();
a70083a3
AD
1856 lineno = 1;
1857
11d82f03
MA
1858 /* Initialize the muscle obstack. */
1859 obstack_init (&muscle_obstack);
82e236e2 1860
a70083a3 1861 /* Initialize the symbol table. */
db8837cb 1862 symbols_new ();
b6610515 1863
30171f79
AD
1864 /* Construct the axiom symbol. */
1865 axiom = getsym ("$axiom");
1866 axiom->class = nterm_sym;
d9b739c3 1867 axiom->number = nvars++;
30171f79 1868
a70083a3
AD
1869 /* Construct the error token */
1870 errtoken = getsym ("error");
d7020c20 1871 errtoken->class = token_sym;
72a23c97 1872 errtoken->number = ntokens++;
b6610515 1873
a70083a3
AD
1874 /* Construct a token that represents all undefined literal tokens.
1875 It is always token number 2. */
1876 undeftoken = getsym ("$undefined.");
d7020c20 1877 undeftoken->class = token_sym;
72a23c97 1878 undeftoken->number = ntokens++;
a70083a3 1879
331dbc1b
AD
1880 /* Initialize the obstacks. */
1881 obstack_init (&action_obstack);
331dbc1b 1882 obstack_init (&output_obstack);
0dd1580a
RA
1883 obstack_init (&pre_prologue_obstack);
1884 obstack_init (&post_prologue_obstack);
331dbc1b
AD
1885
1886 finput = xfopen (infile, "r");
1887
896fe5c1
AD
1888 /* Read the declaration section. Copy %{ ... %} groups to
1889 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1890 etc. found there. */
a70083a3 1891 read_declarations ();
b7c49edf
AD
1892
1893 /* If the user did not define her EOFTOKEN, do it now. */
1894 if (!eoftoken)
1895 {
1896 eoftoken = getsym ("$");
1897 eoftoken->class = token_sym;
72a23c97 1898 eoftoken->number = 0;
b7c49edf
AD
1899 /* Value specified by POSIX. */
1900 eoftoken->user_token_number = 0;
1901 }
1902
a70083a3
AD
1903 /* Read in the grammar, build grammar in list form. Write out
1904 guards and actions. */
1905 readgram ();
ff48177d
MA
1906 /* Some C code is given at the end of the grammar file. */
1907 read_additionnal_code ();
b0c4483e 1908
331dbc1b
AD
1909 lex_free ();
1910 xfclose (finput);
1911
a70083a3
AD
1912 /* Assign the symbols their symbol numbers. Write #defines for the
1913 token symbols into FDEFINES if requested. */
1914 packsymbols ();
93ede233 1915
a70083a3
AD
1916 /* Convert the grammar into the format described in gram.h. */
1917 packgram ();
8419d367
AD
1918
1919 /* The grammar as a symbol_list is no longer needed. */
1920 LIST_FREE (symbol_list, grammar);
a70083a3 1921}
76514394
AD
1922
1923void
1924grammar_free (void)
1925{
1926 XFREE (ritem);
1927 free (rules + 1);
1928 /* Free the symbol table data structure. */
db8837cb 1929 symbols_free ();
76514394 1930}