]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/reader.c, symtab.c: Remove debugging code.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca
NF
30#include "lex.h"
31#include "gram.h"
a0f6b076 32#include "complain.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
340ef489 35#include "conflicts.h"
11d82f03 36#include "muscle_tab.h"
1ff442ca 37
a70083a3
AD
38typedef struct symbol_list
39{
40 struct symbol_list *next;
db8837cb 41 symbol_t *sym;
b29b2ed5 42 int line;
f499b062 43
3f96f4dc
AD
44 /* The action is attached to the LHS of a rule. */
45 const char *action;
46 int action_line;
f499b062
AD
47
48 /* The guard is attached to the LHS of a rule. */
49 const char *guard;
50 int guard_line;
db8837cb 51 symbol_t *ruleprec;
d945f5cd 52} symbol_list;
118fb205 53
1ff442ca 54int lineno;
280a38c3
AD
55static symbol_list *grammar = NULL;
56static int start_flag = 0;
db8837cb 57static symbol_t *startval = NULL;
1ff442ca
NF
58
59/* Nonzero if components of semantic values are used, implying
60 they must be unions. */
61static int value_components_used;
62
d7020c20 63/* Nonzero if %union has been seen. */
280a38c3 64static int typed = 0;
1ff442ca 65
d7020c20 66/* Incremented for each %left, %right or %nonassoc seen */
280a38c3 67static int lastprec = 0;
1ff442ca 68
db8837cb
AD
69symbol_t *errtoken = NULL;
70symbol_t *undeftoken = NULL;
71symbol_t *eoftoken = NULL;
72symbol_t *axiom = NULL;
b29b2ed5 73
6255b435 74static symbol_list *
db8837cb 75symbol_list_new (symbol_t *sym)
b29b2ed5
AD
76{
77 symbol_list *res = XMALLOC (symbol_list, 1);
78 res->next = NULL;
79 res->sym = sym;
80 res->line = lineno;
d945f5cd
AD
81 res->action = NULL;
82 res->action_line = 0;
f499b062
AD
83 res->guard = NULL;
84 res->guard_line = 0;
b29b2ed5
AD
85 res->ruleprec = NULL;
86 return res;
87}
88
72a23c97 89/*------------------------.
db8837cb 90| Operations on symbols. |
72a23c97
AD
91`------------------------*/
92
93
94/*-----------------------------------------------------------.
95| If THIS is not defined, report an error, and consider it a |
96| nonterminal. |
97`-----------------------------------------------------------*/
98
99static bool
db8837cb 100symbol_check_defined (symbol_t *this)
72a23c97
AD
101{
102 if (this->class == unknown_sym)
103 {
104 complain
105 (_("symbol %s is used, but is not defined as a token and has no rules"),
106 this->tag);
107 this->class = nterm_sym;
108 this->number = nvars++;
109 }
110
111 return TRUE;
112}
113
114
115/*-------------------------------------------------------------------.
116| Assign a symbol number, and write the definition of the token name |
117| into FDEFINES. Put in SYMBOLS. |
118`-------------------------------------------------------------------*/
119
120static bool
db8837cb 121symbol_make_alias (symbol_t *symbol, char *typename)
72a23c97
AD
122{
123 if (symval->alias)
124 warn (_("symbol `%s' used more than once as a literal string"),
125 symval->tag);
126 else if (symbol->alias)
127 warn (_("symbol `%s' given more than one literal string"),
128 symbol->tag);
129 else
130 {
131 symval->class = token_sym;
132 symval->type_name = typename;
133 symval->user_token_number = symbol->user_token_number;
134 symbol->user_token_number = SALIAS;
135 symval->alias = symbol;
136 symbol->alias = symval;
137 /* symbol and symval combined are only one symbol */
138 nsyms--;
139 ntokens--;
140 assert (ntokens == symbol->number || ntokens == symval->number);
141 symbol->number = symval->number =
142 (symval->number < symbol->number) ? symval->number : symbol->number;
143 }
144
145 return TRUE;
146}
147
148/*---------------------------------------------------------.
149| Check that THIS, and its alias, have same precedence and |
150| associativity. |
151`---------------------------------------------------------*/
152
153static bool
db8837cb 154symbol_check_alias_consistence (symbol_t *this)
72a23c97
AD
155{
156 /* Check only those who _are_ the aliases. */
157 if (this->alias && this->user_token_number == SALIAS)
158 {
159 if (this->prec != this->alias->prec)
160 {
161 if (this->prec != 0 && this->alias->prec != 0)
162 complain (_("conflicting precedences for %s and %s"),
163 this->tag, this->alias->tag);
164 if (this->prec != 0)
165 this->alias->prec = this->prec;
166 else
167 this->prec = this->alias->prec;
168 }
169
170 if (this->assoc != this->alias->assoc)
171 {
172 if (this->assoc != 0 && this->alias->assoc != 0)
173 complain (_("conflicting assoc values for %s and %s"),
174 this->tag, this->alias->tag);
175 if (this->assoc != 0)
176 this->alias->assoc = this->assoc;
177 else
178 this->assoc = this->alias->assoc;
179 }
180 }
181 return TRUE;
182}
183
184
185/*-------------------------------------------------------------------.
186| Assign a symbol number, and write the definition of the token name |
187| into FDEFINES. Put in SYMBOLS. |
188`-------------------------------------------------------------------*/
189
190static bool
db8837cb 191symbol_pack (symbol_t *this)
72a23c97 192{
72a23c97
AD
193 if (this->class == nterm_sym)
194 {
195 this->number += ntokens;
196 }
197 else if (this->alias)
198 {
199 /* This symbol and its alias are a single token defn.
200 Allocate a tokno, and assign to both check agreement of
201 prec and assoc fields and make both the same */
202 if (this->number == -1)
203 {
204 if (this == eoftoken || this->alias == eoftoken)
205 this->number = this->alias->number = 0;
206 else
207 {
208 assert (this->alias->number != -1);
209 this->number = this->alias->number;
210 }
211 }
212 /* Do not do processing below for SALIASs. */
213 if (this->user_token_number == SALIAS)
214 return TRUE;
215 }
216 else /* this->class == token_sym */
217 {
218 assert (this->number != -1);
219 }
220
72a23c97
AD
221 symbols[this->number] = this;
222 return TRUE;
223}
224
225
226
227
228/*--------------------------------------------------.
229| Put THIS in TOKEN_TRANSLATIONS if it is a token. |
230`--------------------------------------------------*/
231
232static bool
db8837cb 233symbol_translation (symbol_t *this)
72a23c97 234{
72a23c97
AD
235 /* Non-terminal? */
236 if (this->class == token_sym
237 && this->user_token_number != SALIAS)
238 {
239 /* A token which translation has already been set? */
240 if (token_translations[this->user_token_number] != 2)
241 complain (_("tokens %s and %s both assigned number %d"),
242 symbols[token_translations[this->user_token_number]]->tag,
243 this->tag, this->user_token_number);
244
72a23c97
AD
245 token_translations[this->user_token_number] = this->number;
246 }
247
248 return TRUE;
249}
0d533154 250\f
a70083a3 251
0d533154
AD
252/*===================\
253| Low level lexing. |
254\===================*/
943819bf
RS
255
256static void
118fb205 257skip_to_char (int target)
943819bf
RS
258{
259 int c;
260 if (target == '\n')
a0f6b076 261 complain (_(" Skipping to next \\n"));
943819bf 262 else
a0f6b076 263 complain (_(" Skipping to next %c"), target);
943819bf
RS
264
265 do
0d533154 266 c = skip_white_space ();
943819bf 267 while (c != target && c != EOF);
a083fbbf 268 if (c != EOF)
0d533154 269 ungetc (c, finput);
943819bf
RS
270}
271
272
0d533154
AD
273/*---------------------------------------------------------.
274| Read a signed integer from STREAM and return its value. |
275`---------------------------------------------------------*/
276
277static inline int
278read_signed_integer (FILE *stream)
279{
a70083a3
AD
280 int c = getc (stream);
281 int sign = 1;
282 int n = 0;
0d533154
AD
283
284 if (c == '-')
285 {
286 c = getc (stream);
287 sign = -1;
288 }
289
290 while (isdigit (c))
291 {
292 n = 10 * n + (c - '0');
293 c = getc (stream);
294 }
295
296 ungetc (c, stream);
297
298 return sign * n;
299}
300\f
79282c5a
AD
301/*--------------------------------------------------------------.
302| Get the data type (alternative in the union) of the value for |
303| symbol N in rule RULE. |
304`--------------------------------------------------------------*/
305
306static char *
b29b2ed5 307get_type_name (int n, symbol_list *rule)
79282c5a
AD
308{
309 int i;
310 symbol_list *rp;
311
312 if (n < 0)
313 {
314 complain (_("invalid $ value"));
315 return NULL;
316 }
317
318 rp = rule;
319 i = 0;
320
321 while (i < n)
322 {
323 rp = rp->next;
324 if (rp == NULL || rp->sym == NULL)
325 {
326 complain (_("invalid $ value"));
327 return NULL;
328 }
329 i++;
330 }
331
332 return rp->sym->type_name;
333}
334\f
337bab46
AD
335/*------------------------------------------------------------.
336| Dump the string from FIN to OOUT if non null. MATCH is the |
337| delimiter of the string (either ' or "). |
338`------------------------------------------------------------*/
ae3c3164
AD
339
340static inline void
b6610515 341copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
342{
343 int c;
344
b6610515
RA
345 if (store)
346 obstack_1grow (oout, match);
8c7ebe49 347
4a120d45 348 c = getc (fin);
ae3c3164
AD
349
350 while (c != match)
351 {
352 if (c == EOF)
353 fatal (_("unterminated string at end of file"));
354 if (c == '\n')
355 {
a0f6b076 356 complain (_("unterminated string"));
4a120d45 357 ungetc (c, fin);
ae3c3164
AD
358 c = match; /* invent terminator */
359 continue;
360 }
361
337bab46 362 obstack_1grow (oout, c);
ae3c3164
AD
363
364 if (c == '\\')
365 {
4a120d45 366 c = getc (fin);
ae3c3164
AD
367 if (c == EOF)
368 fatal (_("unterminated string at end of file"));
337bab46 369 obstack_1grow (oout, c);
8c7ebe49 370
ae3c3164
AD
371 if (c == '\n')
372 lineno++;
373 }
374
a70083a3 375 c = getc (fin);
ae3c3164
AD
376 }
377
b6610515
RA
378 if (store)
379 obstack_1grow (oout, c);
380}
381
382/* FIXME. */
383
384static inline void
385copy_string (FILE *fin, struct obstack *oout, int match)
386{
387 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
388}
389
b6610515
RA
390/* FIXME. */
391
392static inline void
393copy_identifier (FILE *fin, struct obstack *oout)
394{
395 int c;
396
397 while (isalnum (c = getc (fin)) || c == '_')
398 obstack_1grow (oout, c);
399
400 ungetc (c, fin);
401}
ae3c3164 402
2666f928
AD
403
404/*------------------------------------------------------------------.
405| Dump the wannabee comment from IN to OOUT. In fact we just saw a |
406| `/', which might or might not be a comment. In any case, copy |
407| what we saw. |
408`------------------------------------------------------------------*/
ae3c3164
AD
409
410static inline void
2666f928 411copy_comment (FILE *fin, struct obstack *oout)
ae3c3164
AD
412{
413 int cplus_comment;
a70083a3 414 int ended;
550a72a3
AD
415 int c;
416
417 /* We read a `/', output it. */
2666f928 418 obstack_1grow (oout, '/');
550a72a3
AD
419
420 switch ((c = getc (fin)))
421 {
422 case '/':
423 cplus_comment = 1;
424 break;
425 case '*':
426 cplus_comment = 0;
427 break;
428 default:
429 ungetc (c, fin);
430 return;
431 }
ae3c3164 432
2666f928 433 obstack_1grow (oout, c);
550a72a3 434 c = getc (fin);
ae3c3164
AD
435
436 ended = 0;
437 while (!ended)
438 {
439 if (!cplus_comment && c == '*')
440 {
441 while (c == '*')
442 {
2666f928 443 obstack_1grow (oout, c);
550a72a3 444 c = getc (fin);
ae3c3164
AD
445 }
446
447 if (c == '/')
448 {
2666f928 449 obstack_1grow (oout, c);
ae3c3164
AD
450 ended = 1;
451 }
452 }
453 else if (c == '\n')
454 {
455 lineno++;
2666f928 456 obstack_1grow (oout, c);
ae3c3164
AD
457 if (cplus_comment)
458 ended = 1;
459 else
550a72a3 460 c = getc (fin);
ae3c3164
AD
461 }
462 else if (c == EOF)
463 fatal (_("unterminated comment"));
464 else
465 {
2666f928 466 obstack_1grow (oout, c);
550a72a3 467 c = getc (fin);
ae3c3164
AD
468 }
469 }
470}
471
472
a70083a3 473/*-----------------------------------------------------------------.
337bab46 474| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
475| reference to this location. STACK_OFFSET is the number of values |
476| in the current rule so far, which says where to find `$0' with |
477| respect to the top of the stack. |
478`-----------------------------------------------------------------*/
1ff442ca 479
a70083a3 480static inline void
337bab46 481copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 482{
a70083a3 483 int c;
1ff442ca 484
a70083a3
AD
485 c = getc (fin);
486 if (c == '$')
1ff442ca 487 {
ff4423cc 488 obstack_sgrow (oout, "yyloc");
89cab50d 489 locations_flag = 1;
a70083a3
AD
490 }
491 else if (isdigit (c) || c == '-')
492 {
493 int n;
1ff442ca 494
a70083a3
AD
495 ungetc (c, fin);
496 n = read_signed_integer (fin);
11e2beca
AD
497 if (n > stack_offset)
498 complain (_("invalid value: %s%d"), "@", n);
499 else
500 {
501 /* Offset is always 0 if parser has already popped the stack
502 pointer. */
503 obstack_fgrow1 (oout, "yylsp[%d]",
504 n - (semantic_parser ? 0 : stack_offset));
505 locations_flag = 1;
506 }
1ff442ca 507 }
a70083a3 508 else
ff4a34be
AD
509 {
510 char buf[] = "@c";
511 buf[1] = c;
512 complain (_("%s is invalid"), quote (buf));
513 }
1ff442ca 514}
79282c5a
AD
515
516
517/*-------------------------------------------------------------------.
518| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
519| |
520| Possible inputs: $[<TYPENAME>]($|integer) |
521| |
337bab46 522| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
523| the number of values in the current rule so far, which says where |
524| to find `$0' with respect to the top of the stack. |
525`-------------------------------------------------------------------*/
526
527static inline void
337bab46 528copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
529 symbol_list *rule, int stack_offset)
530{
531 int c = getc (fin);
b0ce6046 532 const char *type_name = NULL;
79282c5a 533
f282676b 534 /* Get the type name if explicit. */
79282c5a
AD
535 if (c == '<')
536 {
f282676b 537 read_type_name (fin);
79282c5a
AD
538 type_name = token_buffer;
539 value_components_used = 1;
79282c5a
AD
540 c = getc (fin);
541 }
542
543 if (c == '$')
544 {
ff4423cc 545 obstack_sgrow (oout, "yyval");
8c7ebe49 546
79282c5a
AD
547 if (!type_name)
548 type_name = get_type_name (0, rule);
549 if (type_name)
337bab46 550 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
551 if (!type_name && typed)
552 complain (_("$$ of `%s' has no declared type"),
553 rule->sym->tag);
554 }
555 else if (isdigit (c) || c == '-')
556 {
557 int n;
558 ungetc (c, fin);
559 n = read_signed_integer (fin);
560
11e2beca
AD
561 if (n > stack_offset)
562 complain (_("invalid value: %s%d"), "$", n);
563 else
564 {
565 if (!type_name && n > 0)
566 type_name = get_type_name (n, rule);
567
568 /* Offset is always 0 if parser has already popped the stack
569 pointer. */
570 obstack_fgrow1 (oout, "yyvsp[%d]",
571 n - (semantic_parser ? 0 : stack_offset));
572
573 if (type_name)
574 obstack_fgrow1 (oout, ".%s", type_name);
575 if (!type_name && typed)
576 complain (_("$%d of `%s' has no declared type"),
577 n, rule->sym->tag);
578 }
79282c5a
AD
579 }
580 else
581 {
582 char buf[] = "$c";
583 buf[1] = c;
584 complain (_("%s is invalid"), quote (buf));
585 }
586}
a70083a3
AD
587\f
588/*-------------------------------------------------------------------.
589| Copy the contents of a `%{ ... %}' into the definitions file. The |
590| `%{' has already been read. Return after reading the `%}'. |
591`-------------------------------------------------------------------*/
1ff442ca 592
4a120d45 593static void
118fb205 594copy_definition (void)
1ff442ca 595{
a70083a3 596 int c;
ae3c3164 597 /* -1 while reading a character if prev char was %. */
a70083a3 598 int after_percent;
1ff442ca 599
89cab50d 600 if (!no_lines_flag)
25b222fa
MA
601 {
602 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 603 lineno, quotearg_style (c_quoting_style,
b7c49edf 604 muscle_find ("filename")));
25b222fa 605 }
1ff442ca
NF
606
607 after_percent = 0;
608
ae3c3164 609 c = getc (finput);
1ff442ca
NF
610
611 for (;;)
612 {
613 switch (c)
614 {
615 case '\n':
dd60faec 616 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
617 lineno++;
618 break;
619
620 case '%':
a70083a3 621 after_percent = -1;
1ff442ca 622 break;
a083fbbf 623
1ff442ca
NF
624 case '\'':
625 case '"':
337bab46 626 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
627 break;
628
629 case '/':
337bab46 630 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
631 break;
632
633 case EOF:
a70083a3 634 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
635
636 default:
dd60faec 637 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
638 }
639
a70083a3 640 c = getc (finput);
1ff442ca
NF
641
642 if (after_percent)
643 {
644 if (c == '}')
645 return;
dd60faec 646 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
647 }
648 after_percent = 0;
1ff442ca 649 }
1ff442ca
NF
650}
651
652
d7020c20
AD
653/*-------------------------------------------------------------------.
654| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
655| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
656| are reversed. |
657`-------------------------------------------------------------------*/
1ff442ca 658
4a120d45 659static void
d7020c20 660parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 661{
342b8b6e
AD
662 token_t token = tok_undef;
663 char *typename = NULL;
1ff442ca 664
1e9798d5 665 /* The symbol being defined. */
db8837cb 666 symbol_t *symbol = NULL;
1e9798d5
AD
667
668 /* After `%token' and `%nterm', any number of symbols maybe be
669 defined. */
1ff442ca
NF
670 for (;;)
671 {
e6011337
JT
672 int tmp_char = ungetc (skip_white_space (), finput);
673
1e9798d5
AD
674 /* `%' (for instance from `%token', or from `%%' etc.) is the
675 only valid means to end this declaration. */
e6011337 676 if (tmp_char == '%')
1ff442ca 677 return;
e6011337 678 if (tmp_char == EOF)
a0f6b076 679 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 680
a70083a3 681 token = lex ();
511e79b3 682 if (token == tok_comma)
943819bf
RS
683 {
684 symbol = NULL;
685 continue;
686 }
511e79b3 687 if (token == tok_typename)
1ff442ca 688 {
95e36146 689 typename = xstrdup (token_buffer);
1ff442ca 690 value_components_used = 1;
943819bf
RS
691 symbol = NULL;
692 }
511e79b3 693 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 694 {
db8837cb 695 symbol_make_alias (symbol, typename);
8e03724b 696 symbol = NULL;
1ff442ca 697 }
511e79b3 698 else if (token == tok_identifier)
1ff442ca
NF
699 {
700 int oldclass = symval->class;
943819bf 701 symbol = symval;
1ff442ca 702
943819bf 703 if (symbol->class == what_is_not)
a0f6b076 704 complain (_("symbol %s redefined"), symbol->tag);
943819bf 705 symbol->class = what_is;
d7020c20 706 if (what_is == nterm_sym && oldclass != nterm_sym)
d9b739c3 707 symbol->number = nvars++;
72a23c97 708 if (what_is == token_sym && symbol->number == -1)
bd02036a 709 symbol->number = ntokens++;
1ff442ca
NF
710
711 if (typename)
712 {
943819bf
RS
713 if (symbol->type_name == NULL)
714 symbol->type_name = typename;
a70083a3 715 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 716 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
717 }
718 }
511e79b3 719 else if (symbol && token == tok_number)
a70083a3 720 {
943819bf 721 symbol->user_token_number = numval;
b7c49edf
AD
722 /* User defined EOF token? */
723 if (numval == 0)
72a23c97
AD
724 {
725 eoftoken = symbol;
726 eoftoken->number = 0;
727 /* It is always mapped to 0, so it was already counted in
728 NTOKENS. */
729 --ntokens;
730 }
a70083a3 731 }
1ff442ca 732 else
943819bf 733 {
a0f6b076 734 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
735 token_buffer,
736 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 737 skip_to_char ('%');
943819bf 738 }
1ff442ca
NF
739 }
740
741}
742
1ff442ca 743
d7020c20
AD
744/*------------------------------.
745| Parse what comes after %start |
746`------------------------------*/
1ff442ca 747
4a120d45 748static void
118fb205 749parse_start_decl (void)
1ff442ca
NF
750{
751 if (start_flag)
27821bff 752 complain (_("multiple %s declarations"), "%start");
511e79b3 753 if (lex () != tok_identifier)
27821bff 754 complain (_("invalid %s declaration"), "%start");
943819bf
RS
755 else
756 {
757 start_flag = 1;
758 startval = symval;
759 }
1ff442ca
NF
760}
761
a70083a3
AD
762/*-----------------------------------------------------------.
763| read in a %type declaration and record its information for |
764| get_type_name to access |
765`-----------------------------------------------------------*/
766
767static void
768parse_type_decl (void)
769{
a70083a3
AD
770 char *name;
771
511e79b3 772 if (lex () != tok_typename)
a70083a3
AD
773 {
774 complain ("%s", _("%type declaration has no <typename>"));
775 skip_to_char ('%');
776 return;
777 }
778
95e36146 779 name = xstrdup (token_buffer);
a70083a3
AD
780
781 for (;;)
782 {
f17bcd1f 783 token_t t;
a70083a3
AD
784 int tmp_char = ungetc (skip_white_space (), finput);
785
786 if (tmp_char == '%')
787 return;
788 if (tmp_char == EOF)
789 fatal (_("Premature EOF after %s"), token_buffer);
790
791 t = lex ();
792
793 switch (t)
1ff442ca
NF
794 {
795
511e79b3
AD
796 case tok_comma:
797 case tok_semicolon:
1ff442ca
NF
798 break;
799
511e79b3 800 case tok_identifier:
1ff442ca
NF
801 if (symval->type_name == NULL)
802 symval->type_name = name;
a70083a3 803 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 804 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
805
806 break;
807
808 default:
a0f6b076
AD
809 complain (_("invalid %%type declaration due to item: %s"),
810 token_buffer);
a70083a3 811 skip_to_char ('%');
1ff442ca
NF
812 }
813 }
814}
815
816
817
d7020c20
AD
818/*----------------------------------------------------------------.
819| Read in a %left, %right or %nonassoc declaration and record its |
820| information. |
821`----------------------------------------------------------------*/
1ff442ca 822
4a120d45 823static void
d7020c20 824parse_assoc_decl (associativity assoc)
1ff442ca 825{
a70083a3
AD
826 char *name = NULL;
827 int prev = 0;
1ff442ca 828
a70083a3 829 lastprec++; /* Assign a new precedence level, never 0. */
1ff442ca 830
1ff442ca
NF
831 for (;;)
832 {
f17bcd1f 833 token_t t;
e6011337 834 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 835
e6011337 836 if (tmp_char == '%')
1ff442ca 837 return;
e6011337 838 if (tmp_char == EOF)
a0f6b076 839 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 840
a70083a3 841 t = lex ();
1ff442ca
NF
842
843 switch (t)
844 {
511e79b3 845 case tok_typename:
95e36146 846 name = xstrdup (token_buffer);
1ff442ca
NF
847 break;
848
511e79b3 849 case tok_comma:
1ff442ca
NF
850 break;
851
511e79b3 852 case tok_identifier:
1ff442ca 853 if (symval->prec != 0)
a0f6b076 854 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
855 symval->prec = lastprec;
856 symval->assoc = assoc;
d7020c20 857 if (symval->class == nterm_sym)
a0f6b076 858 complain (_("symbol %s redefined"), symval->tag);
72a23c97
AD
859 if (symval->number == -1)
860 {
861 symval->number = ntokens++;
862 symval->class = token_sym;
863 }
1ff442ca 864 if (name)
a70083a3 865 { /* record the type, if one is specified */
1ff442ca
NF
866 if (symval->type_name == NULL)
867 symval->type_name = name;
a70083a3 868 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 869 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
870 }
871 break;
872
511e79b3
AD
873 case tok_number:
874 if (prev == tok_identifier)
a70083a3 875 {
1ff442ca 876 symval->user_token_number = numval;
a70083a3
AD
877 }
878 else
879 {
72a23c97
AD
880 complain
881 (_("invalid text (%s) - number should be after identifier"),
882 token_buffer);
a70083a3
AD
883 skip_to_char ('%');
884 }
1ff442ca
NF
885 break;
886
511e79b3 887 case tok_semicolon:
1ff442ca
NF
888 return;
889
890 default:
a0f6b076 891 complain (_("unexpected item: %s"), token_buffer);
a70083a3 892 skip_to_char ('%');
1ff442ca
NF
893 }
894
895 prev = t;
1ff442ca
NF
896 }
897}
898
899
900
dd60faec 901/*--------------------------------------------------------------.
180d45ba
PB
902| Copy the union declaration into the stype muscle |
903| (and fdefines), where it is made into the definition of |
904| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 905`--------------------------------------------------------------*/
1ff442ca 906
4a120d45 907static void
118fb205 908parse_union_decl (void)
1ff442ca 909{
a70083a3
AD
910 int c;
911 int count = 0;
428046f8 912 bool done = FALSE;
180d45ba 913 struct obstack union_obstack;
1ff442ca 914 if (typed)
27821bff 915 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
916
917 typed = 1;
918
642cb8f8 919 MUSCLE_INSERT_INT ("stype_line", lineno);
180d45ba
PB
920 obstack_init (&union_obstack);
921 obstack_sgrow (&union_obstack, "union");
1ff442ca 922
428046f8 923 while (!done)
1ff442ca 924 {
428046f8
AD
925 c = xgetc (finput);
926
342b8b6e
AD
927 /* If C contains '/', it is output by copy_comment (). */
928 if (c != '/')
2666f928 929 obstack_1grow (&union_obstack, c);
1ff442ca
NF
930
931 switch (c)
932 {
933 case '\n':
934 lineno++;
935 break;
936
937 case '/':
2666f928 938 copy_comment (finput, &union_obstack);
1ff442ca
NF
939 break;
940
1ff442ca
NF
941 case '{':
942 count++;
943 break;
944
945 case '}':
428046f8 946 /* FIXME: Errr. How could this happen???. --akim */
1ff442ca 947 if (count == 0)
27821bff 948 complain (_("unmatched %s"), "`}'");
1ff442ca 949 count--;
428046f8
AD
950 if (!count)
951 done = TRUE;
952 break;
1ff442ca 953 }
1ff442ca 954 }
180d45ba 955
428046f8
AD
956 /* JF don't choke on trailing semi */
957 c = skip_white_space ();
958 if (c != ';')
959 ungetc (c, finput);
960 obstack_1grow (&union_obstack, 0);
961 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
962}
963
d7020c20
AD
964
965/*-------------------------------------------------------.
966| Parse the declaration %expect N which says to expect N |
967| shift-reduce conflicts. |
968`-------------------------------------------------------*/
1ff442ca 969
4a120d45 970static void
118fb205 971parse_expect_decl (void)
1ff442ca 972{
131e2fef 973 int c = skip_white_space ();
1ff442ca
NF
974 ungetc (c, finput);
975
131e2fef 976 if (!isdigit (c))
79282c5a 977 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
978 else
979 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
980}
981
a70083a3
AD
982
983/*-------------------------------------------------------------------.
984| Parse what comes after %thong. the full syntax is |
985| |
986| %thong <type> token number literal |
987| |
988| the <type> or number may be omitted. The number specifies the |
989| user_token_number. |
990| |
991| Two symbols are entered in the table, one for the token symbol and |
992| one for the literal. Both are given the <type>, if any, from the |
993| declaration. The ->user_token_number of the first is SALIAS and |
994| the ->user_token_number of the second is set to the number, if |
995| any, from the declaration. The two symbols are linked via |
996| pointers in their ->alias fields. |
997| |
998| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
999| only the literal string is retained it is the literal string that |
1000| is output to yytname |
1001`-------------------------------------------------------------------*/
1002
1003static void
1004parse_thong_decl (void)
7b306f52 1005{
f17bcd1f 1006 token_t token;
db8837cb 1007 symbol_t *symbol;
a70083a3 1008 char *typename = 0;
6b7e85b9 1009 int usrtoknum = SUNDEF;
7b306f52 1010
a70083a3 1011 token = lex (); /* fetch typename or first token */
511e79b3 1012 if (token == tok_typename)
7b306f52 1013 {
95e36146 1014 typename = xstrdup (token_buffer);
a70083a3
AD
1015 value_components_used = 1;
1016 token = lex (); /* fetch first token */
7b306f52 1017 }
7b306f52 1018
a70083a3 1019 /* process first token */
7b306f52 1020
511e79b3 1021 if (token != tok_identifier)
a70083a3
AD
1022 {
1023 complain (_("unrecognized item %s, expected an identifier"),
1024 token_buffer);
1025 skip_to_char ('%');
1026 return;
7b306f52 1027 }
d7020c20 1028 symval->class = token_sym;
a70083a3
AD
1029 symval->type_name = typename;
1030 symval->user_token_number = SALIAS;
1031 symbol = symval;
7b306f52 1032
a70083a3 1033 token = lex (); /* get number or literal string */
1ff442ca 1034
511e79b3 1035 if (token == tok_number)
943819bf 1036 {
a70083a3
AD
1037 usrtoknum = numval;
1038 token = lex (); /* okay, did number, now get literal */
943819bf 1039 }
1ff442ca 1040
a70083a3 1041 /* process literal string token */
1ff442ca 1042
511e79b3 1043 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 1044 {
a70083a3
AD
1045 complain (_("expected string constant instead of %s"), token_buffer);
1046 skip_to_char ('%');
1047 return;
1ff442ca 1048 }
d7020c20 1049 symval->class = token_sym;
a70083a3
AD
1050 symval->type_name = typename;
1051 symval->user_token_number = usrtoknum;
1ff442ca 1052
a70083a3
AD
1053 symval->alias = symbol;
1054 symbol->alias = symval;
1ff442ca 1055
79282c5a
AD
1056 /* symbol and symval combined are only one symbol. */
1057 nsyms--;
a70083a3 1058}
3cef001a 1059
11e2beca 1060
b6610515 1061static void
11d82f03 1062parse_muscle_decl (void)
b6610515
RA
1063{
1064 int ch = ungetc (skip_white_space (), finput);
b7c49edf
AD
1065 char *muscle_key;
1066 char *muscle_value;
b6610515
RA
1067
1068 /* Read key. */
1069 if (!isalpha (ch) && ch != '_')
1070 {
1071 complain (_("invalid %s declaration"), "%define");
1072 skip_to_char ('%');
1073 return;
1074 }
11d82f03
MA
1075 copy_identifier (finput, &muscle_obstack);
1076 obstack_1grow (&muscle_obstack, 0);
1077 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 1078
b6610515
RA
1079 /* Read value. */
1080 ch = skip_white_space ();
1081 if (ch != '"')
1082 {
1083 ungetc (ch, finput);
1084 if (ch != EOF)
1085 {
1086 complain (_("invalid %s declaration"), "%define");
1087 skip_to_char ('%');
1088 return;
1089 }
1090 else
1091 fatal (_("Premature EOF after %s"), "\"");
1092 }
11d82f03
MA
1093 copy_string2 (finput, &muscle_obstack, '"', 0);
1094 obstack_1grow (&muscle_obstack, 0);
1095 muscle_value = obstack_finish (&muscle_obstack);
b6610515 1096
b6610515 1097 /* Store the (key, value) pair in the environment. */
11d82f03 1098 muscle_insert (muscle_key, muscle_value);
b6610515
RA
1099}
1100
2ba3b73c 1101
426cf563
MA
1102
1103/*---------------------------------.
a870c567 1104| Parse a double quoted parameter. |
426cf563
MA
1105`---------------------------------*/
1106
1107static const char *
1108parse_dquoted_param (const char *from)
1109{
1110 struct obstack param_obstack;
1111 const char *param = NULL;
1112 int c;
1113
1114 obstack_init (&param_obstack);
1115 c = skip_white_space ();
1116
1117 if (c != '"')
1118 {
1119 complain (_("invalid %s declaration"), from);
1120 ungetc (c, finput);
1121 skip_to_char ('%');
1122 return NULL;
1123 }
1124
2648a72d
AD
1125 while ((c = literalchar ()) != '"')
1126 obstack_1grow (&param_obstack, c);
a870c567 1127
426cf563
MA
1128 obstack_1grow (&param_obstack, '\0');
1129 param = obstack_finish (&param_obstack);
1130
1131 if (c != '"' || strlen (param) == 0)
1132 {
1133 complain (_("invalid %s declaration"), from);
1134 if (c != '"')
1135 ungetc (c, finput);
1136 skip_to_char ('%');
1137 return NULL;
1138 }
1139
1140 return param;
1141}
1142
2ba3b73c
MA
1143/*----------------------------------.
1144| Parse what comes after %skeleton. |
1145`----------------------------------*/
1146
a870c567 1147static void
2ba3b73c
MA
1148parse_skel_decl (void)
1149{
426cf563 1150 skeleton = parse_dquoted_param ("%skeleton");
2ba3b73c
MA
1151}
1152
a70083a3
AD
1153/*----------------------------------------------------------------.
1154| Read from finput until `%%' is seen. Discard the `%%'. Handle |
1155| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 1156| groups to ATTRS_OBSTACK. |
a70083a3 1157`----------------------------------------------------------------*/
1ff442ca 1158
4a120d45 1159static void
a70083a3 1160read_declarations (void)
1ff442ca 1161{
a70083a3 1162 for (;;)
1ff442ca 1163 {
951366c1 1164 int c = skip_white_space ();
1ff442ca 1165
a70083a3
AD
1166 if (c == '%')
1167 {
951366c1 1168 token_t tok = parse_percent_token ();
1ff442ca 1169
a70083a3 1170 switch (tok)
943819bf 1171 {
511e79b3 1172 case tok_two_percents:
a70083a3 1173 return;
1ff442ca 1174
511e79b3 1175 case tok_percent_left_curly:
a70083a3
AD
1176 copy_definition ();
1177 break;
1ff442ca 1178
511e79b3 1179 case tok_token:
d7020c20 1180 parse_token_decl (token_sym, nterm_sym);
a70083a3 1181 break;
1ff442ca 1182
511e79b3 1183 case tok_nterm:
d7020c20 1184 parse_token_decl (nterm_sym, token_sym);
a70083a3 1185 break;
1ff442ca 1186
511e79b3 1187 case tok_type:
a70083a3
AD
1188 parse_type_decl ();
1189 break;
1ff442ca 1190
511e79b3 1191 case tok_start:
a70083a3
AD
1192 parse_start_decl ();
1193 break;
118fb205 1194
511e79b3 1195 case tok_union:
a70083a3
AD
1196 parse_union_decl ();
1197 break;
1ff442ca 1198
511e79b3 1199 case tok_expect:
a70083a3
AD
1200 parse_expect_decl ();
1201 break;
6deb4447 1202
511e79b3 1203 case tok_thong:
a70083a3
AD
1204 parse_thong_decl ();
1205 break;
d7020c20 1206
511e79b3 1207 case tok_left:
d7020c20 1208 parse_assoc_decl (left_assoc);
a70083a3 1209 break;
1ff442ca 1210
511e79b3 1211 case tok_right:
d7020c20 1212 parse_assoc_decl (right_assoc);
a70083a3 1213 break;
1ff442ca 1214
511e79b3 1215 case tok_nonassoc:
d7020c20 1216 parse_assoc_decl (non_assoc);
a70083a3 1217 break;
1ff442ca 1218
b6610515 1219 case tok_define:
11d82f03 1220 parse_muscle_decl ();
b6610515 1221 break;
342b8b6e 1222
2ba3b73c
MA
1223 case tok_skel:
1224 parse_skel_decl ();
1225 break;
b6610515 1226
511e79b3 1227 case tok_noop:
a70083a3 1228 break;
1ff442ca 1229
951366c1
AD
1230 case tok_stropt:
1231 case tok_intopt:
1232 case tok_obsolete:
72a23c97 1233 assert (0);
951366c1
AD
1234 break;
1235
e0c40012 1236 case tok_illegal:
a70083a3
AD
1237 default:
1238 complain (_("unrecognized: %s"), token_buffer);
1239 skip_to_char ('%');
1240 }
1241 }
1242 else if (c == EOF)
1243 fatal (_("no input grammar"));
1244 else
1245 {
ff4a34be
AD
1246 char buf[] = "c";
1247 buf[0] = c;
1248 complain (_("unknown character: %s"), quote (buf));
a70083a3 1249 skip_to_char ('%');
1ff442ca 1250 }
1ff442ca 1251 }
1ff442ca 1252}
a70083a3
AD
1253\f
1254/*-------------------------------------------------------------------.
1255| Assuming that a `{' has just been seen, copy everything up to the |
1256| matching `}' into the actions file. STACK_OFFSET is the number of |
1257| values in the current rule so far, which says where to find `$0' |
1258| with respect to the top of the stack. |
14d293ac 1259| |
11e2beca
AD
1260| This routine is used both for actions and guards. Only |
1261| ACTION_OBSTACK is used, but this is fine, since we use only |
14d293ac 1262| pointers to relevant portions inside this obstack. |
a70083a3 1263`-------------------------------------------------------------------*/
1ff442ca 1264
4a120d45 1265static void
14d293ac 1266parse_braces (symbol_list *rule, int stack_offset)
1ff442ca 1267{
a70083a3 1268 int c;
a70083a3 1269 int count;
1ff442ca 1270
1ff442ca 1271 count = 1;
1ff442ca
NF
1272 while (count > 0)
1273 {
14d293ac
AD
1274 while ((c = getc (finput)) != '}')
1275 switch (c)
1276 {
1277 case '\n':
1278 obstack_1grow (&action_obstack, c);
1279 lineno++;
1280 break;
1ff442ca 1281
14d293ac
AD
1282 case '{':
1283 obstack_1grow (&action_obstack, c);
1284 count++;
1285 break;
1ff442ca 1286
14d293ac
AD
1287 case '\'':
1288 case '"':
1289 copy_string (finput, &action_obstack, c);
1290 break;
1ff442ca 1291
14d293ac
AD
1292 case '/':
1293 copy_comment (finput, &action_obstack);
1294 break;
1ff442ca 1295
14d293ac
AD
1296 case '$':
1297 copy_dollar (finput, &action_obstack,
1298 rule, stack_offset);
1299 break;
1ff442ca 1300
14d293ac
AD
1301 case '@':
1302 copy_at (finput, &action_obstack,
1303 stack_offset);
1304 break;
a70083a3 1305
14d293ac
AD
1306 case EOF:
1307 fatal (_("unmatched %s"), "`{'");
a70083a3 1308
14d293ac
AD
1309 default:
1310 obstack_1grow (&action_obstack, c);
1311 }
a70083a3 1312
14d293ac 1313 /* Above loop exits when C is '}'. */
a70083a3 1314 if (--count)
2b25d624 1315 obstack_1grow (&action_obstack, c);
a70083a3
AD
1316 }
1317
3f96f4dc 1318 obstack_1grow (&action_obstack, '\0');
a70083a3 1319}
14d293ac 1320
a70083a3
AD
1321
1322static void
14d293ac 1323parse_action (symbol_list *rule, int stack_offset)
a70083a3 1324{
14d293ac
AD
1325 rule->action_line = lineno;
1326 parse_braces (rule, stack_offset);
1327 rule->action = obstack_finish (&action_obstack);
1328}
a70083a3 1329
a70083a3 1330
14d293ac
AD
1331static void
1332parse_guard (symbol_list *rule, int stack_offset)
1333{
1334 token_t t = lex ();
1335 if (t != tok_left_curly)
1336 complain (_("invalid %s declaration"), "%guard");
f499b062 1337 rule->guard_line = lineno;
14d293ac
AD
1338 parse_braces (rule, stack_offset);
1339 rule->guard = obstack_finish (&action_obstack);
1ff442ca 1340}
14d293ac 1341
a70083a3
AD
1342\f
1343
a70083a3
AD
1344/*-------------------------------------------------------------------.
1345| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1346| with the user's names. |
1347`-------------------------------------------------------------------*/
1ff442ca 1348
db8837cb 1349static symbol_t *
118fb205 1350gensym (void)
1ff442ca 1351{
274d42ce
AD
1352 /* Incremented for each generated symbol */
1353 static int gensym_count = 0;
1354 static char buf[256];
1355
db8837cb 1356 symbol_t *sym;
1ff442ca 1357
274d42ce
AD
1358 sprintf (buf, "@%d", ++gensym_count);
1359 token_buffer = buf;
a70083a3 1360 sym = getsym (token_buffer);
d7020c20 1361 sym->class = nterm_sym;
d9b739c3 1362 sym->number = nvars++;
36281465 1363 return sym;
1ff442ca 1364}
a70083a3 1365\f
107f7dfb
AD
1366/*-------------------------------------------------------------------.
1367| Parse the input grammar into a one symbol_list structure. Each |
1368| rule is represented by a sequence of symbols: the left hand side |
1369| followed by the contents of the right hand side, followed by a |
1370| null pointer instead of a symbol to terminate the rule. The next |
1371| symbol is the lhs of the following rule. |
1372| |
1373| All guards and actions are copied out to the appropriate files, |
1374| labelled by the rule number they apply to. |
1375| |
1376| Bison used to allow some %directives in the rules sections, but |
1377| this is no longer consider appropriate: (i) the documented grammar |
1378| doesn't claim it, (ii), it would promote bad style, (iii), error |
1379| recovery for %directives consists in skipping the junk until a `%' |
1380| is seen and helrp synchronizing. This scheme is definitely wrong |
1381| in the rules section. |
1382`-------------------------------------------------------------------*/
1ff442ca 1383
4a120d45 1384static void
118fb205 1385readgram (void)
1ff442ca 1386{
f17bcd1f 1387 token_t t;
db8837cb 1388 symbol_t *lhs = NULL;
107f7dfb
AD
1389 symbol_list *p = NULL;
1390 symbol_list *p1 = NULL;
1ff442ca 1391
ff4a34be
AD
1392 /* Points to first symbol_list of current rule. its symbol is the
1393 lhs of the rule. */
107f7dfb 1394 symbol_list *crule = NULL;
ff4a34be 1395 /* Points to the symbol_list preceding crule. */
107f7dfb 1396 symbol_list *crule1 = NULL;
1ff442ca 1397
a70083a3 1398 t = lex ();
1ff442ca 1399
511e79b3 1400 while (t != tok_two_percents && t != tok_eof)
107f7dfb
AD
1401 if (t == tok_identifier || t == tok_bar)
1402 {
1403 int action_flag = 0;
1404 /* Number of symbols in rhs of this rule so far */
1405 int rulelength = 0;
1406 int xactions = 0; /* JF for error checking */
db8837cb 1407 symbol_t *first_rhs = 0;
107f7dfb
AD
1408
1409 if (t == tok_identifier)
1410 {
1411 lhs = symval;
1412
1413 if (!start_flag)
1414 {
1415 startval = lhs;
1416 start_flag = 1;
1417 }
1ff442ca 1418
107f7dfb
AD
1419 t = lex ();
1420 if (t != tok_colon)
1421 {
1422 complain (_("ill-formed rule: initial symbol not followed by colon"));
1423 unlex (t);
1424 }
1425 }
1426
1427 if (nrules == 0 && t == tok_bar)
1428 {
1429 complain (_("grammar starts with vertical bar"));
1430 lhs = symval; /* BOGUS: use a random symval */
1431 }
1432 /* start a new rule and record its lhs. */
1433
1434 nrules++;
1435 nitems++;
1436
1437 p = symbol_list_new (lhs);
1438
1439 crule1 = p1;
1440 if (p1)
1441 p1->next = p;
1442 else
1443 grammar = p;
1ff442ca 1444
107f7dfb
AD
1445 p1 = p;
1446 crule = p;
1ff442ca 1447
107f7dfb 1448 /* mark the rule's lhs as a nonterminal if not already so. */
1ff442ca 1449
107f7dfb
AD
1450 if (lhs->class == unknown_sym)
1451 {
1452 lhs->class = nterm_sym;
d9b739c3 1453 lhs->number = nvars;
107f7dfb
AD
1454 nvars++;
1455 }
1456 else if (lhs->class == token_sym)
1457 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca 1458
107f7dfb 1459 /* read the rhs of the rule. */
1ff442ca 1460
107f7dfb
AD
1461 for (;;)
1462 {
1463 t = lex ();
1464 if (t == tok_prec)
1465 {
1466 t = lex ();
1467 crule->ruleprec = symval;
1468 t = lex ();
1469 }
1470
1471 if (!(t == tok_identifier || t == tok_left_curly))
1472 break;
1ff442ca 1473
107f7dfb
AD
1474 /* If next token is an identifier, see if a colon follows it.
1475 If one does, exit this rule now. */
1476 if (t == tok_identifier)
1477 {
db8837cb 1478 symbol_t *ssave;
107f7dfb
AD
1479 token_t t1;
1480
1481 ssave = symval;
1482 t1 = lex ();
1483 unlex (t1);
1484 symval = ssave;
1485 if (t1 == tok_colon)
e5352bc7 1486 {
fff9bf0b 1487 warn (_("previous rule lacks an ending `;'"));
e5352bc7
AD
1488 break;
1489 }
107f7dfb
AD
1490
1491 if (!first_rhs) /* JF */
1492 first_rhs = symval;
1493 /* Not followed by colon =>
1494 process as part of this rule's rhs. */
1495 }
1496
1497 /* If we just passed an action, that action was in the middle
1498 of a rule, so make a dummy rule to reduce it to a
1499 non-terminal. */
1500 if (action_flag)
1501 {
1502 /* Since the action was written out with this rule's
1503 number, we must give the new rule this number by
1504 inserting the new rule before it. */
1505
1506 /* Make a dummy nonterminal, a gensym. */
db8837cb 1507 symbol_t *sdummy = gensym ();
107f7dfb
AD
1508
1509 /* Make a new rule, whose body is empty, before the
1510 current one, so that the action just read can
1511 belong to it. */
1512 nrules++;
1513 nitems++;
1514 p = symbol_list_new (sdummy);
1515 /* Attach its lineno to that of the host rule. */
1516 p->line = crule->line;
82c035a8
AD
1517 /* Move the action from the host rule to this one. */
1518 p->action = crule->action;
1519 p->action_line = crule->action_line;
1520 crule->action = NULL;
1521
107f7dfb
AD
1522 if (crule1)
1523 crule1->next = p;
1524 else
1525 grammar = p;
1526 /* End of the rule. */
1527 crule1 = symbol_list_new (NULL);
1528 crule1->next = crule;
1529
1530 p->next = crule1;
1531
1532 /* Insert the dummy generated by that rule into this
1533 rule. */
1534 nitems++;
1535 p = symbol_list_new (sdummy);
1536 p1->next = p;
1537 p1 = p;
1538
1539 action_flag = 0;
1540 }
1541
1542 if (t == tok_identifier)
1543 {
1544 nitems++;
1545 p = symbol_list_new (symval);
1546 p1->next = p;
1547 p1 = p;
1548 }
1549 else /* handle an action. */
1550 {
14d293ac 1551 parse_action (crule, rulelength);
107f7dfb
AD
1552 action_flag = 1;
1553 xactions++; /* JF */
1554 }
1555 rulelength++;
1556 } /* end of read rhs of rule */
1557
1558 /* Put an empty link in the list to mark the end of this rule */
1559 p = symbol_list_new (NULL);
1560 p1->next = p;
1561 p1 = p;
1562
1563 if (t == tok_prec)
1564 {
1565 complain (_("two @prec's in a row"));
1566 t = lex ();
1567 crule->ruleprec = symval;
1568 t = lex ();
1569 }
f499b062 1570
107f7dfb
AD
1571 if (t == tok_guard)
1572 {
1573 if (!semantic_parser)
1574 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1575
14d293ac 1576 parse_guard (crule, rulelength);
a70083a3 1577 t = lex ();
107f7dfb 1578 }
f499b062
AD
1579
1580 if (t == tok_left_curly)
107f7dfb
AD
1581 {
1582 /* This case never occurs -wjh */
1583 if (action_flag)
1584 complain (_("two actions at end of one rule"));
14d293ac 1585 parse_action (crule, rulelength);
107f7dfb
AD
1586 action_flag = 1;
1587 xactions++; /* -wjh */
1588 t = lex ();
1589 }
1590 /* If $$ is being set in default way, report if any type
1591 mismatch. */
1592 else if (!xactions
1593 && first_rhs && lhs->type_name != first_rhs->type_name)
1594 {
1595 if (lhs->type_name == 0
1596 || first_rhs->type_name == 0
1597 || strcmp (lhs->type_name, first_rhs->type_name))
1598 complain (_("type clash (`%s' `%s') on default action"),
1599 lhs->type_name ? lhs->type_name : "",
1600 first_rhs->type_name ? first_rhs->type_name : "");
1601 }
1602 /* Warn if there is no default for $$ but we need one. */
1603 else if (!xactions && !first_rhs && lhs->type_name != 0)
1604 complain (_("empty rule for typed nonterminal, and no action"));
bfcf1f3a 1605 if (t == tok_two_percents || t == tok_eof)
fff9bf0b 1606 warn (_("previous rule lacks an ending `;'"));
107f7dfb 1607 if (t == tok_semicolon)
a70083a3 1608 t = lex ();
107f7dfb
AD
1609 }
1610 else
1611 {
1612 complain (_("invalid input: %s"), quote (token_buffer));
1613 t = lex ();
1614 }
943819bf 1615
b68e7744
AD
1616 /* grammar has been read. Do some checking */
1617
1618 if (nrules == 0)
1619 fatal (_("no rules in the input grammar"));
1620
1621 /* Report any undefined symbols and consider them nonterminals. */
db8837cb 1622 symbols_do (symbol_check_defined, NULL);
b68e7744 1623
ff442794
AD
1624 /* Insert the initial rule, which line is that of the first rule
1625 (not that of the start symbol):
30171f79
AD
1626
1627 axiom: %start EOF. */
1628 p = symbol_list_new (axiom);
ff442794 1629 p->line = grammar->line;
30171f79
AD
1630 p->next = symbol_list_new (startval);
1631 p->next->next = symbol_list_new (eoftoken);
1632 p->next->next->next = symbol_list_new (NULL);
1633 p->next->next->next->next = grammar;
1634 nrules += 1;
1635 nitems += 3;
1636 grammar = p;
1637 startval = axiom;
1ff442ca
NF
1638
1639 if (nsyms > MAXSHORT)
a0f6b076
AD
1640 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1641 MAXSHORT);
1ff442ca 1642
72a23c97 1643 assert (nsyms == ntokens + nvars);
1ff442ca 1644}
ff48177d
MA
1645
1646/* At the end of the grammar file, some C source code must
63c2d5de 1647 be stored. It is going to be associated to the epilogue
ff48177d
MA
1648 directive. */
1649static void
1650read_additionnal_code (void)
1651{
9101a310 1652 int c;
63c2d5de 1653 struct obstack el_obstack;
342b8b6e 1654
63c2d5de 1655 obstack_init (&el_obstack);
ff48177d 1656
710ddc4f
MA
1657 if (!no_lines_flag)
1658 {
1659 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1660 lineno, quotearg_style (c_quoting_style,
b7c49edf 1661 muscle_find ("filename")));
710ddc4f
MA
1662 }
1663
ff48177d 1664 while ((c = getc (finput)) != EOF)
63c2d5de 1665 obstack_1grow (&el_obstack, c);
342b8b6e 1666
63c2d5de 1667 obstack_1grow (&el_obstack, 0);
11d82f03 1668 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1669}
1670
a70083a3 1671\f
037ca2f1
AD
1672/*------------------------------------------------------------------.
1673| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1674| number. |
1675`------------------------------------------------------------------*/
1676
1677static void
1678token_translations_init (void)
1679{
72a23c97 1680 int last_user_token_number = 256;
037ca2f1
AD
1681 int i;
1682
72a23c97
AD
1683 /* Set the user numbers. */
1684 for (i = 0; i < ntokens; ++i)
1685 {
db8837cb 1686 symbol_t *this = symbols[i];
72a23c97
AD
1687 if (this->user_token_number == SUNDEF)
1688 this->user_token_number = ++last_user_token_number;
1689 if (this->user_token_number > max_user_token_number)
1690 max_user_token_number = this->user_token_number;
72a23c97
AD
1691 }
1692
037ca2f1
AD
1693 token_translations = XCALLOC (short, max_user_token_number + 1);
1694
1695 /* Initialize all entries for literal tokens to 2, the internal
1696 token number for $undefined., which represents all invalid
1697 inputs. */
18bcecb0 1698 for (i = 0; i < max_user_token_number + 1; i++)
037ca2f1
AD
1699 token_translations[i] = 2;
1700
db8837cb 1701 symbols_do (symbol_translation, NULL);
037ca2f1
AD
1702}
1703
1704
0e78e603
AD
1705/*----------------------------------------------------------------.
1706| Assign symbol numbers, and write definition of token names into |
1707| FDEFINES. Set up vectors SYMBOL_TABLE, TAGS of symbols. |
1708`----------------------------------------------------------------*/
1ff442ca 1709
4a120d45 1710static void
118fb205 1711packsymbols (void)
1ff442ca 1712{
db8837cb 1713 symbols = XCALLOC (symbol_t *, nsyms);
1ff442ca 1714
db8837cb
AD
1715 symbols_do (symbol_check_alias_consistence, NULL);
1716 symbols_do (symbol_pack, NULL);
1ff442ca 1717
037ca2f1 1718 token_translations_init ();
1ff442ca 1719
d9b739c3 1720 error_token_number = errtoken->number;
1ff442ca 1721
e3f1699f
AD
1722 if (startval->class == unknown_sym)
1723 fatal (_("the start symbol %s is undefined"), startval->tag);
1724 else if (startval->class == token_sym)
1725 fatal (_("the start symbol %s is a token"), startval->tag);
1726
d9b739c3 1727 start_symbol = startval->number;
e3f1699f
AD
1728}
1729
1730
a70083a3
AD
1731/*---------------------------------------------------------------.
1732| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 1733| RITEM. |
a70083a3 1734`---------------------------------------------------------------*/
1ff442ca 1735
4a120d45 1736static void
118fb205 1737packgram (void)
1ff442ca 1738{
a70083a3
AD
1739 int itemno;
1740 int ruleno;
1741 symbol_list *p;
1ff442ca 1742
adc8c848
AD
1743 /* We use short to index items. */
1744 if (nitems >= MAXSHORT)
1745 fatal (_("too many items (max %d)"), MAXSHORT);
1746
d7913476 1747 ritem = XCALLOC (short, nitems + 1);
1a2b5d37 1748 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1749
1750 itemno = 0;
1751 ruleno = 1;
1752
1753 p = grammar;
1754 while (p)
1755 {
db8837cb 1756 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 1757 rules[ruleno].user_number = ruleno;
c3b407f4 1758 rules[ruleno].number = ruleno;
bba97eb2 1759 rules[ruleno].lhs = p->sym;
99013900 1760 rules[ruleno].rhs = ritem + itemno;
1a2b5d37
AD
1761 rules[ruleno].line = p->line;
1762 rules[ruleno].useful = TRUE;
1763 rules[ruleno].action = p->action;
1764 rules[ruleno].action_line = p->action_line;
1765 rules[ruleno].guard = p->guard;
1766 rules[ruleno].guard_line = p->guard_line;
1ff442ca
NF
1767
1768 p = p->next;
1769 while (p && p->sym)
1770 {
d9b739c3 1771 ritem[itemno++] = p->sym->number;
1ff442ca
NF
1772 /* A rule gets by default the precedence and associativity
1773 of the last token in it. */
d7020c20 1774 if (p->sym->class == token_sym)
03b31c0c 1775 rules[ruleno].prec = p->sym;
a70083a3
AD
1776 if (p)
1777 p = p->next;
1ff442ca
NF
1778 }
1779
1780 /* If this rule has a %prec,
a70083a3 1781 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1782 if (ruleprec)
1783 {
03b31c0c
AD
1784 rules[ruleno].precsym = ruleprec;
1785 rules[ruleno].prec = ruleprec;
1ff442ca 1786 }
1ff442ca
NF
1787 ritem[itemno++] = -ruleno;
1788 ruleno++;
1789
a70083a3
AD
1790 if (p)
1791 p = p->next;
1ff442ca
NF
1792 }
1793
1794 ritem[itemno] = 0;
75142d45
AD
1795 nritems = itemno;
1796 assert (nritems == nitems);
3067fbef
AD
1797
1798 if (trace_flag)
1799 ritem_print (stderr);
1ff442ca 1800}
a70083a3
AD
1801\f
1802/*-------------------------------------------------------------------.
1803| Read in the grammar specification and record it in the format |
ea5607fd 1804| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1805| and all actions into ACTION_OBSTACK, in each case forming the body |
1806| of a C function (YYGUARD or YYACTION) which contains a switch |
1807| statement to decide which guard or action to execute. |
a70083a3
AD
1808`-------------------------------------------------------------------*/
1809
1810void
1811reader (void)
1812{
342b8b6e 1813 lex_init ();
a70083a3
AD
1814 lineno = 1;
1815
11d82f03
MA
1816 /* Initialize the muscle obstack. */
1817 obstack_init (&muscle_obstack);
82e236e2 1818
a70083a3 1819 /* Initialize the symbol table. */
db8837cb 1820 symbols_new ();
b6610515 1821
30171f79
AD
1822 /* Construct the axiom symbol. */
1823 axiom = getsym ("$axiom");
1824 axiom->class = nterm_sym;
d9b739c3 1825 axiom->number = nvars++;
30171f79 1826
a70083a3
AD
1827 /* Construct the error token */
1828 errtoken = getsym ("error");
d7020c20 1829 errtoken->class = token_sym;
72a23c97 1830 errtoken->number = ntokens++;
a70083a3 1831 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 1832
a70083a3
AD
1833 /* Construct a token that represents all undefined literal tokens.
1834 It is always token number 2. */
1835 undeftoken = getsym ("$undefined.");
d7020c20 1836 undeftoken->class = token_sym;
72a23c97 1837 undeftoken->number = ntokens++;
a70083a3
AD
1838 undeftoken->user_token_number = 2;
1839
331dbc1b
AD
1840 /* Initialize the obstacks. */
1841 obstack_init (&action_obstack);
1842 obstack_init (&attrs_obstack);
331dbc1b
AD
1843 obstack_init (&output_obstack);
1844
1845 finput = xfopen (infile, "r");
1846
896fe5c1
AD
1847 /* Read the declaration section. Copy %{ ... %} groups to
1848 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1849 etc. found there. */
a70083a3 1850 read_declarations ();
b7c49edf
AD
1851
1852 /* If the user did not define her EOFTOKEN, do it now. */
1853 if (!eoftoken)
1854 {
1855 eoftoken = getsym ("$");
1856 eoftoken->class = token_sym;
72a23c97 1857 eoftoken->number = 0;
b7c49edf
AD
1858 /* Value specified by POSIX. */
1859 eoftoken->user_token_number = 0;
1860 }
1861
a70083a3
AD
1862 /* Read in the grammar, build grammar in list form. Write out
1863 guards and actions. */
1864 readgram ();
ff48177d
MA
1865 /* Some C code is given at the end of the grammar file. */
1866 read_additionnal_code ();
b0c4483e 1867
331dbc1b
AD
1868 lex_free ();
1869 xfclose (finput);
1870
a70083a3
AD
1871 /* Assign the symbols their symbol numbers. Write #defines for the
1872 token symbols into FDEFINES if requested. */
1873 packsymbols ();
93ede233 1874
a70083a3
AD
1875 /* Convert the grammar into the format described in gram.h. */
1876 packgram ();
8419d367
AD
1877
1878 /* The grammar as a symbol_list is no longer needed. */
1879 LIST_FREE (symbol_list, grammar);
a70083a3 1880}
76514394
AD
1881
1882void
1883grammar_free (void)
1884{
1885 XFREE (ritem);
1886 free (rules + 1);
1887 /* Free the symbol table data structure. */
db8837cb 1888 symbols_free ();
76514394 1889}