]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/system.h: No longer using strndup.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca
NF
30#include "lex.h"
31#include "gram.h"
a0f6b076 32#include "complain.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
340ef489 35#include "conflicts.h"
11d82f03 36#include "muscle_tab.h"
1ff442ca 37
a70083a3
AD
38typedef struct symbol_list
39{
40 struct symbol_list *next;
db8837cb 41 symbol_t *sym;
b29b2ed5 42 int line;
f499b062 43
3f96f4dc
AD
44 /* The action is attached to the LHS of a rule. */
45 const char *action;
46 int action_line;
f499b062
AD
47
48 /* The guard is attached to the LHS of a rule. */
49 const char *guard;
50 int guard_line;
db8837cb 51 symbol_t *ruleprec;
d945f5cd 52} symbol_list;
118fb205 53
1ff442ca 54int lineno;
280a38c3
AD
55static symbol_list *grammar = NULL;
56static int start_flag = 0;
db8837cb 57static symbol_t *startval = NULL;
1ff442ca
NF
58
59/* Nonzero if components of semantic values are used, implying
60 they must be unions. */
61static int value_components_used;
62
d7020c20 63/* Nonzero if %union has been seen. */
280a38c3 64static int typed = 0;
1ff442ca 65
d7020c20 66/* Incremented for each %left, %right or %nonassoc seen */
280a38c3 67static int lastprec = 0;
1ff442ca 68
db8837cb
AD
69symbol_t *errtoken = NULL;
70symbol_t *undeftoken = NULL;
71symbol_t *eoftoken = NULL;
72symbol_t *axiom = NULL;
b29b2ed5 73
6255b435 74static symbol_list *
db8837cb 75symbol_list_new (symbol_t *sym)
b29b2ed5
AD
76{
77 symbol_list *res = XMALLOC (symbol_list, 1);
78 res->next = NULL;
79 res->sym = sym;
80 res->line = lineno;
d945f5cd
AD
81 res->action = NULL;
82 res->action_line = 0;
f499b062
AD
83 res->guard = NULL;
84 res->guard_line = 0;
b29b2ed5
AD
85 res->ruleprec = NULL;
86 return res;
87}
88
72a23c97 89/*------------------------.
db8837cb 90| Operations on symbols. |
72a23c97
AD
91`------------------------*/
92
93
94/*-----------------------------------------------------------.
95| If THIS is not defined, report an error, and consider it a |
96| nonterminal. |
97`-----------------------------------------------------------*/
98
99static bool
db8837cb 100symbol_check_defined (symbol_t *this)
72a23c97
AD
101{
102 if (this->class == unknown_sym)
103 {
104 complain
105 (_("symbol %s is used, but is not defined as a token and has no rules"),
106 this->tag);
107 this->class = nterm_sym;
108 this->number = nvars++;
109 }
110
111 return TRUE;
112}
113
114
115/*-------------------------------------------------------------------.
116| Assign a symbol number, and write the definition of the token name |
117| into FDEFINES. Put in SYMBOLS. |
118`-------------------------------------------------------------------*/
119
120static bool
db8837cb 121symbol_make_alias (symbol_t *symbol, char *typename)
72a23c97
AD
122{
123 if (symval->alias)
124 warn (_("symbol `%s' used more than once as a literal string"),
125 symval->tag);
126 else if (symbol->alias)
127 warn (_("symbol `%s' given more than one literal string"),
128 symbol->tag);
129 else
130 {
131 symval->class = token_sym;
132 symval->type_name = typename;
133 symval->user_token_number = symbol->user_token_number;
134 symbol->user_token_number = SALIAS;
135 symval->alias = symbol;
136 symbol->alias = symval;
137 /* symbol and symval combined are only one symbol */
138 nsyms--;
139 ntokens--;
140 assert (ntokens == symbol->number || ntokens == symval->number);
141 symbol->number = symval->number =
142 (symval->number < symbol->number) ? symval->number : symbol->number;
143 }
144
145 return TRUE;
146}
147
148/*---------------------------------------------------------.
149| Check that THIS, and its alias, have same precedence and |
150| associativity. |
151`---------------------------------------------------------*/
152
153static bool
db8837cb 154symbol_check_alias_consistence (symbol_t *this)
72a23c97
AD
155{
156 /* Check only those who _are_ the aliases. */
157 if (this->alias && this->user_token_number == SALIAS)
158 {
159 if (this->prec != this->alias->prec)
160 {
161 if (this->prec != 0 && this->alias->prec != 0)
162 complain (_("conflicting precedences for %s and %s"),
163 this->tag, this->alias->tag);
164 if (this->prec != 0)
165 this->alias->prec = this->prec;
166 else
167 this->prec = this->alias->prec;
168 }
169
170 if (this->assoc != this->alias->assoc)
171 {
172 if (this->assoc != 0 && this->alias->assoc != 0)
173 complain (_("conflicting assoc values for %s and %s"),
174 this->tag, this->alias->tag);
175 if (this->assoc != 0)
176 this->alias->assoc = this->assoc;
177 else
178 this->assoc = this->alias->assoc;
179 }
180 }
181 return TRUE;
182}
183
184
185/*-------------------------------------------------------------------.
186| Assign a symbol number, and write the definition of the token name |
187| into FDEFINES. Put in SYMBOLS. |
188`-------------------------------------------------------------------*/
189
190static bool
db8837cb 191symbol_pack (symbol_t *this)
72a23c97 192{
72a23c97
AD
193 if (this->class == nterm_sym)
194 {
195 this->number += ntokens;
196 }
197 else if (this->alias)
198 {
199 /* This symbol and its alias are a single token defn.
200 Allocate a tokno, and assign to both check agreement of
201 prec and assoc fields and make both the same */
202 if (this->number == -1)
203 {
204 if (this == eoftoken || this->alias == eoftoken)
205 this->number = this->alias->number = 0;
206 else
207 {
208 assert (this->alias->number != -1);
209 this->number = this->alias->number;
210 }
211 }
212 /* Do not do processing below for SALIASs. */
213 if (this->user_token_number == SALIAS)
214 return TRUE;
215 }
216 else /* this->class == token_sym */
217 {
218 assert (this->number != -1);
219 }
220
72a23c97
AD
221 symbols[this->number] = this;
222 return TRUE;
223}
224
225
226
227
228/*--------------------------------------------------.
229| Put THIS in TOKEN_TRANSLATIONS if it is a token. |
230`--------------------------------------------------*/
231
232static bool
db8837cb 233symbol_translation (symbol_t *this)
72a23c97 234{
72a23c97
AD
235 /* Non-terminal? */
236 if (this->class == token_sym
237 && this->user_token_number != SALIAS)
238 {
239 /* A token which translation has already been set? */
240 if (token_translations[this->user_token_number] != 2)
241 complain (_("tokens %s and %s both assigned number %d"),
242 symbols[token_translations[this->user_token_number]]->tag,
243 this->tag, this->user_token_number);
244
72a23c97
AD
245 token_translations[this->user_token_number] = this->number;
246 }
247
248 return TRUE;
249}
0d533154 250\f
a70083a3 251
0d533154
AD
252/*===================\
253| Low level lexing. |
254\===================*/
943819bf
RS
255
256static void
118fb205 257skip_to_char (int target)
943819bf
RS
258{
259 int c;
260 if (target == '\n')
a0f6b076 261 complain (_(" Skipping to next \\n"));
943819bf 262 else
a0f6b076 263 complain (_(" Skipping to next %c"), target);
943819bf
RS
264
265 do
0d533154 266 c = skip_white_space ();
943819bf 267 while (c != target && c != EOF);
a083fbbf 268 if (c != EOF)
0d533154 269 ungetc (c, finput);
943819bf
RS
270}
271
272
0d533154
AD
273/*---------------------------------------------------------.
274| Read a signed integer from STREAM and return its value. |
275`---------------------------------------------------------*/
276
277static inline int
278read_signed_integer (FILE *stream)
279{
a70083a3
AD
280 int c = getc (stream);
281 int sign = 1;
282 int n = 0;
0d533154
AD
283
284 if (c == '-')
285 {
286 c = getc (stream);
287 sign = -1;
288 }
289
290 while (isdigit (c))
291 {
292 n = 10 * n + (c - '0');
293 c = getc (stream);
294 }
295
296 ungetc (c, stream);
297
298 return sign * n;
299}
300\f
79282c5a
AD
301/*--------------------------------------------------------------.
302| Get the data type (alternative in the union) of the value for |
303| symbol N in rule RULE. |
304`--------------------------------------------------------------*/
305
306static char *
b29b2ed5 307get_type_name (int n, symbol_list *rule)
79282c5a
AD
308{
309 int i;
310 symbol_list *rp;
311
312 if (n < 0)
313 {
314 complain (_("invalid $ value"));
315 return NULL;
316 }
317
318 rp = rule;
319 i = 0;
320
321 while (i < n)
322 {
323 rp = rp->next;
324 if (rp == NULL || rp->sym == NULL)
325 {
326 complain (_("invalid $ value"));
327 return NULL;
328 }
f3849179 329 ++i;
79282c5a
AD
330 }
331
332 return rp->sym->type_name;
333}
334\f
337bab46
AD
335/*------------------------------------------------------------.
336| Dump the string from FIN to OOUT if non null. MATCH is the |
337| delimiter of the string (either ' or "). |
338`------------------------------------------------------------*/
ae3c3164
AD
339
340static inline void
b6610515 341copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
342{
343 int c;
344
b6610515
RA
345 if (store)
346 obstack_1grow (oout, match);
8c7ebe49 347
4a120d45 348 c = getc (fin);
ae3c3164
AD
349
350 while (c != match)
351 {
352 if (c == EOF)
353 fatal (_("unterminated string at end of file"));
354 if (c == '\n')
355 {
a0f6b076 356 complain (_("unterminated string"));
4a120d45 357 ungetc (c, fin);
ae3c3164
AD
358 c = match; /* invent terminator */
359 continue;
360 }
361
337bab46 362 obstack_1grow (oout, c);
ae3c3164
AD
363
364 if (c == '\\')
365 {
4a120d45 366 c = getc (fin);
ae3c3164
AD
367 if (c == EOF)
368 fatal (_("unterminated string at end of file"));
337bab46 369 obstack_1grow (oout, c);
8c7ebe49 370
ae3c3164 371 if (c == '\n')
f3849179 372 ++lineno;
ae3c3164
AD
373 }
374
a70083a3 375 c = getc (fin);
ae3c3164
AD
376 }
377
b6610515
RA
378 if (store)
379 obstack_1grow (oout, c);
380}
381
382/* FIXME. */
383
384static inline void
385copy_string (FILE *fin, struct obstack *oout, int match)
386{
387 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
388}
389
b6610515
RA
390/* FIXME. */
391
392static inline void
393copy_identifier (FILE *fin, struct obstack *oout)
394{
395 int c;
396
397 while (isalnum (c = getc (fin)) || c == '_')
398 obstack_1grow (oout, c);
399
400 ungetc (c, fin);
401}
ae3c3164 402
2666f928
AD
403
404/*------------------------------------------------------------------.
405| Dump the wannabee comment from IN to OOUT. In fact we just saw a |
406| `/', which might or might not be a comment. In any case, copy |
407| what we saw. |
408`------------------------------------------------------------------*/
ae3c3164
AD
409
410static inline void
2666f928 411copy_comment (FILE *fin, struct obstack *oout)
ae3c3164
AD
412{
413 int cplus_comment;
a70083a3 414 int ended;
550a72a3
AD
415 int c;
416
417 /* We read a `/', output it. */
2666f928 418 obstack_1grow (oout, '/');
550a72a3
AD
419
420 switch ((c = getc (fin)))
421 {
422 case '/':
423 cplus_comment = 1;
424 break;
425 case '*':
426 cplus_comment = 0;
427 break;
428 default:
429 ungetc (c, fin);
430 return;
431 }
ae3c3164 432
2666f928 433 obstack_1grow (oout, c);
550a72a3 434 c = getc (fin);
ae3c3164
AD
435
436 ended = 0;
437 while (!ended)
438 {
439 if (!cplus_comment && c == '*')
440 {
441 while (c == '*')
442 {
2666f928 443 obstack_1grow (oout, c);
550a72a3 444 c = getc (fin);
ae3c3164
AD
445 }
446
447 if (c == '/')
448 {
2666f928 449 obstack_1grow (oout, c);
ae3c3164
AD
450 ended = 1;
451 }
452 }
453 else if (c == '\n')
454 {
f3849179 455 ++lineno;
2666f928 456 obstack_1grow (oout, c);
ae3c3164
AD
457 if (cplus_comment)
458 ended = 1;
459 else
550a72a3 460 c = getc (fin);
ae3c3164
AD
461 }
462 else if (c == EOF)
463 fatal (_("unterminated comment"));
464 else
465 {
2666f928 466 obstack_1grow (oout, c);
550a72a3 467 c = getc (fin);
ae3c3164
AD
468 }
469 }
470}
471
472
a70083a3 473/*-----------------------------------------------------------------.
337bab46 474| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
475| reference to this location. STACK_OFFSET is the number of values |
476| in the current rule so far, which says where to find `$0' with |
477| respect to the top of the stack. |
478`-----------------------------------------------------------------*/
1ff442ca 479
a70083a3 480static inline void
337bab46 481copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 482{
a70083a3 483 int c;
1ff442ca 484
a70083a3
AD
485 c = getc (fin);
486 if (c == '$')
1ff442ca 487 {
ff4423cc 488 obstack_sgrow (oout, "yyloc");
89cab50d 489 locations_flag = 1;
a70083a3
AD
490 }
491 else if (isdigit (c) || c == '-')
492 {
493 int n;
1ff442ca 494
a70083a3
AD
495 ungetc (c, fin);
496 n = read_signed_integer (fin);
11e2beca
AD
497 if (n > stack_offset)
498 complain (_("invalid value: %s%d"), "@", n);
499 else
500 {
501 /* Offset is always 0 if parser has already popped the stack
502 pointer. */
503 obstack_fgrow1 (oout, "yylsp[%d]",
504 n - (semantic_parser ? 0 : stack_offset));
505 locations_flag = 1;
506 }
1ff442ca 507 }
a70083a3 508 else
ff4a34be
AD
509 {
510 char buf[] = "@c";
511 buf[1] = c;
512 complain (_("%s is invalid"), quote (buf));
513 }
1ff442ca 514}
79282c5a
AD
515
516
517/*-------------------------------------------------------------------.
518| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
519| |
520| Possible inputs: $[<TYPENAME>]($|integer) |
521| |
337bab46 522| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
523| the number of values in the current rule so far, which says where |
524| to find `$0' with respect to the top of the stack. |
525`-------------------------------------------------------------------*/
526
527static inline void
337bab46 528copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
529 symbol_list *rule, int stack_offset)
530{
531 int c = getc (fin);
b0ce6046 532 const char *type_name = NULL;
79282c5a 533
f282676b 534 /* Get the type name if explicit. */
79282c5a
AD
535 if (c == '<')
536 {
f282676b 537 read_type_name (fin);
79282c5a
AD
538 type_name = token_buffer;
539 value_components_used = 1;
79282c5a
AD
540 c = getc (fin);
541 }
542
543 if (c == '$')
544 {
ff4423cc 545 obstack_sgrow (oout, "yyval");
8c7ebe49 546
79282c5a
AD
547 if (!type_name)
548 type_name = get_type_name (0, rule);
549 if (type_name)
337bab46 550 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
551 if (!type_name && typed)
552 complain (_("$$ of `%s' has no declared type"),
553 rule->sym->tag);
554 }
555 else if (isdigit (c) || c == '-')
556 {
557 int n;
558 ungetc (c, fin);
559 n = read_signed_integer (fin);
560
11e2beca
AD
561 if (n > stack_offset)
562 complain (_("invalid value: %s%d"), "$", n);
563 else
564 {
565 if (!type_name && n > 0)
566 type_name = get_type_name (n, rule);
567
568 /* Offset is always 0 if parser has already popped the stack
569 pointer. */
570 obstack_fgrow1 (oout, "yyvsp[%d]",
571 n - (semantic_parser ? 0 : stack_offset));
572
573 if (type_name)
574 obstack_fgrow1 (oout, ".%s", type_name);
575 if (!type_name && typed)
576 complain (_("$%d of `%s' has no declared type"),
577 n, rule->sym->tag);
578 }
79282c5a
AD
579 }
580 else
581 {
582 char buf[] = "$c";
583 buf[1] = c;
584 complain (_("%s is invalid"), quote (buf));
585 }
586}
a70083a3
AD
587\f
588/*-------------------------------------------------------------------.
589| Copy the contents of a `%{ ... %}' into the definitions file. The |
590| `%{' has already been read. Return after reading the `%}'. |
591`-------------------------------------------------------------------*/
1ff442ca 592
4a120d45 593static void
118fb205 594copy_definition (void)
1ff442ca 595{
a70083a3 596 int c;
ae3c3164 597 /* -1 while reading a character if prev char was %. */
a70083a3 598 int after_percent;
1ff442ca 599
89cab50d 600 if (!no_lines_flag)
25b222fa
MA
601 {
602 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
342b8b6e 603 lineno, quotearg_style (c_quoting_style,
b7c49edf 604 muscle_find ("filename")));
25b222fa 605 }
1ff442ca
NF
606
607 after_percent = 0;
608
ae3c3164 609 c = getc (finput);
1ff442ca
NF
610
611 for (;;)
612 {
613 switch (c)
614 {
615 case '\n':
dd60faec 616 obstack_1grow (&attrs_obstack, c);
f3849179 617 ++lineno;
1ff442ca
NF
618 break;
619
620 case '%':
a70083a3 621 after_percent = -1;
1ff442ca 622 break;
a083fbbf 623
1ff442ca
NF
624 case '\'':
625 case '"':
337bab46 626 copy_string (finput, &attrs_obstack, c);
1ff442ca
NF
627 break;
628
629 case '/':
337bab46 630 copy_comment (finput, &attrs_obstack);
1ff442ca
NF
631 break;
632
633 case EOF:
a70083a3 634 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
635
636 default:
dd60faec 637 obstack_1grow (&attrs_obstack, c);
1ff442ca
NF
638 }
639
a70083a3 640 c = getc (finput);
1ff442ca
NF
641
642 if (after_percent)
643 {
644 if (c == '}')
645 return;
dd60faec 646 obstack_1grow (&attrs_obstack, '%');
1ff442ca
NF
647 }
648 after_percent = 0;
1ff442ca 649 }
1ff442ca
NF
650}
651
652
d7020c20
AD
653/*-------------------------------------------------------------------.
654| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
655| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
656| are reversed. |
657`-------------------------------------------------------------------*/
1ff442ca 658
4a120d45 659static void
d7020c20 660parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 661{
342b8b6e
AD
662 token_t token = tok_undef;
663 char *typename = NULL;
1ff442ca 664
1e9798d5 665 /* The symbol being defined. */
db8837cb 666 symbol_t *symbol = NULL;
1e9798d5
AD
667
668 /* After `%token' and `%nterm', any number of symbols maybe be
669 defined. */
1ff442ca
NF
670 for (;;)
671 {
e6011337
JT
672 int tmp_char = ungetc (skip_white_space (), finput);
673
1e9798d5
AD
674 /* `%' (for instance from `%token', or from `%%' etc.) is the
675 only valid means to end this declaration. */
e6011337 676 if (tmp_char == '%')
1ff442ca 677 return;
e6011337 678 if (tmp_char == EOF)
a0f6b076 679 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 680
a70083a3 681 token = lex ();
511e79b3 682 if (token == tok_comma)
943819bf
RS
683 {
684 symbol = NULL;
685 continue;
686 }
511e79b3 687 if (token == tok_typename)
1ff442ca 688 {
95e36146 689 typename = xstrdup (token_buffer);
1ff442ca 690 value_components_used = 1;
943819bf
RS
691 symbol = NULL;
692 }
511e79b3 693 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 694 {
db8837cb 695 symbol_make_alias (symbol, typename);
8e03724b 696 symbol = NULL;
1ff442ca 697 }
511e79b3 698 else if (token == tok_identifier)
1ff442ca
NF
699 {
700 int oldclass = symval->class;
943819bf 701 symbol = symval;
1ff442ca 702
943819bf 703 if (symbol->class == what_is_not)
a0f6b076 704 complain (_("symbol %s redefined"), symbol->tag);
943819bf 705 symbol->class = what_is;
d7020c20 706 if (what_is == nterm_sym && oldclass != nterm_sym)
d9b739c3 707 symbol->number = nvars++;
72a23c97 708 if (what_is == token_sym && symbol->number == -1)
bd02036a 709 symbol->number = ntokens++;
1ff442ca
NF
710
711 if (typename)
712 {
943819bf
RS
713 if (symbol->type_name == NULL)
714 symbol->type_name = typename;
a70083a3 715 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 716 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
717 }
718 }
511e79b3 719 else if (symbol && token == tok_number)
a70083a3 720 {
943819bf 721 symbol->user_token_number = numval;
b7c49edf
AD
722 /* User defined EOF token? */
723 if (numval == 0)
72a23c97
AD
724 {
725 eoftoken = symbol;
726 eoftoken->number = 0;
727 /* It is always mapped to 0, so it was already counted in
728 NTOKENS. */
729 --ntokens;
730 }
a70083a3 731 }
1ff442ca 732 else
943819bf 733 {
a0f6b076 734 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
735 token_buffer,
736 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 737 skip_to_char ('%');
943819bf 738 }
1ff442ca
NF
739 }
740
741}
742
1ff442ca 743
d7020c20
AD
744/*------------------------------.
745| Parse what comes after %start |
746`------------------------------*/
1ff442ca 747
4a120d45 748static void
118fb205 749parse_start_decl (void)
1ff442ca
NF
750{
751 if (start_flag)
27821bff 752 complain (_("multiple %s declarations"), "%start");
511e79b3 753 if (lex () != tok_identifier)
27821bff 754 complain (_("invalid %s declaration"), "%start");
943819bf
RS
755 else
756 {
757 start_flag = 1;
758 startval = symval;
759 }
1ff442ca
NF
760}
761
a70083a3
AD
762/*-----------------------------------------------------------.
763| read in a %type declaration and record its information for |
764| get_type_name to access |
765`-----------------------------------------------------------*/
766
767static void
768parse_type_decl (void)
769{
a70083a3
AD
770 char *name;
771
511e79b3 772 if (lex () != tok_typename)
a70083a3
AD
773 {
774 complain ("%s", _("%type declaration has no <typename>"));
775 skip_to_char ('%');
776 return;
777 }
778
95e36146 779 name = xstrdup (token_buffer);
a70083a3
AD
780
781 for (;;)
782 {
f17bcd1f 783 token_t t;
a70083a3
AD
784 int tmp_char = ungetc (skip_white_space (), finput);
785
786 if (tmp_char == '%')
787 return;
788 if (tmp_char == EOF)
789 fatal (_("Premature EOF after %s"), token_buffer);
790
791 t = lex ();
792
793 switch (t)
1ff442ca
NF
794 {
795
511e79b3
AD
796 case tok_comma:
797 case tok_semicolon:
1ff442ca
NF
798 break;
799
511e79b3 800 case tok_identifier:
1ff442ca
NF
801 if (symval->type_name == NULL)
802 symval->type_name = name;
a70083a3 803 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 804 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
805
806 break;
807
808 default:
a0f6b076
AD
809 complain (_("invalid %%type declaration due to item: %s"),
810 token_buffer);
a70083a3 811 skip_to_char ('%');
1ff442ca
NF
812 }
813 }
814}
815
816
817
d7020c20
AD
818/*----------------------------------------------------------------.
819| Read in a %left, %right or %nonassoc declaration and record its |
820| information. |
821`----------------------------------------------------------------*/
1ff442ca 822
4a120d45 823static void
d7020c20 824parse_assoc_decl (associativity assoc)
1ff442ca 825{
a70083a3
AD
826 char *name = NULL;
827 int prev = 0;
1ff442ca 828
f3849179
AD
829 /* Assign a new precedence level, never 0. */
830 ++lastprec;
1ff442ca 831
1ff442ca
NF
832 for (;;)
833 {
f17bcd1f 834 token_t t;
e6011337 835 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 836
e6011337 837 if (tmp_char == '%')
1ff442ca 838 return;
e6011337 839 if (tmp_char == EOF)
a0f6b076 840 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 841
a70083a3 842 t = lex ();
1ff442ca
NF
843
844 switch (t)
845 {
511e79b3 846 case tok_typename:
95e36146 847 name = xstrdup (token_buffer);
1ff442ca
NF
848 break;
849
511e79b3 850 case tok_comma:
1ff442ca
NF
851 break;
852
511e79b3 853 case tok_identifier:
1ff442ca 854 if (symval->prec != 0)
a0f6b076 855 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
856 symval->prec = lastprec;
857 symval->assoc = assoc;
d7020c20 858 if (symval->class == nterm_sym)
a0f6b076 859 complain (_("symbol %s redefined"), symval->tag);
72a23c97
AD
860 if (symval->number == -1)
861 {
862 symval->number = ntokens++;
863 symval->class = token_sym;
864 }
1ff442ca 865 if (name)
a70083a3 866 { /* record the type, if one is specified */
1ff442ca
NF
867 if (symval->type_name == NULL)
868 symval->type_name = name;
a70083a3 869 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 870 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
871 }
872 break;
873
511e79b3
AD
874 case tok_number:
875 if (prev == tok_identifier)
a70083a3 876 {
1ff442ca 877 symval->user_token_number = numval;
a70083a3
AD
878 }
879 else
880 {
72a23c97
AD
881 complain
882 (_("invalid text (%s) - number should be after identifier"),
883 token_buffer);
a70083a3
AD
884 skip_to_char ('%');
885 }
1ff442ca
NF
886 break;
887
511e79b3 888 case tok_semicolon:
1ff442ca
NF
889 return;
890
891 default:
a0f6b076 892 complain (_("unexpected item: %s"), token_buffer);
a70083a3 893 skip_to_char ('%');
1ff442ca
NF
894 }
895
896 prev = t;
1ff442ca
NF
897 }
898}
899
900
901
dd60faec 902/*--------------------------------------------------------------.
180d45ba
PB
903| Copy the union declaration into the stype muscle |
904| (and fdefines), where it is made into the definition of |
905| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 906`--------------------------------------------------------------*/
1ff442ca 907
4a120d45 908static void
118fb205 909parse_union_decl (void)
1ff442ca 910{
a70083a3
AD
911 int c;
912 int count = 0;
428046f8 913 bool done = FALSE;
180d45ba 914 struct obstack union_obstack;
1ff442ca 915 if (typed)
27821bff 916 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
917
918 typed = 1;
919
642cb8f8 920 MUSCLE_INSERT_INT ("stype_line", lineno);
180d45ba
PB
921 obstack_init (&union_obstack);
922 obstack_sgrow (&union_obstack, "union");
1ff442ca 923
428046f8 924 while (!done)
1ff442ca 925 {
428046f8
AD
926 c = xgetc (finput);
927
342b8b6e
AD
928 /* If C contains '/', it is output by copy_comment (). */
929 if (c != '/')
2666f928 930 obstack_1grow (&union_obstack, c);
1ff442ca
NF
931
932 switch (c)
933 {
934 case '\n':
f3849179 935 ++lineno;
1ff442ca
NF
936 break;
937
938 case '/':
2666f928 939 copy_comment (finput, &union_obstack);
1ff442ca
NF
940 break;
941
1ff442ca 942 case '{':
f3849179 943 ++count;
1ff442ca
NF
944 break;
945
946 case '}':
428046f8 947 /* FIXME: Errr. How could this happen???. --akim */
1ff442ca 948 if (count == 0)
27821bff 949 complain (_("unmatched %s"), "`}'");
1ff442ca 950 count--;
428046f8
AD
951 if (!count)
952 done = TRUE;
953 break;
1ff442ca 954 }
1ff442ca 955 }
180d45ba 956
428046f8
AD
957 /* JF don't choke on trailing semi */
958 c = skip_white_space ();
959 if (c != ';')
960 ungetc (c, finput);
961 obstack_1grow (&union_obstack, 0);
962 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
963}
964
d7020c20
AD
965
966/*-------------------------------------------------------.
967| Parse the declaration %expect N which says to expect N |
968| shift-reduce conflicts. |
969`-------------------------------------------------------*/
1ff442ca 970
4a120d45 971static void
118fb205 972parse_expect_decl (void)
1ff442ca 973{
131e2fef 974 int c = skip_white_space ();
1ff442ca
NF
975 ungetc (c, finput);
976
131e2fef 977 if (!isdigit (c))
79282c5a 978 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
979 else
980 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
981}
982
a70083a3
AD
983
984/*-------------------------------------------------------------------.
985| Parse what comes after %thong. the full syntax is |
986| |
987| %thong <type> token number literal |
988| |
989| the <type> or number may be omitted. The number specifies the |
990| user_token_number. |
991| |
992| Two symbols are entered in the table, one for the token symbol and |
993| one for the literal. Both are given the <type>, if any, from the |
994| declaration. The ->user_token_number of the first is SALIAS and |
995| the ->user_token_number of the second is set to the number, if |
996| any, from the declaration. The two symbols are linked via |
997| pointers in their ->alias fields. |
998| |
999| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
1000| only the literal string is retained it is the literal string that |
1001| is output to yytname |
1002`-------------------------------------------------------------------*/
1003
1004static void
1005parse_thong_decl (void)
7b306f52 1006{
f17bcd1f 1007 token_t token;
db8837cb 1008 symbol_t *symbol;
a70083a3 1009 char *typename = 0;
6b7e85b9 1010 int usrtoknum = SUNDEF;
7b306f52 1011
a70083a3 1012 token = lex (); /* fetch typename or first token */
511e79b3 1013 if (token == tok_typename)
7b306f52 1014 {
95e36146 1015 typename = xstrdup (token_buffer);
a70083a3
AD
1016 value_components_used = 1;
1017 token = lex (); /* fetch first token */
7b306f52 1018 }
7b306f52 1019
a70083a3 1020 /* process first token */
7b306f52 1021
511e79b3 1022 if (token != tok_identifier)
a70083a3
AD
1023 {
1024 complain (_("unrecognized item %s, expected an identifier"),
1025 token_buffer);
1026 skip_to_char ('%');
1027 return;
7b306f52 1028 }
d7020c20 1029 symval->class = token_sym;
a70083a3
AD
1030 symval->type_name = typename;
1031 symval->user_token_number = SALIAS;
1032 symbol = symval;
7b306f52 1033
a70083a3 1034 token = lex (); /* get number or literal string */
1ff442ca 1035
511e79b3 1036 if (token == tok_number)
943819bf 1037 {
a70083a3
AD
1038 usrtoknum = numval;
1039 token = lex (); /* okay, did number, now get literal */
943819bf 1040 }
1ff442ca 1041
a70083a3 1042 /* process literal string token */
1ff442ca 1043
511e79b3 1044 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 1045 {
a70083a3
AD
1046 complain (_("expected string constant instead of %s"), token_buffer);
1047 skip_to_char ('%');
1048 return;
1ff442ca 1049 }
d7020c20 1050 symval->class = token_sym;
a70083a3
AD
1051 symval->type_name = typename;
1052 symval->user_token_number = usrtoknum;
1ff442ca 1053
a70083a3
AD
1054 symval->alias = symbol;
1055 symbol->alias = symval;
1ff442ca 1056
79282c5a
AD
1057 /* symbol and symval combined are only one symbol. */
1058 nsyms--;
a70083a3 1059}
3cef001a 1060
11e2beca 1061
b6610515 1062static void
11d82f03 1063parse_muscle_decl (void)
b6610515
RA
1064{
1065 int ch = ungetc (skip_white_space (), finput);
b7c49edf
AD
1066 char *muscle_key;
1067 char *muscle_value;
b6610515
RA
1068
1069 /* Read key. */
1070 if (!isalpha (ch) && ch != '_')
1071 {
1072 complain (_("invalid %s declaration"), "%define");
1073 skip_to_char ('%');
1074 return;
1075 }
11d82f03
MA
1076 copy_identifier (finput, &muscle_obstack);
1077 obstack_1grow (&muscle_obstack, 0);
1078 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 1079
b6610515
RA
1080 /* Read value. */
1081 ch = skip_white_space ();
1082 if (ch != '"')
1083 {
1084 ungetc (ch, finput);
1085 if (ch != EOF)
1086 {
1087 complain (_("invalid %s declaration"), "%define");
1088 skip_to_char ('%');
1089 return;
1090 }
1091 else
1092 fatal (_("Premature EOF after %s"), "\"");
1093 }
11d82f03
MA
1094 copy_string2 (finput, &muscle_obstack, '"', 0);
1095 obstack_1grow (&muscle_obstack, 0);
1096 muscle_value = obstack_finish (&muscle_obstack);
b6610515 1097
b6610515 1098 /* Store the (key, value) pair in the environment. */
11d82f03 1099 muscle_insert (muscle_key, muscle_value);
b6610515
RA
1100}
1101
2ba3b73c 1102
426cf563
MA
1103
1104/*---------------------------------.
a870c567 1105| Parse a double quoted parameter. |
426cf563
MA
1106`---------------------------------*/
1107
1108static const char *
1109parse_dquoted_param (const char *from)
1110{
1111 struct obstack param_obstack;
1112 const char *param = NULL;
1113 int c;
1114
1115 obstack_init (&param_obstack);
1116 c = skip_white_space ();
1117
1118 if (c != '"')
1119 {
1120 complain (_("invalid %s declaration"), from);
1121 ungetc (c, finput);
1122 skip_to_char ('%');
1123 return NULL;
1124 }
1125
2648a72d
AD
1126 while ((c = literalchar ()) != '"')
1127 obstack_1grow (&param_obstack, c);
a870c567 1128
426cf563
MA
1129 obstack_1grow (&param_obstack, '\0');
1130 param = obstack_finish (&param_obstack);
1131
1132 if (c != '"' || strlen (param) == 0)
1133 {
1134 complain (_("invalid %s declaration"), from);
1135 if (c != '"')
1136 ungetc (c, finput);
1137 skip_to_char ('%');
1138 return NULL;
1139 }
1140
1141 return param;
1142}
1143
2ba3b73c
MA
1144/*----------------------------------.
1145| Parse what comes after %skeleton. |
1146`----------------------------------*/
1147
a870c567 1148static void
2ba3b73c
MA
1149parse_skel_decl (void)
1150{
426cf563 1151 skeleton = parse_dquoted_param ("%skeleton");
2ba3b73c
MA
1152}
1153
a70083a3
AD
1154/*----------------------------------------------------------------.
1155| Read from finput until `%%' is seen. Discard the `%%'. Handle |
1156| any `%' declarations, and copy the contents of any `%{ ... %}' |
dd60faec 1157| groups to ATTRS_OBSTACK. |
a70083a3 1158`----------------------------------------------------------------*/
1ff442ca 1159
4a120d45 1160static void
a70083a3 1161read_declarations (void)
1ff442ca 1162{
a70083a3 1163 for (;;)
1ff442ca 1164 {
951366c1 1165 int c = skip_white_space ();
1ff442ca 1166
a70083a3
AD
1167 if (c == '%')
1168 {
951366c1 1169 token_t tok = parse_percent_token ();
1ff442ca 1170
a70083a3 1171 switch (tok)
943819bf 1172 {
511e79b3 1173 case tok_two_percents:
a70083a3 1174 return;
1ff442ca 1175
511e79b3 1176 case tok_percent_left_curly:
a70083a3
AD
1177 copy_definition ();
1178 break;
1ff442ca 1179
511e79b3 1180 case tok_token:
d7020c20 1181 parse_token_decl (token_sym, nterm_sym);
a70083a3 1182 break;
1ff442ca 1183
511e79b3 1184 case tok_nterm:
d7020c20 1185 parse_token_decl (nterm_sym, token_sym);
a70083a3 1186 break;
1ff442ca 1187
511e79b3 1188 case tok_type:
a70083a3
AD
1189 parse_type_decl ();
1190 break;
1ff442ca 1191
511e79b3 1192 case tok_start:
a70083a3
AD
1193 parse_start_decl ();
1194 break;
118fb205 1195
511e79b3 1196 case tok_union:
a70083a3
AD
1197 parse_union_decl ();
1198 break;
1ff442ca 1199
511e79b3 1200 case tok_expect:
a70083a3
AD
1201 parse_expect_decl ();
1202 break;
6deb4447 1203
511e79b3 1204 case tok_thong:
a70083a3
AD
1205 parse_thong_decl ();
1206 break;
d7020c20 1207
511e79b3 1208 case tok_left:
d7020c20 1209 parse_assoc_decl (left_assoc);
a70083a3 1210 break;
1ff442ca 1211
511e79b3 1212 case tok_right:
d7020c20 1213 parse_assoc_decl (right_assoc);
a70083a3 1214 break;
1ff442ca 1215
511e79b3 1216 case tok_nonassoc:
d7020c20 1217 parse_assoc_decl (non_assoc);
a70083a3 1218 break;
1ff442ca 1219
b6610515 1220 case tok_define:
11d82f03 1221 parse_muscle_decl ();
b6610515 1222 break;
342b8b6e 1223
2ba3b73c
MA
1224 case tok_skel:
1225 parse_skel_decl ();
1226 break;
b6610515 1227
511e79b3 1228 case tok_noop:
a70083a3 1229 break;
1ff442ca 1230
951366c1
AD
1231 case tok_stropt:
1232 case tok_intopt:
1233 case tok_obsolete:
72a23c97 1234 assert (0);
951366c1
AD
1235 break;
1236
e0c40012 1237 case tok_illegal:
a70083a3
AD
1238 default:
1239 complain (_("unrecognized: %s"), token_buffer);
1240 skip_to_char ('%');
1241 }
1242 }
1243 else if (c == EOF)
1244 fatal (_("no input grammar"));
1245 else
1246 {
ff4a34be
AD
1247 char buf[] = "c";
1248 buf[0] = c;
1249 complain (_("unknown character: %s"), quote (buf));
a70083a3 1250 skip_to_char ('%');
1ff442ca 1251 }
1ff442ca 1252 }
1ff442ca 1253}
a70083a3
AD
1254\f
1255/*-------------------------------------------------------------------.
1256| Assuming that a `{' has just been seen, copy everything up to the |
1257| matching `}' into the actions file. STACK_OFFSET is the number of |
1258| values in the current rule so far, which says where to find `$0' |
1259| with respect to the top of the stack. |
14d293ac 1260| |
11e2beca
AD
1261| This routine is used both for actions and guards. Only |
1262| ACTION_OBSTACK is used, but this is fine, since we use only |
14d293ac 1263| pointers to relevant portions inside this obstack. |
a70083a3 1264`-------------------------------------------------------------------*/
1ff442ca 1265
4a120d45 1266static void
14d293ac 1267parse_braces (symbol_list *rule, int stack_offset)
1ff442ca 1268{
a70083a3 1269 int c;
a70083a3 1270 int count;
1ff442ca 1271
1ff442ca 1272 count = 1;
1ff442ca
NF
1273 while (count > 0)
1274 {
14d293ac
AD
1275 while ((c = getc (finput)) != '}')
1276 switch (c)
1277 {
1278 case '\n':
1279 obstack_1grow (&action_obstack, c);
f3849179 1280 ++lineno;
14d293ac 1281 break;
1ff442ca 1282
14d293ac
AD
1283 case '{':
1284 obstack_1grow (&action_obstack, c);
f3849179 1285 ++count;
14d293ac 1286 break;
1ff442ca 1287
14d293ac
AD
1288 case '\'':
1289 case '"':
1290 copy_string (finput, &action_obstack, c);
1291 break;
1ff442ca 1292
14d293ac
AD
1293 case '/':
1294 copy_comment (finput, &action_obstack);
1295 break;
1ff442ca 1296
14d293ac
AD
1297 case '$':
1298 copy_dollar (finput, &action_obstack,
1299 rule, stack_offset);
1300 break;
1ff442ca 1301
14d293ac
AD
1302 case '@':
1303 copy_at (finput, &action_obstack,
1304 stack_offset);
1305 break;
a70083a3 1306
14d293ac
AD
1307 case EOF:
1308 fatal (_("unmatched %s"), "`{'");
a70083a3 1309
14d293ac
AD
1310 default:
1311 obstack_1grow (&action_obstack, c);
1312 }
a70083a3 1313
14d293ac 1314 /* Above loop exits when C is '}'. */
a70083a3 1315 if (--count)
2b25d624 1316 obstack_1grow (&action_obstack, c);
a70083a3
AD
1317 }
1318
3f96f4dc 1319 obstack_1grow (&action_obstack, '\0');
a70083a3 1320}
14d293ac 1321
a70083a3
AD
1322
1323static void
14d293ac 1324parse_action (symbol_list *rule, int stack_offset)
a70083a3 1325{
14d293ac
AD
1326 rule->action_line = lineno;
1327 parse_braces (rule, stack_offset);
1328 rule->action = obstack_finish (&action_obstack);
1329}
a70083a3 1330
a70083a3 1331
14d293ac
AD
1332static void
1333parse_guard (symbol_list *rule, int stack_offset)
1334{
1335 token_t t = lex ();
1336 if (t != tok_left_curly)
1337 complain (_("invalid %s declaration"), "%guard");
f499b062 1338 rule->guard_line = lineno;
14d293ac
AD
1339 parse_braces (rule, stack_offset);
1340 rule->guard = obstack_finish (&action_obstack);
1ff442ca 1341}
14d293ac 1342
a70083a3
AD
1343\f
1344
a70083a3
AD
1345/*-------------------------------------------------------------------.
1346| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1347| with the user's names. |
1348`-------------------------------------------------------------------*/
1ff442ca 1349
db8837cb 1350static symbol_t *
118fb205 1351gensym (void)
1ff442ca 1352{
274d42ce
AD
1353 /* Incremented for each generated symbol */
1354 static int gensym_count = 0;
1355 static char buf[256];
1356
db8837cb 1357 symbol_t *sym;
1ff442ca 1358
274d42ce
AD
1359 sprintf (buf, "@%d", ++gensym_count);
1360 token_buffer = buf;
a70083a3 1361 sym = getsym (token_buffer);
d7020c20 1362 sym->class = nterm_sym;
d9b739c3 1363 sym->number = nvars++;
36281465 1364 return sym;
1ff442ca 1365}
a70083a3 1366\f
107f7dfb
AD
1367/*-------------------------------------------------------------------.
1368| Parse the input grammar into a one symbol_list structure. Each |
1369| rule is represented by a sequence of symbols: the left hand side |
1370| followed by the contents of the right hand side, followed by a |
1371| null pointer instead of a symbol to terminate the rule. The next |
1372| symbol is the lhs of the following rule. |
1373| |
1374| All guards and actions are copied out to the appropriate files, |
1375| labelled by the rule number they apply to. |
1376| |
1377| Bison used to allow some %directives in the rules sections, but |
1378| this is no longer consider appropriate: (i) the documented grammar |
1379| doesn't claim it, (ii), it would promote bad style, (iii), error |
1380| recovery for %directives consists in skipping the junk until a `%' |
1381| is seen and helrp synchronizing. This scheme is definitely wrong |
1382| in the rules section. |
1383`-------------------------------------------------------------------*/
1ff442ca 1384
4a120d45 1385static void
118fb205 1386readgram (void)
1ff442ca 1387{
f17bcd1f 1388 token_t t;
db8837cb 1389 symbol_t *lhs = NULL;
107f7dfb
AD
1390 symbol_list *p = NULL;
1391 symbol_list *p1 = NULL;
1ff442ca 1392
ff4a34be
AD
1393 /* Points to first symbol_list of current rule. its symbol is the
1394 lhs of the rule. */
107f7dfb 1395 symbol_list *crule = NULL;
ff4a34be 1396 /* Points to the symbol_list preceding crule. */
107f7dfb 1397 symbol_list *crule1 = NULL;
1ff442ca 1398
a70083a3 1399 t = lex ();
1ff442ca 1400
511e79b3 1401 while (t != tok_two_percents && t != tok_eof)
107f7dfb
AD
1402 if (t == tok_identifier || t == tok_bar)
1403 {
1404 int action_flag = 0;
1405 /* Number of symbols in rhs of this rule so far */
1406 int rulelength = 0;
1407 int xactions = 0; /* JF for error checking */
db8837cb 1408 symbol_t *first_rhs = 0;
107f7dfb
AD
1409
1410 if (t == tok_identifier)
1411 {
1412 lhs = symval;
1413
1414 if (!start_flag)
1415 {
1416 startval = lhs;
1417 start_flag = 1;
1418 }
1ff442ca 1419
107f7dfb
AD
1420 t = lex ();
1421 if (t != tok_colon)
1422 {
1423 complain (_("ill-formed rule: initial symbol not followed by colon"));
1424 unlex (t);
1425 }
1426 }
1427
1428 if (nrules == 0 && t == tok_bar)
1429 {
1430 complain (_("grammar starts with vertical bar"));
1431 lhs = symval; /* BOGUS: use a random symval */
1432 }
1433 /* start a new rule and record its lhs. */
1434
f3849179 1435 ++nrules;
5123689b 1436 ++nritems;
107f7dfb
AD
1437
1438 p = symbol_list_new (lhs);
1439
1440 crule1 = p1;
1441 if (p1)
1442 p1->next = p;
1443 else
1444 grammar = p;
1ff442ca 1445
107f7dfb
AD
1446 p1 = p;
1447 crule = p;
1ff442ca 1448
107f7dfb 1449 /* mark the rule's lhs as a nonterminal if not already so. */
1ff442ca 1450
107f7dfb
AD
1451 if (lhs->class == unknown_sym)
1452 {
1453 lhs->class = nterm_sym;
d9b739c3 1454 lhs->number = nvars;
f3849179 1455 ++nvars;
107f7dfb
AD
1456 }
1457 else if (lhs->class == token_sym)
1458 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca 1459
107f7dfb 1460 /* read the rhs of the rule. */
1ff442ca 1461
107f7dfb
AD
1462 for (;;)
1463 {
1464 t = lex ();
1465 if (t == tok_prec)
1466 {
1467 t = lex ();
1468 crule->ruleprec = symval;
1469 t = lex ();
1470 }
1471
1472 if (!(t == tok_identifier || t == tok_left_curly))
1473 break;
1ff442ca 1474
107f7dfb
AD
1475 /* If next token is an identifier, see if a colon follows it.
1476 If one does, exit this rule now. */
1477 if (t == tok_identifier)
1478 {
db8837cb 1479 symbol_t *ssave;
107f7dfb
AD
1480 token_t t1;
1481
1482 ssave = symval;
1483 t1 = lex ();
1484 unlex (t1);
1485 symval = ssave;
1486 if (t1 == tok_colon)
e5352bc7 1487 {
fff9bf0b 1488 warn (_("previous rule lacks an ending `;'"));
e5352bc7
AD
1489 break;
1490 }
107f7dfb
AD
1491
1492 if (!first_rhs) /* JF */
1493 first_rhs = symval;
1494 /* Not followed by colon =>
1495 process as part of this rule's rhs. */
1496 }
1497
1498 /* If we just passed an action, that action was in the middle
1499 of a rule, so make a dummy rule to reduce it to a
1500 non-terminal. */
1501 if (action_flag)
1502 {
1503 /* Since the action was written out with this rule's
1504 number, we must give the new rule this number by
1505 inserting the new rule before it. */
1506
1507 /* Make a dummy nonterminal, a gensym. */
db8837cb 1508 symbol_t *sdummy = gensym ();
107f7dfb
AD
1509
1510 /* Make a new rule, whose body is empty, before the
1511 current one, so that the action just read can
1512 belong to it. */
f3849179 1513 ++nrules;
5123689b 1514 ++nritems;
107f7dfb
AD
1515 p = symbol_list_new (sdummy);
1516 /* Attach its lineno to that of the host rule. */
1517 p->line = crule->line;
82c035a8
AD
1518 /* Move the action from the host rule to this one. */
1519 p->action = crule->action;
1520 p->action_line = crule->action_line;
1521 crule->action = NULL;
1522
107f7dfb
AD
1523 if (crule1)
1524 crule1->next = p;
1525 else
1526 grammar = p;
1527 /* End of the rule. */
1528 crule1 = symbol_list_new (NULL);
1529 crule1->next = crule;
1530
1531 p->next = crule1;
1532
1533 /* Insert the dummy generated by that rule into this
1534 rule. */
5123689b 1535 ++nritems;
107f7dfb
AD
1536 p = symbol_list_new (sdummy);
1537 p1->next = p;
1538 p1 = p;
1539
1540 action_flag = 0;
1541 }
1542
1543 if (t == tok_identifier)
1544 {
5123689b 1545 ++nritems;
107f7dfb
AD
1546 p = symbol_list_new (symval);
1547 p1->next = p;
1548 p1 = p;
1549 }
1550 else /* handle an action. */
1551 {
14d293ac 1552 parse_action (crule, rulelength);
107f7dfb 1553 action_flag = 1;
f3849179 1554 ++xactions; /* JF */
107f7dfb 1555 }
f3849179 1556 ++rulelength;
107f7dfb
AD
1557 } /* end of read rhs of rule */
1558
1559 /* Put an empty link in the list to mark the end of this rule */
1560 p = symbol_list_new (NULL);
1561 p1->next = p;
1562 p1 = p;
1563
1564 if (t == tok_prec)
1565 {
1566 complain (_("two @prec's in a row"));
1567 t = lex ();
1568 crule->ruleprec = symval;
1569 t = lex ();
1570 }
f499b062 1571
107f7dfb
AD
1572 if (t == tok_guard)
1573 {
1574 if (!semantic_parser)
1575 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1576
14d293ac 1577 parse_guard (crule, rulelength);
a70083a3 1578 t = lex ();
107f7dfb 1579 }
f499b062
AD
1580
1581 if (t == tok_left_curly)
107f7dfb
AD
1582 {
1583 /* This case never occurs -wjh */
1584 if (action_flag)
1585 complain (_("two actions at end of one rule"));
14d293ac 1586 parse_action (crule, rulelength);
107f7dfb 1587 action_flag = 1;
f3849179 1588 ++xactions; /* -wjh */
107f7dfb
AD
1589 t = lex ();
1590 }
1591 /* If $$ is being set in default way, report if any type
1592 mismatch. */
1593 else if (!xactions
1594 && first_rhs && lhs->type_name != first_rhs->type_name)
1595 {
1596 if (lhs->type_name == 0
1597 || first_rhs->type_name == 0
1598 || strcmp (lhs->type_name, first_rhs->type_name))
1599 complain (_("type clash (`%s' `%s') on default action"),
1600 lhs->type_name ? lhs->type_name : "",
1601 first_rhs->type_name ? first_rhs->type_name : "");
1602 }
1603 /* Warn if there is no default for $$ but we need one. */
1604 else if (!xactions && !first_rhs && lhs->type_name != 0)
1605 complain (_("empty rule for typed nonterminal, and no action"));
bfcf1f3a 1606 if (t == tok_two_percents || t == tok_eof)
fff9bf0b 1607 warn (_("previous rule lacks an ending `;'"));
107f7dfb 1608 if (t == tok_semicolon)
a70083a3 1609 t = lex ();
107f7dfb
AD
1610 }
1611 else
1612 {
1613 complain (_("invalid input: %s"), quote (token_buffer));
1614 t = lex ();
1615 }
943819bf 1616
b68e7744
AD
1617 /* grammar has been read. Do some checking */
1618
1619 if (nrules == 0)
1620 fatal (_("no rules in the input grammar"));
1621
1622 /* Report any undefined symbols and consider them nonterminals. */
db8837cb 1623 symbols_do (symbol_check_defined, NULL);
b68e7744 1624
ff442794
AD
1625 /* Insert the initial rule, which line is that of the first rule
1626 (not that of the start symbol):
30171f79
AD
1627
1628 axiom: %start EOF. */
1629 p = symbol_list_new (axiom);
ff442794 1630 p->line = grammar->line;
30171f79
AD
1631 p->next = symbol_list_new (startval);
1632 p->next->next = symbol_list_new (eoftoken);
1633 p->next->next->next = symbol_list_new (NULL);
1634 p->next->next->next->next = grammar;
1635 nrules += 1;
5123689b 1636 nritems += 3;
30171f79
AD
1637 grammar = p;
1638 startval = axiom;
1ff442ca
NF
1639
1640 if (nsyms > MAXSHORT)
a0f6b076
AD
1641 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1642 MAXSHORT);
1ff442ca 1643
72a23c97 1644 assert (nsyms == ntokens + nvars);
1ff442ca 1645}
ff48177d
MA
1646
1647/* At the end of the grammar file, some C source code must
63c2d5de 1648 be stored. It is going to be associated to the epilogue
ff48177d
MA
1649 directive. */
1650static void
1651read_additionnal_code (void)
1652{
9101a310 1653 int c;
63c2d5de 1654 struct obstack el_obstack;
342b8b6e 1655
63c2d5de 1656 obstack_init (&el_obstack);
ff48177d 1657
710ddc4f
MA
1658 if (!no_lines_flag)
1659 {
1660 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1661 lineno, quotearg_style (c_quoting_style,
b7c49edf 1662 muscle_find ("filename")));
710ddc4f
MA
1663 }
1664
ff48177d 1665 while ((c = getc (finput)) != EOF)
63c2d5de 1666 obstack_1grow (&el_obstack, c);
342b8b6e 1667
63c2d5de 1668 obstack_1grow (&el_obstack, 0);
11d82f03 1669 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1670}
1671
a70083a3 1672\f
037ca2f1
AD
1673/*------------------------------------------------------------------.
1674| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1675| number. |
1676`------------------------------------------------------------------*/
1677
1678static void
1679token_translations_init (void)
1680{
72a23c97 1681 int last_user_token_number = 256;
037ca2f1
AD
1682 int i;
1683
72a23c97
AD
1684 /* Set the user numbers. */
1685 for (i = 0; i < ntokens; ++i)
1686 {
db8837cb 1687 symbol_t *this = symbols[i];
72a23c97
AD
1688 if (this->user_token_number == SUNDEF)
1689 this->user_token_number = ++last_user_token_number;
1690 if (this->user_token_number > max_user_token_number)
1691 max_user_token_number = this->user_token_number;
72a23c97
AD
1692 }
1693
680e8701 1694 token_translations = XCALLOC (token_number_t, max_user_token_number + 1);
037ca2f1
AD
1695
1696 /* Initialize all entries for literal tokens to 2, the internal
1697 token number for $undefined., which represents all invalid
1698 inputs. */
18bcecb0 1699 for (i = 0; i < max_user_token_number + 1; i++)
037ca2f1
AD
1700 token_translations[i] = 2;
1701
db8837cb 1702 symbols_do (symbol_translation, NULL);
037ca2f1
AD
1703}
1704
1705
0e78e603
AD
1706/*----------------------------------------------------------------.
1707| Assign symbol numbers, and write definition of token names into |
1708| FDEFINES. Set up vectors SYMBOL_TABLE, TAGS of symbols. |
1709`----------------------------------------------------------------*/
1ff442ca 1710
4a120d45 1711static void
118fb205 1712packsymbols (void)
1ff442ca 1713{
db8837cb 1714 symbols = XCALLOC (symbol_t *, nsyms);
1ff442ca 1715
db8837cb
AD
1716 symbols_do (symbol_check_alias_consistence, NULL);
1717 symbols_do (symbol_pack, NULL);
1ff442ca 1718
037ca2f1 1719 token_translations_init ();
1ff442ca 1720
d9b739c3 1721 error_token_number = errtoken->number;
1ff442ca 1722
e3f1699f
AD
1723 if (startval->class == unknown_sym)
1724 fatal (_("the start symbol %s is undefined"), startval->tag);
1725 else if (startval->class == token_sym)
1726 fatal (_("the start symbol %s is a token"), startval->tag);
1727
d9b739c3 1728 start_symbol = startval->number;
e3f1699f
AD
1729}
1730
1731
a70083a3
AD
1732/*---------------------------------------------------------------.
1733| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 1734| RITEM. |
a70083a3 1735`---------------------------------------------------------------*/
1ff442ca 1736
4a120d45 1737static void
118fb205 1738packgram (void)
1ff442ca 1739{
a70083a3
AD
1740 int itemno;
1741 int ruleno;
1742 symbol_list *p;
1ff442ca 1743
adc8c848 1744 /* We use short to index items. */
5123689b 1745 if (nritems >= MAXSHORT)
adc8c848
AD
1746 fatal (_("too many items (max %d)"), MAXSHORT);
1747
5123689b 1748 ritem = XCALLOC (short, nritems + 1);
1a2b5d37 1749 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1750
1751 itemno = 0;
1752 ruleno = 1;
1753
1754 p = grammar;
1755 while (p)
1756 {
db8837cb 1757 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 1758 rules[ruleno].user_number = ruleno;
c3b407f4 1759 rules[ruleno].number = ruleno;
bba97eb2 1760 rules[ruleno].lhs = p->sym;
99013900 1761 rules[ruleno].rhs = ritem + itemno;
1a2b5d37
AD
1762 rules[ruleno].line = p->line;
1763 rules[ruleno].useful = TRUE;
1764 rules[ruleno].action = p->action;
1765 rules[ruleno].action_line = p->action_line;
1766 rules[ruleno].guard = p->guard;
1767 rules[ruleno].guard_line = p->guard_line;
1ff442ca
NF
1768
1769 p = p->next;
1770 while (p && p->sym)
1771 {
d9b739c3 1772 ritem[itemno++] = p->sym->number;
1ff442ca
NF
1773 /* A rule gets by default the precedence and associativity
1774 of the last token in it. */
d7020c20 1775 if (p->sym->class == token_sym)
03b31c0c 1776 rules[ruleno].prec = p->sym;
a70083a3
AD
1777 if (p)
1778 p = p->next;
1ff442ca
NF
1779 }
1780
1781 /* If this rule has a %prec,
a70083a3 1782 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1783 if (ruleprec)
1784 {
03b31c0c
AD
1785 rules[ruleno].precsym = ruleprec;
1786 rules[ruleno].prec = ruleprec;
1ff442ca 1787 }
1ff442ca 1788 ritem[itemno++] = -ruleno;
f3849179 1789 ++ruleno;
1ff442ca 1790
a70083a3
AD
1791 if (p)
1792 p = p->next;
1ff442ca
NF
1793 }
1794
1795 ritem[itemno] = 0;
5123689b 1796 assert (itemno == nritems);
3067fbef
AD
1797
1798 if (trace_flag)
1799 ritem_print (stderr);
1ff442ca 1800}
a70083a3
AD
1801\f
1802/*-------------------------------------------------------------------.
1803| Read in the grammar specification and record it in the format |
ea5607fd 1804| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1805| and all actions into ACTION_OBSTACK, in each case forming the body |
1806| of a C function (YYGUARD or YYACTION) which contains a switch |
1807| statement to decide which guard or action to execute. |
a70083a3
AD
1808`-------------------------------------------------------------------*/
1809
1810void
1811reader (void)
1812{
342b8b6e 1813 lex_init ();
a70083a3
AD
1814 lineno = 1;
1815
11d82f03
MA
1816 /* Initialize the muscle obstack. */
1817 obstack_init (&muscle_obstack);
82e236e2 1818
a70083a3 1819 /* Initialize the symbol table. */
db8837cb 1820 symbols_new ();
b6610515 1821
30171f79
AD
1822 /* Construct the axiom symbol. */
1823 axiom = getsym ("$axiom");
1824 axiom->class = nterm_sym;
d9b739c3 1825 axiom->number = nvars++;
30171f79 1826
a70083a3
AD
1827 /* Construct the error token */
1828 errtoken = getsym ("error");
d7020c20 1829 errtoken->class = token_sym;
72a23c97 1830 errtoken->number = ntokens++;
a70083a3 1831 errtoken->user_token_number = 256; /* Value specified by POSIX. */
b6610515 1832
a70083a3
AD
1833 /* Construct a token that represents all undefined literal tokens.
1834 It is always token number 2. */
1835 undeftoken = getsym ("$undefined.");
d7020c20 1836 undeftoken->class = token_sym;
72a23c97 1837 undeftoken->number = ntokens++;
a70083a3
AD
1838 undeftoken->user_token_number = 2;
1839
331dbc1b
AD
1840 /* Initialize the obstacks. */
1841 obstack_init (&action_obstack);
1842 obstack_init (&attrs_obstack);
331dbc1b
AD
1843 obstack_init (&output_obstack);
1844
1845 finput = xfopen (infile, "r");
1846
896fe5c1
AD
1847 /* Read the declaration section. Copy %{ ... %} groups to
1848 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1849 etc. found there. */
a70083a3 1850 read_declarations ();
b7c49edf
AD
1851
1852 /* If the user did not define her EOFTOKEN, do it now. */
1853 if (!eoftoken)
1854 {
1855 eoftoken = getsym ("$");
1856 eoftoken->class = token_sym;
72a23c97 1857 eoftoken->number = 0;
b7c49edf
AD
1858 /* Value specified by POSIX. */
1859 eoftoken->user_token_number = 0;
1860 }
1861
a70083a3
AD
1862 /* Read in the grammar, build grammar in list form. Write out
1863 guards and actions. */
1864 readgram ();
ff48177d
MA
1865 /* Some C code is given at the end of the grammar file. */
1866 read_additionnal_code ();
b0c4483e 1867
331dbc1b
AD
1868 lex_free ();
1869 xfclose (finput);
1870
a70083a3
AD
1871 /* Assign the symbols their symbol numbers. Write #defines for the
1872 token symbols into FDEFINES if requested. */
1873 packsymbols ();
93ede233 1874
a70083a3
AD
1875 /* Convert the grammar into the format described in gram.h. */
1876 packgram ();
8419d367
AD
1877
1878 /* The grammar as a symbol_list is no longer needed. */
1879 LIST_FREE (symbol_list, grammar);
a70083a3 1880}
76514394
AD
1881
1882void
1883grammar_free (void)
1884{
1885 XFREE (ritem);
1886 free (rules + 1);
1887 /* Free the symbol table data structure. */
db8837cb 1888 symbols_free ();
76514394 1889}