]> git.saurik.com Git - bison.git/blame - src/reader.c
* tests/sets.at (AT_EXTRACT_SETS): Sed portability issue: no space
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca
NF
30#include "lex.h"
31#include "gram.h"
a0f6b076 32#include "complain.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
340ef489 35#include "conflicts.h"
11d82f03 36#include "muscle_tab.h"
1ff442ca 37
a70083a3
AD
38typedef struct symbol_list
39{
40 struct symbol_list *next;
db8837cb 41 symbol_t *sym;
b29b2ed5 42 int line;
f499b062 43
3f96f4dc
AD
44 /* The action is attached to the LHS of a rule. */
45 const char *action;
46 int action_line;
f499b062
AD
47
48 /* The guard is attached to the LHS of a rule. */
49 const char *guard;
50 int guard_line;
db8837cb 51 symbol_t *ruleprec;
d945f5cd 52} symbol_list;
118fb205 53
1ff442ca 54int lineno;
280a38c3
AD
55static symbol_list *grammar = NULL;
56static int start_flag = 0;
db8837cb 57static symbol_t *startval = NULL;
1ff442ca
NF
58
59/* Nonzero if components of semantic values are used, implying
60 they must be unions. */
61static int value_components_used;
62
d7020c20 63/* Nonzero if %union has been seen. */
280a38c3 64static int typed = 0;
1ff442ca 65
d7020c20 66/* Incremented for each %left, %right or %nonassoc seen */
280a38c3 67static int lastprec = 0;
1ff442ca 68
db8837cb
AD
69symbol_t *errtoken = NULL;
70symbol_t *undeftoken = NULL;
71symbol_t *eoftoken = NULL;
72symbol_t *axiom = NULL;
b29b2ed5 73
6255b435 74static symbol_list *
db8837cb 75symbol_list_new (symbol_t *sym)
b29b2ed5
AD
76{
77 symbol_list *res = XMALLOC (symbol_list, 1);
78 res->next = NULL;
79 res->sym = sym;
80 res->line = lineno;
d945f5cd
AD
81 res->action = NULL;
82 res->action_line = 0;
f499b062
AD
83 res->guard = NULL;
84 res->guard_line = 0;
b29b2ed5
AD
85 res->ruleprec = NULL;
86 return res;
87}
88
72a23c97 89/*------------------------.
db8837cb 90| Operations on symbols. |
72a23c97
AD
91`------------------------*/
92
93
94/*-----------------------------------------------------------.
95| If THIS is not defined, report an error, and consider it a |
96| nonterminal. |
97`-----------------------------------------------------------*/
98
99static bool
db8837cb 100symbol_check_defined (symbol_t *this)
72a23c97
AD
101{
102 if (this->class == unknown_sym)
103 {
104 complain
105 (_("symbol %s is used, but is not defined as a token and has no rules"),
106 this->tag);
107 this->class = nterm_sym;
108 this->number = nvars++;
109 }
110
111 return TRUE;
112}
113
114
115/*-------------------------------------------------------------------.
116| Assign a symbol number, and write the definition of the token name |
117| into FDEFINES. Put in SYMBOLS. |
118`-------------------------------------------------------------------*/
119
120static bool
db8837cb 121symbol_make_alias (symbol_t *symbol, char *typename)
72a23c97
AD
122{
123 if (symval->alias)
124 warn (_("symbol `%s' used more than once as a literal string"),
125 symval->tag);
126 else if (symbol->alias)
127 warn (_("symbol `%s' given more than one literal string"),
128 symbol->tag);
129 else
130 {
131 symval->class = token_sym;
132 symval->type_name = typename;
133 symval->user_token_number = symbol->user_token_number;
134 symbol->user_token_number = SALIAS;
135 symval->alias = symbol;
136 symbol->alias = symval;
137 /* symbol and symval combined are only one symbol */
138 nsyms--;
139 ntokens--;
140 assert (ntokens == symbol->number || ntokens == symval->number);
141 symbol->number = symval->number =
142 (symval->number < symbol->number) ? symval->number : symbol->number;
143 }
144
145 return TRUE;
146}
147
148/*---------------------------------------------------------.
149| Check that THIS, and its alias, have same precedence and |
150| associativity. |
151`---------------------------------------------------------*/
152
153static bool
db8837cb 154symbol_check_alias_consistence (symbol_t *this)
72a23c97
AD
155{
156 /* Check only those who _are_ the aliases. */
157 if (this->alias && this->user_token_number == SALIAS)
158 {
159 if (this->prec != this->alias->prec)
160 {
161 if (this->prec != 0 && this->alias->prec != 0)
162 complain (_("conflicting precedences for %s and %s"),
163 this->tag, this->alias->tag);
164 if (this->prec != 0)
165 this->alias->prec = this->prec;
166 else
167 this->prec = this->alias->prec;
168 }
169
170 if (this->assoc != this->alias->assoc)
171 {
172 if (this->assoc != 0 && this->alias->assoc != 0)
173 complain (_("conflicting assoc values for %s and %s"),
174 this->tag, this->alias->tag);
175 if (this->assoc != 0)
176 this->alias->assoc = this->assoc;
177 else
178 this->assoc = this->alias->assoc;
179 }
180 }
181 return TRUE;
182}
183
184
185/*-------------------------------------------------------------------.
186| Assign a symbol number, and write the definition of the token name |
187| into FDEFINES. Put in SYMBOLS. |
188`-------------------------------------------------------------------*/
189
190static bool
db8837cb 191symbol_pack (symbol_t *this)
72a23c97 192{
72a23c97
AD
193 if (this->class == nterm_sym)
194 {
195 this->number += ntokens;
196 }
197 else if (this->alias)
198 {
199 /* This symbol and its alias are a single token defn.
200 Allocate a tokno, and assign to both check agreement of
201 prec and assoc fields and make both the same */
5fbb0954 202 if (this->number == NUMBER_UNDEFINED)
72a23c97
AD
203 {
204 if (this == eoftoken || this->alias == eoftoken)
205 this->number = this->alias->number = 0;
206 else
207 {
5fbb0954 208 assert (this->alias->number != NUMBER_UNDEFINED);
72a23c97
AD
209 this->number = this->alias->number;
210 }
211 }
212 /* Do not do processing below for SALIASs. */
213 if (this->user_token_number == SALIAS)
214 return TRUE;
215 }
216 else /* this->class == token_sym */
217 {
5fbb0954 218 assert (this->number != NUMBER_UNDEFINED);
72a23c97
AD
219 }
220
72a23c97
AD
221 symbols[this->number] = this;
222 return TRUE;
223}
224
225
226
227
228/*--------------------------------------------------.
229| Put THIS in TOKEN_TRANSLATIONS if it is a token. |
230`--------------------------------------------------*/
231
232static bool
db8837cb 233symbol_translation (symbol_t *this)
72a23c97 234{
72a23c97
AD
235 /* Non-terminal? */
236 if (this->class == token_sym
237 && this->user_token_number != SALIAS)
238 {
239 /* A token which translation has already been set? */
007a50a4 240 if (token_translations[this->user_token_number] != undeftoken->number)
72a23c97
AD
241 complain (_("tokens %s and %s both assigned number %d"),
242 symbols[token_translations[this->user_token_number]]->tag,
243 this->tag, this->user_token_number);
244
72a23c97
AD
245 token_translations[this->user_token_number] = this->number;
246 }
247
248 return TRUE;
249}
0d533154 250\f
a70083a3 251
0d533154
AD
252/*===================\
253| Low level lexing. |
254\===================*/
943819bf
RS
255
256static void
118fb205 257skip_to_char (int target)
943819bf
RS
258{
259 int c;
260 if (target == '\n')
a0f6b076 261 complain (_(" Skipping to next \\n"));
943819bf 262 else
a0f6b076 263 complain (_(" Skipping to next %c"), target);
943819bf
RS
264
265 do
0d533154 266 c = skip_white_space ();
943819bf 267 while (c != target && c != EOF);
a083fbbf 268 if (c != EOF)
0d533154 269 ungetc (c, finput);
943819bf
RS
270}
271
272
0d533154
AD
273/*---------------------------------------------------------.
274| Read a signed integer from STREAM and return its value. |
275`---------------------------------------------------------*/
276
277static inline int
278read_signed_integer (FILE *stream)
279{
a70083a3
AD
280 int c = getc (stream);
281 int sign = 1;
282 int n = 0;
0d533154
AD
283
284 if (c == '-')
285 {
286 c = getc (stream);
287 sign = -1;
288 }
289
290 while (isdigit (c))
291 {
292 n = 10 * n + (c - '0');
293 c = getc (stream);
294 }
295
296 ungetc (c, stream);
297
298 return sign * n;
299}
300\f
79282c5a
AD
301/*--------------------------------------------------------------.
302| Get the data type (alternative in the union) of the value for |
303| symbol N in rule RULE. |
304`--------------------------------------------------------------*/
305
306static char *
b29b2ed5 307get_type_name (int n, symbol_list *rule)
79282c5a
AD
308{
309 int i;
310 symbol_list *rp;
311
312 if (n < 0)
313 {
314 complain (_("invalid $ value"));
315 return NULL;
316 }
317
318 rp = rule;
319 i = 0;
320
321 while (i < n)
322 {
323 rp = rp->next;
324 if (rp == NULL || rp->sym == NULL)
325 {
326 complain (_("invalid $ value"));
327 return NULL;
328 }
f3849179 329 ++i;
79282c5a
AD
330 }
331
332 return rp->sym->type_name;
333}
334\f
337bab46
AD
335/*------------------------------------------------------------.
336| Dump the string from FIN to OOUT if non null. MATCH is the |
337| delimiter of the string (either ' or "). |
338`------------------------------------------------------------*/
ae3c3164
AD
339
340static inline void
b6610515 341copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
342{
343 int c;
344
b6610515
RA
345 if (store)
346 obstack_1grow (oout, match);
8c7ebe49 347
4a120d45 348 c = getc (fin);
ae3c3164
AD
349
350 while (c != match)
351 {
352 if (c == EOF)
353 fatal (_("unterminated string at end of file"));
354 if (c == '\n')
355 {
a0f6b076 356 complain (_("unterminated string"));
4a120d45 357 ungetc (c, fin);
ae3c3164
AD
358 c = match; /* invent terminator */
359 continue;
360 }
361
337bab46 362 obstack_1grow (oout, c);
ae3c3164
AD
363
364 if (c == '\\')
365 {
4a120d45 366 c = getc (fin);
ae3c3164
AD
367 if (c == EOF)
368 fatal (_("unterminated string at end of file"));
337bab46 369 obstack_1grow (oout, c);
8c7ebe49 370
ae3c3164 371 if (c == '\n')
f3849179 372 ++lineno;
ae3c3164
AD
373 }
374
a70083a3 375 c = getc (fin);
ae3c3164
AD
376 }
377
b6610515
RA
378 if (store)
379 obstack_1grow (oout, c);
380}
381
382/* FIXME. */
383
384static inline void
385copy_string (FILE *fin, struct obstack *oout, int match)
386{
387 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
388}
389
b6610515
RA
390/* FIXME. */
391
392static inline void
393copy_identifier (FILE *fin, struct obstack *oout)
394{
395 int c;
396
397 while (isalnum (c = getc (fin)) || c == '_')
398 obstack_1grow (oout, c);
399
400 ungetc (c, fin);
401}
ae3c3164 402
2666f928
AD
403
404/*------------------------------------------------------------------.
405| Dump the wannabee comment from IN to OOUT. In fact we just saw a |
406| `/', which might or might not be a comment. In any case, copy |
407| what we saw. |
408`------------------------------------------------------------------*/
ae3c3164
AD
409
410static inline void
2666f928 411copy_comment (FILE *fin, struct obstack *oout)
ae3c3164
AD
412{
413 int cplus_comment;
a70083a3 414 int ended;
550a72a3
AD
415 int c;
416
417 /* We read a `/', output it. */
2666f928 418 obstack_1grow (oout, '/');
550a72a3
AD
419
420 switch ((c = getc (fin)))
421 {
422 case '/':
423 cplus_comment = 1;
424 break;
425 case '*':
426 cplus_comment = 0;
427 break;
428 default:
429 ungetc (c, fin);
430 return;
431 }
ae3c3164 432
2666f928 433 obstack_1grow (oout, c);
550a72a3 434 c = getc (fin);
ae3c3164
AD
435
436 ended = 0;
437 while (!ended)
438 {
439 if (!cplus_comment && c == '*')
440 {
441 while (c == '*')
442 {
2666f928 443 obstack_1grow (oout, c);
550a72a3 444 c = getc (fin);
ae3c3164
AD
445 }
446
447 if (c == '/')
448 {
2666f928 449 obstack_1grow (oout, c);
ae3c3164
AD
450 ended = 1;
451 }
452 }
453 else if (c == '\n')
454 {
f3849179 455 ++lineno;
2666f928 456 obstack_1grow (oout, c);
ae3c3164
AD
457 if (cplus_comment)
458 ended = 1;
459 else
550a72a3 460 c = getc (fin);
ae3c3164
AD
461 }
462 else if (c == EOF)
463 fatal (_("unterminated comment"));
464 else
465 {
2666f928 466 obstack_1grow (oout, c);
550a72a3 467 c = getc (fin);
ae3c3164
AD
468 }
469 }
470}
471
472
a70083a3 473/*-----------------------------------------------------------------.
337bab46 474| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
a70083a3
AD
475| reference to this location. STACK_OFFSET is the number of values |
476| in the current rule so far, which says where to find `$0' with |
477| respect to the top of the stack. |
478`-----------------------------------------------------------------*/
1ff442ca 479
a70083a3 480static inline void
337bab46 481copy_at (FILE *fin, struct obstack *oout, int stack_offset)
1ff442ca 482{
a70083a3 483 int c;
1ff442ca 484
a70083a3
AD
485 c = getc (fin);
486 if (c == '$')
1ff442ca 487 {
ff4423cc 488 obstack_sgrow (oout, "yyloc");
89cab50d 489 locations_flag = 1;
a70083a3
AD
490 }
491 else if (isdigit (c) || c == '-')
492 {
493 int n;
1ff442ca 494
a70083a3
AD
495 ungetc (c, fin);
496 n = read_signed_integer (fin);
11e2beca
AD
497 if (n > stack_offset)
498 complain (_("invalid value: %s%d"), "@", n);
499 else
500 {
501 /* Offset is always 0 if parser has already popped the stack
502 pointer. */
503 obstack_fgrow1 (oout, "yylsp[%d]",
504 n - (semantic_parser ? 0 : stack_offset));
505 locations_flag = 1;
506 }
1ff442ca 507 }
a70083a3 508 else
ff4a34be
AD
509 {
510 char buf[] = "@c";
511 buf[1] = c;
512 complain (_("%s is invalid"), quote (buf));
513 }
1ff442ca 514}
79282c5a
AD
515
516
517/*-------------------------------------------------------------------.
518| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
519| |
520| Possible inputs: $[<TYPENAME>]($|integer) |
521| |
337bab46 522| Output to OOUT a reference to this semantic value. STACK_OFFSET is |
79282c5a
AD
523| the number of values in the current rule so far, which says where |
524| to find `$0' with respect to the top of the stack. |
525`-------------------------------------------------------------------*/
526
527static inline void
337bab46 528copy_dollar (FILE *fin, struct obstack *oout,
79282c5a
AD
529 symbol_list *rule, int stack_offset)
530{
531 int c = getc (fin);
b0ce6046 532 const char *type_name = NULL;
79282c5a 533
f282676b 534 /* Get the type name if explicit. */
79282c5a
AD
535 if (c == '<')
536 {
f282676b 537 read_type_name (fin);
79282c5a
AD
538 type_name = token_buffer;
539 value_components_used = 1;
79282c5a
AD
540 c = getc (fin);
541 }
542
543 if (c == '$')
544 {
ff4423cc 545 obstack_sgrow (oout, "yyval");
8c7ebe49 546
79282c5a
AD
547 if (!type_name)
548 type_name = get_type_name (0, rule);
549 if (type_name)
337bab46 550 obstack_fgrow1 (oout, ".%s", type_name);
79282c5a
AD
551 if (!type_name && typed)
552 complain (_("$$ of `%s' has no declared type"),
553 rule->sym->tag);
554 }
555 else if (isdigit (c) || c == '-')
556 {
557 int n;
558 ungetc (c, fin);
559 n = read_signed_integer (fin);
560
11e2beca
AD
561 if (n > stack_offset)
562 complain (_("invalid value: %s%d"), "$", n);
563 else
564 {
565 if (!type_name && n > 0)
566 type_name = get_type_name (n, rule);
567
568 /* Offset is always 0 if parser has already popped the stack
569 pointer. */
570 obstack_fgrow1 (oout, "yyvsp[%d]",
571 n - (semantic_parser ? 0 : stack_offset));
572
573 if (type_name)
574 obstack_fgrow1 (oout, ".%s", type_name);
575 if (!type_name && typed)
576 complain (_("$%d of `%s' has no declared type"),
577 n, rule->sym->tag);
578 }
79282c5a
AD
579 }
580 else
581 {
582 char buf[] = "$c";
583 buf[1] = c;
584 complain (_("%s is invalid"), quote (buf));
585 }
586}
a70083a3
AD
587\f
588/*-------------------------------------------------------------------.
589| Copy the contents of a `%{ ... %}' into the definitions file. The |
590| `%{' has already been read. Return after reading the `%}'. |
591`-------------------------------------------------------------------*/
1ff442ca 592
4a120d45 593static void
0dd1580a 594copy_definition (struct obstack *oout)
1ff442ca 595{
a70083a3 596 int c;
ae3c3164 597 /* -1 while reading a character if prev char was %. */
a70083a3 598 int after_percent;
1ff442ca 599
89cab50d 600 if (!no_lines_flag)
25b222fa 601 {
0dd1580a 602 obstack_fgrow2 (oout, muscle_find ("linef"),
342b8b6e 603 lineno, quotearg_style (c_quoting_style,
b7c49edf 604 muscle_find ("filename")));
25b222fa 605 }
1ff442ca
NF
606
607 after_percent = 0;
608
ae3c3164 609 c = getc (finput);
1ff442ca
NF
610
611 for (;;)
612 {
613 switch (c)
614 {
615 case '\n':
0dd1580a 616 obstack_1grow (oout, c);
f3849179 617 ++lineno;
1ff442ca
NF
618 break;
619
620 case '%':
a70083a3 621 after_percent = -1;
1ff442ca 622 break;
a083fbbf 623
1ff442ca
NF
624 case '\'':
625 case '"':
0dd1580a 626 copy_string (finput, oout, c);
1ff442ca
NF
627 break;
628
629 case '/':
0dd1580a 630 copy_comment (finput, oout);
1ff442ca
NF
631 break;
632
633 case EOF:
a70083a3 634 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
635
636 default:
0dd1580a 637 obstack_1grow (oout, c);
1ff442ca
NF
638 }
639
a70083a3 640 c = getc (finput);
1ff442ca
NF
641
642 if (after_percent)
643 {
644 if (c == '}')
645 return;
0dd1580a 646 obstack_1grow (oout, '%');
1ff442ca
NF
647 }
648 after_percent = 0;
1ff442ca 649 }
1ff442ca
NF
650}
651
652
d7020c20
AD
653/*-------------------------------------------------------------------.
654| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
655| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
656| are reversed. |
657`-------------------------------------------------------------------*/
1ff442ca 658
4a120d45 659static void
d7020c20 660parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 661{
342b8b6e
AD
662 token_t token = tok_undef;
663 char *typename = NULL;
1ff442ca 664
1e9798d5 665 /* The symbol being defined. */
db8837cb 666 symbol_t *symbol = NULL;
1e9798d5
AD
667
668 /* After `%token' and `%nterm', any number of symbols maybe be
669 defined. */
1ff442ca
NF
670 for (;;)
671 {
e6011337
JT
672 int tmp_char = ungetc (skip_white_space (), finput);
673
1e9798d5
AD
674 /* `%' (for instance from `%token', or from `%%' etc.) is the
675 only valid means to end this declaration. */
e6011337 676 if (tmp_char == '%')
1ff442ca 677 return;
e6011337 678 if (tmp_char == EOF)
a0f6b076 679 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 680
a70083a3 681 token = lex ();
511e79b3 682 if (token == tok_comma)
943819bf
RS
683 {
684 symbol = NULL;
685 continue;
686 }
511e79b3 687 if (token == tok_typename)
1ff442ca 688 {
95e36146 689 typename = xstrdup (token_buffer);
1ff442ca 690 value_components_used = 1;
943819bf
RS
691 symbol = NULL;
692 }
511e79b3 693 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 694 {
db8837cb 695 symbol_make_alias (symbol, typename);
8e03724b 696 symbol = NULL;
1ff442ca 697 }
511e79b3 698 else if (token == tok_identifier)
1ff442ca
NF
699 {
700 int oldclass = symval->class;
943819bf 701 symbol = symval;
1ff442ca 702
943819bf 703 if (symbol->class == what_is_not)
a0f6b076 704 complain (_("symbol %s redefined"), symbol->tag);
943819bf 705 symbol->class = what_is;
d7020c20 706 if (what_is == nterm_sym && oldclass != nterm_sym)
d9b739c3 707 symbol->number = nvars++;
5fbb0954 708 if (what_is == token_sym && symbol->number == NUMBER_UNDEFINED)
bd02036a 709 symbol->number = ntokens++;
1ff442ca
NF
710
711 if (typename)
712 {
943819bf
RS
713 if (symbol->type_name == NULL)
714 symbol->type_name = typename;
a70083a3 715 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 716 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
717 }
718 }
511e79b3 719 else if (symbol && token == tok_number)
a70083a3 720 {
943819bf 721 symbol->user_token_number = numval;
b7c49edf
AD
722 /* User defined EOF token? */
723 if (numval == 0)
72a23c97
AD
724 {
725 eoftoken = symbol;
726 eoftoken->number = 0;
727 /* It is always mapped to 0, so it was already counted in
728 NTOKENS. */
729 --ntokens;
730 }
a70083a3 731 }
1ff442ca 732 else
943819bf 733 {
a0f6b076 734 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
735 token_buffer,
736 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 737 skip_to_char ('%');
943819bf 738 }
1ff442ca
NF
739 }
740
741}
742
1ff442ca 743
d7020c20
AD
744/*------------------------------.
745| Parse what comes after %start |
746`------------------------------*/
1ff442ca 747
4a120d45 748static void
118fb205 749parse_start_decl (void)
1ff442ca
NF
750{
751 if (start_flag)
27821bff 752 complain (_("multiple %s declarations"), "%start");
511e79b3 753 if (lex () != tok_identifier)
27821bff 754 complain (_("invalid %s declaration"), "%start");
943819bf
RS
755 else
756 {
757 start_flag = 1;
758 startval = symval;
759 }
1ff442ca
NF
760}
761
a70083a3
AD
762/*-----------------------------------------------------------.
763| read in a %type declaration and record its information for |
764| get_type_name to access |
765`-----------------------------------------------------------*/
766
767static void
768parse_type_decl (void)
769{
a70083a3
AD
770 char *name;
771
511e79b3 772 if (lex () != tok_typename)
a70083a3
AD
773 {
774 complain ("%s", _("%type declaration has no <typename>"));
775 skip_to_char ('%');
776 return;
777 }
778
95e36146 779 name = xstrdup (token_buffer);
a70083a3
AD
780
781 for (;;)
782 {
f17bcd1f 783 token_t t;
a70083a3
AD
784 int tmp_char = ungetc (skip_white_space (), finput);
785
786 if (tmp_char == '%')
787 return;
788 if (tmp_char == EOF)
789 fatal (_("Premature EOF after %s"), token_buffer);
790
791 t = lex ();
792
793 switch (t)
1ff442ca
NF
794 {
795
511e79b3
AD
796 case tok_comma:
797 case tok_semicolon:
1ff442ca
NF
798 break;
799
511e79b3 800 case tok_identifier:
1ff442ca
NF
801 if (symval->type_name == NULL)
802 symval->type_name = name;
a70083a3 803 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 804 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
805
806 break;
807
808 default:
a0f6b076
AD
809 complain (_("invalid %%type declaration due to item: %s"),
810 token_buffer);
a70083a3 811 skip_to_char ('%');
1ff442ca
NF
812 }
813 }
814}
815
816
817
d7020c20
AD
818/*----------------------------------------------------------------.
819| Read in a %left, %right or %nonassoc declaration and record its |
820| information. |
821`----------------------------------------------------------------*/
1ff442ca 822
4a120d45 823static void
d7020c20 824parse_assoc_decl (associativity assoc)
1ff442ca 825{
a70083a3
AD
826 char *name = NULL;
827 int prev = 0;
1ff442ca 828
f3849179
AD
829 /* Assign a new precedence level, never 0. */
830 ++lastprec;
1ff442ca 831
1ff442ca
NF
832 for (;;)
833 {
f17bcd1f 834 token_t t;
e6011337 835 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 836
e6011337 837 if (tmp_char == '%')
1ff442ca 838 return;
e6011337 839 if (tmp_char == EOF)
a0f6b076 840 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 841
a70083a3 842 t = lex ();
1ff442ca
NF
843
844 switch (t)
845 {
511e79b3 846 case tok_typename:
95e36146 847 name = xstrdup (token_buffer);
1ff442ca
NF
848 break;
849
511e79b3 850 case tok_comma:
1ff442ca
NF
851 break;
852
511e79b3 853 case tok_identifier:
1ff442ca 854 if (symval->prec != 0)
a0f6b076 855 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
856 symval->prec = lastprec;
857 symval->assoc = assoc;
d7020c20 858 if (symval->class == nterm_sym)
a0f6b076 859 complain (_("symbol %s redefined"), symval->tag);
5fbb0954 860 if (symval->number == NUMBER_UNDEFINED)
72a23c97
AD
861 {
862 symval->number = ntokens++;
863 symval->class = token_sym;
864 }
1ff442ca 865 if (name)
a70083a3 866 { /* record the type, if one is specified */
1ff442ca
NF
867 if (symval->type_name == NULL)
868 symval->type_name = name;
a70083a3 869 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 870 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
871 }
872 break;
873
511e79b3
AD
874 case tok_number:
875 if (prev == tok_identifier)
a70083a3 876 {
1ff442ca 877 symval->user_token_number = numval;
a70083a3
AD
878 }
879 else
880 {
72a23c97
AD
881 complain
882 (_("invalid text (%s) - number should be after identifier"),
883 token_buffer);
a70083a3
AD
884 skip_to_char ('%');
885 }
1ff442ca
NF
886 break;
887
511e79b3 888 case tok_semicolon:
1ff442ca
NF
889 return;
890
891 default:
a0f6b076 892 complain (_("unexpected item: %s"), token_buffer);
a70083a3 893 skip_to_char ('%');
1ff442ca
NF
894 }
895
896 prev = t;
1ff442ca
NF
897 }
898}
899
900
901
dd60faec 902/*--------------------------------------------------------------.
180d45ba
PB
903| Copy the union declaration into the stype muscle |
904| (and fdefines), where it is made into the definition of |
905| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 906`--------------------------------------------------------------*/
1ff442ca 907
4a120d45 908static void
118fb205 909parse_union_decl (void)
1ff442ca 910{
a70083a3
AD
911 int c;
912 int count = 0;
428046f8 913 bool done = FALSE;
180d45ba 914 struct obstack union_obstack;
1ff442ca 915 if (typed)
27821bff 916 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
917
918 typed = 1;
919
642cb8f8 920 MUSCLE_INSERT_INT ("stype_line", lineno);
180d45ba
PB
921 obstack_init (&union_obstack);
922 obstack_sgrow (&union_obstack, "union");
1ff442ca 923
428046f8 924 while (!done)
1ff442ca 925 {
428046f8
AD
926 c = xgetc (finput);
927
342b8b6e
AD
928 /* If C contains '/', it is output by copy_comment (). */
929 if (c != '/')
2666f928 930 obstack_1grow (&union_obstack, c);
1ff442ca
NF
931
932 switch (c)
933 {
934 case '\n':
f3849179 935 ++lineno;
1ff442ca
NF
936 break;
937
938 case '/':
2666f928 939 copy_comment (finput, &union_obstack);
1ff442ca
NF
940 break;
941
1ff442ca 942 case '{':
f3849179 943 ++count;
1ff442ca
NF
944 break;
945
946 case '}':
428046f8 947 /* FIXME: Errr. How could this happen???. --akim */
1ff442ca 948 if (count == 0)
27821bff 949 complain (_("unmatched %s"), "`}'");
1ff442ca 950 count--;
428046f8
AD
951 if (!count)
952 done = TRUE;
953 break;
1ff442ca 954 }
1ff442ca 955 }
180d45ba 956
428046f8
AD
957 /* JF don't choke on trailing semi */
958 c = skip_white_space ();
959 if (c != ';')
960 ungetc (c, finput);
961 obstack_1grow (&union_obstack, 0);
962 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
963}
964
d7020c20
AD
965
966/*-------------------------------------------------------.
967| Parse the declaration %expect N which says to expect N |
968| shift-reduce conflicts. |
969`-------------------------------------------------------*/
1ff442ca 970
4a120d45 971static void
118fb205 972parse_expect_decl (void)
1ff442ca 973{
131e2fef 974 int c = skip_white_space ();
1ff442ca
NF
975 ungetc (c, finput);
976
131e2fef 977 if (!isdigit (c))
79282c5a 978 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
979 else
980 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
981}
982
a70083a3
AD
983
984/*-------------------------------------------------------------------.
985| Parse what comes after %thong. the full syntax is |
986| |
987| %thong <type> token number literal |
988| |
989| the <type> or number may be omitted. The number specifies the |
990| user_token_number. |
991| |
992| Two symbols are entered in the table, one for the token symbol and |
993| one for the literal. Both are given the <type>, if any, from the |
994| declaration. The ->user_token_number of the first is SALIAS and |
995| the ->user_token_number of the second is set to the number, if |
996| any, from the declaration. The two symbols are linked via |
997| pointers in their ->alias fields. |
998| |
999| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
1000| only the literal string is retained it is the literal string that |
1001| is output to yytname |
1002`-------------------------------------------------------------------*/
1003
1004static void
1005parse_thong_decl (void)
7b306f52 1006{
f17bcd1f 1007 token_t token;
db8837cb 1008 symbol_t *symbol;
a70083a3 1009 char *typename = 0;
6b7e85b9 1010 int usrtoknum = SUNDEF;
7b306f52 1011
a70083a3 1012 token = lex (); /* fetch typename or first token */
511e79b3 1013 if (token == tok_typename)
7b306f52 1014 {
95e36146 1015 typename = xstrdup (token_buffer);
a70083a3
AD
1016 value_components_used = 1;
1017 token = lex (); /* fetch first token */
7b306f52 1018 }
7b306f52 1019
a70083a3 1020 /* process first token */
7b306f52 1021
511e79b3 1022 if (token != tok_identifier)
a70083a3
AD
1023 {
1024 complain (_("unrecognized item %s, expected an identifier"),
1025 token_buffer);
1026 skip_to_char ('%');
1027 return;
7b306f52 1028 }
d7020c20 1029 symval->class = token_sym;
a70083a3
AD
1030 symval->type_name = typename;
1031 symval->user_token_number = SALIAS;
1032 symbol = symval;
7b306f52 1033
a70083a3 1034 token = lex (); /* get number or literal string */
1ff442ca 1035
511e79b3 1036 if (token == tok_number)
943819bf 1037 {
a70083a3
AD
1038 usrtoknum = numval;
1039 token = lex (); /* okay, did number, now get literal */
943819bf 1040 }
1ff442ca 1041
a70083a3 1042 /* process literal string token */
1ff442ca 1043
511e79b3 1044 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 1045 {
a70083a3
AD
1046 complain (_("expected string constant instead of %s"), token_buffer);
1047 skip_to_char ('%');
1048 return;
1ff442ca 1049 }
d7020c20 1050 symval->class = token_sym;
a70083a3
AD
1051 symval->type_name = typename;
1052 symval->user_token_number = usrtoknum;
1ff442ca 1053
a70083a3
AD
1054 symval->alias = symbol;
1055 symbol->alias = symval;
1ff442ca 1056
79282c5a
AD
1057 /* symbol and symval combined are only one symbol. */
1058 nsyms--;
a70083a3 1059}
3cef001a 1060
11e2beca 1061
b6610515 1062static void
11d82f03 1063parse_muscle_decl (void)
b6610515
RA
1064{
1065 int ch = ungetc (skip_white_space (), finput);
b7c49edf
AD
1066 char *muscle_key;
1067 char *muscle_value;
b6610515
RA
1068
1069 /* Read key. */
1070 if (!isalpha (ch) && ch != '_')
1071 {
1072 complain (_("invalid %s declaration"), "%define");
1073 skip_to_char ('%');
1074 return;
1075 }
11d82f03
MA
1076 copy_identifier (finput, &muscle_obstack);
1077 obstack_1grow (&muscle_obstack, 0);
1078 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 1079
b6610515
RA
1080 /* Read value. */
1081 ch = skip_white_space ();
1082 if (ch != '"')
1083 {
1084 ungetc (ch, finput);
1085 if (ch != EOF)
1086 {
1087 complain (_("invalid %s declaration"), "%define");
1088 skip_to_char ('%');
1089 return;
1090 }
1091 else
1092 fatal (_("Premature EOF after %s"), "\"");
1093 }
11d82f03
MA
1094 copy_string2 (finput, &muscle_obstack, '"', 0);
1095 obstack_1grow (&muscle_obstack, 0);
1096 muscle_value = obstack_finish (&muscle_obstack);
b6610515 1097
b6610515 1098 /* Store the (key, value) pair in the environment. */
11d82f03 1099 muscle_insert (muscle_key, muscle_value);
b6610515
RA
1100}
1101
2ba3b73c 1102
426cf563
MA
1103
1104/*---------------------------------.
a870c567 1105| Parse a double quoted parameter. |
426cf563
MA
1106`---------------------------------*/
1107
1108static const char *
1109parse_dquoted_param (const char *from)
1110{
1111 struct obstack param_obstack;
1112 const char *param = NULL;
1113 int c;
1114
1115 obstack_init (&param_obstack);
1116 c = skip_white_space ();
1117
1118 if (c != '"')
1119 {
1120 complain (_("invalid %s declaration"), from);
1121 ungetc (c, finput);
1122 skip_to_char ('%');
1123 return NULL;
1124 }
1125
2648a72d
AD
1126 while ((c = literalchar ()) != '"')
1127 obstack_1grow (&param_obstack, c);
a870c567 1128
426cf563
MA
1129 obstack_1grow (&param_obstack, '\0');
1130 param = obstack_finish (&param_obstack);
1131
1132 if (c != '"' || strlen (param) == 0)
1133 {
1134 complain (_("invalid %s declaration"), from);
1135 if (c != '"')
1136 ungetc (c, finput);
1137 skip_to_char ('%');
1138 return NULL;
1139 }
1140
1141 return param;
1142}
1143
2ba3b73c
MA
1144/*----------------------------------.
1145| Parse what comes after %skeleton. |
1146`----------------------------------*/
1147
a870c567 1148static void
2ba3b73c
MA
1149parse_skel_decl (void)
1150{
426cf563 1151 skeleton = parse_dquoted_param ("%skeleton");
2ba3b73c
MA
1152}
1153
a70083a3
AD
1154/*----------------------------------------------------------------.
1155| Read from finput until `%%' is seen. Discard the `%%'. Handle |
1156| any `%' declarations, and copy the contents of any `%{ ... %}' |
0dd1580a 1157| groups to PRE_PROLOGUE_OBSTACK or POST_PROLOGUE_OBSTACK. |
a70083a3 1158`----------------------------------------------------------------*/
1ff442ca 1159
4a120d45 1160static void
a70083a3 1161read_declarations (void)
1ff442ca 1162{
a70083a3 1163 for (;;)
1ff442ca 1164 {
951366c1 1165 int c = skip_white_space ();
1ff442ca 1166
a70083a3
AD
1167 if (c == '%')
1168 {
951366c1 1169 token_t tok = parse_percent_token ();
1ff442ca 1170
a70083a3 1171 switch (tok)
943819bf 1172 {
511e79b3 1173 case tok_two_percents:
a70083a3 1174 return;
1ff442ca 1175
511e79b3 1176 case tok_percent_left_curly:
0dd1580a
RA
1177 if (!typed)
1178 copy_definition (&pre_prologue_obstack);
1179 else
1180 copy_definition (&post_prologue_obstack);
a70083a3 1181 break;
1ff442ca 1182
511e79b3 1183 case tok_token:
d7020c20 1184 parse_token_decl (token_sym, nterm_sym);
a70083a3 1185 break;
1ff442ca 1186
511e79b3 1187 case tok_nterm:
d7020c20 1188 parse_token_decl (nterm_sym, token_sym);
a70083a3 1189 break;
1ff442ca 1190
511e79b3 1191 case tok_type:
a70083a3
AD
1192 parse_type_decl ();
1193 break;
1ff442ca 1194
511e79b3 1195 case tok_start:
a70083a3
AD
1196 parse_start_decl ();
1197 break;
118fb205 1198
511e79b3 1199 case tok_union:
a70083a3
AD
1200 parse_union_decl ();
1201 break;
1ff442ca 1202
511e79b3 1203 case tok_expect:
a70083a3
AD
1204 parse_expect_decl ();
1205 break;
6deb4447 1206
511e79b3 1207 case tok_thong:
a70083a3
AD
1208 parse_thong_decl ();
1209 break;
d7020c20 1210
511e79b3 1211 case tok_left:
d7020c20 1212 parse_assoc_decl (left_assoc);
a70083a3 1213 break;
1ff442ca 1214
511e79b3 1215 case tok_right:
d7020c20 1216 parse_assoc_decl (right_assoc);
a70083a3 1217 break;
1ff442ca 1218
511e79b3 1219 case tok_nonassoc:
d7020c20 1220 parse_assoc_decl (non_assoc);
a70083a3 1221 break;
1ff442ca 1222
b6610515 1223 case tok_define:
11d82f03 1224 parse_muscle_decl ();
b6610515 1225 break;
342b8b6e 1226
2ba3b73c
MA
1227 case tok_skel:
1228 parse_skel_decl ();
1229 break;
b6610515 1230
511e79b3 1231 case tok_noop:
a70083a3 1232 break;
1ff442ca 1233
951366c1
AD
1234 case tok_stropt:
1235 case tok_intopt:
1236 case tok_obsolete:
72a23c97 1237 assert (0);
951366c1
AD
1238 break;
1239
e0c40012 1240 case tok_illegal:
a70083a3
AD
1241 default:
1242 complain (_("unrecognized: %s"), token_buffer);
1243 skip_to_char ('%');
1244 }
1245 }
1246 else if (c == EOF)
1247 fatal (_("no input grammar"));
1248 else
1249 {
ff4a34be
AD
1250 char buf[] = "c";
1251 buf[0] = c;
1252 complain (_("unknown character: %s"), quote (buf));
a70083a3 1253 skip_to_char ('%');
1ff442ca 1254 }
1ff442ca 1255 }
1ff442ca 1256}
a70083a3
AD
1257\f
1258/*-------------------------------------------------------------------.
1259| Assuming that a `{' has just been seen, copy everything up to the |
1260| matching `}' into the actions file. STACK_OFFSET is the number of |
1261| values in the current rule so far, which says where to find `$0' |
1262| with respect to the top of the stack. |
14d293ac 1263| |
11e2beca
AD
1264| This routine is used both for actions and guards. Only |
1265| ACTION_OBSTACK is used, but this is fine, since we use only |
14d293ac 1266| pointers to relevant portions inside this obstack. |
a70083a3 1267`-------------------------------------------------------------------*/
1ff442ca 1268
4a120d45 1269static void
14d293ac 1270parse_braces (symbol_list *rule, int stack_offset)
1ff442ca 1271{
a70083a3 1272 int c;
a70083a3 1273 int count;
1ff442ca 1274
1ff442ca 1275 count = 1;
1ff442ca
NF
1276 while (count > 0)
1277 {
14d293ac
AD
1278 while ((c = getc (finput)) != '}')
1279 switch (c)
1280 {
1281 case '\n':
1282 obstack_1grow (&action_obstack, c);
f3849179 1283 ++lineno;
14d293ac 1284 break;
1ff442ca 1285
14d293ac
AD
1286 case '{':
1287 obstack_1grow (&action_obstack, c);
f3849179 1288 ++count;
14d293ac 1289 break;
1ff442ca 1290
14d293ac
AD
1291 case '\'':
1292 case '"':
1293 copy_string (finput, &action_obstack, c);
1294 break;
1ff442ca 1295
14d293ac
AD
1296 case '/':
1297 copy_comment (finput, &action_obstack);
1298 break;
1ff442ca 1299
14d293ac
AD
1300 case '$':
1301 copy_dollar (finput, &action_obstack,
1302 rule, stack_offset);
1303 break;
1ff442ca 1304
14d293ac
AD
1305 case '@':
1306 copy_at (finput, &action_obstack,
1307 stack_offset);
1308 break;
a70083a3 1309
14d293ac
AD
1310 case EOF:
1311 fatal (_("unmatched %s"), "`{'");
a70083a3 1312
14d293ac
AD
1313 default:
1314 obstack_1grow (&action_obstack, c);
1315 }
a70083a3 1316
14d293ac 1317 /* Above loop exits when C is '}'. */
a70083a3 1318 if (--count)
2b25d624 1319 obstack_1grow (&action_obstack, c);
a70083a3
AD
1320 }
1321
3f96f4dc 1322 obstack_1grow (&action_obstack, '\0');
a70083a3 1323}
14d293ac 1324
a70083a3
AD
1325
1326static void
14d293ac 1327parse_action (symbol_list *rule, int stack_offset)
a70083a3 1328{
14d293ac
AD
1329 rule->action_line = lineno;
1330 parse_braces (rule, stack_offset);
1331 rule->action = obstack_finish (&action_obstack);
1332}
a70083a3 1333
a70083a3 1334
14d293ac
AD
1335static void
1336parse_guard (symbol_list *rule, int stack_offset)
1337{
1338 token_t t = lex ();
1339 if (t != tok_left_curly)
1340 complain (_("invalid %s declaration"), "%guard");
f499b062 1341 rule->guard_line = lineno;
14d293ac
AD
1342 parse_braces (rule, stack_offset);
1343 rule->guard = obstack_finish (&action_obstack);
1ff442ca 1344}
14d293ac 1345
a70083a3
AD
1346\f
1347
a70083a3
AD
1348/*-------------------------------------------------------------------.
1349| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1350| with the user's names. |
1351`-------------------------------------------------------------------*/
1ff442ca 1352
db8837cb 1353static symbol_t *
118fb205 1354gensym (void)
1ff442ca 1355{
274d42ce
AD
1356 /* Incremented for each generated symbol */
1357 static int gensym_count = 0;
1358 static char buf[256];
1359
db8837cb 1360 symbol_t *sym;
1ff442ca 1361
274d42ce
AD
1362 sprintf (buf, "@%d", ++gensym_count);
1363 token_buffer = buf;
a70083a3 1364 sym = getsym (token_buffer);
d7020c20 1365 sym->class = nterm_sym;
d9b739c3 1366 sym->number = nvars++;
36281465 1367 return sym;
1ff442ca 1368}
a70083a3 1369\f
107f7dfb
AD
1370/*-------------------------------------------------------------------.
1371| Parse the input grammar into a one symbol_list structure. Each |
1372| rule is represented by a sequence of symbols: the left hand side |
1373| followed by the contents of the right hand side, followed by a |
1374| null pointer instead of a symbol to terminate the rule. The next |
1375| symbol is the lhs of the following rule. |
1376| |
1377| All guards and actions are copied out to the appropriate files, |
1378| labelled by the rule number they apply to. |
1379| |
1380| Bison used to allow some %directives in the rules sections, but |
1381| this is no longer consider appropriate: (i) the documented grammar |
1382| doesn't claim it, (ii), it would promote bad style, (iii), error |
1383| recovery for %directives consists in skipping the junk until a `%' |
1384| is seen and helrp synchronizing. This scheme is definitely wrong |
1385| in the rules section. |
1386`-------------------------------------------------------------------*/
1ff442ca 1387
4a120d45 1388static void
118fb205 1389readgram (void)
1ff442ca 1390{
f17bcd1f 1391 token_t t;
db8837cb 1392 symbol_t *lhs = NULL;
107f7dfb
AD
1393 symbol_list *p = NULL;
1394 symbol_list *p1 = NULL;
1ff442ca 1395
ff4a34be
AD
1396 /* Points to first symbol_list of current rule. its symbol is the
1397 lhs of the rule. */
107f7dfb 1398 symbol_list *crule = NULL;
ff4a34be 1399 /* Points to the symbol_list preceding crule. */
107f7dfb 1400 symbol_list *crule1 = NULL;
1ff442ca 1401
a70083a3 1402 t = lex ();
1ff442ca 1403
511e79b3 1404 while (t != tok_two_percents && t != tok_eof)
107f7dfb
AD
1405 if (t == tok_identifier || t == tok_bar)
1406 {
1407 int action_flag = 0;
1408 /* Number of symbols in rhs of this rule so far */
1409 int rulelength = 0;
1410 int xactions = 0; /* JF for error checking */
db8837cb 1411 symbol_t *first_rhs = 0;
107f7dfb
AD
1412
1413 if (t == tok_identifier)
1414 {
1415 lhs = symval;
1416
1417 if (!start_flag)
1418 {
1419 startval = lhs;
1420 start_flag = 1;
1421 }
1ff442ca 1422
107f7dfb
AD
1423 t = lex ();
1424 if (t != tok_colon)
1425 {
1426 complain (_("ill-formed rule: initial symbol not followed by colon"));
1427 unlex (t);
1428 }
1429 }
1430
1431 if (nrules == 0 && t == tok_bar)
1432 {
1433 complain (_("grammar starts with vertical bar"));
1434 lhs = symval; /* BOGUS: use a random symval */
1435 }
1436 /* start a new rule and record its lhs. */
1437
f3849179 1438 ++nrules;
5123689b 1439 ++nritems;
107f7dfb
AD
1440
1441 p = symbol_list_new (lhs);
1442
1443 crule1 = p1;
1444 if (p1)
1445 p1->next = p;
1446 else
1447 grammar = p;
1ff442ca 1448
107f7dfb
AD
1449 p1 = p;
1450 crule = p;
1ff442ca 1451
107f7dfb 1452 /* mark the rule's lhs as a nonterminal if not already so. */
1ff442ca 1453
107f7dfb
AD
1454 if (lhs->class == unknown_sym)
1455 {
1456 lhs->class = nterm_sym;
d9b739c3 1457 lhs->number = nvars;
f3849179 1458 ++nvars;
107f7dfb
AD
1459 }
1460 else if (lhs->class == token_sym)
1461 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca 1462
107f7dfb 1463 /* read the rhs of the rule. */
1ff442ca 1464
107f7dfb
AD
1465 for (;;)
1466 {
1467 t = lex ();
1468 if (t == tok_prec)
1469 {
1470 t = lex ();
1471 crule->ruleprec = symval;
1472 t = lex ();
1473 }
1474
1475 if (!(t == tok_identifier || t == tok_left_curly))
1476 break;
1ff442ca 1477
107f7dfb
AD
1478 /* If next token is an identifier, see if a colon follows it.
1479 If one does, exit this rule now. */
1480 if (t == tok_identifier)
1481 {
db8837cb 1482 symbol_t *ssave;
107f7dfb
AD
1483 token_t t1;
1484
1485 ssave = symval;
1486 t1 = lex ();
1487 unlex (t1);
1488 symval = ssave;
1489 if (t1 == tok_colon)
e5352bc7 1490 {
fff9bf0b 1491 warn (_("previous rule lacks an ending `;'"));
e5352bc7
AD
1492 break;
1493 }
107f7dfb
AD
1494
1495 if (!first_rhs) /* JF */
1496 first_rhs = symval;
1497 /* Not followed by colon =>
1498 process as part of this rule's rhs. */
1499 }
1500
1501 /* If we just passed an action, that action was in the middle
1502 of a rule, so make a dummy rule to reduce it to a
1503 non-terminal. */
1504 if (action_flag)
1505 {
1506 /* Since the action was written out with this rule's
1507 number, we must give the new rule this number by
1508 inserting the new rule before it. */
1509
1510 /* Make a dummy nonterminal, a gensym. */
db8837cb 1511 symbol_t *sdummy = gensym ();
107f7dfb
AD
1512
1513 /* Make a new rule, whose body is empty, before the
1514 current one, so that the action just read can
1515 belong to it. */
f3849179 1516 ++nrules;
5123689b 1517 ++nritems;
107f7dfb
AD
1518 p = symbol_list_new (sdummy);
1519 /* Attach its lineno to that of the host rule. */
1520 p->line = crule->line;
82c035a8
AD
1521 /* Move the action from the host rule to this one. */
1522 p->action = crule->action;
1523 p->action_line = crule->action_line;
1524 crule->action = NULL;
1525
107f7dfb
AD
1526 if (crule1)
1527 crule1->next = p;
1528 else
1529 grammar = p;
1530 /* End of the rule. */
1531 crule1 = symbol_list_new (NULL);
1532 crule1->next = crule;
1533
1534 p->next = crule1;
1535
1536 /* Insert the dummy generated by that rule into this
1537 rule. */
5123689b 1538 ++nritems;
107f7dfb
AD
1539 p = symbol_list_new (sdummy);
1540 p1->next = p;
1541 p1 = p;
1542
1543 action_flag = 0;
1544 }
1545
1546 if (t == tok_identifier)
1547 {
5123689b 1548 ++nritems;
107f7dfb
AD
1549 p = symbol_list_new (symval);
1550 p1->next = p;
1551 p1 = p;
1552 }
1553 else /* handle an action. */
1554 {
14d293ac 1555 parse_action (crule, rulelength);
107f7dfb 1556 action_flag = 1;
f3849179 1557 ++xactions; /* JF */
107f7dfb 1558 }
f3849179 1559 ++rulelength;
107f7dfb
AD
1560 } /* end of read rhs of rule */
1561
1562 /* Put an empty link in the list to mark the end of this rule */
1563 p = symbol_list_new (NULL);
1564 p1->next = p;
1565 p1 = p;
1566
1567 if (t == tok_prec)
1568 {
1569 complain (_("two @prec's in a row"));
1570 t = lex ();
1571 crule->ruleprec = symval;
1572 t = lex ();
1573 }
f499b062 1574
107f7dfb
AD
1575 if (t == tok_guard)
1576 {
1577 if (!semantic_parser)
1578 complain (_("%%guard present but %%semantic_parser not specified"));
1ff442ca 1579
14d293ac 1580 parse_guard (crule, rulelength);
a70083a3 1581 t = lex ();
107f7dfb 1582 }
f499b062
AD
1583
1584 if (t == tok_left_curly)
107f7dfb
AD
1585 {
1586 /* This case never occurs -wjh */
1587 if (action_flag)
1588 complain (_("two actions at end of one rule"));
14d293ac 1589 parse_action (crule, rulelength);
107f7dfb 1590 action_flag = 1;
f3849179 1591 ++xactions; /* -wjh */
107f7dfb
AD
1592 t = lex ();
1593 }
1594 /* If $$ is being set in default way, report if any type
1595 mismatch. */
1596 else if (!xactions
1597 && first_rhs && lhs->type_name != first_rhs->type_name)
1598 {
1599 if (lhs->type_name == 0
1600 || first_rhs->type_name == 0
1601 || strcmp (lhs->type_name, first_rhs->type_name))
1602 complain (_("type clash (`%s' `%s') on default action"),
1603 lhs->type_name ? lhs->type_name : "",
1604 first_rhs->type_name ? first_rhs->type_name : "");
1605 }
1606 /* Warn if there is no default for $$ but we need one. */
1607 else if (!xactions && !first_rhs && lhs->type_name != 0)
1608 complain (_("empty rule for typed nonterminal, and no action"));
bfcf1f3a 1609 if (t == tok_two_percents || t == tok_eof)
fff9bf0b 1610 warn (_("previous rule lacks an ending `;'"));
107f7dfb 1611 if (t == tok_semicolon)
a70083a3 1612 t = lex ();
107f7dfb
AD
1613 }
1614 else
1615 {
1616 complain (_("invalid input: %s"), quote (token_buffer));
1617 t = lex ();
1618 }
943819bf 1619
b68e7744
AD
1620 /* grammar has been read. Do some checking */
1621
1622 if (nrules == 0)
1623 fatal (_("no rules in the input grammar"));
1624
1625 /* Report any undefined symbols and consider them nonterminals. */
db8837cb 1626 symbols_do (symbol_check_defined, NULL);
b68e7744 1627
ff442794
AD
1628 /* Insert the initial rule, which line is that of the first rule
1629 (not that of the start symbol):
30171f79
AD
1630
1631 axiom: %start EOF. */
1632 p = symbol_list_new (axiom);
ff442794 1633 p->line = grammar->line;
30171f79
AD
1634 p->next = symbol_list_new (startval);
1635 p->next->next = symbol_list_new (eoftoken);
1636 p->next->next->next = symbol_list_new (NULL);
1637 p->next->next->next->next = grammar;
1638 nrules += 1;
5123689b 1639 nritems += 3;
30171f79
AD
1640 grammar = p;
1641 startval = axiom;
1ff442ca 1642
62a3e4f0 1643 if (nsyms > SHRT_MAX)
a0f6b076 1644 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
62a3e4f0 1645 SHRT_MAX);
1ff442ca 1646
72a23c97 1647 assert (nsyms == ntokens + nvars);
1ff442ca 1648}
ff48177d
MA
1649
1650/* At the end of the grammar file, some C source code must
63c2d5de 1651 be stored. It is going to be associated to the epilogue
ff48177d
MA
1652 directive. */
1653static void
1654read_additionnal_code (void)
1655{
9101a310 1656 int c;
63c2d5de 1657 struct obstack el_obstack;
342b8b6e 1658
63c2d5de 1659 obstack_init (&el_obstack);
ff48177d 1660
710ddc4f
MA
1661 if (!no_lines_flag)
1662 {
1663 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1664 lineno, quotearg_style (c_quoting_style,
b7c49edf 1665 muscle_find ("filename")));
710ddc4f
MA
1666 }
1667
ff48177d 1668 while ((c = getc (finput)) != EOF)
63c2d5de 1669 obstack_1grow (&el_obstack, c);
342b8b6e 1670
63c2d5de 1671 obstack_1grow (&el_obstack, 0);
11d82f03 1672 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1673}
1674
a70083a3 1675\f
037ca2f1
AD
1676/*------------------------------------------------------------------.
1677| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1678| number. |
1679`------------------------------------------------------------------*/
1680
1681static void
1682token_translations_init (void)
1683{
23c5a174 1684 int num_256_available_p = TRUE;
037ca2f1
AD
1685 int i;
1686
23c5a174
AD
1687 /* Find the highest user token number, and whether 256, the POSIX
1688 preferred user token number for the error token, is used. */
1689 max_user_token_number = 0;
1690 for (i = 0; i < ntokens; ++i)
1691 {
1692 symbol_t *this = symbols[i];
1693 if (this->user_token_number != SUNDEF)
1694 {
1695 if (this->user_token_number > max_user_token_number)
1696 max_user_token_number = this->user_token_number;
1697 if (this->user_token_number == 256)
1698 num_256_available_p = FALSE;
1699 }
1700 }
1701
1702 /* If 256 is not used, assign it to error, to follow POSIX. */
1703 if (num_256_available_p && errtoken->user_token_number == SUNDEF)
1704 errtoken->user_token_number = 256;
1705
1706 /* Set the missing user numbers. */
1707 if (max_user_token_number < 256)
1708 max_user_token_number = 256;
1709
72a23c97
AD
1710 for (i = 0; i < ntokens; ++i)
1711 {
db8837cb 1712 symbol_t *this = symbols[i];
72a23c97 1713 if (this->user_token_number == SUNDEF)
23c5a174 1714 this->user_token_number = ++max_user_token_number;
72a23c97
AD
1715 if (this->user_token_number > max_user_token_number)
1716 max_user_token_number = this->user_token_number;
72a23c97
AD
1717 }
1718
680e8701 1719 token_translations = XCALLOC (token_number_t, max_user_token_number + 1);
037ca2f1
AD
1720
1721 /* Initialize all entries for literal tokens to 2, the internal
1722 token number for $undefined., which represents all invalid
1723 inputs. */
18bcecb0 1724 for (i = 0; i < max_user_token_number + 1; i++)
007a50a4 1725 token_translations[i] = undeftoken->number;
db8837cb 1726 symbols_do (symbol_translation, NULL);
037ca2f1
AD
1727}
1728
1729
0e78e603
AD
1730/*----------------------------------------------------------------.
1731| Assign symbol numbers, and write definition of token names into |
1732| FDEFINES. Set up vectors SYMBOL_TABLE, TAGS of symbols. |
1733`----------------------------------------------------------------*/
1ff442ca 1734
4a120d45 1735static void
118fb205 1736packsymbols (void)
1ff442ca 1737{
db8837cb 1738 symbols = XCALLOC (symbol_t *, nsyms);
1ff442ca 1739
db8837cb
AD
1740 symbols_do (symbol_check_alias_consistence, NULL);
1741 symbols_do (symbol_pack, NULL);
1ff442ca 1742
037ca2f1 1743 token_translations_init ();
1ff442ca 1744
e3f1699f
AD
1745 if (startval->class == unknown_sym)
1746 fatal (_("the start symbol %s is undefined"), startval->tag);
1747 else if (startval->class == token_sym)
1748 fatal (_("the start symbol %s is a token"), startval->tag);
1749
d9b739c3 1750 start_symbol = startval->number;
e3f1699f
AD
1751}
1752
1753
a70083a3
AD
1754/*---------------------------------------------------------------.
1755| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 1756| RITEM. |
a70083a3 1757`---------------------------------------------------------------*/
1ff442ca 1758
4a120d45 1759static void
118fb205 1760packgram (void)
1ff442ca 1761{
a70083a3
AD
1762 int itemno;
1763 int ruleno;
1764 symbol_list *p;
1ff442ca 1765
62a3e4f0 1766 ritem = XCALLOC (item_number_t, nritems + 1);
1a2b5d37 1767 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1768
1769 itemno = 0;
1770 ruleno = 1;
1771
1772 p = grammar;
1773 while (p)
1774 {
db8837cb 1775 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 1776 rules[ruleno].user_number = ruleno;
c3b407f4 1777 rules[ruleno].number = ruleno;
bba97eb2 1778 rules[ruleno].lhs = p->sym;
99013900 1779 rules[ruleno].rhs = ritem + itemno;
1a2b5d37
AD
1780 rules[ruleno].line = p->line;
1781 rules[ruleno].useful = TRUE;
1782 rules[ruleno].action = p->action;
1783 rules[ruleno].action_line = p->action_line;
1784 rules[ruleno].guard = p->guard;
1785 rules[ruleno].guard_line = p->guard_line;
1ff442ca
NF
1786
1787 p = p->next;
1788 while (p && p->sym)
1789 {
5fbb0954
AD
1790 /* item_number_t = token_number_t.
1791 But the former needs to contain more: negative rule numbers. */
1792 ritem[itemno++] = token_number_as_item_number (p->sym->number);
1ff442ca
NF
1793 /* A rule gets by default the precedence and associativity
1794 of the last token in it. */
d7020c20 1795 if (p->sym->class == token_sym)
03b31c0c 1796 rules[ruleno].prec = p->sym;
a70083a3
AD
1797 if (p)
1798 p = p->next;
1ff442ca
NF
1799 }
1800
1801 /* If this rule has a %prec,
a70083a3 1802 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1803 if (ruleprec)
1804 {
03b31c0c
AD
1805 rules[ruleno].precsym = ruleprec;
1806 rules[ruleno].prec = ruleprec;
1ff442ca 1807 }
1ff442ca 1808 ritem[itemno++] = -ruleno;
f3849179 1809 ++ruleno;
1ff442ca 1810
a70083a3
AD
1811 if (p)
1812 p = p->next;
1ff442ca
NF
1813 }
1814
1815 ritem[itemno] = 0;
5123689b 1816 assert (itemno == nritems);
3067fbef
AD
1817
1818 if (trace_flag)
1819 ritem_print (stderr);
1ff442ca 1820}
a70083a3
AD
1821\f
1822/*-------------------------------------------------------------------.
1823| Read in the grammar specification and record it in the format |
ea5607fd 1824| described in gram.h. All guards are copied into the GUARD_OBSTACK |
8c7ebe49
AD
1825| and all actions into ACTION_OBSTACK, in each case forming the body |
1826| of a C function (YYGUARD or YYACTION) which contains a switch |
1827| statement to decide which guard or action to execute. |
a70083a3
AD
1828`-------------------------------------------------------------------*/
1829
1830void
1831reader (void)
1832{
342b8b6e 1833 lex_init ();
a70083a3
AD
1834 lineno = 1;
1835
11d82f03
MA
1836 /* Initialize the muscle obstack. */
1837 obstack_init (&muscle_obstack);
82e236e2 1838
a70083a3 1839 /* Initialize the symbol table. */
db8837cb 1840 symbols_new ();
b6610515 1841
30171f79
AD
1842 /* Construct the axiom symbol. */
1843 axiom = getsym ("$axiom");
1844 axiom->class = nterm_sym;
d9b739c3 1845 axiom->number = nvars++;
30171f79 1846
a70083a3
AD
1847 /* Construct the error token */
1848 errtoken = getsym ("error");
d7020c20 1849 errtoken->class = token_sym;
72a23c97 1850 errtoken->number = ntokens++;
b6610515 1851
a70083a3
AD
1852 /* Construct a token that represents all undefined literal tokens.
1853 It is always token number 2. */
1854 undeftoken = getsym ("$undefined.");
d7020c20 1855 undeftoken->class = token_sym;
72a23c97 1856 undeftoken->number = ntokens++;
a70083a3 1857
331dbc1b
AD
1858 /* Initialize the obstacks. */
1859 obstack_init (&action_obstack);
331dbc1b 1860 obstack_init (&output_obstack);
0dd1580a
RA
1861 obstack_init (&pre_prologue_obstack);
1862 obstack_init (&post_prologue_obstack);
331dbc1b
AD
1863
1864 finput = xfopen (infile, "r");
1865
896fe5c1
AD
1866 /* Read the declaration section. Copy %{ ... %} groups to
1867 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1868 etc. found there. */
a70083a3 1869 read_declarations ();
b7c49edf
AD
1870
1871 /* If the user did not define her EOFTOKEN, do it now. */
1872 if (!eoftoken)
1873 {
1874 eoftoken = getsym ("$");
1875 eoftoken->class = token_sym;
72a23c97 1876 eoftoken->number = 0;
b7c49edf
AD
1877 /* Value specified by POSIX. */
1878 eoftoken->user_token_number = 0;
1879 }
1880
a70083a3
AD
1881 /* Read in the grammar, build grammar in list form. Write out
1882 guards and actions. */
1883 readgram ();
ff48177d
MA
1884 /* Some C code is given at the end of the grammar file. */
1885 read_additionnal_code ();
b0c4483e 1886
331dbc1b
AD
1887 lex_free ();
1888 xfclose (finput);
1889
a70083a3
AD
1890 /* Assign the symbols their symbol numbers. Write #defines for the
1891 token symbols into FDEFINES if requested. */
1892 packsymbols ();
93ede233 1893
a70083a3
AD
1894 /* Convert the grammar into the format described in gram.h. */
1895 packgram ();
8419d367
AD
1896
1897 /* The grammar as a symbol_list is no longer needed. */
1898 LIST_FREE (symbol_list, grammar);
a70083a3 1899}
76514394
AD
1900
1901void
1902grammar_free (void)
1903{
1904 XFREE (ritem);
1905 free (rules + 1);
1906 /* Free the symbol table data structure. */
db8837cb 1907 symbols_free ();
76514394 1908}