]> git.saurik.com Git - bison.git/blame - src/reader.c
* data/m4sugar/m4sugar.m4: Update from CVS Autoconf.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca
NF
30#include "lex.h"
31#include "gram.h"
a0f6b076 32#include "complain.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
340ef489 35#include "conflicts.h"
11d82f03 36#include "muscle_tab.h"
1ff442ca 37
a70083a3
AD
38typedef struct symbol_list
39{
40 struct symbol_list *next;
db8837cb 41 symbol_t *sym;
b29b2ed5 42 int line;
f499b062 43
3f96f4dc
AD
44 /* The action is attached to the LHS of a rule. */
45 const char *action;
46 int action_line;
f499b062 47
db8837cb 48 symbol_t *ruleprec;
d945f5cd 49} symbol_list;
118fb205 50
1ff442ca 51int lineno;
280a38c3
AD
52static symbol_list *grammar = NULL;
53static int start_flag = 0;
db8837cb 54static symbol_t *startval = NULL;
1ff442ca
NF
55
56/* Nonzero if components of semantic values are used, implying
57 they must be unions. */
58static int value_components_used;
59
d7020c20 60/* Nonzero if %union has been seen. */
280a38c3 61static int typed = 0;
1ff442ca 62
d7020c20 63/* Incremented for each %left, %right or %nonassoc seen */
280a38c3 64static int lastprec = 0;
1ff442ca 65
db8837cb
AD
66symbol_t *errtoken = NULL;
67symbol_t *undeftoken = NULL;
68symbol_t *eoftoken = NULL;
69symbol_t *axiom = NULL;
b29b2ed5 70
6255b435 71static symbol_list *
db8837cb 72symbol_list_new (symbol_t *sym)
b29b2ed5
AD
73{
74 symbol_list *res = XMALLOC (symbol_list, 1);
75 res->next = NULL;
76 res->sym = sym;
77 res->line = lineno;
d945f5cd
AD
78 res->action = NULL;
79 res->action_line = 0;
b29b2ed5
AD
80 res->ruleprec = NULL;
81 return res;
82}
83
72a23c97 84/*------------------------.
db8837cb 85| Operations on symbols. |
72a23c97
AD
86`------------------------*/
87
88
89/*-----------------------------------------------------------.
90| If THIS is not defined, report an error, and consider it a |
91| nonterminal. |
92`-----------------------------------------------------------*/
93
94static bool
db8837cb 95symbol_check_defined (symbol_t *this)
72a23c97
AD
96{
97 if (this->class == unknown_sym)
98 {
99 complain
100 (_("symbol %s is used, but is not defined as a token and has no rules"),
101 this->tag);
102 this->class = nterm_sym;
103 this->number = nvars++;
104 }
105
106 return TRUE;
107}
108
109
110/*-------------------------------------------------------------------.
111| Assign a symbol number, and write the definition of the token name |
112| into FDEFINES. Put in SYMBOLS. |
113`-------------------------------------------------------------------*/
114
115static bool
db8837cb 116symbol_make_alias (symbol_t *symbol, char *typename)
72a23c97
AD
117{
118 if (symval->alias)
119 warn (_("symbol `%s' used more than once as a literal string"),
120 symval->tag);
121 else if (symbol->alias)
122 warn (_("symbol `%s' given more than one literal string"),
123 symbol->tag);
124 else
125 {
126 symval->class = token_sym;
127 symval->type_name = typename;
128 symval->user_token_number = symbol->user_token_number;
129 symbol->user_token_number = SALIAS;
130 symval->alias = symbol;
131 symbol->alias = symval;
132 /* symbol and symval combined are only one symbol */
133 nsyms--;
134 ntokens--;
135 assert (ntokens == symbol->number || ntokens == symval->number);
136 symbol->number = symval->number =
137 (symval->number < symbol->number) ? symval->number : symbol->number;
138 }
139
140 return TRUE;
141}
142
143/*---------------------------------------------------------.
144| Check that THIS, and its alias, have same precedence and |
145| associativity. |
146`---------------------------------------------------------*/
147
148static bool
db8837cb 149symbol_check_alias_consistence (symbol_t *this)
72a23c97
AD
150{
151 /* Check only those who _are_ the aliases. */
152 if (this->alias && this->user_token_number == SALIAS)
153 {
154 if (this->prec != this->alias->prec)
155 {
156 if (this->prec != 0 && this->alias->prec != 0)
157 complain (_("conflicting precedences for %s and %s"),
158 this->tag, this->alias->tag);
159 if (this->prec != 0)
160 this->alias->prec = this->prec;
161 else
162 this->prec = this->alias->prec;
163 }
164
165 if (this->assoc != this->alias->assoc)
166 {
167 if (this->assoc != 0 && this->alias->assoc != 0)
168 complain (_("conflicting assoc values for %s and %s"),
169 this->tag, this->alias->tag);
170 if (this->assoc != 0)
171 this->alias->assoc = this->assoc;
172 else
173 this->assoc = this->alias->assoc;
174 }
175 }
176 return TRUE;
177}
178
179
180/*-------------------------------------------------------------------.
181| Assign a symbol number, and write the definition of the token name |
182| into FDEFINES. Put in SYMBOLS. |
183`-------------------------------------------------------------------*/
184
185static bool
db8837cb 186symbol_pack (symbol_t *this)
72a23c97 187{
72a23c97
AD
188 if (this->class == nterm_sym)
189 {
190 this->number += ntokens;
191 }
192 else if (this->alias)
193 {
194 /* This symbol and its alias are a single token defn.
195 Allocate a tokno, and assign to both check agreement of
196 prec and assoc fields and make both the same */
5fbb0954 197 if (this->number == NUMBER_UNDEFINED)
72a23c97
AD
198 {
199 if (this == eoftoken || this->alias == eoftoken)
200 this->number = this->alias->number = 0;
201 else
202 {
5fbb0954 203 assert (this->alias->number != NUMBER_UNDEFINED);
72a23c97
AD
204 this->number = this->alias->number;
205 }
206 }
207 /* Do not do processing below for SALIASs. */
208 if (this->user_token_number == SALIAS)
209 return TRUE;
210 }
211 else /* this->class == token_sym */
212 {
5fbb0954 213 assert (this->number != NUMBER_UNDEFINED);
72a23c97
AD
214 }
215
72a23c97
AD
216 symbols[this->number] = this;
217 return TRUE;
218}
219
220
221
222
223/*--------------------------------------------------.
224| Put THIS in TOKEN_TRANSLATIONS if it is a token. |
225`--------------------------------------------------*/
226
227static bool
db8837cb 228symbol_translation (symbol_t *this)
72a23c97 229{
72a23c97
AD
230 /* Non-terminal? */
231 if (this->class == token_sym
232 && this->user_token_number != SALIAS)
233 {
234 /* A token which translation has already been set? */
007a50a4 235 if (token_translations[this->user_token_number] != undeftoken->number)
72a23c97
AD
236 complain (_("tokens %s and %s both assigned number %d"),
237 symbols[token_translations[this->user_token_number]]->tag,
238 this->tag, this->user_token_number);
239
72a23c97
AD
240 token_translations[this->user_token_number] = this->number;
241 }
242
243 return TRUE;
244}
0d533154 245\f
a70083a3 246
0d533154
AD
247/*===================\
248| Low level lexing. |
249\===================*/
943819bf
RS
250
251static void
118fb205 252skip_to_char (int target)
943819bf
RS
253{
254 int c;
255 if (target == '\n')
a0f6b076 256 complain (_(" Skipping to next \\n"));
943819bf 257 else
a0f6b076 258 complain (_(" Skipping to next %c"), target);
943819bf
RS
259
260 do
0d533154 261 c = skip_white_space ();
943819bf 262 while (c != target && c != EOF);
a083fbbf 263 if (c != EOF)
0d533154 264 ungetc (c, finput);
943819bf
RS
265}
266
267
0d533154
AD
268/*---------------------------------------------------------.
269| Read a signed integer from STREAM and return its value. |
270`---------------------------------------------------------*/
271
272static inline int
273read_signed_integer (FILE *stream)
274{
a70083a3
AD
275 int c = getc (stream);
276 int sign = 1;
277 int n = 0;
0d533154
AD
278
279 if (c == '-')
280 {
281 c = getc (stream);
282 sign = -1;
283 }
284
285 while (isdigit (c))
286 {
287 n = 10 * n + (c - '0');
288 c = getc (stream);
289 }
290
291 ungetc (c, stream);
292
293 return sign * n;
294}
295\f
79282c5a
AD
296/*--------------------------------------------------------------.
297| Get the data type (alternative in the union) of the value for |
298| symbol N in rule RULE. |
299`--------------------------------------------------------------*/
300
301static char *
b29b2ed5 302get_type_name (int n, symbol_list *rule)
79282c5a
AD
303{
304 int i;
305 symbol_list *rp;
306
307 if (n < 0)
308 {
309 complain (_("invalid $ value"));
310 return NULL;
311 }
312
313 rp = rule;
314 i = 0;
315
316 while (i < n)
317 {
318 rp = rp->next;
319 if (rp == NULL || rp->sym == NULL)
320 {
321 complain (_("invalid $ value"));
322 return NULL;
323 }
f3849179 324 ++i;
79282c5a
AD
325 }
326
327 return rp->sym->type_name;
328}
329\f
2b7ed18a
RA
330/*------------------------------------------------------------------.
331| Copy the character C to OOUT, and insert quadigraphs when needed. |
332`------------------------------------------------------------------*/
333
334static inline void
335copy_character (struct obstack *oout, int c)
336{
337 switch (c)
338 {
339 case '[':
340 obstack_sgrow (oout, "@<:@");
341 break;
342
343 case ']':
344 obstack_sgrow (oout, "@:>@");
345 break;
346
347 default:
348 obstack_1grow (oout, c);
349 }
350}
351
337bab46
AD
352/*------------------------------------------------------------.
353| Dump the string from FIN to OOUT if non null. MATCH is the |
354| delimiter of the string (either ' or "). |
355`------------------------------------------------------------*/
ae3c3164
AD
356
357static inline void
b6610515 358copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
ae3c3164
AD
359{
360 int c;
361
b6610515
RA
362 if (store)
363 obstack_1grow (oout, match);
8c7ebe49 364
4a120d45 365 c = getc (fin);
ae3c3164
AD
366
367 while (c != match)
368 {
369 if (c == EOF)
370 fatal (_("unterminated string at end of file"));
371 if (c == '\n')
372 {
a0f6b076 373 complain (_("unterminated string"));
4a120d45 374 ungetc (c, fin);
ae3c3164
AD
375 c = match; /* invent terminator */
376 continue;
377 }
378
2b7ed18a 379 copy_character (oout, c);
ae3c3164
AD
380
381 if (c == '\\')
382 {
4a120d45 383 c = getc (fin);
ae3c3164
AD
384 if (c == EOF)
385 fatal (_("unterminated string at end of file"));
2b7ed18a 386 copy_character (oout, c);
8c7ebe49 387
ae3c3164 388 if (c == '\n')
f3849179 389 ++lineno;
ae3c3164
AD
390 }
391
a70083a3 392 c = getc (fin);
ae3c3164
AD
393 }
394
b6610515
RA
395 if (store)
396 obstack_1grow (oout, c);
397}
398
399/* FIXME. */
400
401static inline void
402copy_string (FILE *fin, struct obstack *oout, int match)
403{
404 copy_string2 (fin, oout, match, 1);
ae3c3164
AD
405}
406
b6610515
RA
407/* FIXME. */
408
409static inline void
410copy_identifier (FILE *fin, struct obstack *oout)
411{
412 int c;
413
414 while (isalnum (c = getc (fin)) || c == '_')
415 obstack_1grow (oout, c);
416
417 ungetc (c, fin);
418}
ae3c3164 419
2666f928
AD
420
421/*------------------------------------------------------------------.
422| Dump the wannabee comment from IN to OOUT. In fact we just saw a |
423| `/', which might or might not be a comment. In any case, copy |
424| what we saw. |
425`------------------------------------------------------------------*/
ae3c3164
AD
426
427static inline void
2666f928 428copy_comment (FILE *fin, struct obstack *oout)
ae3c3164
AD
429{
430 int cplus_comment;
a70083a3 431 int ended;
550a72a3
AD
432 int c;
433
434 /* We read a `/', output it. */
2666f928 435 obstack_1grow (oout, '/');
550a72a3
AD
436
437 switch ((c = getc (fin)))
438 {
439 case '/':
440 cplus_comment = 1;
441 break;
442 case '*':
443 cplus_comment = 0;
444 break;
445 default:
446 ungetc (c, fin);
447 return;
448 }
ae3c3164 449
2666f928 450 obstack_1grow (oout, c);
550a72a3 451 c = getc (fin);
ae3c3164
AD
452
453 ended = 0;
454 while (!ended)
455 {
456 if (!cplus_comment && c == '*')
457 {
458 while (c == '*')
459 {
2666f928 460 obstack_1grow (oout, c);
550a72a3 461 c = getc (fin);
ae3c3164
AD
462 }
463
464 if (c == '/')
465 {
2666f928 466 obstack_1grow (oout, c);
ae3c3164
AD
467 ended = 1;
468 }
469 }
470 else if (c == '\n')
471 {
f3849179 472 ++lineno;
2666f928 473 obstack_1grow (oout, c);
ae3c3164
AD
474 if (cplus_comment)
475 ended = 1;
476 else
550a72a3 477 c = getc (fin);
ae3c3164
AD
478 }
479 else if (c == EOF)
480 fatal (_("unterminated comment"));
481 else
482 {
2b7ed18a 483 copy_character (oout, c);
550a72a3 484 c = getc (fin);
ae3c3164
AD
485 }
486 }
487}
488
489
82b6cb3f
AD
490/*-------------------------------------------------------------------.
491| FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
492| reference to this location. RULE_LENGTH is the number of values in |
493| the current rule so far, which says where to find `$0' with |
494| respect to the top of the stack. |
495`-------------------------------------------------------------------*/
1ff442ca 496
a70083a3 497static inline void
82b6cb3f 498copy_at (FILE *fin, struct obstack *oout, int rule_length)
1ff442ca 499{
82b6cb3f
AD
500 int c = getc (fin);
501 locations_flag = 1;
1ff442ca 502
a70083a3 503 if (c == '$')
1ff442ca 504 {
82b6cb3f 505 obstack_sgrow (oout, "]b4_lhs_location[");
a70083a3
AD
506 }
507 else if (isdigit (c) || c == '-')
508 {
509 int n;
1ff442ca 510
a70083a3
AD
511 ungetc (c, fin);
512 n = read_signed_integer (fin);
82b6cb3f 513 if (n > rule_length)
11e2beca
AD
514 complain (_("invalid value: %s%d"), "@", n);
515 else
82b6cb3f
AD
516 obstack_fgrow2 (oout, "]b4_rhs_location([%d], [%d])[",
517 rule_length, n);
1ff442ca 518 }
a70083a3 519 else
ff4a34be
AD
520 {
521 char buf[] = "@c";
522 buf[1] = c;
523 complain (_("%s is invalid"), quote (buf));
524 }
1ff442ca 525}
79282c5a
AD
526
527
82b6cb3f
AD
528/*------------------------------------------------------------------.
529| FIN is pointing to a wannabee semantic value (i.e., a `$'). |
530| |
531| Possible inputs: $[<TYPENAME>]($|integer) |
532| |
533| Output to OOUT a reference to this semantic value. RULE_LENGTH is |
534| the number of values in the current rule so far, which says where |
535| to find `$0' with respect to the top of the stack. |
536`------------------------------------------------------------------*/
79282c5a
AD
537
538static inline void
337bab46 539copy_dollar (FILE *fin, struct obstack *oout,
82b6cb3f 540 symbol_list *rule, int rule_length)
79282c5a
AD
541{
542 int c = getc (fin);
b0ce6046 543 const char *type_name = NULL;
79282c5a 544
f282676b 545 /* Get the type name if explicit. */
79282c5a
AD
546 if (c == '<')
547 {
f282676b 548 read_type_name (fin);
79282c5a
AD
549 type_name = token_buffer;
550 value_components_used = 1;
79282c5a
AD
551 c = getc (fin);
552 }
553
554 if (c == '$')
555 {
79282c5a
AD
556 if (!type_name)
557 type_name = get_type_name (0, rule);
79282c5a
AD
558 if (!type_name && typed)
559 complain (_("$$ of `%s' has no declared type"),
560 rule->sym->tag);
82b6cb3f
AD
561 if (!type_name)
562 type_name = "";
563 obstack_fgrow1 (oout,
564 "]b4_lhs_value([%s])[", type_name);
79282c5a
AD
565 }
566 else if (isdigit (c) || c == '-')
567 {
568 int n;
569 ungetc (c, fin);
570 n = read_signed_integer (fin);
571
82b6cb3f 572 if (n > rule_length)
11e2beca
AD
573 complain (_("invalid value: %s%d"), "$", n);
574 else
575 {
576 if (!type_name && n > 0)
577 type_name = get_type_name (n, rule);
11e2beca
AD
578 if (!type_name && typed)
579 complain (_("$%d of `%s' has no declared type"),
580 n, rule->sym->tag);
82b6cb3f
AD
581 if (!type_name)
582 type_name = "";
583 obstack_fgrow3 (oout, "]b4_rhs_value([%d], [%d], [%s])[",
584 rule_length, n, type_name);
11e2beca 585 }
79282c5a
AD
586 }
587 else
588 {
589 char buf[] = "$c";
590 buf[1] = c;
591 complain (_("%s is invalid"), quote (buf));
592 }
593}
a70083a3
AD
594\f
595/*-------------------------------------------------------------------.
596| Copy the contents of a `%{ ... %}' into the definitions file. The |
597| `%{' has already been read. Return after reading the `%}'. |
598`-------------------------------------------------------------------*/
1ff442ca 599
4a120d45 600static void
0dd1580a 601copy_definition (struct obstack *oout)
1ff442ca 602{
a70083a3 603 int c;
ae3c3164 604 /* -1 while reading a character if prev char was %. */
a70083a3 605 int after_percent;
1ff442ca 606
89cab50d 607 if (!no_lines_flag)
25b222fa 608 {
0dd1580a 609 obstack_fgrow2 (oout, muscle_find ("linef"),
342b8b6e 610 lineno, quotearg_style (c_quoting_style,
b7c49edf 611 muscle_find ("filename")));
25b222fa 612 }
1ff442ca
NF
613
614 after_percent = 0;
615
ae3c3164 616 c = getc (finput);
1ff442ca
NF
617
618 for (;;)
619 {
620 switch (c)
621 {
622 case '\n':
0dd1580a 623 obstack_1grow (oout, c);
f3849179 624 ++lineno;
1ff442ca
NF
625 break;
626
627 case '%':
a70083a3 628 after_percent = -1;
1ff442ca 629 break;
a083fbbf 630
1ff442ca
NF
631 case '\'':
632 case '"':
0dd1580a 633 copy_string (finput, oout, c);
1ff442ca
NF
634 break;
635
636 case '/':
0dd1580a 637 copy_comment (finput, oout);
1ff442ca
NF
638 break;
639
640 case EOF:
a70083a3 641 fatal ("%s", _("unterminated `%{' definition"));
1ff442ca
NF
642
643 default:
2b7ed18a 644 copy_character (oout, c);
1ff442ca
NF
645 }
646
a70083a3 647 c = getc (finput);
1ff442ca
NF
648
649 if (after_percent)
650 {
651 if (c == '}')
652 return;
0dd1580a 653 obstack_1grow (oout, '%');
1ff442ca
NF
654 }
655 after_percent = 0;
1ff442ca 656 }
1ff442ca
NF
657}
658
659
d7020c20
AD
660/*-------------------------------------------------------------------.
661| Parse what comes after %token or %nterm. For %token, WHAT_IS is |
662| token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
663| are reversed. |
664`-------------------------------------------------------------------*/
1ff442ca 665
4a120d45 666static void
d7020c20 667parse_token_decl (symbol_class what_is, symbol_class what_is_not)
1ff442ca 668{
342b8b6e
AD
669 token_t token = tok_undef;
670 char *typename = NULL;
1ff442ca 671
1e9798d5 672 /* The symbol being defined. */
db8837cb 673 symbol_t *symbol = NULL;
1e9798d5
AD
674
675 /* After `%token' and `%nterm', any number of symbols maybe be
676 defined. */
1ff442ca
NF
677 for (;;)
678 {
e6011337
JT
679 int tmp_char = ungetc (skip_white_space (), finput);
680
1e9798d5
AD
681 /* `%' (for instance from `%token', or from `%%' etc.) is the
682 only valid means to end this declaration. */
e6011337 683 if (tmp_char == '%')
1ff442ca 684 return;
e6011337 685 if (tmp_char == EOF)
a0f6b076 686 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 687
a70083a3 688 token = lex ();
511e79b3 689 if (token == tok_comma)
943819bf
RS
690 {
691 symbol = NULL;
692 continue;
693 }
511e79b3 694 if (token == tok_typename)
1ff442ca 695 {
95e36146 696 typename = xstrdup (token_buffer);
1ff442ca 697 value_components_used = 1;
943819bf
RS
698 symbol = NULL;
699 }
511e79b3 700 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
943819bf 701 {
db8837cb 702 symbol_make_alias (symbol, typename);
8e03724b 703 symbol = NULL;
1ff442ca 704 }
511e79b3 705 else if (token == tok_identifier)
1ff442ca
NF
706 {
707 int oldclass = symval->class;
943819bf 708 symbol = symval;
1ff442ca 709
943819bf 710 if (symbol->class == what_is_not)
a0f6b076 711 complain (_("symbol %s redefined"), symbol->tag);
943819bf 712 symbol->class = what_is;
d7020c20 713 if (what_is == nterm_sym && oldclass != nterm_sym)
d9b739c3 714 symbol->number = nvars++;
5fbb0954 715 if (what_is == token_sym && symbol->number == NUMBER_UNDEFINED)
bd02036a 716 symbol->number = ntokens++;
1ff442ca
NF
717
718 if (typename)
719 {
943819bf
RS
720 if (symbol->type_name == NULL)
721 symbol->type_name = typename;
a70083a3 722 else if (strcmp (typename, symbol->type_name) != 0)
a0f6b076 723 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
724 }
725 }
511e79b3 726 else if (symbol && token == tok_number)
a70083a3 727 {
943819bf 728 symbol->user_token_number = numval;
b7c49edf
AD
729 /* User defined EOF token? */
730 if (numval == 0)
72a23c97
AD
731 {
732 eoftoken = symbol;
733 eoftoken->number = 0;
734 /* It is always mapped to 0, so it was already counted in
735 NTOKENS. */
736 --ntokens;
737 }
a70083a3 738 }
1ff442ca 739 else
943819bf 740 {
a0f6b076 741 complain (_("`%s' is invalid in %s"),
b29b2ed5
AD
742 token_buffer,
743 (what_is == token_sym) ? "%token" : "%nterm");
a70083a3 744 skip_to_char ('%');
943819bf 745 }
1ff442ca
NF
746 }
747
748}
749
1ff442ca 750
d7020c20
AD
751/*------------------------------.
752| Parse what comes after %start |
753`------------------------------*/
1ff442ca 754
4a120d45 755static void
118fb205 756parse_start_decl (void)
1ff442ca
NF
757{
758 if (start_flag)
27821bff 759 complain (_("multiple %s declarations"), "%start");
511e79b3 760 if (lex () != tok_identifier)
27821bff 761 complain (_("invalid %s declaration"), "%start");
943819bf
RS
762 else
763 {
764 start_flag = 1;
765 startval = symval;
766 }
1ff442ca
NF
767}
768
a70083a3
AD
769/*-----------------------------------------------------------.
770| read in a %type declaration and record its information for |
771| get_type_name to access |
772`-----------------------------------------------------------*/
773
774static void
775parse_type_decl (void)
776{
a70083a3
AD
777 char *name;
778
511e79b3 779 if (lex () != tok_typename)
a70083a3
AD
780 {
781 complain ("%s", _("%type declaration has no <typename>"));
782 skip_to_char ('%');
783 return;
784 }
785
95e36146 786 name = xstrdup (token_buffer);
a70083a3
AD
787
788 for (;;)
789 {
f17bcd1f 790 token_t t;
a70083a3
AD
791 int tmp_char = ungetc (skip_white_space (), finput);
792
793 if (tmp_char == '%')
794 return;
795 if (tmp_char == EOF)
796 fatal (_("Premature EOF after %s"), token_buffer);
797
798 t = lex ();
799
800 switch (t)
1ff442ca
NF
801 {
802
511e79b3
AD
803 case tok_comma:
804 case tok_semicolon:
1ff442ca
NF
805 break;
806
511e79b3 807 case tok_identifier:
1ff442ca
NF
808 if (symval->type_name == NULL)
809 symval->type_name = name;
a70083a3 810 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 811 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
812
813 break;
814
815 default:
a0f6b076
AD
816 complain (_("invalid %%type declaration due to item: %s"),
817 token_buffer);
a70083a3 818 skip_to_char ('%');
1ff442ca
NF
819 }
820 }
821}
822
823
824
d7020c20
AD
825/*----------------------------------------------------------------.
826| Read in a %left, %right or %nonassoc declaration and record its |
827| information. |
828`----------------------------------------------------------------*/
1ff442ca 829
4a120d45 830static void
d7020c20 831parse_assoc_decl (associativity assoc)
1ff442ca 832{
a70083a3
AD
833 char *name = NULL;
834 int prev = 0;
1ff442ca 835
f3849179
AD
836 /* Assign a new precedence level, never 0. */
837 ++lastprec;
1ff442ca 838
1ff442ca
NF
839 for (;;)
840 {
f17bcd1f 841 token_t t;
e6011337 842 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 843
e6011337 844 if (tmp_char == '%')
1ff442ca 845 return;
e6011337 846 if (tmp_char == EOF)
a0f6b076 847 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 848
a70083a3 849 t = lex ();
1ff442ca
NF
850
851 switch (t)
852 {
511e79b3 853 case tok_typename:
95e36146 854 name = xstrdup (token_buffer);
1ff442ca
NF
855 break;
856
511e79b3 857 case tok_comma:
1ff442ca
NF
858 break;
859
511e79b3 860 case tok_identifier:
1ff442ca 861 if (symval->prec != 0)
a0f6b076 862 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
863 symval->prec = lastprec;
864 symval->assoc = assoc;
d7020c20 865 if (symval->class == nterm_sym)
a0f6b076 866 complain (_("symbol %s redefined"), symval->tag);
5fbb0954 867 if (symval->number == NUMBER_UNDEFINED)
72a23c97
AD
868 {
869 symval->number = ntokens++;
870 symval->class = token_sym;
871 }
1ff442ca 872 if (name)
a70083a3 873 { /* record the type, if one is specified */
1ff442ca
NF
874 if (symval->type_name == NULL)
875 symval->type_name = name;
a70083a3 876 else if (strcmp (name, symval->type_name) != 0)
a0f6b076 877 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
878 }
879 break;
880
511e79b3
AD
881 case tok_number:
882 if (prev == tok_identifier)
a70083a3 883 {
1ff442ca 884 symval->user_token_number = numval;
a70083a3
AD
885 }
886 else
887 {
72a23c97
AD
888 complain
889 (_("invalid text (%s) - number should be after identifier"),
890 token_buffer);
a70083a3
AD
891 skip_to_char ('%');
892 }
1ff442ca
NF
893 break;
894
511e79b3 895 case tok_semicolon:
1ff442ca
NF
896 return;
897
898 default:
a0f6b076 899 complain (_("unexpected item: %s"), token_buffer);
a70083a3 900 skip_to_char ('%');
1ff442ca
NF
901 }
902
903 prev = t;
1ff442ca
NF
904 }
905}
906
907
908
dd60faec 909/*--------------------------------------------------------------.
180d45ba
PB
910| Copy the union declaration into the stype muscle |
911| (and fdefines), where it is made into the definition of |
912| YYSTYPE, the type of elements of the parser value stack. |
dd60faec 913`--------------------------------------------------------------*/
1ff442ca 914
4a120d45 915static void
118fb205 916parse_union_decl (void)
1ff442ca 917{
a70083a3
AD
918 int c;
919 int count = 0;
428046f8 920 bool done = FALSE;
180d45ba 921 struct obstack union_obstack;
1ff442ca 922 if (typed)
27821bff 923 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
924
925 typed = 1;
926
642cb8f8 927 MUSCLE_INSERT_INT ("stype_line", lineno);
180d45ba
PB
928 obstack_init (&union_obstack);
929 obstack_sgrow (&union_obstack, "union");
1ff442ca 930
428046f8 931 while (!done)
1ff442ca 932 {
428046f8
AD
933 c = xgetc (finput);
934
342b8b6e
AD
935 /* If C contains '/', it is output by copy_comment (). */
936 if (c != '/')
2666f928 937 obstack_1grow (&union_obstack, c);
1ff442ca
NF
938
939 switch (c)
940 {
941 case '\n':
f3849179 942 ++lineno;
1ff442ca
NF
943 break;
944
945 case '/':
2666f928 946 copy_comment (finput, &union_obstack);
1ff442ca
NF
947 break;
948
1ff442ca 949 case '{':
f3849179 950 ++count;
1ff442ca
NF
951 break;
952
953 case '}':
428046f8 954 /* FIXME: Errr. How could this happen???. --akim */
1ff442ca 955 if (count == 0)
27821bff 956 complain (_("unmatched %s"), "`}'");
1ff442ca 957 count--;
428046f8
AD
958 if (!count)
959 done = TRUE;
960 break;
1ff442ca 961 }
1ff442ca 962 }
180d45ba 963
428046f8
AD
964 /* JF don't choke on trailing semi */
965 c = skip_white_space ();
966 if (c != ';')
967 ungetc (c, finput);
968 obstack_1grow (&union_obstack, 0);
969 muscle_insert ("stype", obstack_finish (&union_obstack));
1ff442ca
NF
970}
971
d7020c20
AD
972
973/*-------------------------------------------------------.
974| Parse the declaration %expect N which says to expect N |
975| shift-reduce conflicts. |
976`-------------------------------------------------------*/
1ff442ca 977
4a120d45 978static void
118fb205 979parse_expect_decl (void)
1ff442ca 980{
131e2fef 981 int c = skip_white_space ();
1ff442ca
NF
982 ungetc (c, finput);
983
131e2fef 984 if (!isdigit (c))
79282c5a 985 complain (_("argument of %%expect is not an integer"));
131e2fef
AD
986 else
987 expected_conflicts = read_signed_integer (finput);
1ff442ca
NF
988}
989
a70083a3
AD
990
991/*-------------------------------------------------------------------.
992| Parse what comes after %thong. the full syntax is |
993| |
994| %thong <type> token number literal |
995| |
996| the <type> or number may be omitted. The number specifies the |
997| user_token_number. |
998| |
999| Two symbols are entered in the table, one for the token symbol and |
1000| one for the literal. Both are given the <type>, if any, from the |
1001| declaration. The ->user_token_number of the first is SALIAS and |
1002| the ->user_token_number of the second is set to the number, if |
1003| any, from the declaration. The two symbols are linked via |
1004| pointers in their ->alias fields. |
1005| |
1006| During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
1007| only the literal string is retained it is the literal string that |
1008| is output to yytname |
1009`-------------------------------------------------------------------*/
1010
1011static void
1012parse_thong_decl (void)
7b306f52 1013{
f17bcd1f 1014 token_t token;
db8837cb 1015 symbol_t *symbol;
a70083a3 1016 char *typename = 0;
6b7e85b9 1017 int usrtoknum = SUNDEF;
7b306f52 1018
a70083a3 1019 token = lex (); /* fetch typename or first token */
511e79b3 1020 if (token == tok_typename)
7b306f52 1021 {
95e36146 1022 typename = xstrdup (token_buffer);
a70083a3
AD
1023 value_components_used = 1;
1024 token = lex (); /* fetch first token */
7b306f52 1025 }
7b306f52 1026
a70083a3 1027 /* process first token */
7b306f52 1028
511e79b3 1029 if (token != tok_identifier)
a70083a3
AD
1030 {
1031 complain (_("unrecognized item %s, expected an identifier"),
1032 token_buffer);
1033 skip_to_char ('%');
1034 return;
7b306f52 1035 }
d7020c20 1036 symval->class = token_sym;
a70083a3
AD
1037 symval->type_name = typename;
1038 symval->user_token_number = SALIAS;
1039 symbol = symval;
7b306f52 1040
a70083a3 1041 token = lex (); /* get number or literal string */
1ff442ca 1042
511e79b3 1043 if (token == tok_number)
943819bf 1044 {
a70083a3
AD
1045 usrtoknum = numval;
1046 token = lex (); /* okay, did number, now get literal */
943819bf 1047 }
1ff442ca 1048
a70083a3 1049 /* process literal string token */
1ff442ca 1050
511e79b3 1051 if (token != tok_identifier || *symval->tag != '\"')
1ff442ca 1052 {
a70083a3
AD
1053 complain (_("expected string constant instead of %s"), token_buffer);
1054 skip_to_char ('%');
1055 return;
1ff442ca 1056 }
d7020c20 1057 symval->class = token_sym;
a70083a3
AD
1058 symval->type_name = typename;
1059 symval->user_token_number = usrtoknum;
1ff442ca 1060
a70083a3
AD
1061 symval->alias = symbol;
1062 symbol->alias = symval;
1ff442ca 1063
79282c5a
AD
1064 /* symbol and symval combined are only one symbol. */
1065 nsyms--;
a70083a3 1066}
3cef001a 1067
11e2beca 1068
b6610515 1069static void
11d82f03 1070parse_muscle_decl (void)
b6610515
RA
1071{
1072 int ch = ungetc (skip_white_space (), finput);
b7c49edf
AD
1073 char *muscle_key;
1074 char *muscle_value;
b6610515
RA
1075
1076 /* Read key. */
1077 if (!isalpha (ch) && ch != '_')
1078 {
1079 complain (_("invalid %s declaration"), "%define");
1080 skip_to_char ('%');
1081 return;
1082 }
11d82f03
MA
1083 copy_identifier (finput, &muscle_obstack);
1084 obstack_1grow (&muscle_obstack, 0);
1085 muscle_key = obstack_finish (&muscle_obstack);
342b8b6e 1086
b6610515
RA
1087 /* Read value. */
1088 ch = skip_white_space ();
1089 if (ch != '"')
1090 {
1091 ungetc (ch, finput);
1092 if (ch != EOF)
1093 {
1094 complain (_("invalid %s declaration"), "%define");
1095 skip_to_char ('%');
1096 return;
1097 }
1098 else
1099 fatal (_("Premature EOF after %s"), "\"");
1100 }
11d82f03
MA
1101 copy_string2 (finput, &muscle_obstack, '"', 0);
1102 obstack_1grow (&muscle_obstack, 0);
1103 muscle_value = obstack_finish (&muscle_obstack);
b6610515 1104
b6610515 1105 /* Store the (key, value) pair in the environment. */
11d82f03 1106 muscle_insert (muscle_key, muscle_value);
b6610515
RA
1107}
1108
2ba3b73c 1109
426cf563
MA
1110
1111/*---------------------------------.
a870c567 1112| Parse a double quoted parameter. |
426cf563
MA
1113`---------------------------------*/
1114
1115static const char *
1116parse_dquoted_param (const char *from)
1117{
1118 struct obstack param_obstack;
1119 const char *param = NULL;
1120 int c;
1121
1122 obstack_init (&param_obstack);
1123 c = skip_white_space ();
1124
1125 if (c != '"')
1126 {
1127 complain (_("invalid %s declaration"), from);
1128 ungetc (c, finput);
1129 skip_to_char ('%');
1130 return NULL;
1131 }
1132
2648a72d
AD
1133 while ((c = literalchar ()) != '"')
1134 obstack_1grow (&param_obstack, c);
a870c567 1135
426cf563
MA
1136 obstack_1grow (&param_obstack, '\0');
1137 param = obstack_finish (&param_obstack);
1138
1139 if (c != '"' || strlen (param) == 0)
1140 {
1141 complain (_("invalid %s declaration"), from);
1142 if (c != '"')
1143 ungetc (c, finput);
1144 skip_to_char ('%');
1145 return NULL;
1146 }
1147
1148 return param;
1149}
1150
2ba3b73c
MA
1151/*----------------------------------.
1152| Parse what comes after %skeleton. |
1153`----------------------------------*/
1154
a870c567 1155static void
2ba3b73c
MA
1156parse_skel_decl (void)
1157{
426cf563 1158 skeleton = parse_dquoted_param ("%skeleton");
2ba3b73c
MA
1159}
1160
a70083a3
AD
1161/*----------------------------------------------------------------.
1162| Read from finput until `%%' is seen. Discard the `%%'. Handle |
1163| any `%' declarations, and copy the contents of any `%{ ... %}' |
0dd1580a 1164| groups to PRE_PROLOGUE_OBSTACK or POST_PROLOGUE_OBSTACK. |
a70083a3 1165`----------------------------------------------------------------*/
1ff442ca 1166
4a120d45 1167static void
a70083a3 1168read_declarations (void)
1ff442ca 1169{
a70083a3 1170 for (;;)
1ff442ca 1171 {
951366c1 1172 int c = skip_white_space ();
1ff442ca 1173
a70083a3
AD
1174 if (c == '%')
1175 {
951366c1 1176 token_t tok = parse_percent_token ();
1ff442ca 1177
a70083a3 1178 switch (tok)
943819bf 1179 {
511e79b3 1180 case tok_two_percents:
a70083a3 1181 return;
1ff442ca 1182
511e79b3 1183 case tok_percent_left_curly:
0dd1580a
RA
1184 if (!typed)
1185 copy_definition (&pre_prologue_obstack);
1186 else
1187 copy_definition (&post_prologue_obstack);
a70083a3 1188 break;
1ff442ca 1189
511e79b3 1190 case tok_token:
d7020c20 1191 parse_token_decl (token_sym, nterm_sym);
a70083a3 1192 break;
1ff442ca 1193
511e79b3 1194 case tok_nterm:
d7020c20 1195 parse_token_decl (nterm_sym, token_sym);
a70083a3 1196 break;
1ff442ca 1197
511e79b3 1198 case tok_type:
a70083a3
AD
1199 parse_type_decl ();
1200 break;
1ff442ca 1201
511e79b3 1202 case tok_start:
a70083a3
AD
1203 parse_start_decl ();
1204 break;
118fb205 1205
511e79b3 1206 case tok_union:
a70083a3
AD
1207 parse_union_decl ();
1208 break;
1ff442ca 1209
511e79b3 1210 case tok_expect:
a70083a3
AD
1211 parse_expect_decl ();
1212 break;
6deb4447 1213
511e79b3 1214 case tok_thong:
a70083a3
AD
1215 parse_thong_decl ();
1216 break;
d7020c20 1217
511e79b3 1218 case tok_left:
d7020c20 1219 parse_assoc_decl (left_assoc);
a70083a3 1220 break;
1ff442ca 1221
511e79b3 1222 case tok_right:
d7020c20 1223 parse_assoc_decl (right_assoc);
a70083a3 1224 break;
1ff442ca 1225
511e79b3 1226 case tok_nonassoc:
d7020c20 1227 parse_assoc_decl (non_assoc);
a70083a3 1228 break;
1ff442ca 1229
b6610515 1230 case tok_define:
11d82f03 1231 parse_muscle_decl ();
b6610515 1232 break;
342b8b6e 1233
2ba3b73c
MA
1234 case tok_skel:
1235 parse_skel_decl ();
1236 break;
b6610515 1237
511e79b3 1238 case tok_noop:
a70083a3 1239 break;
1ff442ca 1240
951366c1
AD
1241 case tok_stropt:
1242 case tok_intopt:
1243 case tok_obsolete:
72a23c97 1244 assert (0);
951366c1
AD
1245 break;
1246
e0c40012 1247 case tok_illegal:
a70083a3
AD
1248 default:
1249 complain (_("unrecognized: %s"), token_buffer);
1250 skip_to_char ('%');
1251 }
1252 }
1253 else if (c == EOF)
1254 fatal (_("no input grammar"));
1255 else
1256 {
ff4a34be
AD
1257 char buf[] = "c";
1258 buf[0] = c;
1259 complain (_("unknown character: %s"), quote (buf));
a70083a3 1260 skip_to_char ('%');
1ff442ca 1261 }
1ff442ca 1262 }
1ff442ca 1263}
a70083a3 1264\f
82b6cb3f
AD
1265/*------------------------------------------------------------------.
1266| Assuming that a `{' has just been seen, copy everything up to the |
3a8b4109
AD
1267| matching `}' into ACTION_OBSTACK. |
1268| |
1269| RULE_LENGTH is the number of values in the current rule so far, |
1270| which says where to find `$0' with respect to the top of the |
1271| stack. It is not the same as the rule->length in the case of mid |
1272| rule actions. |
82b6cb3f 1273| |
fdbcd8e2 1274| This routine is used for actions. |
82b6cb3f 1275`------------------------------------------------------------------*/
1ff442ca 1276
4a120d45 1277static void
3a8b4109 1278parse_action (symbol_list *rule, int rule_length)
1ff442ca 1279{
3a8b4109
AD
1280 int count = 1;
1281 rule->action_line = lineno;
1ff442ca
NF
1282 while (count > 0)
1283 {
3a8b4109 1284 int c;
14d293ac
AD
1285 while ((c = getc (finput)) != '}')
1286 switch (c)
1287 {
1288 case '\n':
1289 obstack_1grow (&action_obstack, c);
f3849179 1290 ++lineno;
14d293ac 1291 break;
1ff442ca 1292
14d293ac
AD
1293 case '{':
1294 obstack_1grow (&action_obstack, c);
f3849179 1295 ++count;
14d293ac 1296 break;
1ff442ca 1297
14d293ac
AD
1298 case '\'':
1299 case '"':
1300 copy_string (finput, &action_obstack, c);
1301 break;
1ff442ca 1302
14d293ac
AD
1303 case '/':
1304 copy_comment (finput, &action_obstack);
1305 break;
1ff442ca 1306
14d293ac 1307 case '$':
82b6cb3f 1308 copy_dollar (finput, &action_obstack, rule, rule_length);
14d293ac 1309 break;
1ff442ca 1310
14d293ac 1311 case '@':
82b6cb3f 1312 copy_at (finput, &action_obstack, rule_length);
14d293ac 1313 break;
a70083a3 1314
14d293ac
AD
1315 case EOF:
1316 fatal (_("unmatched %s"), "`{'");
a70083a3 1317
14d293ac
AD
1318 default:
1319 obstack_1grow (&action_obstack, c);
1320 }
a70083a3 1321
14d293ac 1322 /* Above loop exits when C is '}'. */
a70083a3 1323 if (--count)
2b25d624 1324 obstack_1grow (&action_obstack, c);
a70083a3
AD
1325 }
1326
3f96f4dc 1327 obstack_1grow (&action_obstack, '\0');
14d293ac
AD
1328 rule->action = obstack_finish (&action_obstack);
1329}
a70083a3 1330
a70083a3
AD
1331\f
1332
a70083a3
AD
1333/*-------------------------------------------------------------------.
1334| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1335| with the user's names. |
1336`-------------------------------------------------------------------*/
1ff442ca 1337
db8837cb 1338static symbol_t *
118fb205 1339gensym (void)
1ff442ca 1340{
274d42ce
AD
1341 /* Incremented for each generated symbol */
1342 static int gensym_count = 0;
1343 static char buf[256];
1344
db8837cb 1345 symbol_t *sym;
1ff442ca 1346
274d42ce
AD
1347 sprintf (buf, "@%d", ++gensym_count);
1348 token_buffer = buf;
a70083a3 1349 sym = getsym (token_buffer);
d7020c20 1350 sym->class = nterm_sym;
d9b739c3 1351 sym->number = nvars++;
36281465 1352 return sym;
1ff442ca 1353}
a70083a3 1354\f
107f7dfb
AD
1355/*-------------------------------------------------------------------.
1356| Parse the input grammar into a one symbol_list structure. Each |
1357| rule is represented by a sequence of symbols: the left hand side |
1358| followed by the contents of the right hand side, followed by a |
1359| null pointer instead of a symbol to terminate the rule. The next |
1360| symbol is the lhs of the following rule. |
1361| |
fdbcd8e2
AD
1362| All actions are copied out, labelled by the rule number they apply |
1363| to. |
107f7dfb
AD
1364| |
1365| Bison used to allow some %directives in the rules sections, but |
1366| this is no longer consider appropriate: (i) the documented grammar |
1367| doesn't claim it, (ii), it would promote bad style, (iii), error |
1368| recovery for %directives consists in skipping the junk until a `%' |
1369| is seen and helrp synchronizing. This scheme is definitely wrong |
1370| in the rules section. |
1371`-------------------------------------------------------------------*/
1ff442ca 1372
4a120d45 1373static void
118fb205 1374readgram (void)
1ff442ca 1375{
f17bcd1f 1376 token_t t;
db8837cb 1377 symbol_t *lhs = NULL;
107f7dfb
AD
1378 symbol_list *p = NULL;
1379 symbol_list *p1 = NULL;
1ff442ca 1380
ff4a34be
AD
1381 /* Points to first symbol_list of current rule. its symbol is the
1382 lhs of the rule. */
107f7dfb 1383 symbol_list *crule = NULL;
ff4a34be 1384 /* Points to the symbol_list preceding crule. */
107f7dfb 1385 symbol_list *crule1 = NULL;
1ff442ca 1386
a70083a3 1387 t = lex ();
1ff442ca 1388
511e79b3 1389 while (t != tok_two_percents && t != tok_eof)
107f7dfb
AD
1390 if (t == tok_identifier || t == tok_bar)
1391 {
1392 int action_flag = 0;
1393 /* Number of symbols in rhs of this rule so far */
1394 int rulelength = 0;
1395 int xactions = 0; /* JF for error checking */
db8837cb 1396 symbol_t *first_rhs = 0;
107f7dfb
AD
1397
1398 if (t == tok_identifier)
1399 {
1400 lhs = symval;
1401
1402 if (!start_flag)
1403 {
1404 startval = lhs;
1405 start_flag = 1;
1406 }
1ff442ca 1407
107f7dfb
AD
1408 t = lex ();
1409 if (t != tok_colon)
1410 {
1411 complain (_("ill-formed rule: initial symbol not followed by colon"));
1412 unlex (t);
1413 }
1414 }
1415
1416 if (nrules == 0 && t == tok_bar)
1417 {
1418 complain (_("grammar starts with vertical bar"));
1419 lhs = symval; /* BOGUS: use a random symval */
1420 }
1421 /* start a new rule and record its lhs. */
1422
f3849179 1423 ++nrules;
5123689b 1424 ++nritems;
107f7dfb
AD
1425
1426 p = symbol_list_new (lhs);
1427
1428 crule1 = p1;
1429 if (p1)
1430 p1->next = p;
1431 else
1432 grammar = p;
1ff442ca 1433
107f7dfb
AD
1434 p1 = p;
1435 crule = p;
1ff442ca 1436
107f7dfb 1437 /* mark the rule's lhs as a nonterminal if not already so. */
1ff442ca 1438
107f7dfb
AD
1439 if (lhs->class == unknown_sym)
1440 {
1441 lhs->class = nterm_sym;
d9b739c3 1442 lhs->number = nvars;
f3849179 1443 ++nvars;
107f7dfb
AD
1444 }
1445 else if (lhs->class == token_sym)
1446 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca 1447
107f7dfb 1448 /* read the rhs of the rule. */
1ff442ca 1449
107f7dfb
AD
1450 for (;;)
1451 {
1452 t = lex ();
1453 if (t == tok_prec)
1454 {
1455 t = lex ();
1456 crule->ruleprec = symval;
1457 t = lex ();
1458 }
1459
1460 if (!(t == tok_identifier || t == tok_left_curly))
1461 break;
1ff442ca 1462
107f7dfb
AD
1463 /* If next token is an identifier, see if a colon follows it.
1464 If one does, exit this rule now. */
1465 if (t == tok_identifier)
1466 {
db8837cb 1467 symbol_t *ssave;
107f7dfb
AD
1468 token_t t1;
1469
1470 ssave = symval;
1471 t1 = lex ();
1472 unlex (t1);
1473 symval = ssave;
1474 if (t1 == tok_colon)
e5352bc7 1475 {
fff9bf0b 1476 warn (_("previous rule lacks an ending `;'"));
e5352bc7
AD
1477 break;
1478 }
107f7dfb
AD
1479
1480 if (!first_rhs) /* JF */
1481 first_rhs = symval;
1482 /* Not followed by colon =>
1483 process as part of this rule's rhs. */
1484 }
1485
1486 /* If we just passed an action, that action was in the middle
1487 of a rule, so make a dummy rule to reduce it to a
1488 non-terminal. */
1489 if (action_flag)
1490 {
1491 /* Since the action was written out with this rule's
1492 number, we must give the new rule this number by
1493 inserting the new rule before it. */
1494
1495 /* Make a dummy nonterminal, a gensym. */
db8837cb 1496 symbol_t *sdummy = gensym ();
107f7dfb
AD
1497
1498 /* Make a new rule, whose body is empty, before the
1499 current one, so that the action just read can
1500 belong to it. */
f3849179 1501 ++nrules;
5123689b 1502 ++nritems;
107f7dfb
AD
1503 p = symbol_list_new (sdummy);
1504 /* Attach its lineno to that of the host rule. */
1505 p->line = crule->line;
82c035a8
AD
1506 /* Move the action from the host rule to this one. */
1507 p->action = crule->action;
1508 p->action_line = crule->action_line;
1509 crule->action = NULL;
1510
107f7dfb
AD
1511 if (crule1)
1512 crule1->next = p;
1513 else
1514 grammar = p;
1515 /* End of the rule. */
1516 crule1 = symbol_list_new (NULL);
1517 crule1->next = crule;
1518
1519 p->next = crule1;
1520
1521 /* Insert the dummy generated by that rule into this
1522 rule. */
5123689b 1523 ++nritems;
107f7dfb
AD
1524 p = symbol_list_new (sdummy);
1525 p1->next = p;
1526 p1 = p;
1527
1528 action_flag = 0;
1529 }
1530
1531 if (t == tok_identifier)
1532 {
5123689b 1533 ++nritems;
107f7dfb
AD
1534 p = symbol_list_new (symval);
1535 p1->next = p;
1536 p1 = p;
1537 }
1538 else /* handle an action. */
1539 {
14d293ac 1540 parse_action (crule, rulelength);
107f7dfb 1541 action_flag = 1;
f3849179 1542 ++xactions; /* JF */
107f7dfb 1543 }
f3849179 1544 ++rulelength;
107f7dfb
AD
1545 } /* end of read rhs of rule */
1546
1547 /* Put an empty link in the list to mark the end of this rule */
1548 p = symbol_list_new (NULL);
1549 p1->next = p;
1550 p1 = p;
1551
1552 if (t == tok_prec)
1553 {
1554 complain (_("two @prec's in a row"));
1555 t = lex ();
1556 crule->ruleprec = symval;
1557 t = lex ();
1558 }
f499b062 1559
f499b062 1560 if (t == tok_left_curly)
107f7dfb
AD
1561 {
1562 /* This case never occurs -wjh */
1563 if (action_flag)
1564 complain (_("two actions at end of one rule"));
14d293ac 1565 parse_action (crule, rulelength);
107f7dfb 1566 action_flag = 1;
f3849179 1567 ++xactions; /* -wjh */
107f7dfb
AD
1568 t = lex ();
1569 }
1570 /* If $$ is being set in default way, report if any type
1571 mismatch. */
1572 else if (!xactions
1573 && first_rhs && lhs->type_name != first_rhs->type_name)
1574 {
1575 if (lhs->type_name == 0
1576 || first_rhs->type_name == 0
1577 || strcmp (lhs->type_name, first_rhs->type_name))
1578 complain (_("type clash (`%s' `%s') on default action"),
1579 lhs->type_name ? lhs->type_name : "",
1580 first_rhs->type_name ? first_rhs->type_name : "");
1581 }
1582 /* Warn if there is no default for $$ but we need one. */
1583 else if (!xactions && !first_rhs && lhs->type_name != 0)
1584 complain (_("empty rule for typed nonterminal, and no action"));
bfcf1f3a 1585 if (t == tok_two_percents || t == tok_eof)
fff9bf0b 1586 warn (_("previous rule lacks an ending `;'"));
107f7dfb 1587 if (t == tok_semicolon)
a70083a3 1588 t = lex ();
107f7dfb
AD
1589 }
1590 else
1591 {
1592 complain (_("invalid input: %s"), quote (token_buffer));
1593 t = lex ();
1594 }
943819bf 1595
b68e7744
AD
1596 /* grammar has been read. Do some checking */
1597
1598 if (nrules == 0)
1599 fatal (_("no rules in the input grammar"));
1600
1601 /* Report any undefined symbols and consider them nonterminals. */
db8837cb 1602 symbols_do (symbol_check_defined, NULL);
b68e7744 1603
ff442794
AD
1604 /* Insert the initial rule, which line is that of the first rule
1605 (not that of the start symbol):
30171f79
AD
1606
1607 axiom: %start EOF. */
1608 p = symbol_list_new (axiom);
ff442794 1609 p->line = grammar->line;
30171f79
AD
1610 p->next = symbol_list_new (startval);
1611 p->next->next = symbol_list_new (eoftoken);
1612 p->next->next->next = symbol_list_new (NULL);
1613 p->next->next->next->next = grammar;
1614 nrules += 1;
5123689b 1615 nritems += 3;
30171f79
AD
1616 grammar = p;
1617 startval = axiom;
1ff442ca 1618
62a3e4f0 1619 if (nsyms > SHRT_MAX)
a0f6b076 1620 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
62a3e4f0 1621 SHRT_MAX);
1ff442ca 1622
72a23c97 1623 assert (nsyms == ntokens + nvars);
1ff442ca 1624}
ff48177d
MA
1625
1626/* At the end of the grammar file, some C source code must
63c2d5de 1627 be stored. It is going to be associated to the epilogue
ff48177d
MA
1628 directive. */
1629static void
1630read_additionnal_code (void)
1631{
9101a310 1632 int c;
63c2d5de 1633 struct obstack el_obstack;
342b8b6e 1634
63c2d5de 1635 obstack_init (&el_obstack);
ff48177d 1636
710ddc4f
MA
1637 if (!no_lines_flag)
1638 {
1639 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1640 lineno, quotearg_style (c_quoting_style,
b7c49edf 1641 muscle_find ("filename")));
710ddc4f
MA
1642 }
1643
ff48177d 1644 while ((c = getc (finput)) != EOF)
2b7ed18a 1645 copy_character (&el_obstack, c);
342b8b6e 1646
63c2d5de 1647 obstack_1grow (&el_obstack, 0);
11d82f03 1648 muscle_insert ("epilogue", obstack_finish (&el_obstack));
ff48177d
MA
1649}
1650
a70083a3 1651\f
037ca2f1
AD
1652/*------------------------------------------------------------------.
1653| Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1654| number. |
1655`------------------------------------------------------------------*/
1656
1657static void
1658token_translations_init (void)
1659{
23c5a174 1660 int num_256_available_p = TRUE;
037ca2f1
AD
1661 int i;
1662
23c5a174
AD
1663 /* Find the highest user token number, and whether 256, the POSIX
1664 preferred user token number for the error token, is used. */
1665 max_user_token_number = 0;
1666 for (i = 0; i < ntokens; ++i)
1667 {
1668 symbol_t *this = symbols[i];
1669 if (this->user_token_number != SUNDEF)
1670 {
1671 if (this->user_token_number > max_user_token_number)
1672 max_user_token_number = this->user_token_number;
1673 if (this->user_token_number == 256)
1674 num_256_available_p = FALSE;
1675 }
1676 }
1677
1678 /* If 256 is not used, assign it to error, to follow POSIX. */
1679 if (num_256_available_p && errtoken->user_token_number == SUNDEF)
1680 errtoken->user_token_number = 256;
1681
1682 /* Set the missing user numbers. */
1683 if (max_user_token_number < 256)
1684 max_user_token_number = 256;
1685
72a23c97
AD
1686 for (i = 0; i < ntokens; ++i)
1687 {
db8837cb 1688 symbol_t *this = symbols[i];
72a23c97 1689 if (this->user_token_number == SUNDEF)
23c5a174 1690 this->user_token_number = ++max_user_token_number;
72a23c97
AD
1691 if (this->user_token_number > max_user_token_number)
1692 max_user_token_number = this->user_token_number;
72a23c97
AD
1693 }
1694
680e8701 1695 token_translations = XCALLOC (token_number_t, max_user_token_number + 1);
037ca2f1
AD
1696
1697 /* Initialize all entries for literal tokens to 2, the internal
1698 token number for $undefined., which represents all invalid
1699 inputs. */
18bcecb0 1700 for (i = 0; i < max_user_token_number + 1; i++)
007a50a4 1701 token_translations[i] = undeftoken->number;
db8837cb 1702 symbols_do (symbol_translation, NULL);
037ca2f1
AD
1703}
1704
1705
0e78e603
AD
1706/*----------------------------------------------------------------.
1707| Assign symbol numbers, and write definition of token names into |
1708| FDEFINES. Set up vectors SYMBOL_TABLE, TAGS of symbols. |
1709`----------------------------------------------------------------*/
1ff442ca 1710
4a120d45 1711static void
118fb205 1712packsymbols (void)
1ff442ca 1713{
db8837cb 1714 symbols = XCALLOC (symbol_t *, nsyms);
1ff442ca 1715
db8837cb
AD
1716 symbols_do (symbol_check_alias_consistence, NULL);
1717 symbols_do (symbol_pack, NULL);
1ff442ca 1718
037ca2f1 1719 token_translations_init ();
1ff442ca 1720
e3f1699f
AD
1721 if (startval->class == unknown_sym)
1722 fatal (_("the start symbol %s is undefined"), startval->tag);
1723 else if (startval->class == token_sym)
1724 fatal (_("the start symbol %s is a token"), startval->tag);
1725
d9b739c3 1726 start_symbol = startval->number;
e3f1699f
AD
1727}
1728
1729
a70083a3
AD
1730/*---------------------------------------------------------------.
1731| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 1732| RITEM. |
a70083a3 1733`---------------------------------------------------------------*/
1ff442ca 1734
4a120d45 1735static void
118fb205 1736packgram (void)
1ff442ca 1737{
a70083a3
AD
1738 int itemno;
1739 int ruleno;
1740 symbol_list *p;
1ff442ca 1741
62a3e4f0 1742 ritem = XCALLOC (item_number_t, nritems + 1);
1a2b5d37 1743 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
1744
1745 itemno = 0;
1746 ruleno = 1;
1747
1748 p = grammar;
1749 while (p)
1750 {
db8837cb 1751 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 1752 rules[ruleno].user_number = ruleno;
c3b407f4 1753 rules[ruleno].number = ruleno;
bba97eb2 1754 rules[ruleno].lhs = p->sym;
99013900 1755 rules[ruleno].rhs = ritem + itemno;
1a2b5d37
AD
1756 rules[ruleno].line = p->line;
1757 rules[ruleno].useful = TRUE;
1758 rules[ruleno].action = p->action;
1759 rules[ruleno].action_line = p->action_line;
1ff442ca
NF
1760
1761 p = p->next;
1762 while (p && p->sym)
1763 {
5fbb0954
AD
1764 /* item_number_t = token_number_t.
1765 But the former needs to contain more: negative rule numbers. */
1766 ritem[itemno++] = token_number_as_item_number (p->sym->number);
1ff442ca
NF
1767 /* A rule gets by default the precedence and associativity
1768 of the last token in it. */
d7020c20 1769 if (p->sym->class == token_sym)
03b31c0c 1770 rules[ruleno].prec = p->sym;
a70083a3
AD
1771 if (p)
1772 p = p->next;
1ff442ca
NF
1773 }
1774
1775 /* If this rule has a %prec,
a70083a3 1776 the specified symbol's precedence replaces the default. */
1ff442ca
NF
1777 if (ruleprec)
1778 {
03b31c0c
AD
1779 rules[ruleno].precsym = ruleprec;
1780 rules[ruleno].prec = ruleprec;
1ff442ca 1781 }
1ff442ca 1782 ritem[itemno++] = -ruleno;
f3849179 1783 ++ruleno;
1ff442ca 1784
a70083a3
AD
1785 if (p)
1786 p = p->next;
1ff442ca
NF
1787 }
1788
1789 ritem[itemno] = 0;
5123689b 1790 assert (itemno == nritems);
3067fbef
AD
1791
1792 if (trace_flag)
1793 ritem_print (stderr);
1ff442ca 1794}
a70083a3 1795\f
fdbcd8e2
AD
1796/*------------------------------------------------------------------.
1797| Read in the grammar specification and record it in the format |
1798| described in gram.h. All actions are copied into ACTION_OBSTACK, |
1799| in each case forming the body of a C function (YYACTION) which |
1800| contains a switch statement to decide which action to execute. |
1801`------------------------------------------------------------------*/
a70083a3
AD
1802
1803void
1804reader (void)
1805{
342b8b6e 1806 lex_init ();
a70083a3
AD
1807 lineno = 1;
1808
11d82f03
MA
1809 /* Initialize the muscle obstack. */
1810 obstack_init (&muscle_obstack);
82e236e2 1811
a70083a3 1812 /* Initialize the symbol table. */
db8837cb 1813 symbols_new ();
b6610515 1814
30171f79
AD
1815 /* Construct the axiom symbol. */
1816 axiom = getsym ("$axiom");
1817 axiom->class = nterm_sym;
d9b739c3 1818 axiom->number = nvars++;
30171f79 1819
a70083a3
AD
1820 /* Construct the error token */
1821 errtoken = getsym ("error");
d7020c20 1822 errtoken->class = token_sym;
72a23c97 1823 errtoken->number = ntokens++;
b6610515 1824
a70083a3
AD
1825 /* Construct a token that represents all undefined literal tokens.
1826 It is always token number 2. */
1827 undeftoken = getsym ("$undefined.");
d7020c20 1828 undeftoken->class = token_sym;
72a23c97 1829 undeftoken->number = ntokens++;
a70083a3 1830
331dbc1b
AD
1831 /* Initialize the obstacks. */
1832 obstack_init (&action_obstack);
331dbc1b 1833 obstack_init (&output_obstack);
0dd1580a
RA
1834 obstack_init (&pre_prologue_obstack);
1835 obstack_init (&post_prologue_obstack);
331dbc1b
AD
1836
1837 finput = xfopen (infile, "r");
1838
896fe5c1
AD
1839 /* Read the declaration section. Copy %{ ... %} groups to
1840 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1841 etc. found there. */
a70083a3 1842 read_declarations ();
b7c49edf
AD
1843
1844 /* If the user did not define her EOFTOKEN, do it now. */
1845 if (!eoftoken)
1846 {
1847 eoftoken = getsym ("$");
1848 eoftoken->class = token_sym;
72a23c97 1849 eoftoken->number = 0;
b7c49edf
AD
1850 /* Value specified by POSIX. */
1851 eoftoken->user_token_number = 0;
1852 }
1853
a70083a3 1854 /* Read in the grammar, build grammar in list form. Write out
fdbcd8e2 1855 actions. */
a70083a3 1856 readgram ();
ff48177d
MA
1857 /* Some C code is given at the end of the grammar file. */
1858 read_additionnal_code ();
b0c4483e 1859
331dbc1b
AD
1860 lex_free ();
1861 xfclose (finput);
1862
a70083a3
AD
1863 /* Assign the symbols their symbol numbers. Write #defines for the
1864 token symbols into FDEFINES if requested. */
1865 packsymbols ();
93ede233 1866
a70083a3
AD
1867 /* Convert the grammar into the format described in gram.h. */
1868 packgram ();
8419d367
AD
1869
1870 /* The grammar as a symbol_list is no longer needed. */
1871 LIST_FREE (symbol_list, grammar);
a70083a3 1872}
76514394
AD
1873
1874void
1875grammar_free (void)
1876{
1877 XFREE (ritem);
1878 free (rules + 1);
1879 /* Free the symbol table data structure. */
db8837cb 1880 symbols_free ();
76514394 1881}