]> git.saurik.com Git - bison.git/blob - src/reader.c
3375fa2a3f48d250d39401e6a436c7e777a0ff35
[bison.git] / src / reader.c
1 /* Input parser for bison
2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
3 Free Software Foundation, Inc.
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22
23 #include "system.h"
24 #include "quotearg.h"
25 #include "quote.h"
26 #include "getargs.h"
27 #include "files.h"
28 #include "symtab.h"
29 #include "options.h"
30 #include "lex.h"
31 #include "gram.h"
32 #include "complain.h"
33 #include "output.h"
34 #include "reader.h"
35 #include "conflicts.h"
36 #include "muscle_tab.h"
37
38 typedef struct symbol_list
39 {
40 struct symbol_list *next;
41 symbol_t *sym;
42 int line;
43
44 /* The action is attached to the LHS of a rule. */
45 const char *action;
46 int action_line;
47
48 /* The guard is attached to the LHS of a rule. */
49 const char *guard;
50 int guard_line;
51 symbol_t *ruleprec;
52 } symbol_list;
53
54 int lineno;
55 static symbol_list *grammar = NULL;
56 static int start_flag = 0;
57 static symbol_t *startval = NULL;
58
59 /* Nonzero if components of semantic values are used, implying
60 they must be unions. */
61 static int value_components_used;
62
63 /* Nonzero if %union has been seen. */
64 static int typed = 0;
65
66 /* Incremented for each %left, %right or %nonassoc seen */
67 static int lastprec = 0;
68
69 symbol_t *errtoken = NULL;
70 symbol_t *undeftoken = NULL;
71 symbol_t *eoftoken = NULL;
72 symbol_t *axiom = NULL;
73
74 static symbol_list *
75 symbol_list_new (symbol_t *sym)
76 {
77 symbol_list *res = XMALLOC (symbol_list, 1);
78 res->next = NULL;
79 res->sym = sym;
80 res->line = lineno;
81 res->action = NULL;
82 res->action_line = 0;
83 res->guard = NULL;
84 res->guard_line = 0;
85 res->ruleprec = NULL;
86 return res;
87 }
88
89 /*------------------------.
90 | Operations on symbols. |
91 `------------------------*/
92
93
94 /*-----------------------------------------------------------.
95 | If THIS is not defined, report an error, and consider it a |
96 | nonterminal. |
97 `-----------------------------------------------------------*/
98
99 static bool
100 symbol_check_defined (symbol_t *this)
101 {
102 if (this->class == unknown_sym)
103 {
104 complain
105 (_("symbol %s is used, but is not defined as a token and has no rules"),
106 this->tag);
107 this->class = nterm_sym;
108 this->number = nvars++;
109 }
110
111 return TRUE;
112 }
113
114
115 /*-------------------------------------------------------------------.
116 | Assign a symbol number, and write the definition of the token name |
117 | into FDEFINES. Put in SYMBOLS. |
118 `-------------------------------------------------------------------*/
119
120 static bool
121 symbol_make_alias (symbol_t *symbol, char *typename)
122 {
123 if (symval->alias)
124 warn (_("symbol `%s' used more than once as a literal string"),
125 symval->tag);
126 else if (symbol->alias)
127 warn (_("symbol `%s' given more than one literal string"),
128 symbol->tag);
129 else
130 {
131 symval->class = token_sym;
132 symval->type_name = typename;
133 symval->user_token_number = symbol->user_token_number;
134 symbol->user_token_number = SALIAS;
135 symval->alias = symbol;
136 symbol->alias = symval;
137 /* symbol and symval combined are only one symbol */
138 nsyms--;
139 ntokens--;
140 assert (ntokens == symbol->number || ntokens == symval->number);
141 symbol->number = symval->number =
142 (symval->number < symbol->number) ? symval->number : symbol->number;
143 }
144
145 return TRUE;
146 }
147
148 /*---------------------------------------------------------.
149 | Check that THIS, and its alias, have same precedence and |
150 | associativity. |
151 `---------------------------------------------------------*/
152
153 static bool
154 symbol_check_alias_consistence (symbol_t *this)
155 {
156 /* Check only those who _are_ the aliases. */
157 if (this->alias && this->user_token_number == SALIAS)
158 {
159 if (this->prec != this->alias->prec)
160 {
161 if (this->prec != 0 && this->alias->prec != 0)
162 complain (_("conflicting precedences for %s and %s"),
163 this->tag, this->alias->tag);
164 if (this->prec != 0)
165 this->alias->prec = this->prec;
166 else
167 this->prec = this->alias->prec;
168 }
169
170 if (this->assoc != this->alias->assoc)
171 {
172 if (this->assoc != 0 && this->alias->assoc != 0)
173 complain (_("conflicting assoc values for %s and %s"),
174 this->tag, this->alias->tag);
175 if (this->assoc != 0)
176 this->alias->assoc = this->assoc;
177 else
178 this->assoc = this->alias->assoc;
179 }
180 }
181 return TRUE;
182 }
183
184
185 /*-------------------------------------------------------------------.
186 | Assign a symbol number, and write the definition of the token name |
187 | into FDEFINES. Put in SYMBOLS. |
188 `-------------------------------------------------------------------*/
189
190 static bool
191 symbol_pack (symbol_t *this)
192 {
193 if (this->class == nterm_sym)
194 {
195 this->number += ntokens;
196 }
197 else if (this->alias)
198 {
199 /* This symbol and its alias are a single token defn.
200 Allocate a tokno, and assign to both check agreement of
201 prec and assoc fields and make both the same */
202 if (this->number == NUMBER_UNDEFINED)
203 {
204 if (this == eoftoken || this->alias == eoftoken)
205 this->number = this->alias->number = 0;
206 else
207 {
208 assert (this->alias->number != NUMBER_UNDEFINED);
209 this->number = this->alias->number;
210 }
211 }
212 /* Do not do processing below for SALIASs. */
213 if (this->user_token_number == SALIAS)
214 return TRUE;
215 }
216 else /* this->class == token_sym */
217 {
218 assert (this->number != NUMBER_UNDEFINED);
219 }
220
221 symbols[this->number] = this;
222 return TRUE;
223 }
224
225
226
227
228 /*--------------------------------------------------.
229 | Put THIS in TOKEN_TRANSLATIONS if it is a token. |
230 `--------------------------------------------------*/
231
232 static bool
233 symbol_translation (symbol_t *this)
234 {
235 /* Non-terminal? */
236 if (this->class == token_sym
237 && this->user_token_number != SALIAS)
238 {
239 /* A token which translation has already been set? */
240 if (token_translations[this->user_token_number] != undeftoken->number)
241 complain (_("tokens %s and %s both assigned number %d"),
242 symbols[token_translations[this->user_token_number]]->tag,
243 this->tag, this->user_token_number);
244
245 token_translations[this->user_token_number] = this->number;
246 }
247
248 return TRUE;
249 }
250 \f
251
252 /*===================\
253 | Low level lexing. |
254 \===================*/
255
256 static void
257 skip_to_char (int target)
258 {
259 int c;
260 if (target == '\n')
261 complain (_(" Skipping to next \\n"));
262 else
263 complain (_(" Skipping to next %c"), target);
264
265 do
266 c = skip_white_space ();
267 while (c != target && c != EOF);
268 if (c != EOF)
269 ungetc (c, finput);
270 }
271
272
273 /*---------------------------------------------------------.
274 | Read a signed integer from STREAM and return its value. |
275 `---------------------------------------------------------*/
276
277 static inline int
278 read_signed_integer (FILE *stream)
279 {
280 int c = getc (stream);
281 int sign = 1;
282 int n = 0;
283
284 if (c == '-')
285 {
286 c = getc (stream);
287 sign = -1;
288 }
289
290 while (isdigit (c))
291 {
292 n = 10 * n + (c - '0');
293 c = getc (stream);
294 }
295
296 ungetc (c, stream);
297
298 return sign * n;
299 }
300 \f
301 /*--------------------------------------------------------------.
302 | Get the data type (alternative in the union) of the value for |
303 | symbol N in rule RULE. |
304 `--------------------------------------------------------------*/
305
306 static char *
307 get_type_name (int n, symbol_list *rule)
308 {
309 int i;
310 symbol_list *rp;
311
312 if (n < 0)
313 {
314 complain (_("invalid $ value"));
315 return NULL;
316 }
317
318 rp = rule;
319 i = 0;
320
321 while (i < n)
322 {
323 rp = rp->next;
324 if (rp == NULL || rp->sym == NULL)
325 {
326 complain (_("invalid $ value"));
327 return NULL;
328 }
329 ++i;
330 }
331
332 return rp->sym->type_name;
333 }
334 \f
335 /*------------------------------------------------------------------.
336 | Copy the character C to OOUT, and insert quadigraphs when needed. |
337 `------------------------------------------------------------------*/
338
339 static inline void
340 copy_character (struct obstack *oout, int c)
341 {
342 switch (c)
343 {
344 case '[':
345 obstack_sgrow (oout, "@<:@");
346 break;
347
348 case ']':
349 obstack_sgrow (oout, "@:>@");
350 break;
351
352 default:
353 obstack_1grow (oout, c);
354 }
355 }
356
357 /*------------------------------------------------------------.
358 | Dump the string from FIN to OOUT if non null. MATCH is the |
359 | delimiter of the string (either ' or "). |
360 `------------------------------------------------------------*/
361
362 static inline void
363 copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
364 {
365 int c;
366
367 if (store)
368 obstack_1grow (oout, match);
369
370 c = getc (fin);
371
372 while (c != match)
373 {
374 if (c == EOF)
375 fatal (_("unterminated string at end of file"));
376 if (c == '\n')
377 {
378 complain (_("unterminated string"));
379 ungetc (c, fin);
380 c = match; /* invent terminator */
381 continue;
382 }
383
384 copy_character (oout, c);
385
386 if (c == '\\')
387 {
388 c = getc (fin);
389 if (c == EOF)
390 fatal (_("unterminated string at end of file"));
391 copy_character (oout, c);
392
393 if (c == '\n')
394 ++lineno;
395 }
396
397 c = getc (fin);
398 }
399
400 if (store)
401 obstack_1grow (oout, c);
402 }
403
404 /* FIXME. */
405
406 static inline void
407 copy_string (FILE *fin, struct obstack *oout, int match)
408 {
409 copy_string2 (fin, oout, match, 1);
410 }
411
412 /* FIXME. */
413
414 static inline void
415 copy_identifier (FILE *fin, struct obstack *oout)
416 {
417 int c;
418
419 while (isalnum (c = getc (fin)) || c == '_')
420 obstack_1grow (oout, c);
421
422 ungetc (c, fin);
423 }
424
425
426 /*------------------------------------------------------------------.
427 | Dump the wannabee comment from IN to OOUT. In fact we just saw a |
428 | `/', which might or might not be a comment. In any case, copy |
429 | what we saw. |
430 `------------------------------------------------------------------*/
431
432 static inline void
433 copy_comment (FILE *fin, struct obstack *oout)
434 {
435 int cplus_comment;
436 int ended;
437 int c;
438
439 /* We read a `/', output it. */
440 obstack_1grow (oout, '/');
441
442 switch ((c = getc (fin)))
443 {
444 case '/':
445 cplus_comment = 1;
446 break;
447 case '*':
448 cplus_comment = 0;
449 break;
450 default:
451 ungetc (c, fin);
452 return;
453 }
454
455 obstack_1grow (oout, c);
456 c = getc (fin);
457
458 ended = 0;
459 while (!ended)
460 {
461 if (!cplus_comment && c == '*')
462 {
463 while (c == '*')
464 {
465 obstack_1grow (oout, c);
466 c = getc (fin);
467 }
468
469 if (c == '/')
470 {
471 obstack_1grow (oout, c);
472 ended = 1;
473 }
474 }
475 else if (c == '\n')
476 {
477 ++lineno;
478 obstack_1grow (oout, c);
479 if (cplus_comment)
480 ended = 1;
481 else
482 c = getc (fin);
483 }
484 else if (c == EOF)
485 fatal (_("unterminated comment"));
486 else
487 {
488 copy_character (oout, c);
489 c = getc (fin);
490 }
491 }
492 }
493
494
495 /*-----------------------------------------------------------------.
496 | FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
497 | reference to this location. STACK_OFFSET is the number of values |
498 | in the current rule so far, which says where to find `$0' with |
499 | respect to the top of the stack. |
500 `-----------------------------------------------------------------*/
501
502 static inline void
503 copy_at (FILE *fin, struct obstack *oout, int stack_offset)
504 {
505 int c;
506
507 c = getc (fin);
508 if (c == '$')
509 {
510 obstack_sgrow (oout, "yyloc");
511 locations_flag = 1;
512 }
513 else if (isdigit (c) || c == '-')
514 {
515 int n;
516
517 ungetc (c, fin);
518 n = read_signed_integer (fin);
519 if (n > stack_offset)
520 complain (_("invalid value: %s%d"), "@", n);
521 else
522 {
523 /* Offset is always 0 if parser has already popped the stack
524 pointer. */
525 obstack_fgrow1 (oout, "yylsp[%d]",
526 n - (semantic_parser ? 0 : stack_offset));
527 locations_flag = 1;
528 }
529 }
530 else
531 {
532 char buf[] = "@c";
533 buf[1] = c;
534 complain (_("%s is invalid"), quote (buf));
535 }
536 }
537
538
539 /*-------------------------------------------------------------------.
540 | FIN is pointing to a wannabee semantic value (i.e., a `$'). |
541 | |
542 | Possible inputs: $[<TYPENAME>]($|integer) |
543 | |
544 | Output to OOUT a reference to this semantic value. STACK_OFFSET is |
545 | the number of values in the current rule so far, which says where |
546 | to find `$0' with respect to the top of the stack. |
547 `-------------------------------------------------------------------*/
548
549 static inline void
550 copy_dollar (FILE *fin, struct obstack *oout,
551 symbol_list *rule, int stack_offset)
552 {
553 int c = getc (fin);
554 const char *type_name = NULL;
555
556 /* Get the type name if explicit. */
557 if (c == '<')
558 {
559 read_type_name (fin);
560 type_name = token_buffer;
561 value_components_used = 1;
562 c = getc (fin);
563 }
564
565 if (c == '$')
566 {
567 obstack_sgrow (oout, "yyval");
568
569 if (!type_name)
570 type_name = get_type_name (0, rule);
571 if (type_name)
572 obstack_fgrow1 (oout, ".%s", type_name);
573 if (!type_name && typed)
574 complain (_("$$ of `%s' has no declared type"),
575 rule->sym->tag);
576 }
577 else if (isdigit (c) || c == '-')
578 {
579 int n;
580 ungetc (c, fin);
581 n = read_signed_integer (fin);
582
583 if (n > stack_offset)
584 complain (_("invalid value: %s%d"), "$", n);
585 else
586 {
587 if (!type_name && n > 0)
588 type_name = get_type_name (n, rule);
589
590 /* Offset is always 0 if parser has already popped the stack
591 pointer. */
592 obstack_fgrow1 (oout, "yyvsp[%d]",
593 n - (semantic_parser ? 0 : stack_offset));
594
595 if (type_name)
596 obstack_fgrow1 (oout, ".%s", type_name);
597 if (!type_name && typed)
598 complain (_("$%d of `%s' has no declared type"),
599 n, rule->sym->tag);
600 }
601 }
602 else
603 {
604 char buf[] = "$c";
605 buf[1] = c;
606 complain (_("%s is invalid"), quote (buf));
607 }
608 }
609 \f
610 /*-------------------------------------------------------------------.
611 | Copy the contents of a `%{ ... %}' into the definitions file. The |
612 | `%{' has already been read. Return after reading the `%}'. |
613 `-------------------------------------------------------------------*/
614
615 static void
616 copy_definition (struct obstack *oout)
617 {
618 int c;
619 /* -1 while reading a character if prev char was %. */
620 int after_percent;
621
622 if (!no_lines_flag)
623 {
624 obstack_fgrow2 (oout, muscle_find ("linef"),
625 lineno, quotearg_style (c_quoting_style,
626 muscle_find ("filename")));
627 }
628
629 after_percent = 0;
630
631 c = getc (finput);
632
633 for (;;)
634 {
635 switch (c)
636 {
637 case '\n':
638 obstack_1grow (oout, c);
639 ++lineno;
640 break;
641
642 case '%':
643 after_percent = -1;
644 break;
645
646 case '\'':
647 case '"':
648 copy_string (finput, oout, c);
649 break;
650
651 case '/':
652 copy_comment (finput, oout);
653 break;
654
655 case EOF:
656 fatal ("%s", _("unterminated `%{' definition"));
657
658 default:
659 copy_character (oout, c);
660 }
661
662 c = getc (finput);
663
664 if (after_percent)
665 {
666 if (c == '}')
667 return;
668 obstack_1grow (oout, '%');
669 }
670 after_percent = 0;
671 }
672 }
673
674
675 /*-------------------------------------------------------------------.
676 | Parse what comes after %token or %nterm. For %token, WHAT_IS is |
677 | token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
678 | are reversed. |
679 `-------------------------------------------------------------------*/
680
681 static void
682 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
683 {
684 token_t token = tok_undef;
685 char *typename = NULL;
686
687 /* The symbol being defined. */
688 symbol_t *symbol = NULL;
689
690 /* After `%token' and `%nterm', any number of symbols maybe be
691 defined. */
692 for (;;)
693 {
694 int tmp_char = ungetc (skip_white_space (), finput);
695
696 /* `%' (for instance from `%token', or from `%%' etc.) is the
697 only valid means to end this declaration. */
698 if (tmp_char == '%')
699 return;
700 if (tmp_char == EOF)
701 fatal (_("Premature EOF after %s"), token_buffer);
702
703 token = lex ();
704 if (token == tok_comma)
705 {
706 symbol = NULL;
707 continue;
708 }
709 if (token == tok_typename)
710 {
711 typename = xstrdup (token_buffer);
712 value_components_used = 1;
713 symbol = NULL;
714 }
715 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
716 {
717 symbol_make_alias (symbol, typename);
718 symbol = NULL;
719 }
720 else if (token == tok_identifier)
721 {
722 int oldclass = symval->class;
723 symbol = symval;
724
725 if (symbol->class == what_is_not)
726 complain (_("symbol %s redefined"), symbol->tag);
727 symbol->class = what_is;
728 if (what_is == nterm_sym && oldclass != nterm_sym)
729 symbol->number = nvars++;
730 if (what_is == token_sym && symbol->number == NUMBER_UNDEFINED)
731 symbol->number = ntokens++;
732
733 if (typename)
734 {
735 if (symbol->type_name == NULL)
736 symbol->type_name = typename;
737 else if (strcmp (typename, symbol->type_name) != 0)
738 complain (_("type redeclaration for %s"), symbol->tag);
739 }
740 }
741 else if (symbol && token == tok_number)
742 {
743 symbol->user_token_number = numval;
744 /* User defined EOF token? */
745 if (numval == 0)
746 {
747 eoftoken = symbol;
748 eoftoken->number = 0;
749 /* It is always mapped to 0, so it was already counted in
750 NTOKENS. */
751 --ntokens;
752 }
753 }
754 else
755 {
756 complain (_("`%s' is invalid in %s"),
757 token_buffer,
758 (what_is == token_sym) ? "%token" : "%nterm");
759 skip_to_char ('%');
760 }
761 }
762
763 }
764
765
766 /*------------------------------.
767 | Parse what comes after %start |
768 `------------------------------*/
769
770 static void
771 parse_start_decl (void)
772 {
773 if (start_flag)
774 complain (_("multiple %s declarations"), "%start");
775 if (lex () != tok_identifier)
776 complain (_("invalid %s declaration"), "%start");
777 else
778 {
779 start_flag = 1;
780 startval = symval;
781 }
782 }
783
784 /*-----------------------------------------------------------.
785 | read in a %type declaration and record its information for |
786 | get_type_name to access |
787 `-----------------------------------------------------------*/
788
789 static void
790 parse_type_decl (void)
791 {
792 char *name;
793
794 if (lex () != tok_typename)
795 {
796 complain ("%s", _("%type declaration has no <typename>"));
797 skip_to_char ('%');
798 return;
799 }
800
801 name = xstrdup (token_buffer);
802
803 for (;;)
804 {
805 token_t t;
806 int tmp_char = ungetc (skip_white_space (), finput);
807
808 if (tmp_char == '%')
809 return;
810 if (tmp_char == EOF)
811 fatal (_("Premature EOF after %s"), token_buffer);
812
813 t = lex ();
814
815 switch (t)
816 {
817
818 case tok_comma:
819 case tok_semicolon:
820 break;
821
822 case tok_identifier:
823 if (symval->type_name == NULL)
824 symval->type_name = name;
825 else if (strcmp (name, symval->type_name) != 0)
826 complain (_("type redeclaration for %s"), symval->tag);
827
828 break;
829
830 default:
831 complain (_("invalid %%type declaration due to item: %s"),
832 token_buffer);
833 skip_to_char ('%');
834 }
835 }
836 }
837
838
839
840 /*----------------------------------------------------------------.
841 | Read in a %left, %right or %nonassoc declaration and record its |
842 | information. |
843 `----------------------------------------------------------------*/
844
845 static void
846 parse_assoc_decl (associativity assoc)
847 {
848 char *name = NULL;
849 int prev = 0;
850
851 /* Assign a new precedence level, never 0. */
852 ++lastprec;
853
854 for (;;)
855 {
856 token_t t;
857 int tmp_char = ungetc (skip_white_space (), finput);
858
859 if (tmp_char == '%')
860 return;
861 if (tmp_char == EOF)
862 fatal (_("Premature EOF after %s"), token_buffer);
863
864 t = lex ();
865
866 switch (t)
867 {
868 case tok_typename:
869 name = xstrdup (token_buffer);
870 break;
871
872 case tok_comma:
873 break;
874
875 case tok_identifier:
876 if (symval->prec != 0)
877 complain (_("redefining precedence of %s"), symval->tag);
878 symval->prec = lastprec;
879 symval->assoc = assoc;
880 if (symval->class == nterm_sym)
881 complain (_("symbol %s redefined"), symval->tag);
882 if (symval->number == NUMBER_UNDEFINED)
883 {
884 symval->number = ntokens++;
885 symval->class = token_sym;
886 }
887 if (name)
888 { /* record the type, if one is specified */
889 if (symval->type_name == NULL)
890 symval->type_name = name;
891 else if (strcmp (name, symval->type_name) != 0)
892 complain (_("type redeclaration for %s"), symval->tag);
893 }
894 break;
895
896 case tok_number:
897 if (prev == tok_identifier)
898 {
899 symval->user_token_number = numval;
900 }
901 else
902 {
903 complain
904 (_("invalid text (%s) - number should be after identifier"),
905 token_buffer);
906 skip_to_char ('%');
907 }
908 break;
909
910 case tok_semicolon:
911 return;
912
913 default:
914 complain (_("unexpected item: %s"), token_buffer);
915 skip_to_char ('%');
916 }
917
918 prev = t;
919 }
920 }
921
922
923
924 /*--------------------------------------------------------------.
925 | Copy the union declaration into the stype muscle |
926 | (and fdefines), where it is made into the definition of |
927 | YYSTYPE, the type of elements of the parser value stack. |
928 `--------------------------------------------------------------*/
929
930 static void
931 parse_union_decl (void)
932 {
933 int c;
934 int count = 0;
935 bool done = FALSE;
936 struct obstack union_obstack;
937 if (typed)
938 complain (_("multiple %s declarations"), "%union");
939
940 typed = 1;
941
942 MUSCLE_INSERT_INT ("stype_line", lineno);
943 obstack_init (&union_obstack);
944 obstack_sgrow (&union_obstack, "union");
945
946 while (!done)
947 {
948 c = xgetc (finput);
949
950 /* If C contains '/', it is output by copy_comment (). */
951 if (c != '/')
952 obstack_1grow (&union_obstack, c);
953
954 switch (c)
955 {
956 case '\n':
957 ++lineno;
958 break;
959
960 case '/':
961 copy_comment (finput, &union_obstack);
962 break;
963
964 case '{':
965 ++count;
966 break;
967
968 case '}':
969 /* FIXME: Errr. How could this happen???. --akim */
970 if (count == 0)
971 complain (_("unmatched %s"), "`}'");
972 count--;
973 if (!count)
974 done = TRUE;
975 break;
976 }
977 }
978
979 /* JF don't choke on trailing semi */
980 c = skip_white_space ();
981 if (c != ';')
982 ungetc (c, finput);
983 obstack_1grow (&union_obstack, 0);
984 muscle_insert ("stype", obstack_finish (&union_obstack));
985 }
986
987
988 /*-------------------------------------------------------.
989 | Parse the declaration %expect N which says to expect N |
990 | shift-reduce conflicts. |
991 `-------------------------------------------------------*/
992
993 static void
994 parse_expect_decl (void)
995 {
996 int c = skip_white_space ();
997 ungetc (c, finput);
998
999 if (!isdigit (c))
1000 complain (_("argument of %%expect is not an integer"));
1001 else
1002 expected_conflicts = read_signed_integer (finput);
1003 }
1004
1005
1006 /*-------------------------------------------------------------------.
1007 | Parse what comes after %thong. the full syntax is |
1008 | |
1009 | %thong <type> token number literal |
1010 | |
1011 | the <type> or number may be omitted. The number specifies the |
1012 | user_token_number. |
1013 | |
1014 | Two symbols are entered in the table, one for the token symbol and |
1015 | one for the literal. Both are given the <type>, if any, from the |
1016 | declaration. The ->user_token_number of the first is SALIAS and |
1017 | the ->user_token_number of the second is set to the number, if |
1018 | any, from the declaration. The two symbols are linked via |
1019 | pointers in their ->alias fields. |
1020 | |
1021 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
1022 | only the literal string is retained it is the literal string that |
1023 | is output to yytname |
1024 `-------------------------------------------------------------------*/
1025
1026 static void
1027 parse_thong_decl (void)
1028 {
1029 token_t token;
1030 symbol_t *symbol;
1031 char *typename = 0;
1032 int usrtoknum = SUNDEF;
1033
1034 token = lex (); /* fetch typename or first token */
1035 if (token == tok_typename)
1036 {
1037 typename = xstrdup (token_buffer);
1038 value_components_used = 1;
1039 token = lex (); /* fetch first token */
1040 }
1041
1042 /* process first token */
1043
1044 if (token != tok_identifier)
1045 {
1046 complain (_("unrecognized item %s, expected an identifier"),
1047 token_buffer);
1048 skip_to_char ('%');
1049 return;
1050 }
1051 symval->class = token_sym;
1052 symval->type_name = typename;
1053 symval->user_token_number = SALIAS;
1054 symbol = symval;
1055
1056 token = lex (); /* get number or literal string */
1057
1058 if (token == tok_number)
1059 {
1060 usrtoknum = numval;
1061 token = lex (); /* okay, did number, now get literal */
1062 }
1063
1064 /* process literal string token */
1065
1066 if (token != tok_identifier || *symval->tag != '\"')
1067 {
1068 complain (_("expected string constant instead of %s"), token_buffer);
1069 skip_to_char ('%');
1070 return;
1071 }
1072 symval->class = token_sym;
1073 symval->type_name = typename;
1074 symval->user_token_number = usrtoknum;
1075
1076 symval->alias = symbol;
1077 symbol->alias = symval;
1078
1079 /* symbol and symval combined are only one symbol. */
1080 nsyms--;
1081 }
1082
1083
1084 static void
1085 parse_muscle_decl (void)
1086 {
1087 int ch = ungetc (skip_white_space (), finput);
1088 char *muscle_key;
1089 char *muscle_value;
1090
1091 /* Read key. */
1092 if (!isalpha (ch) && ch != '_')
1093 {
1094 complain (_("invalid %s declaration"), "%define");
1095 skip_to_char ('%');
1096 return;
1097 }
1098 copy_identifier (finput, &muscle_obstack);
1099 obstack_1grow (&muscle_obstack, 0);
1100 muscle_key = obstack_finish (&muscle_obstack);
1101
1102 /* Read value. */
1103 ch = skip_white_space ();
1104 if (ch != '"')
1105 {
1106 ungetc (ch, finput);
1107 if (ch != EOF)
1108 {
1109 complain (_("invalid %s declaration"), "%define");
1110 skip_to_char ('%');
1111 return;
1112 }
1113 else
1114 fatal (_("Premature EOF after %s"), "\"");
1115 }
1116 copy_string2 (finput, &muscle_obstack, '"', 0);
1117 obstack_1grow (&muscle_obstack, 0);
1118 muscle_value = obstack_finish (&muscle_obstack);
1119
1120 /* Store the (key, value) pair in the environment. */
1121 muscle_insert (muscle_key, muscle_value);
1122 }
1123
1124
1125
1126 /*---------------------------------.
1127 | Parse a double quoted parameter. |
1128 `---------------------------------*/
1129
1130 static const char *
1131 parse_dquoted_param (const char *from)
1132 {
1133 struct obstack param_obstack;
1134 const char *param = NULL;
1135 int c;
1136
1137 obstack_init (&param_obstack);
1138 c = skip_white_space ();
1139
1140 if (c != '"')
1141 {
1142 complain (_("invalid %s declaration"), from);
1143 ungetc (c, finput);
1144 skip_to_char ('%');
1145 return NULL;
1146 }
1147
1148 while ((c = literalchar ()) != '"')
1149 obstack_1grow (&param_obstack, c);
1150
1151 obstack_1grow (&param_obstack, '\0');
1152 param = obstack_finish (&param_obstack);
1153
1154 if (c != '"' || strlen (param) == 0)
1155 {
1156 complain (_("invalid %s declaration"), from);
1157 if (c != '"')
1158 ungetc (c, finput);
1159 skip_to_char ('%');
1160 return NULL;
1161 }
1162
1163 return param;
1164 }
1165
1166 /*----------------------------------.
1167 | Parse what comes after %skeleton. |
1168 `----------------------------------*/
1169
1170 static void
1171 parse_skel_decl (void)
1172 {
1173 skeleton = parse_dquoted_param ("%skeleton");
1174 }
1175
1176 /*----------------------------------------------------------------.
1177 | Read from finput until `%%' is seen. Discard the `%%'. Handle |
1178 | any `%' declarations, and copy the contents of any `%{ ... %}' |
1179 | groups to PRE_PROLOGUE_OBSTACK or POST_PROLOGUE_OBSTACK. |
1180 `----------------------------------------------------------------*/
1181
1182 static void
1183 read_declarations (void)
1184 {
1185 for (;;)
1186 {
1187 int c = skip_white_space ();
1188
1189 if (c == '%')
1190 {
1191 token_t tok = parse_percent_token ();
1192
1193 switch (tok)
1194 {
1195 case tok_two_percents:
1196 return;
1197
1198 case tok_percent_left_curly:
1199 if (!typed)
1200 copy_definition (&pre_prologue_obstack);
1201 else
1202 copy_definition (&post_prologue_obstack);
1203 break;
1204
1205 case tok_token:
1206 parse_token_decl (token_sym, nterm_sym);
1207 break;
1208
1209 case tok_nterm:
1210 parse_token_decl (nterm_sym, token_sym);
1211 break;
1212
1213 case tok_type:
1214 parse_type_decl ();
1215 break;
1216
1217 case tok_start:
1218 parse_start_decl ();
1219 break;
1220
1221 case tok_union:
1222 parse_union_decl ();
1223 break;
1224
1225 case tok_expect:
1226 parse_expect_decl ();
1227 break;
1228
1229 case tok_thong:
1230 parse_thong_decl ();
1231 break;
1232
1233 case tok_left:
1234 parse_assoc_decl (left_assoc);
1235 break;
1236
1237 case tok_right:
1238 parse_assoc_decl (right_assoc);
1239 break;
1240
1241 case tok_nonassoc:
1242 parse_assoc_decl (non_assoc);
1243 break;
1244
1245 case tok_define:
1246 parse_muscle_decl ();
1247 break;
1248
1249 case tok_skel:
1250 parse_skel_decl ();
1251 break;
1252
1253 case tok_noop:
1254 break;
1255
1256 case tok_stropt:
1257 case tok_intopt:
1258 case tok_obsolete:
1259 assert (0);
1260 break;
1261
1262 case tok_illegal:
1263 default:
1264 complain (_("unrecognized: %s"), token_buffer);
1265 skip_to_char ('%');
1266 }
1267 }
1268 else if (c == EOF)
1269 fatal (_("no input grammar"));
1270 else
1271 {
1272 char buf[] = "c";
1273 buf[0] = c;
1274 complain (_("unknown character: %s"), quote (buf));
1275 skip_to_char ('%');
1276 }
1277 }
1278 }
1279 \f
1280 /*-------------------------------------------------------------------.
1281 | Assuming that a `{' has just been seen, copy everything up to the |
1282 | matching `}' into the actions file. STACK_OFFSET is the number of |
1283 | values in the current rule so far, which says where to find `$0' |
1284 | with respect to the top of the stack. |
1285 | |
1286 | This routine is used both for actions and guards. Only |
1287 | ACTION_OBSTACK is used, but this is fine, since we use only |
1288 | pointers to relevant portions inside this obstack. |
1289 `-------------------------------------------------------------------*/
1290
1291 static void
1292 parse_braces (symbol_list *rule, int stack_offset)
1293 {
1294 int c;
1295 int count;
1296
1297 count = 1;
1298 while (count > 0)
1299 {
1300 while ((c = getc (finput)) != '}')
1301 switch (c)
1302 {
1303 case '\n':
1304 obstack_1grow (&action_obstack, c);
1305 ++lineno;
1306 break;
1307
1308 case '{':
1309 obstack_1grow (&action_obstack, c);
1310 ++count;
1311 break;
1312
1313 case '\'':
1314 case '"':
1315 copy_string (finput, &action_obstack, c);
1316 break;
1317
1318 case '/':
1319 copy_comment (finput, &action_obstack);
1320 break;
1321
1322 case '$':
1323 copy_dollar (finput, &action_obstack,
1324 rule, stack_offset);
1325 break;
1326
1327 case '@':
1328 copy_at (finput, &action_obstack,
1329 stack_offset);
1330 break;
1331
1332 case EOF:
1333 fatal (_("unmatched %s"), "`{'");
1334
1335 default:
1336 obstack_1grow (&action_obstack, c);
1337 }
1338
1339 /* Above loop exits when C is '}'. */
1340 if (--count)
1341 obstack_1grow (&action_obstack, c);
1342 }
1343
1344 obstack_1grow (&action_obstack, '\0');
1345 }
1346
1347
1348 static void
1349 parse_action (symbol_list *rule, int stack_offset)
1350 {
1351 rule->action_line = lineno;
1352 parse_braces (rule, stack_offset);
1353 rule->action = obstack_finish (&action_obstack);
1354 }
1355
1356
1357 static void
1358 parse_guard (symbol_list *rule, int stack_offset)
1359 {
1360 token_t t = lex ();
1361 if (t != tok_left_curly)
1362 complain (_("invalid %s declaration"), "%guard");
1363 rule->guard_line = lineno;
1364 parse_braces (rule, stack_offset);
1365 rule->guard = obstack_finish (&action_obstack);
1366 }
1367
1368 \f
1369
1370 /*-------------------------------------------------------------------.
1371 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1372 | with the user's names. |
1373 `-------------------------------------------------------------------*/
1374
1375 static symbol_t *
1376 gensym (void)
1377 {
1378 /* Incremented for each generated symbol */
1379 static int gensym_count = 0;
1380 static char buf[256];
1381
1382 symbol_t *sym;
1383
1384 sprintf (buf, "@%d", ++gensym_count);
1385 token_buffer = buf;
1386 sym = getsym (token_buffer);
1387 sym->class = nterm_sym;
1388 sym->number = nvars++;
1389 return sym;
1390 }
1391 \f
1392 /*-------------------------------------------------------------------.
1393 | Parse the input grammar into a one symbol_list structure. Each |
1394 | rule is represented by a sequence of symbols: the left hand side |
1395 | followed by the contents of the right hand side, followed by a |
1396 | null pointer instead of a symbol to terminate the rule. The next |
1397 | symbol is the lhs of the following rule. |
1398 | |
1399 | All guards and actions are copied out to the appropriate files, |
1400 | labelled by the rule number they apply to. |
1401 | |
1402 | Bison used to allow some %directives in the rules sections, but |
1403 | this is no longer consider appropriate: (i) the documented grammar |
1404 | doesn't claim it, (ii), it would promote bad style, (iii), error |
1405 | recovery for %directives consists in skipping the junk until a `%' |
1406 | is seen and helrp synchronizing. This scheme is definitely wrong |
1407 | in the rules section. |
1408 `-------------------------------------------------------------------*/
1409
1410 static void
1411 readgram (void)
1412 {
1413 token_t t;
1414 symbol_t *lhs = NULL;
1415 symbol_list *p = NULL;
1416 symbol_list *p1 = NULL;
1417
1418 /* Points to first symbol_list of current rule. its symbol is the
1419 lhs of the rule. */
1420 symbol_list *crule = NULL;
1421 /* Points to the symbol_list preceding crule. */
1422 symbol_list *crule1 = NULL;
1423
1424 t = lex ();
1425
1426 while (t != tok_two_percents && t != tok_eof)
1427 if (t == tok_identifier || t == tok_bar)
1428 {
1429 int action_flag = 0;
1430 /* Number of symbols in rhs of this rule so far */
1431 int rulelength = 0;
1432 int xactions = 0; /* JF for error checking */
1433 symbol_t *first_rhs = 0;
1434
1435 if (t == tok_identifier)
1436 {
1437 lhs = symval;
1438
1439 if (!start_flag)
1440 {
1441 startval = lhs;
1442 start_flag = 1;
1443 }
1444
1445 t = lex ();
1446 if (t != tok_colon)
1447 {
1448 complain (_("ill-formed rule: initial symbol not followed by colon"));
1449 unlex (t);
1450 }
1451 }
1452
1453 if (nrules == 0 && t == tok_bar)
1454 {
1455 complain (_("grammar starts with vertical bar"));
1456 lhs = symval; /* BOGUS: use a random symval */
1457 }
1458 /* start a new rule and record its lhs. */
1459
1460 ++nrules;
1461 ++nritems;
1462
1463 p = symbol_list_new (lhs);
1464
1465 crule1 = p1;
1466 if (p1)
1467 p1->next = p;
1468 else
1469 grammar = p;
1470
1471 p1 = p;
1472 crule = p;
1473
1474 /* mark the rule's lhs as a nonterminal if not already so. */
1475
1476 if (lhs->class == unknown_sym)
1477 {
1478 lhs->class = nterm_sym;
1479 lhs->number = nvars;
1480 ++nvars;
1481 }
1482 else if (lhs->class == token_sym)
1483 complain (_("rule given for %s, which is a token"), lhs->tag);
1484
1485 /* read the rhs of the rule. */
1486
1487 for (;;)
1488 {
1489 t = lex ();
1490 if (t == tok_prec)
1491 {
1492 t = lex ();
1493 crule->ruleprec = symval;
1494 t = lex ();
1495 }
1496
1497 if (!(t == tok_identifier || t == tok_left_curly))
1498 break;
1499
1500 /* If next token is an identifier, see if a colon follows it.
1501 If one does, exit this rule now. */
1502 if (t == tok_identifier)
1503 {
1504 symbol_t *ssave;
1505 token_t t1;
1506
1507 ssave = symval;
1508 t1 = lex ();
1509 unlex (t1);
1510 symval = ssave;
1511 if (t1 == tok_colon)
1512 {
1513 warn (_("previous rule lacks an ending `;'"));
1514 break;
1515 }
1516
1517 if (!first_rhs) /* JF */
1518 first_rhs = symval;
1519 /* Not followed by colon =>
1520 process as part of this rule's rhs. */
1521 }
1522
1523 /* If we just passed an action, that action was in the middle
1524 of a rule, so make a dummy rule to reduce it to a
1525 non-terminal. */
1526 if (action_flag)
1527 {
1528 /* Since the action was written out with this rule's
1529 number, we must give the new rule this number by
1530 inserting the new rule before it. */
1531
1532 /* Make a dummy nonterminal, a gensym. */
1533 symbol_t *sdummy = gensym ();
1534
1535 /* Make a new rule, whose body is empty, before the
1536 current one, so that the action just read can
1537 belong to it. */
1538 ++nrules;
1539 ++nritems;
1540 p = symbol_list_new (sdummy);
1541 /* Attach its lineno to that of the host rule. */
1542 p->line = crule->line;
1543 /* Move the action from the host rule to this one. */
1544 p->action = crule->action;
1545 p->action_line = crule->action_line;
1546 crule->action = NULL;
1547
1548 if (crule1)
1549 crule1->next = p;
1550 else
1551 grammar = p;
1552 /* End of the rule. */
1553 crule1 = symbol_list_new (NULL);
1554 crule1->next = crule;
1555
1556 p->next = crule1;
1557
1558 /* Insert the dummy generated by that rule into this
1559 rule. */
1560 ++nritems;
1561 p = symbol_list_new (sdummy);
1562 p1->next = p;
1563 p1 = p;
1564
1565 action_flag = 0;
1566 }
1567
1568 if (t == tok_identifier)
1569 {
1570 ++nritems;
1571 p = symbol_list_new (symval);
1572 p1->next = p;
1573 p1 = p;
1574 }
1575 else /* handle an action. */
1576 {
1577 parse_action (crule, rulelength);
1578 action_flag = 1;
1579 ++xactions; /* JF */
1580 }
1581 ++rulelength;
1582 } /* end of read rhs of rule */
1583
1584 /* Put an empty link in the list to mark the end of this rule */
1585 p = symbol_list_new (NULL);
1586 p1->next = p;
1587 p1 = p;
1588
1589 if (t == tok_prec)
1590 {
1591 complain (_("two @prec's in a row"));
1592 t = lex ();
1593 crule->ruleprec = symval;
1594 t = lex ();
1595 }
1596
1597 if (t == tok_guard)
1598 {
1599 if (!semantic_parser)
1600 complain (_("%%guard present but %%semantic_parser not specified"));
1601
1602 parse_guard (crule, rulelength);
1603 t = lex ();
1604 }
1605
1606 if (t == tok_left_curly)
1607 {
1608 /* This case never occurs -wjh */
1609 if (action_flag)
1610 complain (_("two actions at end of one rule"));
1611 parse_action (crule, rulelength);
1612 action_flag = 1;
1613 ++xactions; /* -wjh */
1614 t = lex ();
1615 }
1616 /* If $$ is being set in default way, report if any type
1617 mismatch. */
1618 else if (!xactions
1619 && first_rhs && lhs->type_name != first_rhs->type_name)
1620 {
1621 if (lhs->type_name == 0
1622 || first_rhs->type_name == 0
1623 || strcmp (lhs->type_name, first_rhs->type_name))
1624 complain (_("type clash (`%s' `%s') on default action"),
1625 lhs->type_name ? lhs->type_name : "",
1626 first_rhs->type_name ? first_rhs->type_name : "");
1627 }
1628 /* Warn if there is no default for $$ but we need one. */
1629 else if (!xactions && !first_rhs && lhs->type_name != 0)
1630 complain (_("empty rule for typed nonterminal, and no action"));
1631 if (t == tok_two_percents || t == tok_eof)
1632 warn (_("previous rule lacks an ending `;'"));
1633 if (t == tok_semicolon)
1634 t = lex ();
1635 }
1636 else
1637 {
1638 complain (_("invalid input: %s"), quote (token_buffer));
1639 t = lex ();
1640 }
1641
1642 /* grammar has been read. Do some checking */
1643
1644 if (nrules == 0)
1645 fatal (_("no rules in the input grammar"));
1646
1647 /* Report any undefined symbols and consider them nonterminals. */
1648 symbols_do (symbol_check_defined, NULL);
1649
1650 /* Insert the initial rule, which line is that of the first rule
1651 (not that of the start symbol):
1652
1653 axiom: %start EOF. */
1654 p = symbol_list_new (axiom);
1655 p->line = grammar->line;
1656 p->next = symbol_list_new (startval);
1657 p->next->next = symbol_list_new (eoftoken);
1658 p->next->next->next = symbol_list_new (NULL);
1659 p->next->next->next->next = grammar;
1660 nrules += 1;
1661 nritems += 3;
1662 grammar = p;
1663 startval = axiom;
1664
1665 if (nsyms > SHRT_MAX)
1666 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1667 SHRT_MAX);
1668
1669 assert (nsyms == ntokens + nvars);
1670 }
1671
1672 /* At the end of the grammar file, some C source code must
1673 be stored. It is going to be associated to the epilogue
1674 directive. */
1675 static void
1676 read_additionnal_code (void)
1677 {
1678 int c;
1679 struct obstack el_obstack;
1680
1681 obstack_init (&el_obstack);
1682
1683 if (!no_lines_flag)
1684 {
1685 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1686 lineno, quotearg_style (c_quoting_style,
1687 muscle_find ("filename")));
1688 }
1689
1690 while ((c = getc (finput)) != EOF)
1691 copy_character (&el_obstack, c);
1692
1693 obstack_1grow (&el_obstack, 0);
1694 muscle_insert ("epilogue", obstack_finish (&el_obstack));
1695 }
1696
1697 \f
1698 /*------------------------------------------------------------------.
1699 | Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1700 | number. |
1701 `------------------------------------------------------------------*/
1702
1703 static void
1704 token_translations_init (void)
1705 {
1706 int num_256_available_p = TRUE;
1707 int i;
1708
1709 /* Find the highest user token number, and whether 256, the POSIX
1710 preferred user token number for the error token, is used. */
1711 max_user_token_number = 0;
1712 for (i = 0; i < ntokens; ++i)
1713 {
1714 symbol_t *this = symbols[i];
1715 if (this->user_token_number != SUNDEF)
1716 {
1717 if (this->user_token_number > max_user_token_number)
1718 max_user_token_number = this->user_token_number;
1719 if (this->user_token_number == 256)
1720 num_256_available_p = FALSE;
1721 }
1722 }
1723
1724 /* If 256 is not used, assign it to error, to follow POSIX. */
1725 if (num_256_available_p && errtoken->user_token_number == SUNDEF)
1726 errtoken->user_token_number = 256;
1727
1728 /* Set the missing user numbers. */
1729 if (max_user_token_number < 256)
1730 max_user_token_number = 256;
1731
1732 for (i = 0; i < ntokens; ++i)
1733 {
1734 symbol_t *this = symbols[i];
1735 if (this->user_token_number == SUNDEF)
1736 this->user_token_number = ++max_user_token_number;
1737 if (this->user_token_number > max_user_token_number)
1738 max_user_token_number = this->user_token_number;
1739 }
1740
1741 token_translations = XCALLOC (token_number_t, max_user_token_number + 1);
1742
1743 /* Initialize all entries for literal tokens to 2, the internal
1744 token number for $undefined., which represents all invalid
1745 inputs. */
1746 for (i = 0; i < max_user_token_number + 1; i++)
1747 token_translations[i] = undeftoken->number;
1748 symbols_do (symbol_translation, NULL);
1749 }
1750
1751
1752 /*----------------------------------------------------------------.
1753 | Assign symbol numbers, and write definition of token names into |
1754 | FDEFINES. Set up vectors SYMBOL_TABLE, TAGS of symbols. |
1755 `----------------------------------------------------------------*/
1756
1757 static void
1758 packsymbols (void)
1759 {
1760 symbols = XCALLOC (symbol_t *, nsyms);
1761
1762 symbols_do (symbol_check_alias_consistence, NULL);
1763 symbols_do (symbol_pack, NULL);
1764
1765 token_translations_init ();
1766
1767 if (startval->class == unknown_sym)
1768 fatal (_("the start symbol %s is undefined"), startval->tag);
1769 else if (startval->class == token_sym)
1770 fatal (_("the start symbol %s is a token"), startval->tag);
1771
1772 start_symbol = startval->number;
1773 }
1774
1775
1776 /*---------------------------------------------------------------.
1777 | Convert the rules into the representation using RRHS, RLHS and |
1778 | RITEM. |
1779 `---------------------------------------------------------------*/
1780
1781 static void
1782 packgram (void)
1783 {
1784 int itemno;
1785 int ruleno;
1786 symbol_list *p;
1787
1788 ritem = XCALLOC (item_number_t, nritems + 1);
1789 rules = XCALLOC (rule_t, nrules) - 1;
1790
1791 itemno = 0;
1792 ruleno = 1;
1793
1794 p = grammar;
1795 while (p)
1796 {
1797 symbol_t *ruleprec = p->ruleprec;
1798 rules[ruleno].user_number = ruleno;
1799 rules[ruleno].number = ruleno;
1800 rules[ruleno].lhs = p->sym;
1801 rules[ruleno].rhs = ritem + itemno;
1802 rules[ruleno].line = p->line;
1803 rules[ruleno].useful = TRUE;
1804 rules[ruleno].action = p->action;
1805 rules[ruleno].action_line = p->action_line;
1806 rules[ruleno].guard = p->guard;
1807 rules[ruleno].guard_line = p->guard_line;
1808
1809 p = p->next;
1810 while (p && p->sym)
1811 {
1812 /* item_number_t = token_number_t.
1813 But the former needs to contain more: negative rule numbers. */
1814 ritem[itemno++] = token_number_as_item_number (p->sym->number);
1815 /* A rule gets by default the precedence and associativity
1816 of the last token in it. */
1817 if (p->sym->class == token_sym)
1818 rules[ruleno].prec = p->sym;
1819 if (p)
1820 p = p->next;
1821 }
1822
1823 /* If this rule has a %prec,
1824 the specified symbol's precedence replaces the default. */
1825 if (ruleprec)
1826 {
1827 rules[ruleno].precsym = ruleprec;
1828 rules[ruleno].prec = ruleprec;
1829 }
1830 ritem[itemno++] = -ruleno;
1831 ++ruleno;
1832
1833 if (p)
1834 p = p->next;
1835 }
1836
1837 ritem[itemno] = 0;
1838 assert (itemno == nritems);
1839
1840 if (trace_flag)
1841 ritem_print (stderr);
1842 }
1843 \f
1844 /*-------------------------------------------------------------------.
1845 | Read in the grammar specification and record it in the format |
1846 | described in gram.h. All guards are copied into the GUARD_OBSTACK |
1847 | and all actions into ACTION_OBSTACK, in each case forming the body |
1848 | of a C function (YYGUARD or YYACTION) which contains a switch |
1849 | statement to decide which guard or action to execute. |
1850 `-------------------------------------------------------------------*/
1851
1852 void
1853 reader (void)
1854 {
1855 lex_init ();
1856 lineno = 1;
1857
1858 /* Initialize the muscle obstack. */
1859 obstack_init (&muscle_obstack);
1860
1861 /* Initialize the symbol table. */
1862 symbols_new ();
1863
1864 /* Construct the axiom symbol. */
1865 axiom = getsym ("$axiom");
1866 axiom->class = nterm_sym;
1867 axiom->number = nvars++;
1868
1869 /* Construct the error token */
1870 errtoken = getsym ("error");
1871 errtoken->class = token_sym;
1872 errtoken->number = ntokens++;
1873
1874 /* Construct a token that represents all undefined literal tokens.
1875 It is always token number 2. */
1876 undeftoken = getsym ("$undefined.");
1877 undeftoken->class = token_sym;
1878 undeftoken->number = ntokens++;
1879
1880 /* Initialize the obstacks. */
1881 obstack_init (&action_obstack);
1882 obstack_init (&output_obstack);
1883 obstack_init (&pre_prologue_obstack);
1884 obstack_init (&post_prologue_obstack);
1885
1886 finput = xfopen (infile, "r");
1887
1888 /* Read the declaration section. Copy %{ ... %} groups to
1889 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1890 etc. found there. */
1891 read_declarations ();
1892
1893 /* If the user did not define her EOFTOKEN, do it now. */
1894 if (!eoftoken)
1895 {
1896 eoftoken = getsym ("$");
1897 eoftoken->class = token_sym;
1898 eoftoken->number = 0;
1899 /* Value specified by POSIX. */
1900 eoftoken->user_token_number = 0;
1901 }
1902
1903 /* Read in the grammar, build grammar in list form. Write out
1904 guards and actions. */
1905 readgram ();
1906 /* Some C code is given at the end of the grammar file. */
1907 read_additionnal_code ();
1908
1909 lex_free ();
1910 xfclose (finput);
1911
1912 /* Assign the symbols their symbol numbers. Write #defines for the
1913 token symbols into FDEFINES if requested. */
1914 packsymbols ();
1915
1916 /* Convert the grammar into the format described in gram.h. */
1917 packgram ();
1918
1919 /* The grammar as a symbol_list is no longer needed. */
1920 LIST_FREE (symbol_list, grammar);
1921 }
1922
1923 void
1924 grammar_free (void)
1925 {
1926 XFREE (ritem);
1927 free (rules + 1);
1928 /* Free the symbol table data structure. */
1929 symbols_free ();
1930 }