]> git.saurik.com Git - bison.git/blob - src/reader.c
0e1fc2bd9e8d0e2556111a28a51ce8810337de43
[bison.git] / src / reader.c
1 /* Input parser for bison
2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
3 Free Software Foundation, Inc.
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22
23 #include "system.h"
24 #include "quotearg.h"
25 #include "quote.h"
26 #include "getargs.h"
27 #include "files.h"
28 #include "symtab.h"
29 #include "options.h"
30 #include "lex.h"
31 #include "gram.h"
32 #include "complain.h"
33 #include "output.h"
34 #include "reader.h"
35 #include "conflicts.h"
36 #include "muscle_tab.h"
37
38 typedef struct symbol_list
39 {
40 struct symbol_list *next;
41 symbol_t *sym;
42 int line;
43
44 /* The action is attached to the LHS of a rule. */
45 const char *action;
46 int action_line;
47
48 /* The guard is attached to the LHS of a rule. */
49 const char *guard;
50 int guard_line;
51 symbol_t *ruleprec;
52 } symbol_list;
53
54 int lineno;
55 static symbol_list *grammar = NULL;
56 static int start_flag = 0;
57 static symbol_t *startval = NULL;
58
59 /* Nonzero if components of semantic values are used, implying
60 they must be unions. */
61 static int value_components_used;
62
63 /* Nonzero if %union has been seen. */
64 static int typed = 0;
65
66 /* Incremented for each %left, %right or %nonassoc seen */
67 static int lastprec = 0;
68
69 symbol_t *errtoken = NULL;
70 symbol_t *undeftoken = NULL;
71 symbol_t *eoftoken = NULL;
72 symbol_t *axiom = NULL;
73
74 static symbol_list *
75 symbol_list_new (symbol_t *sym)
76 {
77 symbol_list *res = XMALLOC (symbol_list, 1);
78 res->next = NULL;
79 res->sym = sym;
80 res->line = lineno;
81 res->action = NULL;
82 res->action_line = 0;
83 res->guard = NULL;
84 res->guard_line = 0;
85 res->ruleprec = NULL;
86 return res;
87 }
88
89 /*------------------------.
90 | Operations on symbols. |
91 `------------------------*/
92
93
94 /*-----------------------------------------------------------.
95 | If THIS is not defined, report an error, and consider it a |
96 | nonterminal. |
97 `-----------------------------------------------------------*/
98
99 static bool
100 symbol_check_defined (symbol_t *this)
101 {
102 if (this->class == unknown_sym)
103 {
104 complain
105 (_("symbol %s is used, but is not defined as a token and has no rules"),
106 this->tag);
107 this->class = nterm_sym;
108 this->number = nvars++;
109 }
110
111 return TRUE;
112 }
113
114
115 /*-------------------------------------------------------------------.
116 | Assign a symbol number, and write the definition of the token name |
117 | into FDEFINES. Put in SYMBOLS. |
118 `-------------------------------------------------------------------*/
119
120 static bool
121 symbol_make_alias (symbol_t *symbol, char *typename)
122 {
123 if (symval->alias)
124 warn (_("symbol `%s' used more than once as a literal string"),
125 symval->tag);
126 else if (symbol->alias)
127 warn (_("symbol `%s' given more than one literal string"),
128 symbol->tag);
129 else
130 {
131 symval->class = token_sym;
132 symval->type_name = typename;
133 symval->user_token_number = symbol->user_token_number;
134 symbol->user_token_number = SALIAS;
135 symval->alias = symbol;
136 symbol->alias = symval;
137 /* symbol and symval combined are only one symbol */
138 nsyms--;
139 ntokens--;
140 assert (ntokens == symbol->number || ntokens == symval->number);
141 symbol->number = symval->number =
142 (symval->number < symbol->number) ? symval->number : symbol->number;
143 }
144
145 return TRUE;
146 }
147
148 /*---------------------------------------------------------.
149 | Check that THIS, and its alias, have same precedence and |
150 | associativity. |
151 `---------------------------------------------------------*/
152
153 static bool
154 symbol_check_alias_consistence (symbol_t *this)
155 {
156 /* Check only those who _are_ the aliases. */
157 if (this->alias && this->user_token_number == SALIAS)
158 {
159 if (this->prec != this->alias->prec)
160 {
161 if (this->prec != 0 && this->alias->prec != 0)
162 complain (_("conflicting precedences for %s and %s"),
163 this->tag, this->alias->tag);
164 if (this->prec != 0)
165 this->alias->prec = this->prec;
166 else
167 this->prec = this->alias->prec;
168 }
169
170 if (this->assoc != this->alias->assoc)
171 {
172 if (this->assoc != 0 && this->alias->assoc != 0)
173 complain (_("conflicting assoc values for %s and %s"),
174 this->tag, this->alias->tag);
175 if (this->assoc != 0)
176 this->alias->assoc = this->assoc;
177 else
178 this->assoc = this->alias->assoc;
179 }
180 }
181 return TRUE;
182 }
183
184
185 /*-------------------------------------------------------------------.
186 | Assign a symbol number, and write the definition of the token name |
187 | into FDEFINES. Put in SYMBOLS. |
188 `-------------------------------------------------------------------*/
189
190 static bool
191 symbol_pack (symbol_t *this)
192 {
193 if (this->class == nterm_sym)
194 {
195 this->number += ntokens;
196 }
197 else if (this->alias)
198 {
199 /* This symbol and its alias are a single token defn.
200 Allocate a tokno, and assign to both check agreement of
201 prec and assoc fields and make both the same */
202 if (this->number == NUMBER_UNDEFINED)
203 {
204 if (this == eoftoken || this->alias == eoftoken)
205 this->number = this->alias->number = 0;
206 else
207 {
208 assert (this->alias->number != NUMBER_UNDEFINED);
209 this->number = this->alias->number;
210 }
211 }
212 /* Do not do processing below for SALIASs. */
213 if (this->user_token_number == SALIAS)
214 return TRUE;
215 }
216 else /* this->class == token_sym */
217 {
218 assert (this->number != NUMBER_UNDEFINED);
219 }
220
221 symbols[this->number] = this;
222 return TRUE;
223 }
224
225
226
227
228 /*--------------------------------------------------.
229 | Put THIS in TOKEN_TRANSLATIONS if it is a token. |
230 `--------------------------------------------------*/
231
232 static bool
233 symbol_translation (symbol_t *this)
234 {
235 /* Non-terminal? */
236 if (this->class == token_sym
237 && this->user_token_number != SALIAS)
238 {
239 /* A token which translation has already been set? */
240 if (token_translations[this->user_token_number] != undeftoken->number)
241 complain (_("tokens %s and %s both assigned number %d"),
242 symbols[token_translations[this->user_token_number]]->tag,
243 this->tag, this->user_token_number);
244
245 token_translations[this->user_token_number] = this->number;
246 }
247
248 return TRUE;
249 }
250 \f
251
252 /*===================\
253 | Low level lexing. |
254 \===================*/
255
256 static void
257 skip_to_char (int target)
258 {
259 int c;
260 if (target == '\n')
261 complain (_(" Skipping to next \\n"));
262 else
263 complain (_(" Skipping to next %c"), target);
264
265 do
266 c = skip_white_space ();
267 while (c != target && c != EOF);
268 if (c != EOF)
269 ungetc (c, finput);
270 }
271
272
273 /*---------------------------------------------------------.
274 | Read a signed integer from STREAM and return its value. |
275 `---------------------------------------------------------*/
276
277 static inline int
278 read_signed_integer (FILE *stream)
279 {
280 int c = getc (stream);
281 int sign = 1;
282 int n = 0;
283
284 if (c == '-')
285 {
286 c = getc (stream);
287 sign = -1;
288 }
289
290 while (isdigit (c))
291 {
292 n = 10 * n + (c - '0');
293 c = getc (stream);
294 }
295
296 ungetc (c, stream);
297
298 return sign * n;
299 }
300 \f
301 /*--------------------------------------------------------------.
302 | Get the data type (alternative in the union) of the value for |
303 | symbol N in rule RULE. |
304 `--------------------------------------------------------------*/
305
306 static char *
307 get_type_name (int n, symbol_list *rule)
308 {
309 int i;
310 symbol_list *rp;
311
312 if (n < 0)
313 {
314 complain (_("invalid $ value"));
315 return NULL;
316 }
317
318 rp = rule;
319 i = 0;
320
321 while (i < n)
322 {
323 rp = rp->next;
324 if (rp == NULL || rp->sym == NULL)
325 {
326 complain (_("invalid $ value"));
327 return NULL;
328 }
329 ++i;
330 }
331
332 return rp->sym->type_name;
333 }
334 \f
335 /*------------------------------------------------------------------.
336 | Copy the character C to OOUT, and insert quadigraphs when needed. |
337 `------------------------------------------------------------------*/
338
339 static inline void
340 copy_character (struct obstack *oout, int c)
341 {
342 switch (c)
343 {
344 case '[':
345 obstack_sgrow (oout, "@<:@");
346 break;
347
348 case ']':
349 obstack_sgrow (oout, "@:>@");
350 break;
351
352 default:
353 obstack_1grow (oout, c);
354 }
355 }
356
357 /*------------------------------------------------------------.
358 | Dump the string from FIN to OOUT if non null. MATCH is the |
359 | delimiter of the string (either ' or "). |
360 `------------------------------------------------------------*/
361
362 static inline void
363 copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
364 {
365 int c;
366
367 if (store)
368 obstack_1grow (oout, match);
369
370 c = getc (fin);
371
372 while (c != match)
373 {
374 if (c == EOF)
375 fatal (_("unterminated string at end of file"));
376 if (c == '\n')
377 {
378 complain (_("unterminated string"));
379 ungetc (c, fin);
380 c = match; /* invent terminator */
381 continue;
382 }
383
384 copy_character (oout, c);
385
386 if (c == '\\')
387 {
388 c = getc (fin);
389 if (c == EOF)
390 fatal (_("unterminated string at end of file"));
391 copy_character (oout, c);
392
393 if (c == '\n')
394 ++lineno;
395 }
396
397 c = getc (fin);
398 }
399
400 if (store)
401 obstack_1grow (oout, c);
402 }
403
404 /* FIXME. */
405
406 static inline void
407 copy_string (FILE *fin, struct obstack *oout, int match)
408 {
409 copy_string2 (fin, oout, match, 1);
410 }
411
412 /* FIXME. */
413
414 static inline void
415 copy_identifier (FILE *fin, struct obstack *oout)
416 {
417 int c;
418
419 while (isalnum (c = getc (fin)) || c == '_')
420 obstack_1grow (oout, c);
421
422 ungetc (c, fin);
423 }
424
425
426 /*------------------------------------------------------------------.
427 | Dump the wannabee comment from IN to OOUT. In fact we just saw a |
428 | `/', which might or might not be a comment. In any case, copy |
429 | what we saw. |
430 `------------------------------------------------------------------*/
431
432 static inline void
433 copy_comment (FILE *fin, struct obstack *oout)
434 {
435 int cplus_comment;
436 int ended;
437 int c;
438
439 /* We read a `/', output it. */
440 obstack_1grow (oout, '/');
441
442 switch ((c = getc (fin)))
443 {
444 case '/':
445 cplus_comment = 1;
446 break;
447 case '*':
448 cplus_comment = 0;
449 break;
450 default:
451 ungetc (c, fin);
452 return;
453 }
454
455 obstack_1grow (oout, c);
456 c = getc (fin);
457
458 ended = 0;
459 while (!ended)
460 {
461 if (!cplus_comment && c == '*')
462 {
463 while (c == '*')
464 {
465 obstack_1grow (oout, c);
466 c = getc (fin);
467 }
468
469 if (c == '/')
470 {
471 obstack_1grow (oout, c);
472 ended = 1;
473 }
474 }
475 else if (c == '\n')
476 {
477 ++lineno;
478 obstack_1grow (oout, c);
479 if (cplus_comment)
480 ended = 1;
481 else
482 c = getc (fin);
483 }
484 else if (c == EOF)
485 fatal (_("unterminated comment"));
486 else
487 {
488 copy_character (oout, c);
489 c = getc (fin);
490 }
491 }
492 }
493
494
495 /*-------------------------------------------------------------------.
496 | FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
497 | reference to this location. RULE_LENGTH is the number of values in |
498 | the current rule so far, which says where to find `$0' with |
499 | respect to the top of the stack. |
500 `-------------------------------------------------------------------*/
501
502 static inline void
503 copy_at (FILE *fin, struct obstack *oout, int rule_length)
504 {
505 int c = getc (fin);
506 locations_flag = 1;
507
508 if (c == '$')
509 {
510 obstack_sgrow (oout, "]b4_lhs_location[");
511 }
512 else if (isdigit (c) || c == '-')
513 {
514 int n;
515
516 ungetc (c, fin);
517 n = read_signed_integer (fin);
518 if (n > rule_length)
519 complain (_("invalid value: %s%d"), "@", n);
520 else
521 obstack_fgrow2 (oout, "]b4_rhs_location([%d], [%d])[",
522 rule_length, n);
523 }
524 else
525 {
526 char buf[] = "@c";
527 buf[1] = c;
528 complain (_("%s is invalid"), quote (buf));
529 }
530 }
531
532
533 /*------------------------------------------------------------------.
534 | FIN is pointing to a wannabee semantic value (i.e., a `$'). |
535 | |
536 | Possible inputs: $[<TYPENAME>]($|integer) |
537 | |
538 | Output to OOUT a reference to this semantic value. RULE_LENGTH is |
539 | the number of values in the current rule so far, which says where |
540 | to find `$0' with respect to the top of the stack. |
541 `------------------------------------------------------------------*/
542
543 static inline void
544 copy_dollar (FILE *fin, struct obstack *oout,
545 symbol_list *rule, int rule_length)
546 {
547 int c = getc (fin);
548 const char *type_name = NULL;
549
550 /* Get the type name if explicit. */
551 if (c == '<')
552 {
553 read_type_name (fin);
554 type_name = token_buffer;
555 value_components_used = 1;
556 c = getc (fin);
557 }
558
559 if (c == '$')
560 {
561 if (!type_name)
562 type_name = get_type_name (0, rule);
563 if (!type_name && typed)
564 complain (_("$$ of `%s' has no declared type"),
565 rule->sym->tag);
566 if (!type_name)
567 type_name = "";
568 obstack_fgrow1 (oout,
569 "]b4_lhs_value([%s])[", type_name);
570 }
571 else if (isdigit (c) || c == '-')
572 {
573 int n;
574 ungetc (c, fin);
575 n = read_signed_integer (fin);
576
577 if (n > rule_length)
578 complain (_("invalid value: %s%d"), "$", n);
579 else
580 {
581 if (!type_name && n > 0)
582 type_name = get_type_name (n, rule);
583 if (!type_name && typed)
584 complain (_("$%d of `%s' has no declared type"),
585 n, rule->sym->tag);
586 if (!type_name)
587 type_name = "";
588 obstack_fgrow3 (oout, "]b4_rhs_value([%d], [%d], [%s])[",
589 rule_length, n, type_name);
590 }
591 }
592 else
593 {
594 char buf[] = "$c";
595 buf[1] = c;
596 complain (_("%s is invalid"), quote (buf));
597 }
598 }
599 \f
600 /*-------------------------------------------------------------------.
601 | Copy the contents of a `%{ ... %}' into the definitions file. The |
602 | `%{' has already been read. Return after reading the `%}'. |
603 `-------------------------------------------------------------------*/
604
605 static void
606 copy_definition (struct obstack *oout)
607 {
608 int c;
609 /* -1 while reading a character if prev char was %. */
610 int after_percent;
611
612 if (!no_lines_flag)
613 {
614 obstack_fgrow2 (oout, muscle_find ("linef"),
615 lineno, quotearg_style (c_quoting_style,
616 muscle_find ("filename")));
617 }
618
619 after_percent = 0;
620
621 c = getc (finput);
622
623 for (;;)
624 {
625 switch (c)
626 {
627 case '\n':
628 obstack_1grow (oout, c);
629 ++lineno;
630 break;
631
632 case '%':
633 after_percent = -1;
634 break;
635
636 case '\'':
637 case '"':
638 copy_string (finput, oout, c);
639 break;
640
641 case '/':
642 copy_comment (finput, oout);
643 break;
644
645 case EOF:
646 fatal ("%s", _("unterminated `%{' definition"));
647
648 default:
649 copy_character (oout, c);
650 }
651
652 c = getc (finput);
653
654 if (after_percent)
655 {
656 if (c == '}')
657 return;
658 obstack_1grow (oout, '%');
659 }
660 after_percent = 0;
661 }
662 }
663
664
665 /*-------------------------------------------------------------------.
666 | Parse what comes after %token or %nterm. For %token, WHAT_IS is |
667 | token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
668 | are reversed. |
669 `-------------------------------------------------------------------*/
670
671 static void
672 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
673 {
674 token_t token = tok_undef;
675 char *typename = NULL;
676
677 /* The symbol being defined. */
678 symbol_t *symbol = NULL;
679
680 /* After `%token' and `%nterm', any number of symbols maybe be
681 defined. */
682 for (;;)
683 {
684 int tmp_char = ungetc (skip_white_space (), finput);
685
686 /* `%' (for instance from `%token', or from `%%' etc.) is the
687 only valid means to end this declaration. */
688 if (tmp_char == '%')
689 return;
690 if (tmp_char == EOF)
691 fatal (_("Premature EOF after %s"), token_buffer);
692
693 token = lex ();
694 if (token == tok_comma)
695 {
696 symbol = NULL;
697 continue;
698 }
699 if (token == tok_typename)
700 {
701 typename = xstrdup (token_buffer);
702 value_components_used = 1;
703 symbol = NULL;
704 }
705 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
706 {
707 symbol_make_alias (symbol, typename);
708 symbol = NULL;
709 }
710 else if (token == tok_identifier)
711 {
712 int oldclass = symval->class;
713 symbol = symval;
714
715 if (symbol->class == what_is_not)
716 complain (_("symbol %s redefined"), symbol->tag);
717 symbol->class = what_is;
718 if (what_is == nterm_sym && oldclass != nterm_sym)
719 symbol->number = nvars++;
720 if (what_is == token_sym && symbol->number == NUMBER_UNDEFINED)
721 symbol->number = ntokens++;
722
723 if (typename)
724 {
725 if (symbol->type_name == NULL)
726 symbol->type_name = typename;
727 else if (strcmp (typename, symbol->type_name) != 0)
728 complain (_("type redeclaration for %s"), symbol->tag);
729 }
730 }
731 else if (symbol && token == tok_number)
732 {
733 symbol->user_token_number = numval;
734 /* User defined EOF token? */
735 if (numval == 0)
736 {
737 eoftoken = symbol;
738 eoftoken->number = 0;
739 /* It is always mapped to 0, so it was already counted in
740 NTOKENS. */
741 --ntokens;
742 }
743 }
744 else
745 {
746 complain (_("`%s' is invalid in %s"),
747 token_buffer,
748 (what_is == token_sym) ? "%token" : "%nterm");
749 skip_to_char ('%');
750 }
751 }
752
753 }
754
755
756 /*------------------------------.
757 | Parse what comes after %start |
758 `------------------------------*/
759
760 static void
761 parse_start_decl (void)
762 {
763 if (start_flag)
764 complain (_("multiple %s declarations"), "%start");
765 if (lex () != tok_identifier)
766 complain (_("invalid %s declaration"), "%start");
767 else
768 {
769 start_flag = 1;
770 startval = symval;
771 }
772 }
773
774 /*-----------------------------------------------------------.
775 | read in a %type declaration and record its information for |
776 | get_type_name to access |
777 `-----------------------------------------------------------*/
778
779 static void
780 parse_type_decl (void)
781 {
782 char *name;
783
784 if (lex () != tok_typename)
785 {
786 complain ("%s", _("%type declaration has no <typename>"));
787 skip_to_char ('%');
788 return;
789 }
790
791 name = xstrdup (token_buffer);
792
793 for (;;)
794 {
795 token_t t;
796 int tmp_char = ungetc (skip_white_space (), finput);
797
798 if (tmp_char == '%')
799 return;
800 if (tmp_char == EOF)
801 fatal (_("Premature EOF after %s"), token_buffer);
802
803 t = lex ();
804
805 switch (t)
806 {
807
808 case tok_comma:
809 case tok_semicolon:
810 break;
811
812 case tok_identifier:
813 if (symval->type_name == NULL)
814 symval->type_name = name;
815 else if (strcmp (name, symval->type_name) != 0)
816 complain (_("type redeclaration for %s"), symval->tag);
817
818 break;
819
820 default:
821 complain (_("invalid %%type declaration due to item: %s"),
822 token_buffer);
823 skip_to_char ('%');
824 }
825 }
826 }
827
828
829
830 /*----------------------------------------------------------------.
831 | Read in a %left, %right or %nonassoc declaration and record its |
832 | information. |
833 `----------------------------------------------------------------*/
834
835 static void
836 parse_assoc_decl (associativity assoc)
837 {
838 char *name = NULL;
839 int prev = 0;
840
841 /* Assign a new precedence level, never 0. */
842 ++lastprec;
843
844 for (;;)
845 {
846 token_t t;
847 int tmp_char = ungetc (skip_white_space (), finput);
848
849 if (tmp_char == '%')
850 return;
851 if (tmp_char == EOF)
852 fatal (_("Premature EOF after %s"), token_buffer);
853
854 t = lex ();
855
856 switch (t)
857 {
858 case tok_typename:
859 name = xstrdup (token_buffer);
860 break;
861
862 case tok_comma:
863 break;
864
865 case tok_identifier:
866 if (symval->prec != 0)
867 complain (_("redefining precedence of %s"), symval->tag);
868 symval->prec = lastprec;
869 symval->assoc = assoc;
870 if (symval->class == nterm_sym)
871 complain (_("symbol %s redefined"), symval->tag);
872 if (symval->number == NUMBER_UNDEFINED)
873 {
874 symval->number = ntokens++;
875 symval->class = token_sym;
876 }
877 if (name)
878 { /* record the type, if one is specified */
879 if (symval->type_name == NULL)
880 symval->type_name = name;
881 else if (strcmp (name, symval->type_name) != 0)
882 complain (_("type redeclaration for %s"), symval->tag);
883 }
884 break;
885
886 case tok_number:
887 if (prev == tok_identifier)
888 {
889 symval->user_token_number = numval;
890 }
891 else
892 {
893 complain
894 (_("invalid text (%s) - number should be after identifier"),
895 token_buffer);
896 skip_to_char ('%');
897 }
898 break;
899
900 case tok_semicolon:
901 return;
902
903 default:
904 complain (_("unexpected item: %s"), token_buffer);
905 skip_to_char ('%');
906 }
907
908 prev = t;
909 }
910 }
911
912
913
914 /*--------------------------------------------------------------.
915 | Copy the union declaration into the stype muscle |
916 | (and fdefines), where it is made into the definition of |
917 | YYSTYPE, the type of elements of the parser value stack. |
918 `--------------------------------------------------------------*/
919
920 static void
921 parse_union_decl (void)
922 {
923 int c;
924 int count = 0;
925 bool done = FALSE;
926 struct obstack union_obstack;
927 if (typed)
928 complain (_("multiple %s declarations"), "%union");
929
930 typed = 1;
931
932 MUSCLE_INSERT_INT ("stype_line", lineno);
933 obstack_init (&union_obstack);
934 obstack_sgrow (&union_obstack, "union");
935
936 while (!done)
937 {
938 c = xgetc (finput);
939
940 /* If C contains '/', it is output by copy_comment (). */
941 if (c != '/')
942 obstack_1grow (&union_obstack, c);
943
944 switch (c)
945 {
946 case '\n':
947 ++lineno;
948 break;
949
950 case '/':
951 copy_comment (finput, &union_obstack);
952 break;
953
954 case '{':
955 ++count;
956 break;
957
958 case '}':
959 /* FIXME: Errr. How could this happen???. --akim */
960 if (count == 0)
961 complain (_("unmatched %s"), "`}'");
962 count--;
963 if (!count)
964 done = TRUE;
965 break;
966 }
967 }
968
969 /* JF don't choke on trailing semi */
970 c = skip_white_space ();
971 if (c != ';')
972 ungetc (c, finput);
973 obstack_1grow (&union_obstack, 0);
974 muscle_insert ("stype", obstack_finish (&union_obstack));
975 }
976
977
978 /*-------------------------------------------------------.
979 | Parse the declaration %expect N which says to expect N |
980 | shift-reduce conflicts. |
981 `-------------------------------------------------------*/
982
983 static void
984 parse_expect_decl (void)
985 {
986 int c = skip_white_space ();
987 ungetc (c, finput);
988
989 if (!isdigit (c))
990 complain (_("argument of %%expect is not an integer"));
991 else
992 expected_conflicts = read_signed_integer (finput);
993 }
994
995
996 /*-------------------------------------------------------------------.
997 | Parse what comes after %thong. the full syntax is |
998 | |
999 | %thong <type> token number literal |
1000 | |
1001 | the <type> or number may be omitted. The number specifies the |
1002 | user_token_number. |
1003 | |
1004 | Two symbols are entered in the table, one for the token symbol and |
1005 | one for the literal. Both are given the <type>, if any, from the |
1006 | declaration. The ->user_token_number of the first is SALIAS and |
1007 | the ->user_token_number of the second is set to the number, if |
1008 | any, from the declaration. The two symbols are linked via |
1009 | pointers in their ->alias fields. |
1010 | |
1011 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
1012 | only the literal string is retained it is the literal string that |
1013 | is output to yytname |
1014 `-------------------------------------------------------------------*/
1015
1016 static void
1017 parse_thong_decl (void)
1018 {
1019 token_t token;
1020 symbol_t *symbol;
1021 char *typename = 0;
1022 int usrtoknum = SUNDEF;
1023
1024 token = lex (); /* fetch typename or first token */
1025 if (token == tok_typename)
1026 {
1027 typename = xstrdup (token_buffer);
1028 value_components_used = 1;
1029 token = lex (); /* fetch first token */
1030 }
1031
1032 /* process first token */
1033
1034 if (token != tok_identifier)
1035 {
1036 complain (_("unrecognized item %s, expected an identifier"),
1037 token_buffer);
1038 skip_to_char ('%');
1039 return;
1040 }
1041 symval->class = token_sym;
1042 symval->type_name = typename;
1043 symval->user_token_number = SALIAS;
1044 symbol = symval;
1045
1046 token = lex (); /* get number or literal string */
1047
1048 if (token == tok_number)
1049 {
1050 usrtoknum = numval;
1051 token = lex (); /* okay, did number, now get literal */
1052 }
1053
1054 /* process literal string token */
1055
1056 if (token != tok_identifier || *symval->tag != '\"')
1057 {
1058 complain (_("expected string constant instead of %s"), token_buffer);
1059 skip_to_char ('%');
1060 return;
1061 }
1062 symval->class = token_sym;
1063 symval->type_name = typename;
1064 symval->user_token_number = usrtoknum;
1065
1066 symval->alias = symbol;
1067 symbol->alias = symval;
1068
1069 /* symbol and symval combined are only one symbol. */
1070 nsyms--;
1071 }
1072
1073
1074 static void
1075 parse_muscle_decl (void)
1076 {
1077 int ch = ungetc (skip_white_space (), finput);
1078 char *muscle_key;
1079 char *muscle_value;
1080
1081 /* Read key. */
1082 if (!isalpha (ch) && ch != '_')
1083 {
1084 complain (_("invalid %s declaration"), "%define");
1085 skip_to_char ('%');
1086 return;
1087 }
1088 copy_identifier (finput, &muscle_obstack);
1089 obstack_1grow (&muscle_obstack, 0);
1090 muscle_key = obstack_finish (&muscle_obstack);
1091
1092 /* Read value. */
1093 ch = skip_white_space ();
1094 if (ch != '"')
1095 {
1096 ungetc (ch, finput);
1097 if (ch != EOF)
1098 {
1099 complain (_("invalid %s declaration"), "%define");
1100 skip_to_char ('%');
1101 return;
1102 }
1103 else
1104 fatal (_("Premature EOF after %s"), "\"");
1105 }
1106 copy_string2 (finput, &muscle_obstack, '"', 0);
1107 obstack_1grow (&muscle_obstack, 0);
1108 muscle_value = obstack_finish (&muscle_obstack);
1109
1110 /* Store the (key, value) pair in the environment. */
1111 muscle_insert (muscle_key, muscle_value);
1112 }
1113
1114
1115
1116 /*---------------------------------.
1117 | Parse a double quoted parameter. |
1118 `---------------------------------*/
1119
1120 static const char *
1121 parse_dquoted_param (const char *from)
1122 {
1123 struct obstack param_obstack;
1124 const char *param = NULL;
1125 int c;
1126
1127 obstack_init (&param_obstack);
1128 c = skip_white_space ();
1129
1130 if (c != '"')
1131 {
1132 complain (_("invalid %s declaration"), from);
1133 ungetc (c, finput);
1134 skip_to_char ('%');
1135 return NULL;
1136 }
1137
1138 while ((c = literalchar ()) != '"')
1139 obstack_1grow (&param_obstack, c);
1140
1141 obstack_1grow (&param_obstack, '\0');
1142 param = obstack_finish (&param_obstack);
1143
1144 if (c != '"' || strlen (param) == 0)
1145 {
1146 complain (_("invalid %s declaration"), from);
1147 if (c != '"')
1148 ungetc (c, finput);
1149 skip_to_char ('%');
1150 return NULL;
1151 }
1152
1153 return param;
1154 }
1155
1156 /*----------------------------------.
1157 | Parse what comes after %skeleton. |
1158 `----------------------------------*/
1159
1160 static void
1161 parse_skel_decl (void)
1162 {
1163 skeleton = parse_dquoted_param ("%skeleton");
1164 }
1165
1166 /*----------------------------------------------------------------.
1167 | Read from finput until `%%' is seen. Discard the `%%'. Handle |
1168 | any `%' declarations, and copy the contents of any `%{ ... %}' |
1169 | groups to PRE_PROLOGUE_OBSTACK or POST_PROLOGUE_OBSTACK. |
1170 `----------------------------------------------------------------*/
1171
1172 static void
1173 read_declarations (void)
1174 {
1175 for (;;)
1176 {
1177 int c = skip_white_space ();
1178
1179 if (c == '%')
1180 {
1181 token_t tok = parse_percent_token ();
1182
1183 switch (tok)
1184 {
1185 case tok_two_percents:
1186 return;
1187
1188 case tok_percent_left_curly:
1189 if (!typed)
1190 copy_definition (&pre_prologue_obstack);
1191 else
1192 copy_definition (&post_prologue_obstack);
1193 break;
1194
1195 case tok_token:
1196 parse_token_decl (token_sym, nterm_sym);
1197 break;
1198
1199 case tok_nterm:
1200 parse_token_decl (nterm_sym, token_sym);
1201 break;
1202
1203 case tok_type:
1204 parse_type_decl ();
1205 break;
1206
1207 case tok_start:
1208 parse_start_decl ();
1209 break;
1210
1211 case tok_union:
1212 parse_union_decl ();
1213 break;
1214
1215 case tok_expect:
1216 parse_expect_decl ();
1217 break;
1218
1219 case tok_thong:
1220 parse_thong_decl ();
1221 break;
1222
1223 case tok_left:
1224 parse_assoc_decl (left_assoc);
1225 break;
1226
1227 case tok_right:
1228 parse_assoc_decl (right_assoc);
1229 break;
1230
1231 case tok_nonassoc:
1232 parse_assoc_decl (non_assoc);
1233 break;
1234
1235 case tok_define:
1236 parse_muscle_decl ();
1237 break;
1238
1239 case tok_skel:
1240 parse_skel_decl ();
1241 break;
1242
1243 case tok_noop:
1244 break;
1245
1246 case tok_stropt:
1247 case tok_intopt:
1248 case tok_obsolete:
1249 assert (0);
1250 break;
1251
1252 case tok_illegal:
1253 default:
1254 complain (_("unrecognized: %s"), token_buffer);
1255 skip_to_char ('%');
1256 }
1257 }
1258 else if (c == EOF)
1259 fatal (_("no input grammar"));
1260 else
1261 {
1262 char buf[] = "c";
1263 buf[0] = c;
1264 complain (_("unknown character: %s"), quote (buf));
1265 skip_to_char ('%');
1266 }
1267 }
1268 }
1269 \f
1270 /*------------------------------------------------------------------.
1271 | Assuming that a `{' has just been seen, copy everything up to the |
1272 | matching `}' into the actions file. RULE_LENGTH is the number of |
1273 | values in the current rule so far, which says where to find `$0' |
1274 | with respect to the top of the stack. |
1275 | |
1276 | This routine is used both for actions and guards. Only |
1277 | ACTION_OBSTACK is used, but this is fine, since we use only |
1278 | pointers to relevant portions inside this obstack. |
1279 `------------------------------------------------------------------*/
1280
1281 static void
1282 parse_braces (symbol_list *rule, int rule_length)
1283 {
1284 int c;
1285 int count;
1286
1287 count = 1;
1288 while (count > 0)
1289 {
1290 while ((c = getc (finput)) != '}')
1291 switch (c)
1292 {
1293 case '\n':
1294 obstack_1grow (&action_obstack, c);
1295 ++lineno;
1296 break;
1297
1298 case '{':
1299 obstack_1grow (&action_obstack, c);
1300 ++count;
1301 break;
1302
1303 case '\'':
1304 case '"':
1305 copy_string (finput, &action_obstack, c);
1306 break;
1307
1308 case '/':
1309 copy_comment (finput, &action_obstack);
1310 break;
1311
1312 case '$':
1313 copy_dollar (finput, &action_obstack, rule, rule_length);
1314 break;
1315
1316 case '@':
1317 copy_at (finput, &action_obstack, rule_length);
1318 break;
1319
1320 case EOF:
1321 fatal (_("unmatched %s"), "`{'");
1322
1323 default:
1324 obstack_1grow (&action_obstack, c);
1325 }
1326
1327 /* Above loop exits when C is '}'. */
1328 if (--count)
1329 obstack_1grow (&action_obstack, c);
1330 }
1331
1332 obstack_1grow (&action_obstack, '\0');
1333 }
1334
1335
1336 static void
1337 parse_action (symbol_list *rule, int rule_length)
1338 {
1339 rule->action_line = lineno;
1340 parse_braces (rule, rule_length);
1341 rule->action = obstack_finish (&action_obstack);
1342 }
1343
1344
1345 static void
1346 parse_guard (symbol_list *rule, int rule_length)
1347 {
1348 token_t t = lex ();
1349 if (t != tok_left_curly)
1350 complain (_("invalid %s declaration"), "%guard");
1351 rule->guard_line = lineno;
1352 parse_braces (rule, rule_length);
1353 rule->guard = obstack_finish (&action_obstack);
1354 }
1355
1356 \f
1357
1358 /*-------------------------------------------------------------------.
1359 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1360 | with the user's names. |
1361 `-------------------------------------------------------------------*/
1362
1363 static symbol_t *
1364 gensym (void)
1365 {
1366 /* Incremented for each generated symbol */
1367 static int gensym_count = 0;
1368 static char buf[256];
1369
1370 symbol_t *sym;
1371
1372 sprintf (buf, "@%d", ++gensym_count);
1373 token_buffer = buf;
1374 sym = getsym (token_buffer);
1375 sym->class = nterm_sym;
1376 sym->number = nvars++;
1377 return sym;
1378 }
1379 \f
1380 /*-------------------------------------------------------------------.
1381 | Parse the input grammar into a one symbol_list structure. Each |
1382 | rule is represented by a sequence of symbols: the left hand side |
1383 | followed by the contents of the right hand side, followed by a |
1384 | null pointer instead of a symbol to terminate the rule. The next |
1385 | symbol is the lhs of the following rule. |
1386 | |
1387 | All guards and actions are copied out to the appropriate files, |
1388 | labelled by the rule number they apply to. |
1389 | |
1390 | Bison used to allow some %directives in the rules sections, but |
1391 | this is no longer consider appropriate: (i) the documented grammar |
1392 | doesn't claim it, (ii), it would promote bad style, (iii), error |
1393 | recovery for %directives consists in skipping the junk until a `%' |
1394 | is seen and helrp synchronizing. This scheme is definitely wrong |
1395 | in the rules section. |
1396 `-------------------------------------------------------------------*/
1397
1398 static void
1399 readgram (void)
1400 {
1401 token_t t;
1402 symbol_t *lhs = NULL;
1403 symbol_list *p = NULL;
1404 symbol_list *p1 = NULL;
1405
1406 /* Points to first symbol_list of current rule. its symbol is the
1407 lhs of the rule. */
1408 symbol_list *crule = NULL;
1409 /* Points to the symbol_list preceding crule. */
1410 symbol_list *crule1 = NULL;
1411
1412 t = lex ();
1413
1414 while (t != tok_two_percents && t != tok_eof)
1415 if (t == tok_identifier || t == tok_bar)
1416 {
1417 int action_flag = 0;
1418 /* Number of symbols in rhs of this rule so far */
1419 int rulelength = 0;
1420 int xactions = 0; /* JF for error checking */
1421 symbol_t *first_rhs = 0;
1422
1423 if (t == tok_identifier)
1424 {
1425 lhs = symval;
1426
1427 if (!start_flag)
1428 {
1429 startval = lhs;
1430 start_flag = 1;
1431 }
1432
1433 t = lex ();
1434 if (t != tok_colon)
1435 {
1436 complain (_("ill-formed rule: initial symbol not followed by colon"));
1437 unlex (t);
1438 }
1439 }
1440
1441 if (nrules == 0 && t == tok_bar)
1442 {
1443 complain (_("grammar starts with vertical bar"));
1444 lhs = symval; /* BOGUS: use a random symval */
1445 }
1446 /* start a new rule and record its lhs. */
1447
1448 ++nrules;
1449 ++nritems;
1450
1451 p = symbol_list_new (lhs);
1452
1453 crule1 = p1;
1454 if (p1)
1455 p1->next = p;
1456 else
1457 grammar = p;
1458
1459 p1 = p;
1460 crule = p;
1461
1462 /* mark the rule's lhs as a nonterminal if not already so. */
1463
1464 if (lhs->class == unknown_sym)
1465 {
1466 lhs->class = nterm_sym;
1467 lhs->number = nvars;
1468 ++nvars;
1469 }
1470 else if (lhs->class == token_sym)
1471 complain (_("rule given for %s, which is a token"), lhs->tag);
1472
1473 /* read the rhs of the rule. */
1474
1475 for (;;)
1476 {
1477 t = lex ();
1478 if (t == tok_prec)
1479 {
1480 t = lex ();
1481 crule->ruleprec = symval;
1482 t = lex ();
1483 }
1484
1485 if (!(t == tok_identifier || t == tok_left_curly))
1486 break;
1487
1488 /* If next token is an identifier, see if a colon follows it.
1489 If one does, exit this rule now. */
1490 if (t == tok_identifier)
1491 {
1492 symbol_t *ssave;
1493 token_t t1;
1494
1495 ssave = symval;
1496 t1 = lex ();
1497 unlex (t1);
1498 symval = ssave;
1499 if (t1 == tok_colon)
1500 {
1501 warn (_("previous rule lacks an ending `;'"));
1502 break;
1503 }
1504
1505 if (!first_rhs) /* JF */
1506 first_rhs = symval;
1507 /* Not followed by colon =>
1508 process as part of this rule's rhs. */
1509 }
1510
1511 /* If we just passed an action, that action was in the middle
1512 of a rule, so make a dummy rule to reduce it to a
1513 non-terminal. */
1514 if (action_flag)
1515 {
1516 /* Since the action was written out with this rule's
1517 number, we must give the new rule this number by
1518 inserting the new rule before it. */
1519
1520 /* Make a dummy nonterminal, a gensym. */
1521 symbol_t *sdummy = gensym ();
1522
1523 /* Make a new rule, whose body is empty, before the
1524 current one, so that the action just read can
1525 belong to it. */
1526 ++nrules;
1527 ++nritems;
1528 p = symbol_list_new (sdummy);
1529 /* Attach its lineno to that of the host rule. */
1530 p->line = crule->line;
1531 /* Move the action from the host rule to this one. */
1532 p->action = crule->action;
1533 p->action_line = crule->action_line;
1534 crule->action = NULL;
1535
1536 if (crule1)
1537 crule1->next = p;
1538 else
1539 grammar = p;
1540 /* End of the rule. */
1541 crule1 = symbol_list_new (NULL);
1542 crule1->next = crule;
1543
1544 p->next = crule1;
1545
1546 /* Insert the dummy generated by that rule into this
1547 rule. */
1548 ++nritems;
1549 p = symbol_list_new (sdummy);
1550 p1->next = p;
1551 p1 = p;
1552
1553 action_flag = 0;
1554 }
1555
1556 if (t == tok_identifier)
1557 {
1558 ++nritems;
1559 p = symbol_list_new (symval);
1560 p1->next = p;
1561 p1 = p;
1562 }
1563 else /* handle an action. */
1564 {
1565 parse_action (crule, rulelength);
1566 action_flag = 1;
1567 ++xactions; /* JF */
1568 }
1569 ++rulelength;
1570 } /* end of read rhs of rule */
1571
1572 /* Put an empty link in the list to mark the end of this rule */
1573 p = symbol_list_new (NULL);
1574 p1->next = p;
1575 p1 = p;
1576
1577 if (t == tok_prec)
1578 {
1579 complain (_("two @prec's in a row"));
1580 t = lex ();
1581 crule->ruleprec = symval;
1582 t = lex ();
1583 }
1584
1585 if (t == tok_guard)
1586 {
1587 if (!semantic_parser)
1588 complain (_("%%guard present but %%semantic_parser not specified"));
1589
1590 parse_guard (crule, rulelength);
1591 t = lex ();
1592 }
1593
1594 if (t == tok_left_curly)
1595 {
1596 /* This case never occurs -wjh */
1597 if (action_flag)
1598 complain (_("two actions at end of one rule"));
1599 parse_action (crule, rulelength);
1600 action_flag = 1;
1601 ++xactions; /* -wjh */
1602 t = lex ();
1603 }
1604 /* If $$ is being set in default way, report if any type
1605 mismatch. */
1606 else if (!xactions
1607 && first_rhs && lhs->type_name != first_rhs->type_name)
1608 {
1609 if (lhs->type_name == 0
1610 || first_rhs->type_name == 0
1611 || strcmp (lhs->type_name, first_rhs->type_name))
1612 complain (_("type clash (`%s' `%s') on default action"),
1613 lhs->type_name ? lhs->type_name : "",
1614 first_rhs->type_name ? first_rhs->type_name : "");
1615 }
1616 /* Warn if there is no default for $$ but we need one. */
1617 else if (!xactions && !first_rhs && lhs->type_name != 0)
1618 complain (_("empty rule for typed nonterminal, and no action"));
1619 if (t == tok_two_percents || t == tok_eof)
1620 warn (_("previous rule lacks an ending `;'"));
1621 if (t == tok_semicolon)
1622 t = lex ();
1623 }
1624 else
1625 {
1626 complain (_("invalid input: %s"), quote (token_buffer));
1627 t = lex ();
1628 }
1629
1630 /* grammar has been read. Do some checking */
1631
1632 if (nrules == 0)
1633 fatal (_("no rules in the input grammar"));
1634
1635 /* Report any undefined symbols and consider them nonterminals. */
1636 symbols_do (symbol_check_defined, NULL);
1637
1638 /* Insert the initial rule, which line is that of the first rule
1639 (not that of the start symbol):
1640
1641 axiom: %start EOF. */
1642 p = symbol_list_new (axiom);
1643 p->line = grammar->line;
1644 p->next = symbol_list_new (startval);
1645 p->next->next = symbol_list_new (eoftoken);
1646 p->next->next->next = symbol_list_new (NULL);
1647 p->next->next->next->next = grammar;
1648 nrules += 1;
1649 nritems += 3;
1650 grammar = p;
1651 startval = axiom;
1652
1653 if (nsyms > SHRT_MAX)
1654 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1655 SHRT_MAX);
1656
1657 assert (nsyms == ntokens + nvars);
1658 }
1659
1660 /* At the end of the grammar file, some C source code must
1661 be stored. It is going to be associated to the epilogue
1662 directive. */
1663 static void
1664 read_additionnal_code (void)
1665 {
1666 int c;
1667 struct obstack el_obstack;
1668
1669 obstack_init (&el_obstack);
1670
1671 if (!no_lines_flag)
1672 {
1673 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1674 lineno, quotearg_style (c_quoting_style,
1675 muscle_find ("filename")));
1676 }
1677
1678 while ((c = getc (finput)) != EOF)
1679 copy_character (&el_obstack, c);
1680
1681 obstack_1grow (&el_obstack, 0);
1682 muscle_insert ("epilogue", obstack_finish (&el_obstack));
1683 }
1684
1685 \f
1686 /*------------------------------------------------------------------.
1687 | Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1688 | number. |
1689 `------------------------------------------------------------------*/
1690
1691 static void
1692 token_translations_init (void)
1693 {
1694 int num_256_available_p = TRUE;
1695 int i;
1696
1697 /* Find the highest user token number, and whether 256, the POSIX
1698 preferred user token number for the error token, is used. */
1699 max_user_token_number = 0;
1700 for (i = 0; i < ntokens; ++i)
1701 {
1702 symbol_t *this = symbols[i];
1703 if (this->user_token_number != SUNDEF)
1704 {
1705 if (this->user_token_number > max_user_token_number)
1706 max_user_token_number = this->user_token_number;
1707 if (this->user_token_number == 256)
1708 num_256_available_p = FALSE;
1709 }
1710 }
1711
1712 /* If 256 is not used, assign it to error, to follow POSIX. */
1713 if (num_256_available_p && errtoken->user_token_number == SUNDEF)
1714 errtoken->user_token_number = 256;
1715
1716 /* Set the missing user numbers. */
1717 if (max_user_token_number < 256)
1718 max_user_token_number = 256;
1719
1720 for (i = 0; i < ntokens; ++i)
1721 {
1722 symbol_t *this = symbols[i];
1723 if (this->user_token_number == SUNDEF)
1724 this->user_token_number = ++max_user_token_number;
1725 if (this->user_token_number > max_user_token_number)
1726 max_user_token_number = this->user_token_number;
1727 }
1728
1729 token_translations = XCALLOC (token_number_t, max_user_token_number + 1);
1730
1731 /* Initialize all entries for literal tokens to 2, the internal
1732 token number for $undefined., which represents all invalid
1733 inputs. */
1734 for (i = 0; i < max_user_token_number + 1; i++)
1735 token_translations[i] = undeftoken->number;
1736 symbols_do (symbol_translation, NULL);
1737 }
1738
1739
1740 /*----------------------------------------------------------------.
1741 | Assign symbol numbers, and write definition of token names into |
1742 | FDEFINES. Set up vectors SYMBOL_TABLE, TAGS of symbols. |
1743 `----------------------------------------------------------------*/
1744
1745 static void
1746 packsymbols (void)
1747 {
1748 symbols = XCALLOC (symbol_t *, nsyms);
1749
1750 symbols_do (symbol_check_alias_consistence, NULL);
1751 symbols_do (symbol_pack, NULL);
1752
1753 token_translations_init ();
1754
1755 if (startval->class == unknown_sym)
1756 fatal (_("the start symbol %s is undefined"), startval->tag);
1757 else if (startval->class == token_sym)
1758 fatal (_("the start symbol %s is a token"), startval->tag);
1759
1760 start_symbol = startval->number;
1761 }
1762
1763
1764 /*---------------------------------------------------------------.
1765 | Convert the rules into the representation using RRHS, RLHS and |
1766 | RITEM. |
1767 `---------------------------------------------------------------*/
1768
1769 static void
1770 packgram (void)
1771 {
1772 int itemno;
1773 int ruleno;
1774 symbol_list *p;
1775
1776 ritem = XCALLOC (item_number_t, nritems + 1);
1777 rules = XCALLOC (rule_t, nrules) - 1;
1778
1779 itemno = 0;
1780 ruleno = 1;
1781
1782 p = grammar;
1783 while (p)
1784 {
1785 symbol_t *ruleprec = p->ruleprec;
1786 rules[ruleno].user_number = ruleno;
1787 rules[ruleno].number = ruleno;
1788 rules[ruleno].lhs = p->sym;
1789 rules[ruleno].rhs = ritem + itemno;
1790 rules[ruleno].line = p->line;
1791 rules[ruleno].useful = TRUE;
1792 rules[ruleno].action = p->action;
1793 rules[ruleno].action_line = p->action_line;
1794 rules[ruleno].guard = p->guard;
1795 rules[ruleno].guard_line = p->guard_line;
1796
1797 p = p->next;
1798 while (p && p->sym)
1799 {
1800 /* item_number_t = token_number_t.
1801 But the former needs to contain more: negative rule numbers. */
1802 ritem[itemno++] = token_number_as_item_number (p->sym->number);
1803 /* A rule gets by default the precedence and associativity
1804 of the last token in it. */
1805 if (p->sym->class == token_sym)
1806 rules[ruleno].prec = p->sym;
1807 if (p)
1808 p = p->next;
1809 }
1810
1811 /* If this rule has a %prec,
1812 the specified symbol's precedence replaces the default. */
1813 if (ruleprec)
1814 {
1815 rules[ruleno].precsym = ruleprec;
1816 rules[ruleno].prec = ruleprec;
1817 }
1818 ritem[itemno++] = -ruleno;
1819 ++ruleno;
1820
1821 if (p)
1822 p = p->next;
1823 }
1824
1825 ritem[itemno] = 0;
1826 assert (itemno == nritems);
1827
1828 if (trace_flag)
1829 ritem_print (stderr);
1830 }
1831 \f
1832 /*-------------------------------------------------------------------.
1833 | Read in the grammar specification and record it in the format |
1834 | described in gram.h. All guards are copied into the GUARD_OBSTACK |
1835 | and all actions into ACTION_OBSTACK, in each case forming the body |
1836 | of a C function (YYGUARD or YYACTION) which contains a switch |
1837 | statement to decide which guard or action to execute. |
1838 `-------------------------------------------------------------------*/
1839
1840 void
1841 reader (void)
1842 {
1843 lex_init ();
1844 lineno = 1;
1845
1846 /* Initialize the muscle obstack. */
1847 obstack_init (&muscle_obstack);
1848
1849 /* Initialize the symbol table. */
1850 symbols_new ();
1851
1852 /* Construct the axiom symbol. */
1853 axiom = getsym ("$axiom");
1854 axiom->class = nterm_sym;
1855 axiom->number = nvars++;
1856
1857 /* Construct the error token */
1858 errtoken = getsym ("error");
1859 errtoken->class = token_sym;
1860 errtoken->number = ntokens++;
1861
1862 /* Construct a token that represents all undefined literal tokens.
1863 It is always token number 2. */
1864 undeftoken = getsym ("$undefined.");
1865 undeftoken->class = token_sym;
1866 undeftoken->number = ntokens++;
1867
1868 /* Initialize the obstacks. */
1869 obstack_init (&action_obstack);
1870 obstack_init (&output_obstack);
1871 obstack_init (&pre_prologue_obstack);
1872 obstack_init (&post_prologue_obstack);
1873
1874 finput = xfopen (infile, "r");
1875
1876 /* Read the declaration section. Copy %{ ... %} groups to
1877 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1878 etc. found there. */
1879 read_declarations ();
1880
1881 /* If the user did not define her EOFTOKEN, do it now. */
1882 if (!eoftoken)
1883 {
1884 eoftoken = getsym ("$");
1885 eoftoken->class = token_sym;
1886 eoftoken->number = 0;
1887 /* Value specified by POSIX. */
1888 eoftoken->user_token_number = 0;
1889 }
1890
1891 /* Read in the grammar, build grammar in list form. Write out
1892 guards and actions. */
1893 readgram ();
1894 /* Some C code is given at the end of the grammar file. */
1895 read_additionnal_code ();
1896
1897 lex_free ();
1898 xfclose (finput);
1899
1900 /* Assign the symbols their symbol numbers. Write #defines for the
1901 token symbols into FDEFINES if requested. */
1902 packsymbols ();
1903
1904 /* Convert the grammar into the format described in gram.h. */
1905 packgram ();
1906
1907 /* The grammar as a symbol_list is no longer needed. */
1908 LIST_FREE (symbol_list, grammar);
1909 }
1910
1911 void
1912 grammar_free (void)
1913 {
1914 XFREE (ritem);
1915 free (rules + 1);
1916 /* Free the symbol table data structure. */
1917 symbols_free ();
1918 }