]> git.saurik.com Git - bison.git/blob - src/reader.c
* src/derives.c (print_derives): Be sure to use `>= 0', not `> 0',
[bison.git] / src / reader.c
1 /* Input parser for bison
2 Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
3 Free Software Foundation, Inc.
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22
23 #include "system.h"
24 #include "quotearg.h"
25 #include "quote.h"
26 #include "getargs.h"
27 #include "files.h"
28 #include "symtab.h"
29 #include "options.h"
30 #include "lex.h"
31 #include "gram.h"
32 #include "complain.h"
33 #include "output.h"
34 #include "reader.h"
35 #include "conflicts.h"
36 #include "muscle_tab.h"
37
38 typedef struct symbol_list
39 {
40 struct symbol_list *next;
41 bucket *sym;
42 int line;
43
44 /* The action is attached to the LHS of a rule. */
45 const char *action;
46 int action_line;
47
48 /* The guard is attached to the LHS of a rule. */
49 const char *guard;
50 int guard_line;
51 bucket *ruleprec;
52 } symbol_list;
53
54 int lineno;
55 char **tags;
56 short *user_toknums;
57 static symbol_list *grammar;
58 static int start_flag;
59 static bucket *startval;
60
61 /* Nonzero if components of semantic values are used, implying
62 they must be unions. */
63 static int value_components_used;
64
65 /* Nonzero if %union has been seen. */
66 static int typed;
67
68 /* Incremented for each %left, %right or %nonassoc seen */
69 static int lastprec;
70
71 static bucket *errtoken = NULL;
72 static bucket *undeftoken = NULL;
73 static bucket *eoftoken = NULL;
74 static bucket *axiom = NULL;
75
76 static symbol_list *
77 symbol_list_new (bucket *sym)
78 {
79 symbol_list *res = XMALLOC (symbol_list, 1);
80 res->next = NULL;
81 res->sym = sym;
82 res->line = lineno;
83 res->action = NULL;
84 res->action_line = 0;
85 res->guard = NULL;
86 res->guard_line = 0;
87 res->ruleprec = NULL;
88 return res;
89 }
90
91 \f
92
93 /*===================\
94 | Low level lexing. |
95 \===================*/
96
97 static void
98 skip_to_char (int target)
99 {
100 int c;
101 if (target == '\n')
102 complain (_(" Skipping to next \\n"));
103 else
104 complain (_(" Skipping to next %c"), target);
105
106 do
107 c = skip_white_space ();
108 while (c != target && c != EOF);
109 if (c != EOF)
110 ungetc (c, finput);
111 }
112
113
114 /*---------------------------------------------------------.
115 | Read a signed integer from STREAM and return its value. |
116 `---------------------------------------------------------*/
117
118 static inline int
119 read_signed_integer (FILE *stream)
120 {
121 int c = getc (stream);
122 int sign = 1;
123 int n = 0;
124
125 if (c == '-')
126 {
127 c = getc (stream);
128 sign = -1;
129 }
130
131 while (isdigit (c))
132 {
133 n = 10 * n + (c - '0');
134 c = getc (stream);
135 }
136
137 ungetc (c, stream);
138
139 return sign * n;
140 }
141 \f
142 /*--------------------------------------------------------------.
143 | Get the data type (alternative in the union) of the value for |
144 | symbol N in rule RULE. |
145 `--------------------------------------------------------------*/
146
147 static char *
148 get_type_name (int n, symbol_list *rule)
149 {
150 int i;
151 symbol_list *rp;
152
153 if (n < 0)
154 {
155 complain (_("invalid $ value"));
156 return NULL;
157 }
158
159 rp = rule;
160 i = 0;
161
162 while (i < n)
163 {
164 rp = rp->next;
165 if (rp == NULL || rp->sym == NULL)
166 {
167 complain (_("invalid $ value"));
168 return NULL;
169 }
170 i++;
171 }
172
173 return rp->sym->type_name;
174 }
175 \f
176 /*------------------------------------------------------------.
177 | Dump the string from FIN to OOUT if non null. MATCH is the |
178 | delimiter of the string (either ' or "). |
179 `------------------------------------------------------------*/
180
181 static inline void
182 copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
183 {
184 int c;
185
186 if (store)
187 obstack_1grow (oout, match);
188
189 c = getc (fin);
190
191 while (c != match)
192 {
193 if (c == EOF)
194 fatal (_("unterminated string at end of file"));
195 if (c == '\n')
196 {
197 complain (_("unterminated string"));
198 ungetc (c, fin);
199 c = match; /* invent terminator */
200 continue;
201 }
202
203 obstack_1grow (oout, c);
204
205 if (c == '\\')
206 {
207 c = getc (fin);
208 if (c == EOF)
209 fatal (_("unterminated string at end of file"));
210 obstack_1grow (oout, c);
211
212 if (c == '\n')
213 lineno++;
214 }
215
216 c = getc (fin);
217 }
218
219 if (store)
220 obstack_1grow (oout, c);
221 }
222
223 /* FIXME. */
224
225 static inline void
226 copy_string (FILE *fin, struct obstack *oout, int match)
227 {
228 copy_string2 (fin, oout, match, 1);
229 }
230
231 /* FIXME. */
232
233 static inline void
234 copy_identifier (FILE *fin, struct obstack *oout)
235 {
236 int c;
237
238 while (isalnum (c = getc (fin)) || c == '_')
239 obstack_1grow (oout, c);
240
241 ungetc (c, fin);
242 }
243
244
245 /*------------------------------------------------------------------.
246 | Dump the wannabee comment from IN to OOUT. In fact we just saw a |
247 | `/', which might or might not be a comment. In any case, copy |
248 | what we saw. |
249 `------------------------------------------------------------------*/
250
251 static inline void
252 copy_comment (FILE *fin, struct obstack *oout)
253 {
254 int cplus_comment;
255 int ended;
256 int c;
257
258 /* We read a `/', output it. */
259 obstack_1grow (oout, '/');
260
261 switch ((c = getc (fin)))
262 {
263 case '/':
264 cplus_comment = 1;
265 break;
266 case '*':
267 cplus_comment = 0;
268 break;
269 default:
270 ungetc (c, fin);
271 return;
272 }
273
274 obstack_1grow (oout, c);
275 c = getc (fin);
276
277 ended = 0;
278 while (!ended)
279 {
280 if (!cplus_comment && c == '*')
281 {
282 while (c == '*')
283 {
284 obstack_1grow (oout, c);
285 c = getc (fin);
286 }
287
288 if (c == '/')
289 {
290 obstack_1grow (oout, c);
291 ended = 1;
292 }
293 }
294 else if (c == '\n')
295 {
296 lineno++;
297 obstack_1grow (oout, c);
298 if (cplus_comment)
299 ended = 1;
300 else
301 c = getc (fin);
302 }
303 else if (c == EOF)
304 fatal (_("unterminated comment"));
305 else
306 {
307 obstack_1grow (oout, c);
308 c = getc (fin);
309 }
310 }
311 }
312
313
314 /*-----------------------------------------------------------------.
315 | FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
316 | reference to this location. STACK_OFFSET is the number of values |
317 | in the current rule so far, which says where to find `$0' with |
318 | respect to the top of the stack. |
319 `-----------------------------------------------------------------*/
320
321 static inline void
322 copy_at (FILE *fin, struct obstack *oout, int stack_offset)
323 {
324 int c;
325
326 c = getc (fin);
327 if (c == '$')
328 {
329 obstack_sgrow (oout, "yyloc");
330 locations_flag = 1;
331 }
332 else if (isdigit (c) || c == '-')
333 {
334 int n;
335
336 ungetc (c, fin);
337 n = read_signed_integer (fin);
338 if (n > stack_offset)
339 complain (_("invalid value: %s%d"), "@", n);
340 else
341 {
342 /* Offset is always 0 if parser has already popped the stack
343 pointer. */
344 obstack_fgrow1 (oout, "yylsp[%d]",
345 n - (semantic_parser ? 0 : stack_offset));
346 locations_flag = 1;
347 }
348 }
349 else
350 {
351 char buf[] = "@c";
352 buf[1] = c;
353 complain (_("%s is invalid"), quote (buf));
354 }
355 }
356
357
358 /*-------------------------------------------------------------------.
359 | FIN is pointing to a wannabee semantic value (i.e., a `$'). |
360 | |
361 | Possible inputs: $[<TYPENAME>]($|integer) |
362 | |
363 | Output to OOUT a reference to this semantic value. STACK_OFFSET is |
364 | the number of values in the current rule so far, which says where |
365 | to find `$0' with respect to the top of the stack. |
366 `-------------------------------------------------------------------*/
367
368 static inline void
369 copy_dollar (FILE *fin, struct obstack *oout,
370 symbol_list *rule, int stack_offset)
371 {
372 int c = getc (fin);
373 const char *type_name = NULL;
374
375 /* Get the type name if explicit. */
376 if (c == '<')
377 {
378 read_type_name (fin);
379 type_name = token_buffer;
380 value_components_used = 1;
381 c = getc (fin);
382 }
383
384 if (c == '$')
385 {
386 obstack_sgrow (oout, "yyval");
387
388 if (!type_name)
389 type_name = get_type_name (0, rule);
390 if (type_name)
391 obstack_fgrow1 (oout, ".%s", type_name);
392 if (!type_name && typed)
393 complain (_("$$ of `%s' has no declared type"),
394 rule->sym->tag);
395 }
396 else if (isdigit (c) || c == '-')
397 {
398 int n;
399 ungetc (c, fin);
400 n = read_signed_integer (fin);
401
402 if (n > stack_offset)
403 complain (_("invalid value: %s%d"), "$", n);
404 else
405 {
406 if (!type_name && n > 0)
407 type_name = get_type_name (n, rule);
408
409 /* Offset is always 0 if parser has already popped the stack
410 pointer. */
411 obstack_fgrow1 (oout, "yyvsp[%d]",
412 n - (semantic_parser ? 0 : stack_offset));
413
414 if (type_name)
415 obstack_fgrow1 (oout, ".%s", type_name);
416 if (!type_name && typed)
417 complain (_("$%d of `%s' has no declared type"),
418 n, rule->sym->tag);
419 }
420 }
421 else
422 {
423 char buf[] = "$c";
424 buf[1] = c;
425 complain (_("%s is invalid"), quote (buf));
426 }
427 }
428 \f
429 /*-------------------------------------------------------------------.
430 | Copy the contents of a `%{ ... %}' into the definitions file. The |
431 | `%{' has already been read. Return after reading the `%}'. |
432 `-------------------------------------------------------------------*/
433
434 static void
435 copy_definition (void)
436 {
437 int c;
438 /* -1 while reading a character if prev char was %. */
439 int after_percent;
440
441 if (!no_lines_flag)
442 {
443 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
444 lineno, quotearg_style (c_quoting_style,
445 muscle_find ("filename")));
446 }
447
448 after_percent = 0;
449
450 c = getc (finput);
451
452 for (;;)
453 {
454 switch (c)
455 {
456 case '\n':
457 obstack_1grow (&attrs_obstack, c);
458 lineno++;
459 break;
460
461 case '%':
462 after_percent = -1;
463 break;
464
465 case '\'':
466 case '"':
467 copy_string (finput, &attrs_obstack, c);
468 break;
469
470 case '/':
471 copy_comment (finput, &attrs_obstack);
472 break;
473
474 case EOF:
475 fatal ("%s", _("unterminated `%{' definition"));
476
477 default:
478 obstack_1grow (&attrs_obstack, c);
479 }
480
481 c = getc (finput);
482
483 if (after_percent)
484 {
485 if (c == '}')
486 return;
487 obstack_1grow (&attrs_obstack, '%');
488 }
489 after_percent = 0;
490 }
491 }
492
493
494 /*-------------------------------------------------------------------.
495 | Parse what comes after %token or %nterm. For %token, WHAT_IS is |
496 | token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
497 | are reversed. |
498 `-------------------------------------------------------------------*/
499
500 static void
501 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
502 {
503 token_t token = tok_undef;
504 char *typename = NULL;
505
506 /* The symbol being defined. */
507 struct bucket *symbol = NULL;
508
509 /* After `%token' and `%nterm', any number of symbols maybe be
510 defined. */
511 for (;;)
512 {
513 int tmp_char = ungetc (skip_white_space (), finput);
514
515 /* `%' (for instance from `%token', or from `%%' etc.) is the
516 only valid means to end this declaration. */
517 if (tmp_char == '%')
518 return;
519 if (tmp_char == EOF)
520 fatal (_("Premature EOF after %s"), token_buffer);
521
522 token = lex ();
523 if (token == tok_comma)
524 {
525 symbol = NULL;
526 continue;
527 }
528 if (token == tok_typename)
529 {
530 typename = xstrdup (token_buffer);
531 value_components_used = 1;
532 symbol = NULL;
533 }
534 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
535 {
536 if (symval->alias)
537 warn (_("symbol `%s' used more than once as a literal string"),
538 symval->tag);
539 else if (symbol->alias)
540 warn (_("symbol `%s' given more than one literal string"),
541 symbol->tag);
542 else
543 {
544 symval->class = token_sym;
545 symval->type_name = typename;
546 symval->user_token_number = symbol->user_token_number;
547 symbol->user_token_number = SALIAS;
548 symval->alias = symbol;
549 symbol->alias = symval;
550 /* symbol and symval combined are only one symbol */
551 nsyms--;
552 }
553 symbol = NULL;
554 }
555 else if (token == tok_identifier)
556 {
557 int oldclass = symval->class;
558 symbol = symval;
559
560 if (symbol->class == what_is_not)
561 complain (_("symbol %s redefined"), symbol->tag);
562 symbol->class = what_is;
563 if (what_is == nterm_sym && oldclass != nterm_sym)
564 symbol->value = nvars++;
565
566 if (typename)
567 {
568 if (symbol->type_name == NULL)
569 symbol->type_name = typename;
570 else if (strcmp (typename, symbol->type_name) != 0)
571 complain (_("type redeclaration for %s"), symbol->tag);
572 }
573 }
574 else if (symbol && token == tok_number)
575 {
576 symbol->user_token_number = numval;
577 /* User defined EOF token? */
578 if (numval == 0)
579 eoftoken = symbol;
580 }
581 else
582 {
583 complain (_("`%s' is invalid in %s"),
584 token_buffer,
585 (what_is == token_sym) ? "%token" : "%nterm");
586 skip_to_char ('%');
587 }
588 }
589
590 }
591
592
593 /*------------------------------.
594 | Parse what comes after %start |
595 `------------------------------*/
596
597 static void
598 parse_start_decl (void)
599 {
600 if (start_flag)
601 complain (_("multiple %s declarations"), "%start");
602 if (lex () != tok_identifier)
603 complain (_("invalid %s declaration"), "%start");
604 else
605 {
606 start_flag = 1;
607 startval = symval;
608 }
609 }
610
611 /*-----------------------------------------------------------.
612 | read in a %type declaration and record its information for |
613 | get_type_name to access |
614 `-----------------------------------------------------------*/
615
616 static void
617 parse_type_decl (void)
618 {
619 char *name;
620
621 if (lex () != tok_typename)
622 {
623 complain ("%s", _("%type declaration has no <typename>"));
624 skip_to_char ('%');
625 return;
626 }
627
628 name = xstrdup (token_buffer);
629
630 for (;;)
631 {
632 token_t t;
633 int tmp_char = ungetc (skip_white_space (), finput);
634
635 if (tmp_char == '%')
636 return;
637 if (tmp_char == EOF)
638 fatal (_("Premature EOF after %s"), token_buffer);
639
640 t = lex ();
641
642 switch (t)
643 {
644
645 case tok_comma:
646 case tok_semicolon:
647 break;
648
649 case tok_identifier:
650 if (symval->type_name == NULL)
651 symval->type_name = name;
652 else if (strcmp (name, symval->type_name) != 0)
653 complain (_("type redeclaration for %s"), symval->tag);
654
655 break;
656
657 default:
658 complain (_("invalid %%type declaration due to item: %s"),
659 token_buffer);
660 skip_to_char ('%');
661 }
662 }
663 }
664
665
666
667 /*----------------------------------------------------------------.
668 | Read in a %left, %right or %nonassoc declaration and record its |
669 | information. |
670 `----------------------------------------------------------------*/
671
672 static void
673 parse_assoc_decl (associativity assoc)
674 {
675 char *name = NULL;
676 int prev = 0;
677
678 lastprec++; /* Assign a new precedence level, never 0. */
679
680 for (;;)
681 {
682 token_t t;
683 int tmp_char = ungetc (skip_white_space (), finput);
684
685 if (tmp_char == '%')
686 return;
687 if (tmp_char == EOF)
688 fatal (_("Premature EOF after %s"), token_buffer);
689
690 t = lex ();
691
692 switch (t)
693 {
694 case tok_typename:
695 name = xstrdup (token_buffer);
696 break;
697
698 case tok_comma:
699 break;
700
701 case tok_identifier:
702 if (symval->prec != 0)
703 complain (_("redefining precedence of %s"), symval->tag);
704 symval->prec = lastprec;
705 symval->assoc = assoc;
706 if (symval->class == nterm_sym)
707 complain (_("symbol %s redefined"), symval->tag);
708 symval->class = token_sym;
709 if (name)
710 { /* record the type, if one is specified */
711 if (symval->type_name == NULL)
712 symval->type_name = name;
713 else if (strcmp (name, symval->type_name) != 0)
714 complain (_("type redeclaration for %s"), symval->tag);
715 }
716 break;
717
718 case tok_number:
719 if (prev == tok_identifier)
720 {
721 symval->user_token_number = numval;
722 }
723 else
724 {
725 complain (_
726 ("invalid text (%s) - number should be after identifier"),
727 token_buffer);
728 skip_to_char ('%');
729 }
730 break;
731
732 case tok_semicolon:
733 return;
734
735 default:
736 complain (_("unexpected item: %s"), token_buffer);
737 skip_to_char ('%');
738 }
739
740 prev = t;
741 }
742 }
743
744
745
746 /*--------------------------------------------------------------.
747 | Copy the union declaration into the stype muscle |
748 | (and fdefines), where it is made into the definition of |
749 | YYSTYPE, the type of elements of the parser value stack. |
750 `--------------------------------------------------------------*/
751
752 static void
753 parse_union_decl (void)
754 {
755 int c;
756 int count = 0;
757 bool done = FALSE;
758 struct obstack union_obstack;
759 if (typed)
760 complain (_("multiple %s declarations"), "%union");
761
762 typed = 1;
763
764 obstack_init (&union_obstack);
765 obstack_sgrow (&union_obstack, "union");
766
767 while (!done)
768 {
769 c = xgetc (finput);
770
771 /* If C contains '/', it is output by copy_comment (). */
772 if (c != '/')
773 obstack_1grow (&union_obstack, c);
774
775 switch (c)
776 {
777 case '\n':
778 lineno++;
779 break;
780
781 case '/':
782 copy_comment (finput, &union_obstack);
783 break;
784
785 case '{':
786 count++;
787 break;
788
789 case '}':
790 /* FIXME: Errr. How could this happen???. --akim */
791 if (count == 0)
792 complain (_("unmatched %s"), "`}'");
793 count--;
794 if (!count)
795 done = TRUE;
796 break;
797 }
798 }
799
800 /* JF don't choke on trailing semi */
801 c = skip_white_space ();
802 if (c != ';')
803 ungetc (c, finput);
804 obstack_1grow (&union_obstack, 0);
805 muscle_insert ("stype", obstack_finish (&union_obstack));
806 }
807
808
809 /*-------------------------------------------------------.
810 | Parse the declaration %expect N which says to expect N |
811 | shift-reduce conflicts. |
812 `-------------------------------------------------------*/
813
814 static void
815 parse_expect_decl (void)
816 {
817 int c = skip_white_space ();
818 ungetc (c, finput);
819
820 if (!isdigit (c))
821 complain (_("argument of %%expect is not an integer"));
822 else
823 expected_conflicts = read_signed_integer (finput);
824 }
825
826
827 /*-------------------------------------------------------------------.
828 | Parse what comes after %thong. the full syntax is |
829 | |
830 | %thong <type> token number literal |
831 | |
832 | the <type> or number may be omitted. The number specifies the |
833 | user_token_number. |
834 | |
835 | Two symbols are entered in the table, one for the token symbol and |
836 | one for the literal. Both are given the <type>, if any, from the |
837 | declaration. The ->user_token_number of the first is SALIAS and |
838 | the ->user_token_number of the second is set to the number, if |
839 | any, from the declaration. The two symbols are linked via |
840 | pointers in their ->alias fields. |
841 | |
842 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
843 | only the literal string is retained it is the literal string that |
844 | is output to yytname |
845 `-------------------------------------------------------------------*/
846
847 static void
848 parse_thong_decl (void)
849 {
850 token_t token;
851 struct bucket *symbol;
852 char *typename = 0;
853 int usrtoknum = SUNDEF;
854
855 token = lex (); /* fetch typename or first token */
856 if (token == tok_typename)
857 {
858 typename = xstrdup (token_buffer);
859 value_components_used = 1;
860 token = lex (); /* fetch first token */
861 }
862
863 /* process first token */
864
865 if (token != tok_identifier)
866 {
867 complain (_("unrecognized item %s, expected an identifier"),
868 token_buffer);
869 skip_to_char ('%');
870 return;
871 }
872 symval->class = token_sym;
873 symval->type_name = typename;
874 symval->user_token_number = SALIAS;
875 symbol = symval;
876
877 token = lex (); /* get number or literal string */
878
879 if (token == tok_number)
880 {
881 usrtoknum = numval;
882 token = lex (); /* okay, did number, now get literal */
883 }
884
885 /* process literal string token */
886
887 if (token != tok_identifier || *symval->tag != '\"')
888 {
889 complain (_("expected string constant instead of %s"), token_buffer);
890 skip_to_char ('%');
891 return;
892 }
893 symval->class = token_sym;
894 symval->type_name = typename;
895 symval->user_token_number = usrtoknum;
896
897 symval->alias = symbol;
898 symbol->alias = symval;
899
900 /* symbol and symval combined are only one symbol. */
901 nsyms--;
902 }
903
904
905 static void
906 parse_muscle_decl (void)
907 {
908 int ch = ungetc (skip_white_space (), finput);
909 char *muscle_key;
910 char *muscle_value;
911
912 /* Read key. */
913 if (!isalpha (ch) && ch != '_')
914 {
915 complain (_("invalid %s declaration"), "%define");
916 skip_to_char ('%');
917 return;
918 }
919 copy_identifier (finput, &muscle_obstack);
920 obstack_1grow (&muscle_obstack, 0);
921 muscle_key = obstack_finish (&muscle_obstack);
922
923 /* Read value. */
924 ch = skip_white_space ();
925 if (ch != '"')
926 {
927 ungetc (ch, finput);
928 if (ch != EOF)
929 {
930 complain (_("invalid %s declaration"), "%define");
931 skip_to_char ('%');
932 return;
933 }
934 else
935 fatal (_("Premature EOF after %s"), "\"");
936 }
937 copy_string2 (finput, &muscle_obstack, '"', 0);
938 obstack_1grow (&muscle_obstack, 0);
939 muscle_value = obstack_finish (&muscle_obstack);
940
941 /* Store the (key, value) pair in the environment. */
942 muscle_insert (muscle_key, muscle_value);
943 }
944
945
946
947 /*---------------------------------.
948 | Parse a double quoted parameter. |
949 `---------------------------------*/
950
951 static const char *
952 parse_dquoted_param (const char *from)
953 {
954 struct obstack param_obstack;
955 const char *param = NULL;
956 int c;
957
958 obstack_init (&param_obstack);
959 c = skip_white_space ();
960
961 if (c != '"')
962 {
963 complain (_("invalid %s declaration"), from);
964 ungetc (c, finput);
965 skip_to_char ('%');
966 return NULL;
967 }
968
969 while ((c = literalchar ()) != '"')
970 obstack_1grow (&param_obstack, c);
971
972 obstack_1grow (&param_obstack, '\0');
973 param = obstack_finish (&param_obstack);
974
975 if (c != '"' || strlen (param) == 0)
976 {
977 complain (_("invalid %s declaration"), from);
978 if (c != '"')
979 ungetc (c, finput);
980 skip_to_char ('%');
981 return NULL;
982 }
983
984 return param;
985 }
986
987 /*----------------------------------.
988 | Parse what comes after %skeleton. |
989 `----------------------------------*/
990
991 static void
992 parse_skel_decl (void)
993 {
994 skeleton = parse_dquoted_param ("%skeleton");
995 }
996
997 /*----------------------------------------------------------------.
998 | Read from finput until `%%' is seen. Discard the `%%'. Handle |
999 | any `%' declarations, and copy the contents of any `%{ ... %}' |
1000 | groups to ATTRS_OBSTACK. |
1001 `----------------------------------------------------------------*/
1002
1003 static void
1004 read_declarations (void)
1005 {
1006 for (;;)
1007 {
1008 int c = skip_white_space ();
1009
1010 if (c == '%')
1011 {
1012 token_t tok = parse_percent_token ();
1013
1014 switch (tok)
1015 {
1016 case tok_two_percents:
1017 return;
1018
1019 case tok_percent_left_curly:
1020 copy_definition ();
1021 break;
1022
1023 case tok_token:
1024 parse_token_decl (token_sym, nterm_sym);
1025 break;
1026
1027 case tok_nterm:
1028 parse_token_decl (nterm_sym, token_sym);
1029 break;
1030
1031 case tok_type:
1032 parse_type_decl ();
1033 break;
1034
1035 case tok_start:
1036 parse_start_decl ();
1037 break;
1038
1039 case tok_union:
1040 parse_union_decl ();
1041 break;
1042
1043 case tok_expect:
1044 parse_expect_decl ();
1045 break;
1046
1047 case tok_thong:
1048 parse_thong_decl ();
1049 break;
1050
1051 case tok_left:
1052 parse_assoc_decl (left_assoc);
1053 break;
1054
1055 case tok_right:
1056 parse_assoc_decl (right_assoc);
1057 break;
1058
1059 case tok_nonassoc:
1060 parse_assoc_decl (non_assoc);
1061 break;
1062
1063 case tok_define:
1064 parse_muscle_decl ();
1065 break;
1066
1067 case tok_skel:
1068 parse_skel_decl ();
1069 break;
1070
1071 case tok_noop:
1072 break;
1073
1074 case tok_stropt:
1075 case tok_intopt:
1076 case tok_obsolete:
1077 abort ();
1078 break;
1079
1080 case tok_illegal:
1081 default:
1082 complain (_("unrecognized: %s"), token_buffer);
1083 skip_to_char ('%');
1084 }
1085 }
1086 else if (c == EOF)
1087 fatal (_("no input grammar"));
1088 else
1089 {
1090 char buf[] = "c";
1091 buf[0] = c;
1092 complain (_("unknown character: %s"), quote (buf));
1093 skip_to_char ('%');
1094 }
1095 }
1096 }
1097 \f
1098 /*-------------------------------------------------------------------.
1099 | Assuming that a `{' has just been seen, copy everything up to the |
1100 | matching `}' into the actions file. STACK_OFFSET is the number of |
1101 | values in the current rule so far, which says where to find `$0' |
1102 | with respect to the top of the stack. |
1103 | |
1104 | This routine is used both for actions and guards. Only |
1105 | ACTION_OBSTACK is used, but this is fine, since we use only |
1106 | pointers to relevant portions inside this obstack. |
1107 `-------------------------------------------------------------------*/
1108
1109 static void
1110 parse_braces (symbol_list *rule, int stack_offset)
1111 {
1112 int c;
1113 int count;
1114
1115 count = 1;
1116 while (count > 0)
1117 {
1118 while ((c = getc (finput)) != '}')
1119 switch (c)
1120 {
1121 case '\n':
1122 obstack_1grow (&action_obstack, c);
1123 lineno++;
1124 break;
1125
1126 case '{':
1127 obstack_1grow (&action_obstack, c);
1128 count++;
1129 break;
1130
1131 case '\'':
1132 case '"':
1133 copy_string (finput, &action_obstack, c);
1134 break;
1135
1136 case '/':
1137 copy_comment (finput, &action_obstack);
1138 break;
1139
1140 case '$':
1141 copy_dollar (finput, &action_obstack,
1142 rule, stack_offset);
1143 break;
1144
1145 case '@':
1146 copy_at (finput, &action_obstack,
1147 stack_offset);
1148 break;
1149
1150 case EOF:
1151 fatal (_("unmatched %s"), "`{'");
1152
1153 default:
1154 obstack_1grow (&action_obstack, c);
1155 }
1156
1157 /* Above loop exits when C is '}'. */
1158 if (--count)
1159 {
1160 obstack_1grow (&action_obstack, c);
1161 c = getc (finput);
1162 }
1163 }
1164
1165 obstack_1grow (&action_obstack, '\0');
1166 }
1167
1168
1169 static void
1170 parse_action (symbol_list *rule, int stack_offset)
1171 {
1172 rule->action_line = lineno;
1173 parse_braces (rule, stack_offset);
1174 rule->action = obstack_finish (&action_obstack);
1175 }
1176
1177
1178 static void
1179 parse_guard (symbol_list *rule, int stack_offset)
1180 {
1181 token_t t = lex ();
1182 if (t != tok_left_curly)
1183 complain (_("invalid %s declaration"), "%guard");
1184 rule->guard_line = lineno;
1185 parse_braces (rule, stack_offset);
1186 rule->guard = obstack_finish (&action_obstack);
1187 }
1188
1189 \f
1190
1191 /*-------------------------------------------------------------------.
1192 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1193 | with the user's names. |
1194 `-------------------------------------------------------------------*/
1195
1196 static bucket *
1197 gensym (void)
1198 {
1199 /* Incremented for each generated symbol */
1200 static int gensym_count = 0;
1201 static char buf[256];
1202
1203 bucket *sym;
1204
1205 sprintf (buf, "@%d", ++gensym_count);
1206 token_buffer = buf;
1207 sym = getsym (token_buffer);
1208 sym->class = nterm_sym;
1209 sym->value = nvars++;
1210 return sym;
1211 }
1212 \f
1213 /*-------------------------------------------------------------------.
1214 | Parse the input grammar into a one symbol_list structure. Each |
1215 | rule is represented by a sequence of symbols: the left hand side |
1216 | followed by the contents of the right hand side, followed by a |
1217 | null pointer instead of a symbol to terminate the rule. The next |
1218 | symbol is the lhs of the following rule. |
1219 | |
1220 | All guards and actions are copied out to the appropriate files, |
1221 | labelled by the rule number they apply to. |
1222 | |
1223 | Bison used to allow some %directives in the rules sections, but |
1224 | this is no longer consider appropriate: (i) the documented grammar |
1225 | doesn't claim it, (ii), it would promote bad style, (iii), error |
1226 | recovery for %directives consists in skipping the junk until a `%' |
1227 | is seen and helrp synchronizing. This scheme is definitely wrong |
1228 | in the rules section. |
1229 `-------------------------------------------------------------------*/
1230
1231 static void
1232 readgram (void)
1233 {
1234 token_t t;
1235 bucket *lhs = NULL;
1236 symbol_list *p = NULL;
1237 symbol_list *p1 = NULL;
1238 bucket *bp;
1239
1240 /* Points to first symbol_list of current rule. its symbol is the
1241 lhs of the rule. */
1242 symbol_list *crule = NULL;
1243 /* Points to the symbol_list preceding crule. */
1244 symbol_list *crule1 = NULL;
1245
1246 t = lex ();
1247
1248 while (t != tok_two_percents && t != tok_eof)
1249 if (t == tok_identifier || t == tok_bar)
1250 {
1251 int action_flag = 0;
1252 /* Number of symbols in rhs of this rule so far */
1253 int rulelength = 0;
1254 int xactions = 0; /* JF for error checking */
1255 bucket *first_rhs = 0;
1256
1257 if (t == tok_identifier)
1258 {
1259 lhs = symval;
1260
1261 if (!start_flag)
1262 {
1263 startval = lhs;
1264 start_flag = 1;
1265 }
1266
1267 t = lex ();
1268 if (t != tok_colon)
1269 {
1270 complain (_("ill-formed rule: initial symbol not followed by colon"));
1271 unlex (t);
1272 }
1273 }
1274
1275 if (nrules == 0 && t == tok_bar)
1276 {
1277 complain (_("grammar starts with vertical bar"));
1278 lhs = symval; /* BOGUS: use a random symval */
1279 }
1280 /* start a new rule and record its lhs. */
1281
1282 nrules++;
1283 nitems++;
1284
1285 p = symbol_list_new (lhs);
1286
1287 crule1 = p1;
1288 if (p1)
1289 p1->next = p;
1290 else
1291 grammar = p;
1292
1293 p1 = p;
1294 crule = p;
1295
1296 /* mark the rule's lhs as a nonterminal if not already so. */
1297
1298 if (lhs->class == unknown_sym)
1299 {
1300 lhs->class = nterm_sym;
1301 lhs->value = nvars;
1302 nvars++;
1303 }
1304 else if (lhs->class == token_sym)
1305 complain (_("rule given for %s, which is a token"), lhs->tag);
1306
1307 /* read the rhs of the rule. */
1308
1309 for (;;)
1310 {
1311 t = lex ();
1312 if (t == tok_prec)
1313 {
1314 t = lex ();
1315 crule->ruleprec = symval;
1316 t = lex ();
1317 }
1318
1319 if (!(t == tok_identifier || t == tok_left_curly))
1320 break;
1321
1322 /* If next token is an identifier, see if a colon follows it.
1323 If one does, exit this rule now. */
1324 if (t == tok_identifier)
1325 {
1326 bucket *ssave;
1327 token_t t1;
1328
1329 ssave = symval;
1330 t1 = lex ();
1331 unlex (t1);
1332 symval = ssave;
1333 if (t1 == tok_colon)
1334 break;
1335
1336 if (!first_rhs) /* JF */
1337 first_rhs = symval;
1338 /* Not followed by colon =>
1339 process as part of this rule's rhs. */
1340 }
1341
1342 /* If we just passed an action, that action was in the middle
1343 of a rule, so make a dummy rule to reduce it to a
1344 non-terminal. */
1345 if (action_flag)
1346 {
1347 /* Since the action was written out with this rule's
1348 number, we must give the new rule this number by
1349 inserting the new rule before it. */
1350
1351 /* Make a dummy nonterminal, a gensym. */
1352 bucket *sdummy = gensym ();
1353
1354 /* Make a new rule, whose body is empty, before the
1355 current one, so that the action just read can
1356 belong to it. */
1357 nrules++;
1358 nitems++;
1359 p = symbol_list_new (sdummy);
1360 /* Attach its lineno to that of the host rule. */
1361 p->line = crule->line;
1362 if (crule1)
1363 crule1->next = p;
1364 else
1365 grammar = p;
1366 /* End of the rule. */
1367 crule1 = symbol_list_new (NULL);
1368 crule1->next = crule;
1369
1370 p->next = crule1;
1371
1372 /* Insert the dummy generated by that rule into this
1373 rule. */
1374 nitems++;
1375 p = symbol_list_new (sdummy);
1376 p1->next = p;
1377 p1 = p;
1378
1379 action_flag = 0;
1380 }
1381
1382 if (t == tok_identifier)
1383 {
1384 nitems++;
1385 p = symbol_list_new (symval);
1386 p1->next = p;
1387 p1 = p;
1388 }
1389 else /* handle an action. */
1390 {
1391 parse_action (crule, rulelength);
1392 action_flag = 1;
1393 xactions++; /* JF */
1394 }
1395 rulelength++;
1396 } /* end of read rhs of rule */
1397
1398 /* Put an empty link in the list to mark the end of this rule */
1399 p = symbol_list_new (NULL);
1400 p1->next = p;
1401 p1 = p;
1402
1403 if (t == tok_prec)
1404 {
1405 complain (_("two @prec's in a row"));
1406 t = lex ();
1407 crule->ruleprec = symval;
1408 t = lex ();
1409 }
1410
1411 if (t == tok_guard)
1412 {
1413 if (!semantic_parser)
1414 complain (_("%%guard present but %%semantic_parser not specified"));
1415
1416 parse_guard (crule, rulelength);
1417 t = lex ();
1418 }
1419
1420 if (t == tok_left_curly)
1421 {
1422 /* This case never occurs -wjh */
1423 if (action_flag)
1424 complain (_("two actions at end of one rule"));
1425 parse_action (crule, rulelength);
1426 action_flag = 1;
1427 xactions++; /* -wjh */
1428 t = lex ();
1429 }
1430 /* If $$ is being set in default way, report if any type
1431 mismatch. */
1432 else if (!xactions
1433 && first_rhs && lhs->type_name != first_rhs->type_name)
1434 {
1435 if (lhs->type_name == 0
1436 || first_rhs->type_name == 0
1437 || strcmp (lhs->type_name, first_rhs->type_name))
1438 complain (_("type clash (`%s' `%s') on default action"),
1439 lhs->type_name ? lhs->type_name : "",
1440 first_rhs->type_name ? first_rhs->type_name : "");
1441 }
1442 /* Warn if there is no default for $$ but we need one. */
1443 else if (!xactions && !first_rhs && lhs->type_name != 0)
1444 complain (_("empty rule for typed nonterminal, and no action"));
1445 if (t == tok_semicolon)
1446 t = lex ();
1447 }
1448 else
1449 {
1450 complain (_("invalid input: %s"), quote (token_buffer));
1451 t = lex ();
1452 }
1453
1454 /* Insert the initial rule:
1455
1456 axiom: %start EOF. */
1457 p = symbol_list_new (axiom);
1458 p->next = symbol_list_new (startval);
1459 p->next->next = symbol_list_new (eoftoken);
1460 p->next->next->next = symbol_list_new (NULL);
1461 p->next->next->next->next = grammar;
1462 nrules += 1;
1463 nitems += 3;
1464 grammar = p;
1465 startval = axiom;
1466
1467 /* grammar has been read. Do some checking */
1468
1469 if (nsyms > MAXSHORT)
1470 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1471 MAXSHORT);
1472 if (nrules == 0)
1473 fatal (_("no rules in the input grammar"));
1474
1475 /* Report any undefined symbols and consider them nonterminals. */
1476
1477 for (bp = firstsymbol; bp; bp = bp->next)
1478 if (bp->class == unknown_sym)
1479 {
1480 complain (_
1481 ("symbol %s is used, but is not defined as a token and has no rules"),
1482 bp->tag);
1483 bp->class = nterm_sym;
1484 bp->value = nvars++;
1485 }
1486
1487 ntokens = nsyms - nvars;
1488 }
1489
1490 /* At the end of the grammar file, some C source code must
1491 be stored. It is going to be associated to the epilogue
1492 directive. */
1493 static void
1494 read_additionnal_code (void)
1495 {
1496 char c;
1497 struct obstack el_obstack;
1498
1499 obstack_init (&el_obstack);
1500
1501 if (!no_lines_flag)
1502 {
1503 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1504 lineno, quotearg_style (c_quoting_style,
1505 muscle_find ("filename")));
1506 }
1507
1508 while ((c = getc (finput)) != EOF)
1509 obstack_1grow (&el_obstack, c);
1510
1511 obstack_1grow (&el_obstack, 0);
1512 muscle_insert ("epilogue", obstack_finish (&el_obstack));
1513 }
1514
1515 \f
1516 /*------------------------------------------------------------------.
1517 | Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1518 | number. |
1519 `------------------------------------------------------------------*/
1520
1521 static void
1522 token_translations_init (void)
1523 {
1524 bucket *bp = NULL;
1525 int i;
1526
1527 token_translations = XCALLOC (short, max_user_token_number + 1);
1528
1529 /* Initialize all entries for literal tokens to 2, the internal
1530 token number for $undefined., which represents all invalid
1531 inputs. */
1532 for (i = 0; i <= max_user_token_number; i++)
1533 token_translations[i] = 2;
1534
1535 for (bp = firstsymbol; bp; bp = bp->next)
1536 {
1537 /* Non-terminal? */
1538 if (bp->value >= ntokens)
1539 continue;
1540 /* A token string alias? */
1541 if (bp->user_token_number == SALIAS)
1542 continue;
1543
1544 assert (bp->user_token_number != SUNDEF);
1545
1546 /* A token which translation has already been set? */
1547 if (token_translations[bp->user_token_number] != 2)
1548 complain (_("tokens %s and %s both assigned number %d"),
1549 tags[token_translations[bp->user_token_number]],
1550 bp->tag, bp->user_token_number);
1551 token_translations[bp->user_token_number] = bp->value;
1552 }
1553 }
1554
1555
1556 /*------------------------------------------------------------------.
1557 | Assign symbol numbers, and write definition of token names into |
1558 | FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
1559 | of symbols. |
1560 `------------------------------------------------------------------*/
1561
1562 static void
1563 packsymbols (void)
1564 {
1565 bucket *bp = NULL;
1566 int tokno = 1;
1567 int last_user_token_number;
1568
1569 tags = XCALLOC (char *, nsyms + 1);
1570 user_toknums = XCALLOC (short, nsyms + 1);
1571
1572 sprec = XCALLOC (short, nsyms);
1573 sassoc = XCALLOC (short, nsyms);
1574
1575 max_user_token_number = 256;
1576 last_user_token_number = 256;
1577
1578 for (bp = firstsymbol; bp; bp = bp->next)
1579 {
1580 if (bp->class == nterm_sym)
1581 {
1582 bp->value += ntokens;
1583 }
1584 else if (bp->alias)
1585 {
1586 /* This symbol and its alias are a single token defn.
1587 Allocate a tokno, and assign to both check agreement of
1588 prec and assoc fields and make both the same */
1589 if (bp->value == -1)
1590 {
1591 if (bp == eoftoken || bp->alias == eoftoken)
1592 bp->value = bp->alias->value = 0;
1593 else
1594 {
1595 bp->value = bp->alias->value = tokno++;
1596 }
1597 }
1598
1599 if (bp->prec != bp->alias->prec)
1600 {
1601 if (bp->prec != 0 && bp->alias->prec != 0
1602 && bp->user_token_number == SALIAS)
1603 complain (_("conflicting precedences for %s and %s"),
1604 bp->tag, bp->alias->tag);
1605 if (bp->prec != 0)
1606 bp->alias->prec = bp->prec;
1607 else
1608 bp->prec = bp->alias->prec;
1609 }
1610
1611 if (bp->assoc != bp->alias->assoc)
1612 {
1613 if (bp->assoc != 0 && bp->alias->assoc != 0
1614 && bp->user_token_number == SALIAS)
1615 complain (_("conflicting assoc values for %s and %s"),
1616 bp->tag, bp->alias->tag);
1617 if (bp->assoc != 0)
1618 bp->alias->assoc = bp->assoc;
1619 else
1620 bp->assoc = bp->alias->assoc;
1621 }
1622
1623 /* Do not do processing below for SALIASs. */
1624 if (bp->user_token_number == SALIAS)
1625 continue;
1626
1627 }
1628 else /* bp->class == token_sym */
1629 {
1630 if (bp == eoftoken)
1631 bp->value = 0;
1632 else
1633 bp->value = tokno++;
1634 }
1635
1636 if (bp->class == token_sym)
1637 {
1638 if (bp->user_token_number == SUNDEF)
1639 bp->user_token_number = ++last_user_token_number;
1640 if (bp->user_token_number > max_user_token_number)
1641 max_user_token_number = bp->user_token_number;
1642 }
1643
1644 tags[bp->value] = bp->tag;
1645 user_toknums[bp->value] = bp->user_token_number;
1646 sprec[bp->value] = bp->prec;
1647 sassoc[bp->value] = bp->assoc;
1648 }
1649
1650 token_translations_init ();
1651
1652 error_token_number = errtoken->value;
1653
1654 if (startval->class == unknown_sym)
1655 fatal (_("the start symbol %s is undefined"), startval->tag);
1656 else if (startval->class == token_sym)
1657 fatal (_("the start symbol %s is a token"), startval->tag);
1658
1659 start_symbol = startval->value;
1660 }
1661
1662
1663 /*---------------------------------------------------------------.
1664 | Save the definition of token names in the `TOKENDEFS' muscle. |
1665 `---------------------------------------------------------------*/
1666
1667 static void
1668 symbols_save (void)
1669 {
1670 struct obstack tokendefs;
1671 bucket *bp;
1672 obstack_init (&tokendefs);
1673
1674 for (bp = firstsymbol; bp; bp = bp->next)
1675 {
1676 char *symbol = bp->tag; /* get symbol */
1677
1678 if (bp->value >= ntokens)
1679 continue;
1680 if (bp->user_token_number == SALIAS)
1681 continue;
1682 if ('\'' == *symbol)
1683 continue; /* skip literal character */
1684 if (bp == errtoken)
1685 continue; /* skip error token */
1686 if ('\"' == *symbol)
1687 {
1688 /* use literal string only if given a symbol with an alias */
1689 if (bp->alias)
1690 symbol = bp->alias->tag;
1691 else
1692 continue;
1693 }
1694
1695 /* Don't #define nonliteral tokens whose names contain periods. */
1696 if (strchr (symbol, '.'))
1697 continue;
1698
1699 obstack_fgrow2 (&tokendefs, "# define %s\t%d\n",
1700 symbol, bp->user_token_number);
1701 if (semantic_parser)
1702 /* FIXME: This is probably wrong, and should be just as
1703 above. --akim. */
1704 obstack_fgrow2 (&tokendefs, "# define T%s\t%d\n", symbol, bp->value);
1705 }
1706
1707 obstack_1grow (&tokendefs, 0);
1708 muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1709 obstack_free (&tokendefs, NULL);
1710 }
1711
1712
1713 /*---------------------------------------------------------------.
1714 | Convert the rules into the representation using RRHS, RLHS and |
1715 | RITEMS. |
1716 `---------------------------------------------------------------*/
1717
1718 static void
1719 packgram (void)
1720 {
1721 int itemno;
1722 int ruleno;
1723 symbol_list *p;
1724
1725 /* We use short to index items. */
1726 if (nitems >= MAXSHORT)
1727 fatal (_("too many items (max %d)"), MAXSHORT);
1728
1729 ritem = XCALLOC (short, nitems + 1);
1730 rule_table = XCALLOC (rule_t, nrules) - 1;
1731
1732 itemno = 0;
1733 ruleno = 1;
1734
1735 p = grammar;
1736 while (p)
1737 {
1738 bucket *ruleprec = p->ruleprec;
1739 rule_table[ruleno].lhs = p->sym->value;
1740 rule_table[ruleno].rhs = itemno;
1741 rule_table[ruleno].line = p->line;
1742 rule_table[ruleno].useful = TRUE;
1743 rule_table[ruleno].action = p->action;
1744 rule_table[ruleno].action_line = p->action_line;
1745 rule_table[ruleno].guard = p->guard;
1746 rule_table[ruleno].guard_line = p->guard_line;
1747
1748 p = p->next;
1749 while (p && p->sym)
1750 {
1751 ritem[itemno++] = p->sym->value;
1752 /* A rule gets by default the precedence and associativity
1753 of the last token in it. */
1754 if (p->sym->class == token_sym)
1755 {
1756 rule_table[ruleno].prec = p->sym->prec;
1757 rule_table[ruleno].assoc = p->sym->assoc;
1758 }
1759 if (p)
1760 p = p->next;
1761 }
1762
1763 /* If this rule has a %prec,
1764 the specified symbol's precedence replaces the default. */
1765 if (ruleprec)
1766 {
1767 rule_table[ruleno].prec = ruleprec->prec;
1768 rule_table[ruleno].assoc = ruleprec->assoc;
1769 rule_table[ruleno].precsym = ruleprec->value;
1770 }
1771
1772 ritem[itemno++] = -ruleno;
1773 ruleno++;
1774
1775 if (p)
1776 p = p->next;
1777 }
1778
1779 ritem[itemno] = 0;
1780 nritems = itemno;
1781 assert (nritems == nitems);
1782
1783 if (trace_flag)
1784 ritem_print (stderr);
1785 }
1786 \f
1787 /*-------------------------------------------------------------------.
1788 | Read in the grammar specification and record it in the format |
1789 | described in gram.h. All guards are copied into the GUARD_OBSTACK |
1790 | and all actions into ACTION_OBSTACK, in each case forming the body |
1791 | of a C function (YYGUARD or YYACTION) which contains a switch |
1792 | statement to decide which guard or action to execute. |
1793 `-------------------------------------------------------------------*/
1794
1795 void
1796 reader (void)
1797 {
1798 start_flag = 0;
1799 startval = NULL; /* start symbol not specified yet. */
1800
1801 nsyms = 0;
1802 nvars = 0;
1803 nrules = 0;
1804 nitems = 0;
1805
1806 typed = 0;
1807 lastprec = 0;
1808
1809 semantic_parser = 0;
1810 pure_parser = 0;
1811
1812 grammar = NULL;
1813
1814 lex_init ();
1815 lineno = 1;
1816
1817 /* Initialize the muscle obstack. */
1818 obstack_init (&muscle_obstack);
1819
1820 /* Initialize the symbol table. */
1821 tabinit ();
1822
1823 /* Construct the axiom symbol. */
1824 axiom = getsym ("$axiom");
1825 axiom->class = nterm_sym;
1826 axiom->value = nvars++;
1827
1828 /* Construct the error token */
1829 errtoken = getsym ("error");
1830 errtoken->class = token_sym;
1831 errtoken->user_token_number = 256; /* Value specified by POSIX. */
1832
1833 /* Construct a token that represents all undefined literal tokens.
1834 It is always token number 2. */
1835 undeftoken = getsym ("$undefined.");
1836 undeftoken->class = token_sym;
1837 undeftoken->user_token_number = 2;
1838
1839 /* Initialize the obstacks. */
1840 obstack_init (&action_obstack);
1841 obstack_init (&attrs_obstack);
1842 obstack_init (&output_obstack);
1843
1844 finput = xfopen (infile, "r");
1845
1846 /* Read the declaration section. Copy %{ ... %} groups to
1847 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
1848 etc. found there. */
1849 read_declarations ();
1850
1851 /* If the user did not define her EOFTOKEN, do it now. */
1852 if (!eoftoken)
1853 {
1854 eoftoken = getsym ("$");
1855 eoftoken->class = token_sym;
1856 /* Value specified by POSIX. */
1857 eoftoken->user_token_number = 0;
1858 }
1859
1860 /* Read in the grammar, build grammar in list form. Write out
1861 guards and actions. */
1862 readgram ();
1863 /* Some C code is given at the end of the grammar file. */
1864 read_additionnal_code ();
1865
1866 lex_free ();
1867 xfclose (finput);
1868
1869 /* Assign the symbols their symbol numbers. Write #defines for the
1870 token symbols into FDEFINES if requested. */
1871 packsymbols ();
1872 /* Save them. */
1873 symbols_save ();
1874
1875 /* Convert the grammar into the format described in gram.h. */
1876 packgram ();
1877 }