]> git.saurik.com Git - bison.git/blob - src/reader.c
Regen.
[bison.git] / src / reader.c
1 /* Input parser for bison
2 Copyright 1984, 1986, 1989, 1992, 1998, 2000
3 Free Software Foundation, Inc.
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22
23 #include "system.h"
24 #include "obstack.h"
25 #include "quotearg.h"
26 #include "quote.h"
27 #include "getargs.h"
28 #include "files.h"
29 #include "xalloc.h"
30 #include "symtab.h"
31 #include "options.h"
32 #include "lex.h"
33 #include "gram.h"
34 #include "complain.h"
35 #include "output.h"
36 #include "reader.h"
37 #include "conflicts.h"
38 #include "muscle_tab.h"
39
40 /* Number of slots allocated (but not necessarily used yet) in `rline' */
41 static int rline_allocated;
42
43 typedef struct symbol_list
44 {
45 struct symbol_list *next;
46 bucket *sym;
47 bucket *ruleprec;
48 }
49 symbol_list;
50
51 int lineno;
52 char **tags;
53 short *user_toknums;
54 static symbol_list *grammar;
55 static int start_flag;
56 static bucket *startval;
57
58 /* Nonzero if components of semantic values are used, implying
59 they must be unions. */
60 static int value_components_used;
61
62 /* Nonzero if %union has been seen. */
63 static int typed;
64
65 /* Incremented for each %left, %right or %nonassoc seen */
66 static int lastprec;
67
68 static bucket *errtoken;
69 static bucket *undeftoken;
70 \f
71
72 /*===================\
73 | Low level lexing. |
74 \===================*/
75
76 static void
77 skip_to_char (int target)
78 {
79 int c;
80 if (target == '\n')
81 complain (_(" Skipping to next \\n"));
82 else
83 complain (_(" Skipping to next %c"), target);
84
85 do
86 c = skip_white_space ();
87 while (c != target && c != EOF);
88 if (c != EOF)
89 ungetc (c, finput);
90 }
91
92
93 /*---------------------------------------------------------.
94 | Read a signed integer from STREAM and return its value. |
95 `---------------------------------------------------------*/
96
97 static inline int
98 read_signed_integer (FILE *stream)
99 {
100 int c = getc (stream);
101 int sign = 1;
102 int n = 0;
103
104 if (c == '-')
105 {
106 c = getc (stream);
107 sign = -1;
108 }
109
110 while (isdigit (c))
111 {
112 n = 10 * n + (c - '0');
113 c = getc (stream);
114 }
115
116 ungetc (c, stream);
117
118 return sign * n;
119 }
120 \f
121 /*--------------------------------------------------------------.
122 | Get the data type (alternative in the union) of the value for |
123 | symbol N in rule RULE. |
124 `--------------------------------------------------------------*/
125
126 static char *
127 get_type_name (int n, symbol_list * rule)
128 {
129 int i;
130 symbol_list *rp;
131
132 if (n < 0)
133 {
134 complain (_("invalid $ value"));
135 return NULL;
136 }
137
138 rp = rule;
139 i = 0;
140
141 while (i < n)
142 {
143 rp = rp->next;
144 if (rp == NULL || rp->sym == NULL)
145 {
146 complain (_("invalid $ value"));
147 return NULL;
148 }
149 i++;
150 }
151
152 return rp->sym->type_name;
153 }
154 \f
155 /*------------------------------------------------------------.
156 | Dump the string from FIN to OOUT if non null. MATCH is the |
157 | delimiter of the string (either ' or "). |
158 `------------------------------------------------------------*/
159
160 static inline void
161 copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
162 {
163 int c;
164
165 if (store)
166 obstack_1grow (oout, match);
167
168 c = getc (fin);
169
170 while (c != match)
171 {
172 if (c == EOF)
173 fatal (_("unterminated string at end of file"));
174 if (c == '\n')
175 {
176 complain (_("unterminated string"));
177 ungetc (c, fin);
178 c = match; /* invent terminator */
179 continue;
180 }
181
182 obstack_1grow (oout, c);
183
184 if (c == '\\')
185 {
186 c = getc (fin);
187 if (c == EOF)
188 fatal (_("unterminated string at end of file"));
189 obstack_1grow (oout, c);
190
191 if (c == '\n')
192 lineno++;
193 }
194
195 c = getc (fin);
196 }
197
198 if (store)
199 obstack_1grow (oout, c);
200 }
201
202 /* FIXME. */
203
204 static inline void
205 copy_string (FILE *fin, struct obstack *oout, int match)
206 {
207 copy_string2 (fin, oout, match, 1);
208 }
209
210 /* FIXME. */
211
212 static inline void
213 copy_identifier (FILE *fin, struct obstack *oout)
214 {
215 int c;
216
217 while (isalnum (c = getc (fin)) || c == '_')
218 obstack_1grow (oout, c);
219
220 ungetc (c, fin);
221 }
222
223 /*-----------------------------------------------------------------.
224 | Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
225 | NULL). In fact we just saw a `/', which might or might not be a |
226 | comment. In any case, copy what we saw. |
227 | |
228 | OUT2 might be NULL. |
229 `-----------------------------------------------------------------*/
230
231 static inline void
232 copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
233 {
234 int cplus_comment;
235 int ended;
236 int c;
237
238 /* We read a `/', output it. */
239 obstack_1grow (oout1, '/');
240 if (oout2)
241 obstack_1grow (oout2, '/');
242
243 switch ((c = getc (fin)))
244 {
245 case '/':
246 cplus_comment = 1;
247 break;
248 case '*':
249 cplus_comment = 0;
250 break;
251 default:
252 ungetc (c, fin);
253 return;
254 }
255
256 obstack_1grow (oout1, c);
257 if (oout2)
258 obstack_1grow (oout2, c);
259 c = getc (fin);
260
261 ended = 0;
262 while (!ended)
263 {
264 if (!cplus_comment && c == '*')
265 {
266 while (c == '*')
267 {
268 obstack_1grow (oout1, c);
269 if (oout2)
270 obstack_1grow (oout2, c);
271 c = getc (fin);
272 }
273
274 if (c == '/')
275 {
276 obstack_1grow (oout1, c);
277 if (oout2)
278 obstack_1grow (oout2, c);
279 ended = 1;
280 }
281 }
282 else if (c == '\n')
283 {
284 lineno++;
285 obstack_1grow (oout1, c);
286 if (oout2)
287 obstack_1grow (oout2, c);
288 if (cplus_comment)
289 ended = 1;
290 else
291 c = getc (fin);
292 }
293 else if (c == EOF)
294 fatal (_("unterminated comment"));
295 else
296 {
297 obstack_1grow (oout1, c);
298 if (oout2)
299 obstack_1grow (oout2, c);
300 c = getc (fin);
301 }
302 }
303 }
304
305
306 /*-------------------------------------------------------------------.
307 | Dump the comment (actually the current string starting with a `/') |
308 | from FIN to OOUT. |
309 `-------------------------------------------------------------------*/
310
311 static inline void
312 copy_comment (FILE *fin, struct obstack *oout)
313 {
314 copy_comment2 (fin, oout, NULL);
315 }
316
317
318 /*-----------------------------------------------------------------.
319 | FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
320 | reference to this location. STACK_OFFSET is the number of values |
321 | in the current rule so far, which says where to find `$0' with |
322 | respect to the top of the stack. |
323 `-----------------------------------------------------------------*/
324
325 static inline void
326 copy_at (FILE *fin, struct obstack *oout, int stack_offset)
327 {
328 int c;
329
330 c = getc (fin);
331 if (c == '$')
332 {
333 obstack_sgrow (oout, "yyloc");
334 locations_flag = 1;
335 }
336 else if (isdigit (c) || c == '-')
337 {
338 int n;
339
340 ungetc (c, fin);
341 n = read_signed_integer (fin);
342
343 obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
344 locations_flag = 1;
345 }
346 else
347 {
348 char buf[] = "@c";
349 buf[1] = c;
350 complain (_("%s is invalid"), quote (buf));
351 }
352 }
353
354
355 /*-------------------------------------------------------------------.
356 | FIN is pointing to a wannabee semantic value (i.e., a `$'). |
357 | |
358 | Possible inputs: $[<TYPENAME>]($|integer) |
359 | |
360 | Output to OOUT a reference to this semantic value. STACK_OFFSET is |
361 | the number of values in the current rule so far, which says where |
362 | to find `$0' with respect to the top of the stack. |
363 `-------------------------------------------------------------------*/
364
365 static inline void
366 copy_dollar (FILE *fin, struct obstack *oout,
367 symbol_list *rule, int stack_offset)
368 {
369 int c = getc (fin);
370 const char *type_name = NULL;
371
372 /* Get the type name if explicit. */
373 if (c == '<')
374 {
375 read_type_name (fin);
376 type_name = token_buffer;
377 value_components_used = 1;
378 c = getc (fin);
379 }
380
381 if (c == '$')
382 {
383 obstack_sgrow (oout, "yyval");
384
385 if (!type_name)
386 type_name = get_type_name (0, rule);
387 if (type_name)
388 obstack_fgrow1 (oout, ".%s", type_name);
389 if (!type_name && typed)
390 complain (_("$$ of `%s' has no declared type"),
391 rule->sym->tag);
392 }
393 else if (isdigit (c) || c == '-')
394 {
395 int n;
396 ungetc (c, fin);
397 n = read_signed_integer (fin);
398
399 if (!type_name && n > 0)
400 type_name = get_type_name (n, rule);
401
402 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
403
404 if (type_name)
405 obstack_fgrow1 (oout, ".%s", type_name);
406 if (!type_name && typed)
407 complain (_("$%d of `%s' has no declared type"),
408 n, rule->sym->tag);
409 }
410 else
411 {
412 char buf[] = "$c";
413 buf[1] = c;
414 complain (_("%s is invalid"), quote (buf));
415 }
416 }
417 \f
418 /*-------------------------------------------------------------------.
419 | Copy the contents of a `%{ ... %}' into the definitions file. The |
420 | `%{' has already been read. Return after reading the `%}'. |
421 `-------------------------------------------------------------------*/
422
423 static void
424 copy_definition (void)
425 {
426 int c;
427 /* -1 while reading a character if prev char was %. */
428 int after_percent;
429
430 #if 0
431 if (!no_lines_flag)
432 obstack_fgrow2 (&attrs_obstack, "#line %d %s\n",
433 lineno, quotearg_style (c_quoting_style, infile));
434 #endif
435
436 after_percent = 0;
437
438 c = getc (finput);
439
440 for (;;)
441 {
442 switch (c)
443 {
444 case '\n':
445 obstack_1grow (&attrs_obstack, c);
446 lineno++;
447 break;
448
449 case '%':
450 after_percent = -1;
451 break;
452
453 case '\'':
454 case '"':
455 copy_string (finput, &attrs_obstack, c);
456 break;
457
458 case '/':
459 copy_comment (finput, &attrs_obstack);
460 break;
461
462 case EOF:
463 fatal ("%s", _("unterminated `%{' definition"));
464
465 default:
466 obstack_1grow (&attrs_obstack, c);
467 }
468
469 c = getc (finput);
470
471 if (after_percent)
472 {
473 if (c == '}')
474 return;
475 obstack_1grow (&attrs_obstack, '%');
476 }
477 after_percent = 0;
478 }
479 }
480
481
482 /*-------------------------------------------------------------------.
483 | Parse what comes after %token or %nterm. For %token, WHAT_IS is |
484 | token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
485 | are reversed. |
486 `-------------------------------------------------------------------*/
487
488 static void
489 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
490 {
491 token_t token = 0;
492 char *typename = 0;
493
494 /* The symbol being defined. */
495 struct bucket *symbol = NULL;
496
497 /* After `%token' and `%nterm', any number of symbols maybe be
498 defined. */
499 for (;;)
500 {
501 int tmp_char = ungetc (skip_white_space (), finput);
502
503 /* `%' (for instance from `%token', or from `%%' etc.) is the
504 only valid means to end this declaration. */
505 if (tmp_char == '%')
506 return;
507 if (tmp_char == EOF)
508 fatal (_("Premature EOF after %s"), token_buffer);
509
510 token = lex ();
511 if (token == tok_comma)
512 {
513 symbol = NULL;
514 continue;
515 }
516 if (token == tok_typename)
517 {
518 typename = xstrdup (token_buffer);
519 value_components_used = 1;
520 symbol = NULL;
521 }
522 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
523 {
524 if (symval->alias)
525 warn (_("symbol `%s' used more than once as a literal string"),
526 symval->tag);
527 else if (symbol->alias)
528 warn (_("symbol `%s' given more than one literal string"),
529 symbol->tag);
530 else
531 {
532 symval->class = token_sym;
533 symval->type_name = typename;
534 symval->user_token_number = symbol->user_token_number;
535 symbol->user_token_number = SALIAS;
536 symval->alias = symbol;
537 symbol->alias = symval;
538 /* symbol and symval combined are only one symbol */
539 nsyms--;
540 }
541 translations = 1;
542 symbol = NULL;
543 }
544 else if (token == tok_identifier)
545 {
546 int oldclass = symval->class;
547 symbol = symval;
548
549 if (symbol->class == what_is_not)
550 complain (_("symbol %s redefined"), symbol->tag);
551 symbol->class = what_is;
552 if (what_is == nterm_sym && oldclass != nterm_sym)
553 symbol->value = nvars++;
554
555 if (typename)
556 {
557 if (symbol->type_name == NULL)
558 symbol->type_name = typename;
559 else if (strcmp (typename, symbol->type_name) != 0)
560 complain (_("type redeclaration for %s"), symbol->tag);
561 }
562 }
563 else if (symbol && token == tok_number)
564 {
565 symbol->user_token_number = numval;
566 translations = 1;
567 }
568 else
569 {
570 complain (_("`%s' is invalid in %s"),
571 token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
572 skip_to_char ('%');
573 }
574 }
575
576 }
577
578
579 /*------------------------------.
580 | Parse what comes after %start |
581 `------------------------------*/
582
583 static void
584 parse_start_decl (void)
585 {
586 if (start_flag)
587 complain (_("multiple %s declarations"), "%start");
588 if (lex () != tok_identifier)
589 complain (_("invalid %s declaration"), "%start");
590 else
591 {
592 start_flag = 1;
593 startval = symval;
594 }
595 }
596
597 /*-----------------------------------------------------------.
598 | read in a %type declaration and record its information for |
599 | get_type_name to access |
600 `-----------------------------------------------------------*/
601
602 static void
603 parse_type_decl (void)
604 {
605 char *name;
606
607 if (lex () != tok_typename)
608 {
609 complain ("%s", _("%type declaration has no <typename>"));
610 skip_to_char ('%');
611 return;
612 }
613
614 name = xstrdup (token_buffer);
615
616 for (;;)
617 {
618 token_t t;
619 int tmp_char = ungetc (skip_white_space (), finput);
620
621 if (tmp_char == '%')
622 return;
623 if (tmp_char == EOF)
624 fatal (_("Premature EOF after %s"), token_buffer);
625
626 t = lex ();
627
628 switch (t)
629 {
630
631 case tok_comma:
632 case tok_semicolon:
633 break;
634
635 case tok_identifier:
636 if (symval->type_name == NULL)
637 symval->type_name = name;
638 else if (strcmp (name, symval->type_name) != 0)
639 complain (_("type redeclaration for %s"), symval->tag);
640
641 break;
642
643 default:
644 complain (_("invalid %%type declaration due to item: %s"),
645 token_buffer);
646 skip_to_char ('%');
647 }
648 }
649 }
650
651
652
653 /*----------------------------------------------------------------.
654 | Read in a %left, %right or %nonassoc declaration and record its |
655 | information. |
656 `----------------------------------------------------------------*/
657
658 static void
659 parse_assoc_decl (associativity assoc)
660 {
661 char *name = NULL;
662 int prev = 0;
663
664 lastprec++; /* Assign a new precedence level, never 0. */
665
666 for (;;)
667 {
668 token_t t;
669 int tmp_char = ungetc (skip_white_space (), finput);
670
671 if (tmp_char == '%')
672 return;
673 if (tmp_char == EOF)
674 fatal (_("Premature EOF after %s"), token_buffer);
675
676 t = lex ();
677
678 switch (t)
679 {
680 case tok_typename:
681 name = xstrdup (token_buffer);
682 break;
683
684 case tok_comma:
685 break;
686
687 case tok_identifier:
688 if (symval->prec != 0)
689 complain (_("redefining precedence of %s"), symval->tag);
690 symval->prec = lastprec;
691 symval->assoc = assoc;
692 if (symval->class == nterm_sym)
693 complain (_("symbol %s redefined"), symval->tag);
694 symval->class = token_sym;
695 if (name)
696 { /* record the type, if one is specified */
697 if (symval->type_name == NULL)
698 symval->type_name = name;
699 else if (strcmp (name, symval->type_name) != 0)
700 complain (_("type redeclaration for %s"), symval->tag);
701 }
702 break;
703
704 case tok_number:
705 if (prev == tok_identifier)
706 {
707 symval->user_token_number = numval;
708 translations = 1;
709 }
710 else
711 {
712 complain (_
713 ("invalid text (%s) - number should be after identifier"),
714 token_buffer);
715 skip_to_char ('%');
716 }
717 break;
718
719 case tok_semicolon:
720 return;
721
722 default:
723 complain (_("unexpected item: %s"), token_buffer);
724 skip_to_char ('%');
725 }
726
727 prev = t;
728
729 }
730 }
731
732
733
734 /*--------------------------------------------------------------.
735 | Copy the union declaration into ATTRS_OBSTACK (and fdefines), |
736 | where it is made into the definition of YYSTYPE, the type of |
737 | elements of the parser value stack. |
738 `--------------------------------------------------------------*/
739
740 static void
741 parse_union_decl (void)
742 {
743 int c;
744 int count = 0;
745
746 if (typed)
747 complain (_("multiple %s declarations"), "%union");
748
749 typed = 1;
750
751 if (!no_lines_flag)
752 obstack_fgrow2 (&attrs_obstack, "\n#line %d %s\n",
753 lineno, quotearg_style (c_quoting_style,
754 muscle_find("filename")));
755 else
756 obstack_1grow (&attrs_obstack, '\n');
757
758 obstack_sgrow (&attrs_obstack, "typedef union");
759 if (defines_flag)
760 obstack_sgrow (&defines_obstack, "typedef union");
761
762 c = getc (finput);
763
764 while (c != EOF)
765 {
766 obstack_1grow (&attrs_obstack, c);
767 if (defines_flag)
768 obstack_1grow (&defines_obstack, c);
769
770 switch (c)
771 {
772 case '\n':
773 lineno++;
774 break;
775
776 case '/':
777 copy_comment2 (finput, &defines_obstack, &attrs_obstack);
778 break;
779
780 case '{':
781 count++;
782 break;
783
784 case '}':
785 if (count == 0)
786 complain (_("unmatched %s"), "`}'");
787 count--;
788 if (count <= 0)
789 {
790 obstack_sgrow (&attrs_obstack, " YYSTYPE;\n");
791 if (defines_flag)
792 obstack_sgrow (&defines_obstack, " YYSTYPE;\n");
793 /* JF don't choke on trailing semi */
794 c = skip_white_space ();
795 if (c != ';')
796 ungetc (c, finput);
797 return;
798 }
799 }
800
801 c = getc (finput);
802 }
803 }
804
805
806 /*-------------------------------------------------------.
807 | Parse the declaration %expect N which says to expect N |
808 | shift-reduce conflicts. |
809 `-------------------------------------------------------*/
810
811 static void
812 parse_expect_decl (void)
813 {
814 int c = skip_white_space ();
815 ungetc (c, finput);
816
817 if (!isdigit (c))
818 complain (_("argument of %%expect is not an integer"));
819 else
820 expected_conflicts = read_signed_integer (finput);
821 }
822
823
824 /*-------------------------------------------------------------------.
825 | Parse what comes after %thong. the full syntax is |
826 | |
827 | %thong <type> token number literal |
828 | |
829 | the <type> or number may be omitted. The number specifies the |
830 | user_token_number. |
831 | |
832 | Two symbols are entered in the table, one for the token symbol and |
833 | one for the literal. Both are given the <type>, if any, from the |
834 | declaration. The ->user_token_number of the first is SALIAS and |
835 | the ->user_token_number of the second is set to the number, if |
836 | any, from the declaration. The two symbols are linked via |
837 | pointers in their ->alias fields. |
838 | |
839 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
840 | only the literal string is retained it is the literal string that |
841 | is output to yytname |
842 `-------------------------------------------------------------------*/
843
844 static void
845 parse_thong_decl (void)
846 {
847 token_t token;
848 struct bucket *symbol;
849 char *typename = 0;
850 int usrtoknum;
851
852 translations = 1;
853 token = lex (); /* fetch typename or first token */
854 if (token == tok_typename)
855 {
856 typename = xstrdup (token_buffer);
857 value_components_used = 1;
858 token = lex (); /* fetch first token */
859 }
860
861 /* process first token */
862
863 if (token != tok_identifier)
864 {
865 complain (_("unrecognized item %s, expected an identifier"),
866 token_buffer);
867 skip_to_char ('%');
868 return;
869 }
870 symval->class = token_sym;
871 symval->type_name = typename;
872 symval->user_token_number = SALIAS;
873 symbol = symval;
874
875 token = lex (); /* get number or literal string */
876
877 if (token == tok_number)
878 {
879 usrtoknum = numval;
880 token = lex (); /* okay, did number, now get literal */
881 }
882 else
883 usrtoknum = 0;
884
885 /* process literal string token */
886
887 if (token != tok_identifier || *symval->tag != '\"')
888 {
889 complain (_("expected string constant instead of %s"), token_buffer);
890 skip_to_char ('%');
891 return;
892 }
893 symval->class = token_sym;
894 symval->type_name = typename;
895 symval->user_token_number = usrtoknum;
896
897 symval->alias = symbol;
898 symbol->alias = symval;
899
900 /* symbol and symval combined are only one symbol. */
901 nsyms--;
902 }
903
904 /* FIXME. */
905
906 static void
907 parse_muscle_decl (void)
908 {
909 int ch = ungetc (skip_white_space (), finput);
910 char* muscle_key;
911 char* muscle_value;
912
913 /* Read key. */
914 if (!isalpha (ch) && ch != '_')
915 {
916 complain (_("invalid %s declaration"), "%define");
917 skip_to_char ('%');
918 return;
919 }
920 copy_identifier (finput, &muscle_obstack);
921 obstack_1grow (&muscle_obstack, 0);
922 muscle_key = obstack_finish (&muscle_obstack);
923
924 /* Read value. */
925 ch = skip_white_space ();
926 if (ch != '"')
927 {
928 ungetc (ch, finput);
929 if (ch != EOF)
930 {
931 complain (_("invalid %s declaration"), "%define");
932 skip_to_char ('%');
933 return;
934 }
935 else
936 fatal (_("Premature EOF after %s"), "\"");
937 }
938 copy_string2 (finput, &muscle_obstack, '"', 0);
939 obstack_1grow (&muscle_obstack, 0);
940 muscle_value = obstack_finish (&muscle_obstack);
941
942 /* Store the (key, value) pair in the environment. */
943 muscle_insert (muscle_key, muscle_value);
944 }
945
946
947 /*----------------------------------.
948 | Parse what comes after %skeleton. |
949 `----------------------------------*/
950
951 void
952 parse_skel_decl (void)
953 {
954 /* Complete with parse_dquoted_param () on the CVS branch 1.29. */
955 }
956
957 /*------------------------------------------.
958 | Parse what comes after %header_extension. |
959 `------------------------------------------*/
960
961 static void
962 parse_header_extension_decl (void)
963 {
964 char buff[32];
965
966 if (header_extension)
967 complain (_("multiple %%header_extension declarations"));
968 fscanf (finput, "%s", buff);
969 header_extension = xstrdup (buff);
970 }
971
972 /*------------------------------------------.
973 | Parse what comes after %source_extension. |
974 `------------------------------------------*/
975
976 static void
977 parse_source_extension_decl (void)
978 {
979 char buff[32];
980
981 if (src_extension)
982 complain (_("multiple %%source_extension declarations"));
983 fscanf (finput, "%s", buff);
984 src_extension = xstrdup (buff);
985 }
986
987 /*----------------------------------------------------------------.
988 | Read from finput until `%%' is seen. Discard the `%%'. Handle |
989 | any `%' declarations, and copy the contents of any `%{ ... %}' |
990 | groups to ATTRS_OBSTACK. |
991 `----------------------------------------------------------------*/
992
993 static void
994 read_declarations (void)
995 {
996 int c;
997 int tok;
998
999 for (;;)
1000 {
1001 c = skip_white_space ();
1002
1003 if (c == '%')
1004 {
1005 tok = parse_percent_token ();
1006
1007 switch (tok)
1008 {
1009 case tok_two_percents:
1010 return;
1011
1012 case tok_percent_left_curly:
1013 copy_definition ();
1014 break;
1015
1016 case tok_token:
1017 parse_token_decl (token_sym, nterm_sym);
1018 break;
1019
1020 case tok_nterm:
1021 parse_token_decl (nterm_sym, token_sym);
1022 break;
1023
1024 case tok_type:
1025 parse_type_decl ();
1026 break;
1027
1028 case tok_start:
1029 parse_start_decl ();
1030 break;
1031
1032 case tok_union:
1033 parse_union_decl ();
1034 break;
1035
1036 case tok_expect:
1037 parse_expect_decl ();
1038 break;
1039
1040 case tok_thong:
1041 parse_thong_decl ();
1042 break;
1043
1044 case tok_left:
1045 parse_assoc_decl (left_assoc);
1046 break;
1047
1048 case tok_right:
1049 parse_assoc_decl (right_assoc);
1050 break;
1051
1052 case tok_nonassoc:
1053 parse_assoc_decl (non_assoc);
1054 break;
1055
1056 case tok_hdrext:
1057 parse_header_extension_decl ();
1058 break;
1059
1060 case tok_srcext:
1061 parse_source_extension_decl ();
1062 break;
1063
1064 case tok_define:
1065 parse_muscle_decl ();
1066 break;
1067
1068 case tok_skel:
1069 parse_skel_decl ();
1070 break;
1071
1072 case tok_noop:
1073 break;
1074
1075 default:
1076 complain (_("unrecognized: %s"), token_buffer);
1077 skip_to_char ('%');
1078 }
1079 }
1080 else if (c == EOF)
1081 fatal (_("no input grammar"));
1082 else
1083 {
1084 char buf[] = "c";
1085 buf[0] = c;
1086 complain (_("unknown character: %s"), quote (buf));
1087 skip_to_char ('%');
1088 }
1089 }
1090 }
1091 \f
1092 /*-------------------------------------------------------------------.
1093 | Assuming that a `{' has just been seen, copy everything up to the |
1094 | matching `}' into the actions file. STACK_OFFSET is the number of |
1095 | values in the current rule so far, which says where to find `$0' |
1096 | with respect to the top of the stack. |
1097 `-------------------------------------------------------------------*/
1098
1099 static void
1100 copy_action (symbol_list *rule, int stack_offset)
1101 {
1102 int c;
1103 int count;
1104 char buf[4096];
1105
1106 /* offset is always 0 if parser has already popped the stack pointer */
1107 if (semantic_parser)
1108 stack_offset = 0;
1109
1110 sprintf (buf, "\ncase %d:\n", nrules);
1111 obstack_grow (&action_obstack, buf, strlen (buf));
1112
1113 if (!no_lines_flag)
1114 {
1115 sprintf (buf, "#line %d %s\n",
1116 lineno, quotearg_style (c_quoting_style,
1117 muscle_find ("filename")));
1118 obstack_grow (&action_obstack, buf, strlen (buf));
1119 }
1120 obstack_1grow (&action_obstack, '{');
1121
1122 count = 1;
1123 c = getc (finput);
1124
1125 while (count > 0)
1126 {
1127 while (c != '}')
1128 {
1129 switch (c)
1130 {
1131 case '\n':
1132 obstack_1grow (&action_obstack, c);
1133 lineno++;
1134 break;
1135
1136 case '{':
1137 obstack_1grow (&action_obstack, c);
1138 count++;
1139 break;
1140
1141 case '\'':
1142 case '"':
1143 copy_string (finput, &action_obstack, c);
1144 break;
1145
1146 case '/':
1147 copy_comment (finput, &action_obstack);
1148 break;
1149
1150 case '$':
1151 copy_dollar (finput, &action_obstack,
1152 rule, stack_offset);
1153 break;
1154
1155 case '@':
1156 copy_at (finput, &action_obstack,
1157 stack_offset);
1158 break;
1159
1160 case EOF:
1161 fatal (_("unmatched %s"), "`{'");
1162
1163 default:
1164 obstack_1grow (&action_obstack, c);
1165 }
1166
1167 c = getc (finput);
1168 }
1169
1170 /* above loop exits when c is '}' */
1171
1172 if (--count)
1173 {
1174 obstack_1grow (&action_obstack, c);
1175 c = getc (finput);
1176 }
1177 }
1178
1179 obstack_sgrow (&action_obstack, ";\n break;}");
1180 }
1181 \f
1182 /*-------------------------------------------------------------------.
1183 | After `%guard' is seen in the input file, copy the actual guard |
1184 | into the guards file. If the guard is followed by an action, copy |
1185 | that into the actions file. STACK_OFFSET is the number of values |
1186 | in the current rule so far, which says where to find `$0' with |
1187 | respect to the top of the stack, for the simple parser in which |
1188 | the stack is not popped until after the guard is run. |
1189 `-------------------------------------------------------------------*/
1190
1191 static void
1192 copy_guard (symbol_list *rule, int stack_offset)
1193 {
1194 int c;
1195 int count;
1196 int brace_flag = 0;
1197
1198 /* offset is always 0 if parser has already popped the stack pointer */
1199 if (semantic_parser)
1200 stack_offset = 0;
1201
1202 obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
1203 if (!no_lines_flag)
1204 obstack_fgrow2 (&guard_obstack, "#line %d %s\n",
1205 lineno, quotearg_style (c_quoting_style,
1206 muscle_find ("filename")));
1207 obstack_1grow (&guard_obstack, '{');
1208
1209 count = 0;
1210 c = getc (finput);
1211
1212 while (brace_flag ? (count > 0) : (c != ';'))
1213 {
1214 switch (c)
1215 {
1216 case '\n':
1217 obstack_1grow (&guard_obstack, c);
1218 lineno++;
1219 break;
1220
1221 case '{':
1222 obstack_1grow (&guard_obstack, c);
1223 brace_flag = 1;
1224 count++;
1225 break;
1226
1227 case '}':
1228 obstack_1grow (&guard_obstack, c);
1229 if (count > 0)
1230 count--;
1231 else
1232 {
1233 complain (_("unmatched %s"), "`}'");
1234 c = getc (finput); /* skip it */
1235 }
1236 break;
1237
1238 case '\'':
1239 case '"':
1240 copy_string (finput, &guard_obstack, c);
1241 break;
1242
1243 case '/':
1244 copy_comment (finput, &guard_obstack);
1245 break;
1246
1247 case '$':
1248 copy_dollar (finput, &guard_obstack, rule, stack_offset);
1249 break;
1250
1251 case '@':
1252 copy_at (finput, &guard_obstack, stack_offset);
1253 break;
1254
1255 case EOF:
1256 fatal ("%s", _("unterminated %guard clause"));
1257
1258 default:
1259 obstack_1grow (&guard_obstack, c);
1260 }
1261
1262 if (c != '}' || count != 0)
1263 c = getc (finput);
1264 }
1265
1266 c = skip_white_space ();
1267
1268 obstack_sgrow (&guard_obstack, ";\n break;}");
1269 if (c == '{')
1270 copy_action (rule, stack_offset);
1271 else if (c == '=')
1272 {
1273 c = getc (finput); /* why not skip_white_space -wjh */
1274 if (c == '{')
1275 copy_action (rule, stack_offset);
1276 }
1277 else
1278 ungetc (c, finput);
1279 }
1280 \f
1281
1282 static void
1283 record_rule_line (void)
1284 {
1285 /* Record each rule's source line number in rline table. */
1286
1287 if (nrules >= rline_allocated)
1288 {
1289 rline_allocated = nrules * 2;
1290 rline = XREALLOC (rline, short, rline_allocated);
1291 }
1292 rline[nrules] = lineno;
1293 }
1294
1295
1296 /*-------------------------------------------------------------------.
1297 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1298 | with the user's names. |
1299 `-------------------------------------------------------------------*/
1300
1301 static bucket *
1302 gensym (void)
1303 {
1304 /* Incremented for each generated symbol */
1305 static int gensym_count = 0;
1306 static char buf[256];
1307
1308 bucket *sym;
1309
1310 sprintf (buf, "@%d", ++gensym_count);
1311 token_buffer = buf;
1312 sym = getsym (token_buffer);
1313 sym->class = nterm_sym;
1314 sym->value = nvars++;
1315 return sym;
1316 }
1317
1318 #if 0
1319 /*------------------------------------------------------------------.
1320 | read in a %type declaration and record its information for |
1321 | get_type_name to access. This is unused. It is only called from |
1322 | the #if 0 part of readgram |
1323 `------------------------------------------------------------------*/
1324
1325 static int
1326 get_type (void)
1327 {
1328 int k;
1329 token_t token;
1330 char *name;
1331
1332 token = lex ();
1333
1334 if (token != tok_typename)
1335 {
1336 complain (_("invalid %s declaration"), "%type");
1337 return t;
1338 }
1339
1340 name = xstrdup (token_buffer);
1341
1342 for (;;)
1343 {
1344 token = lex ();
1345
1346 switch (token)
1347 {
1348 case tok_semicolon:
1349 return lex ();
1350
1351 case tok_comma:
1352 break;
1353
1354 case tok_identifier:
1355 if (symval->type_name == NULL)
1356 symval->type_name = name;
1357 else if (strcmp (name, symval->type_name) != 0)
1358 complain (_("type redeclaration for %s"), symval->tag);
1359
1360 break;
1361
1362 default:
1363 return token;
1364 }
1365 }
1366 }
1367
1368 #endif
1369 \f
1370 /*------------------------------------------------------------------.
1371 | Parse the input grammar into a one symbol_list structure. Each |
1372 | rule is represented by a sequence of symbols: the left hand side |
1373 | followed by the contents of the right hand side, followed by a |
1374 | null pointer instead of a symbol to terminate the rule. The next |
1375 | symbol is the lhs of the following rule. |
1376 | |
1377 | All guards and actions are copied out to the appropriate files, |
1378 | labelled by the rule number they apply to. |
1379 `------------------------------------------------------------------*/
1380
1381 static void
1382 readgram (void)
1383 {
1384 token_t t;
1385 bucket *lhs = NULL;
1386 symbol_list *p;
1387 symbol_list *p1;
1388 bucket *bp;
1389
1390 /* Points to first symbol_list of current rule. its symbol is the
1391 lhs of the rule. */
1392 symbol_list *crule;
1393 /* Points to the symbol_list preceding crule. */
1394 symbol_list *crule1;
1395
1396 p1 = NULL;
1397
1398 t = lex ();
1399
1400 while (t != tok_two_percents && t != tok_eof)
1401 {
1402 if (t == tok_identifier || t == tok_bar)
1403 {
1404 int action_flag = 0;
1405 /* Number of symbols in rhs of this rule so far */
1406 int rulelength = 0;
1407 int xactions = 0; /* JF for error checking */
1408 bucket *first_rhs = 0;
1409
1410 if (t == tok_identifier)
1411 {
1412 lhs = symval;
1413
1414 if (!start_flag)
1415 {
1416 startval = lhs;
1417 start_flag = 1;
1418 }
1419
1420 t = lex ();
1421 if (t != tok_colon)
1422 {
1423 complain (_("ill-formed rule: initial symbol not followed by colon"));
1424 unlex (t);
1425 }
1426 }
1427
1428 if (nrules == 0 && t == tok_bar)
1429 {
1430 complain (_("grammar starts with vertical bar"));
1431 lhs = symval; /* BOGUS: use a random symval */
1432 }
1433 /* start a new rule and record its lhs. */
1434
1435 nrules++;
1436 nitems++;
1437
1438 record_rule_line ();
1439
1440 p = XCALLOC (symbol_list, 1);
1441 p->sym = lhs;
1442
1443 crule1 = p1;
1444 if (p1)
1445 p1->next = p;
1446 else
1447 grammar = p;
1448
1449 p1 = p;
1450 crule = p;
1451
1452 /* mark the rule's lhs as a nonterminal if not already so. */
1453
1454 if (lhs->class == unknown_sym)
1455 {
1456 lhs->class = nterm_sym;
1457 lhs->value = nvars;
1458 nvars++;
1459 }
1460 else if (lhs->class == token_sym)
1461 complain (_("rule given for %s, which is a token"), lhs->tag);
1462
1463 /* read the rhs of the rule. */
1464
1465 for (;;)
1466 {
1467 t = lex ();
1468 if (t == tok_prec)
1469 {
1470 t = lex ();
1471 crule->ruleprec = symval;
1472 t = lex ();
1473 }
1474
1475 if (!(t == tok_identifier || t == tok_left_curly))
1476 break;
1477
1478 /* If next token is an identifier, see if a colon follows it.
1479 If one does, exit this rule now. */
1480 if (t == tok_identifier)
1481 {
1482 bucket *ssave;
1483 token_t t1;
1484
1485 ssave = symval;
1486 t1 = lex ();
1487 unlex (t1);
1488 symval = ssave;
1489 if (t1 == tok_colon)
1490 break;
1491
1492 if (!first_rhs) /* JF */
1493 first_rhs = symval;
1494 /* Not followed by colon =>
1495 process as part of this rule's rhs. */
1496 }
1497
1498 /* If we just passed an action, that action was in the middle
1499 of a rule, so make a dummy rule to reduce it to a
1500 non-terminal. */
1501 if (action_flag)
1502 {
1503 bucket *sdummy;
1504
1505 /* Since the action was written out with this rule's
1506 number, we must give the new rule this number by
1507 inserting the new rule before it. */
1508
1509 /* Make a dummy nonterminal, a gensym. */
1510 sdummy = gensym ();
1511
1512 /* Make a new rule, whose body is empty,
1513 before the current one, so that the action
1514 just read can belong to it. */
1515 nrules++;
1516 nitems++;
1517 record_rule_line ();
1518 p = XCALLOC (symbol_list, 1);
1519 if (crule1)
1520 crule1->next = p;
1521 else
1522 grammar = p;
1523 p->sym = sdummy;
1524 crule1 = XCALLOC (symbol_list, 1);
1525 p->next = crule1;
1526 crule1->next = crule;
1527
1528 /* Insert the dummy generated by that rule into this
1529 rule. */
1530 nitems++;
1531 p = XCALLOC (symbol_list, 1);
1532 p->sym = sdummy;
1533 p1->next = p;
1534 p1 = p;
1535
1536 action_flag = 0;
1537 }
1538
1539 if (t == tok_identifier)
1540 {
1541 nitems++;
1542 p = XCALLOC (symbol_list, 1);
1543 p->sym = symval;
1544 p1->next = p;
1545 p1 = p;
1546 }
1547 else /* handle an action. */
1548 {
1549 copy_action (crule, rulelength);
1550 action_flag = 1;
1551 xactions++; /* JF */
1552 }
1553 rulelength++;
1554 } /* end of read rhs of rule */
1555
1556 /* Put an empty link in the list to mark the end of this rule */
1557 p = XCALLOC (symbol_list, 1);
1558 p1->next = p;
1559 p1 = p;
1560
1561 if (t == tok_prec)
1562 {
1563 complain (_("two @prec's in a row"));
1564 t = lex ();
1565 crule->ruleprec = symval;
1566 t = lex ();
1567 }
1568 if (t == tok_guard)
1569 {
1570 if (!semantic_parser)
1571 complain (_("%%guard present but %%semantic_parser not specified"));
1572
1573 copy_guard (crule, rulelength);
1574 t = lex ();
1575 }
1576 else if (t == tok_left_curly)
1577 {
1578 /* This case never occurs -wjh */
1579 if (action_flag)
1580 complain (_("two actions at end of one rule"));
1581 copy_action (crule, rulelength);
1582 action_flag = 1;
1583 xactions++; /* -wjh */
1584 t = lex ();
1585 }
1586 /* If $$ is being set in default way, report if any type
1587 mismatch. */
1588 else if (!xactions
1589 && first_rhs && lhs->type_name != first_rhs->type_name)
1590 {
1591 if (lhs->type_name == 0
1592 || first_rhs->type_name == 0
1593 || strcmp (lhs->type_name, first_rhs->type_name))
1594 complain (_("type clash (`%s' `%s') on default action"),
1595 lhs->type_name ? lhs->type_name : "",
1596 first_rhs->type_name ? first_rhs->type_name : "");
1597 }
1598 /* Warn if there is no default for $$ but we need one. */
1599 else if (!xactions && !first_rhs && lhs->type_name != 0)
1600 complain (_("empty rule for typed nonterminal, and no action"));
1601 if (t == tok_semicolon)
1602 t = lex ();
1603 }
1604 #if 0
1605 /* these things can appear as alternatives to rules. */
1606 /* NO, they cannot.
1607 a) none of the documentation allows them
1608 b) most of them scan forward until finding a next %
1609 thus they may swallow lots of intervening rules
1610 */
1611 else if (t == tok_token)
1612 {
1613 parse_token_decl (token_sym, nterm_sym);
1614 t = lex ();
1615 }
1616 else if (t == tok_nterm)
1617 {
1618 parse_token_decl (nterm_sym, token_sym);
1619 t = lex ();
1620 }
1621 else if (t == tok_type)
1622 {
1623 t = get_type ();
1624 }
1625 else if (t == tok_union)
1626 {
1627 parse_union_decl ();
1628 t = lex ();
1629 }
1630 else if (t == tok_expect)
1631 {
1632 parse_expect_decl ();
1633 t = lex ();
1634 }
1635 else if (t == tok_start)
1636 {
1637 parse_start_decl ();
1638 t = lex ();
1639 }
1640 #endif
1641
1642 else
1643 {
1644 complain (_("invalid input: %s"), quote (token_buffer));
1645 t = lex ();
1646 }
1647 }
1648
1649 /* grammar has been read. Do some checking */
1650
1651 if (nsyms > MAXSHORT)
1652 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1653 MAXSHORT);
1654 if (nrules == 0)
1655 fatal (_("no rules in the input grammar"));
1656
1657 /* Report any undefined symbols and consider them nonterminals. */
1658
1659 for (bp = firstsymbol; bp; bp = bp->next)
1660 if (bp->class == unknown_sym)
1661 {
1662 complain (_
1663 ("symbol %s is used, but is not defined as a token and has no rules"),
1664 bp->tag);
1665 bp->class = nterm_sym;
1666 bp->value = nvars++;
1667 }
1668
1669 ntokens = nsyms - nvars;
1670 }
1671
1672 /* At the end of the grammar file, some C source code must
1673 be stored. It is going to be associated to the epilogue
1674 directive. */
1675 static void
1676 read_additionnal_code (void)
1677 {
1678 char c;
1679 struct obstack el_obstack;
1680
1681 obstack_init (&el_obstack);
1682
1683 while ((c = getc (finput)) != EOF)
1684 obstack_1grow (&el_obstack, c);
1685
1686 obstack_1grow (&el_obstack, 0);
1687 muscle_insert ("epilogue", obstack_finish (&el_obstack));
1688 }
1689
1690 \f
1691 /*--------------------------------------------------------------.
1692 | For named tokens, but not literal ones, define the name. The |
1693 | value is the user token number. |
1694 `--------------------------------------------------------------*/
1695
1696 static void
1697 output_token_defines (struct obstack *oout)
1698 {
1699 bucket *bp;
1700 char *cp, *symbol;
1701 char c;
1702
1703 for (bp = firstsymbol; bp; bp = bp->next)
1704 {
1705 symbol = bp->tag; /* get symbol */
1706
1707 if (bp->value >= ntokens)
1708 continue;
1709 if (bp->user_token_number == SALIAS)
1710 continue;
1711 if ('\'' == *symbol)
1712 continue; /* skip literal character */
1713 if (bp == errtoken)
1714 continue; /* skip error token */
1715 if ('\"' == *symbol)
1716 {
1717 /* use literal string only if given a symbol with an alias */
1718 if (bp->alias)
1719 symbol = bp->alias->tag;
1720 else
1721 continue;
1722 }
1723
1724 /* Don't #define nonliteral tokens whose names contain periods. */
1725 cp = symbol;
1726 while ((c = *cp++) && c != '.');
1727 if (c != '\0')
1728 continue;
1729
1730 obstack_fgrow2 (oout, "# define\t%s\t%d\n",
1731 symbol,
1732 (translations ? bp->user_token_number : bp->value));
1733 if (semantic_parser)
1734 obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1735 }
1736 }
1737
1738
1739 /*------------------------------------------------------------------.
1740 | Assign symbol numbers, and write definition of token names into |
1741 | FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
1742 | of symbols. |
1743 `------------------------------------------------------------------*/
1744
1745 static void
1746 packsymbols (void)
1747 {
1748 bucket *bp;
1749 int tokno = 1;
1750 int i;
1751 int last_user_token_number;
1752 static char DOLLAR[] = "$";
1753
1754 tags = XCALLOC (char *, nsyms + 1);
1755 tags[0] = DOLLAR;
1756 user_toknums = XCALLOC (short, nsyms + 1);
1757 user_toknums[0] = 0;
1758
1759 sprec = XCALLOC (short, nsyms);
1760 sassoc = XCALLOC (short, nsyms);
1761
1762 max_user_token_number = 256;
1763 last_user_token_number = 256;
1764
1765 for (bp = firstsymbol; bp; bp = bp->next)
1766 {
1767 if (bp->class == nterm_sym)
1768 {
1769 bp->value += ntokens;
1770 }
1771 else if (bp->alias)
1772 {
1773 /* this symbol and its alias are a single token defn.
1774 allocate a tokno, and assign to both check agreement of
1775 ->prec and ->assoc fields and make both the same */
1776 if (bp->value == 0)
1777 bp->value = bp->alias->value = tokno++;
1778
1779 if (bp->prec != bp->alias->prec)
1780 {
1781 if (bp->prec != 0 && bp->alias->prec != 0
1782 && bp->user_token_number == SALIAS)
1783 complain (_("conflicting precedences for %s and %s"),
1784 bp->tag, bp->alias->tag);
1785 if (bp->prec != 0)
1786 bp->alias->prec = bp->prec;
1787 else
1788 bp->prec = bp->alias->prec;
1789 }
1790
1791 if (bp->assoc != bp->alias->assoc)
1792 {
1793 if (bp->assoc != 0 && bp->alias->assoc != 0
1794 && bp->user_token_number == SALIAS)
1795 complain (_("conflicting assoc values for %s and %s"),
1796 bp->tag, bp->alias->tag);
1797 if (bp->assoc != 0)
1798 bp->alias->assoc = bp->assoc;
1799 else
1800 bp->assoc = bp->alias->assoc;
1801 }
1802
1803 if (bp->user_token_number == SALIAS)
1804 continue; /* do not do processing below for SALIASs */
1805
1806 }
1807 else /* bp->class == token_sym */
1808 {
1809 bp->value = tokno++;
1810 }
1811
1812 if (bp->class == token_sym)
1813 {
1814 if (translations && !(bp->user_token_number))
1815 bp->user_token_number = ++last_user_token_number;
1816 if (bp->user_token_number > max_user_token_number)
1817 max_user_token_number = bp->user_token_number;
1818 }
1819
1820 tags[bp->value] = bp->tag;
1821 user_toknums[bp->value] = bp->user_token_number;
1822 sprec[bp->value] = bp->prec;
1823 sassoc[bp->value] = bp->assoc;
1824
1825 }
1826
1827 if (translations)
1828 {
1829 int j;
1830
1831 token_translations = XCALLOC (short, max_user_token_number + 1);
1832
1833 /* initialize all entries for literal tokens to 2, the internal
1834 token number for $undefined., which represents all invalid
1835 inputs. */
1836 for (j = 0; j <= max_user_token_number; j++)
1837 token_translations[j] = 2;
1838
1839 for (bp = firstsymbol; bp; bp = bp->next)
1840 {
1841 if (bp->value >= ntokens)
1842 continue; /* non-terminal */
1843 if (bp->user_token_number == SALIAS)
1844 continue;
1845 if (token_translations[bp->user_token_number] != 2)
1846 complain (_("tokens %s and %s both assigned number %d"),
1847 tags[token_translations[bp->user_token_number]],
1848 bp->tag, bp->user_token_number);
1849 token_translations[bp->user_token_number] = bp->value;
1850 }
1851 }
1852
1853 error_token_number = errtoken->value;
1854
1855 output_token_defines (&output_obstack);
1856 obstack_1grow (&output_obstack, 0);
1857 muscle_insert ("tokendef", obstack_finish (&output_obstack));
1858
1859 #if 0
1860 if (!no_parser_flag)
1861 output_token_defines (&table_obstack);
1862 #endif
1863
1864 if (startval->class == unknown_sym)
1865 fatal (_("the start symbol %s is undefined"), startval->tag);
1866 else if (startval->class == token_sym)
1867 fatal (_("the start symbol %s is a token"), startval->tag);
1868
1869 start_symbol = startval->value;
1870
1871 if (defines_flag)
1872 {
1873 output_token_defines (&defines_obstack);
1874
1875 if (!pure_parser)
1876 {
1877 if (spec_name_prefix)
1878 obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1879 spec_name_prefix);
1880 else
1881 obstack_sgrow (&defines_obstack,
1882 "\nextern YYSTYPE yylval;\n");
1883 }
1884
1885 if (semantic_parser)
1886 for (i = ntokens; i < nsyms; i++)
1887 {
1888 /* don't make these for dummy nonterminals made by gensym. */
1889 if (*tags[i] != '@')
1890 obstack_fgrow2 (&defines_obstack,
1891 "# define\tNT%s\t%d\n", tags[i], i);
1892 }
1893 #if 0
1894 /* `fdefines' is now a temporary file, so we need to copy its
1895 contents in `done', so we can't close it here. */
1896 fclose (fdefines);
1897 fdefines = NULL;
1898 #endif
1899 }
1900 }
1901
1902
1903 /*---------------------------------------------------------------.
1904 | Convert the rules into the representation using RRHS, RLHS and |
1905 | RITEMS. |
1906 `---------------------------------------------------------------*/
1907
1908 static void
1909 packgram (void)
1910 {
1911 int itemno;
1912 int ruleno;
1913 symbol_list *p;
1914
1915 bucket *ruleprec;
1916
1917 ritem = XCALLOC (short, nitems + 1);
1918 rlhs = XCALLOC (short, nrules) - 1;
1919 rrhs = XCALLOC (short, nrules) - 1;
1920 rprec = XCALLOC (short, nrules) - 1;
1921 rprecsym = XCALLOC (short, nrules) - 1;
1922 rassoc = XCALLOC (short, nrules) - 1;
1923
1924 itemno = 0;
1925 ruleno = 1;
1926
1927 p = grammar;
1928 while (p)
1929 {
1930 rlhs[ruleno] = p->sym->value;
1931 rrhs[ruleno] = itemno;
1932 ruleprec = p->ruleprec;
1933
1934 p = p->next;
1935 while (p && p->sym)
1936 {
1937 ritem[itemno++] = p->sym->value;
1938 /* A rule gets by default the precedence and associativity
1939 of the last token in it. */
1940 if (p->sym->class == token_sym)
1941 {
1942 rprec[ruleno] = p->sym->prec;
1943 rassoc[ruleno] = p->sym->assoc;
1944 }
1945 if (p)
1946 p = p->next;
1947 }
1948
1949 /* If this rule has a %prec,
1950 the specified symbol's precedence replaces the default. */
1951 if (ruleprec)
1952 {
1953 rprec[ruleno] = ruleprec->prec;
1954 rassoc[ruleno] = ruleprec->assoc;
1955 rprecsym[ruleno] = ruleprec->value;
1956 }
1957
1958 ritem[itemno++] = -ruleno;
1959 ruleno++;
1960
1961 if (p)
1962 p = p->next;
1963 }
1964
1965 ritem[itemno] = 0;
1966 }
1967 \f
1968 /*-------------------------------------------------------------------.
1969 | Read in the grammar specification and record it in the format |
1970 | described in gram.h. All guards are copied into the GUARD_OBSTACK |
1971 | and all actions into ACTION_OBSTACK, in each case forming the body |
1972 | of a C function (YYGUARD or YYACTION) which contains a switch |
1973 | statement to decide which guard or action to execute. |
1974 `-------------------------------------------------------------------*/
1975
1976 void
1977 reader (void)
1978 {
1979 start_flag = 0;
1980 startval = NULL; /* start symbol not specified yet. */
1981
1982 #if 0
1983 /* initially assume token number translation not needed. */
1984 translations = 0;
1985 #endif
1986 /* Nowadays translations is always set to 1, since we give `error' a
1987 user-token-number to satisfy the Posix demand for YYERRCODE==256.
1988 */
1989 translations = 1;
1990
1991 nsyms = 1;
1992 nvars = 0;
1993 nrules = 0;
1994 nitems = 0;
1995 rline_allocated = 10;
1996 rline = XCALLOC (short, rline_allocated);
1997
1998 typed = 0;
1999 lastprec = 0;
2000
2001 semantic_parser = 0;
2002 pure_parser = 0;
2003
2004 grammar = NULL;
2005
2006 init_lex ();
2007 lineno = 1;
2008
2009 /* Initialize the muscle obstack. */
2010 obstack_init (&muscle_obstack);
2011
2012 /* Initialize the symbol table. */
2013 tabinit ();
2014
2015 /* Construct the error token */
2016 errtoken = getsym ("error");
2017 errtoken->class = token_sym;
2018 errtoken->user_token_number = 256; /* Value specified by POSIX. */
2019
2020 /* Construct a token that represents all undefined literal tokens.
2021 It is always token number 2. */
2022 undeftoken = getsym ("$undefined.");
2023 undeftoken->class = token_sym;
2024 undeftoken->user_token_number = 2;
2025
2026 /* Read the declaration section. Copy %{ ... %} groups to
2027 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
2028 etc. found there. */
2029 read_declarations ();
2030 /* Read in the grammar, build grammar in list form. Write out
2031 guards and actions. */
2032 readgram ();
2033 /* Some C code is given at the end of the grammar file. */
2034 read_additionnal_code ();
2035
2036 /* Now we know whether we need the line-number stack. If we do,
2037 write its type into the .tab.h file.
2038 This is no longer need with header skeleton. */
2039
2040 /* Assign the symbols their symbol numbers. Write #defines for the
2041 token symbols into FDEFINES if requested. */
2042 packsymbols ();
2043 /* Convert the grammar into the format described in gram.h. */
2044 packgram ();
2045 /* Free the symbol table data structure since symbols are now all
2046 referred to by symbol number. */
2047 free_symtab ();
2048 }