]> git.saurik.com Git - bison.git/blob - src/reader.c
35e08501a04b9a018aa9a4dcb6c5cc8cc38c4105
[bison.git] / src / reader.c
1 /* Input parser for bison
2 Copyright 1984, 1986, 1989, 1992, 1998, 2000
3 Free Software Foundation, Inc.
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22
23 #include "system.h"
24 #include "obstack.h"
25 #include "quotearg.h"
26 #include "quote.h"
27 #include "getargs.h"
28 #include "files.h"
29 #include "xalloc.h"
30 #include "symtab.h"
31 #include "options.h"
32 #include "lex.h"
33 #include "gram.h"
34 #include "complain.h"
35 #include "output.h"
36 #include "reader.h"
37 #include "conflicts.h"
38 #include "muscle_tab.h"
39
40 /* Number of slots allocated (but not necessarily used yet) in `rline' */
41 static int rline_allocated;
42
43 typedef struct symbol_list
44 {
45 struct symbol_list *next;
46 bucket *sym;
47 bucket *ruleprec;
48 }
49 symbol_list;
50
51 int lineno;
52 char **tags;
53 short *user_toknums;
54 static symbol_list *grammar;
55 static int start_flag;
56 static bucket *startval;
57
58 /* Nonzero if components of semantic values are used, implying
59 they must be unions. */
60 static int value_components_used;
61
62 /* Nonzero if %union has been seen. */
63 static int typed;
64
65 /* Incremented for each %left, %right or %nonassoc seen */
66 static int lastprec;
67
68 static bucket *errtoken;
69 static bucket *undeftoken;
70 \f
71
72 /*===================\
73 | Low level lexing. |
74 \===================*/
75
76 static void
77 skip_to_char (int target)
78 {
79 int c;
80 if (target == '\n')
81 complain (_(" Skipping to next \\n"));
82 else
83 complain (_(" Skipping to next %c"), target);
84
85 do
86 c = skip_white_space ();
87 while (c != target && c != EOF);
88 if (c != EOF)
89 ungetc (c, finput);
90 }
91
92
93 /*---------------------------------------------------------.
94 | Read a signed integer from STREAM and return its value. |
95 `---------------------------------------------------------*/
96
97 static inline int
98 read_signed_integer (FILE *stream)
99 {
100 int c = getc (stream);
101 int sign = 1;
102 int n = 0;
103
104 if (c == '-')
105 {
106 c = getc (stream);
107 sign = -1;
108 }
109
110 while (isdigit (c))
111 {
112 n = 10 * n + (c - '0');
113 c = getc (stream);
114 }
115
116 ungetc (c, stream);
117
118 return sign * n;
119 }
120 \f
121 /*--------------------------------------------------------------.
122 | Get the data type (alternative in the union) of the value for |
123 | symbol N in rule RULE. |
124 `--------------------------------------------------------------*/
125
126 static char *
127 get_type_name (int n, symbol_list * rule)
128 {
129 int i;
130 symbol_list *rp;
131
132 if (n < 0)
133 {
134 complain (_("invalid $ value"));
135 return NULL;
136 }
137
138 rp = rule;
139 i = 0;
140
141 while (i < n)
142 {
143 rp = rp->next;
144 if (rp == NULL || rp->sym == NULL)
145 {
146 complain (_("invalid $ value"));
147 return NULL;
148 }
149 i++;
150 }
151
152 return rp->sym->type_name;
153 }
154 \f
155 /*------------------------------------------------------------.
156 | Dump the string from FIN to OOUT if non null. MATCH is the |
157 | delimiter of the string (either ' or "). |
158 `------------------------------------------------------------*/
159
160 static inline void
161 copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
162 {
163 int c;
164
165 if (store)
166 obstack_1grow (oout, match);
167
168 c = getc (fin);
169
170 while (c != match)
171 {
172 if (c == EOF)
173 fatal (_("unterminated string at end of file"));
174 if (c == '\n')
175 {
176 complain (_("unterminated string"));
177 ungetc (c, fin);
178 c = match; /* invent terminator */
179 continue;
180 }
181
182 obstack_1grow (oout, c);
183
184 if (c == '\\')
185 {
186 c = getc (fin);
187 if (c == EOF)
188 fatal (_("unterminated string at end of file"));
189 obstack_1grow (oout, c);
190
191 if (c == '\n')
192 lineno++;
193 }
194
195 c = getc (fin);
196 }
197
198 if (store)
199 obstack_1grow (oout, c);
200 }
201
202 /* FIXME. */
203
204 static inline void
205 copy_string (FILE *fin, struct obstack *oout, int match)
206 {
207 copy_string2 (fin, oout, match, 1);
208 }
209
210 /* FIXME. */
211
212 static inline void
213 copy_identifier (FILE *fin, struct obstack *oout)
214 {
215 int c;
216
217 while (isalnum (c = getc (fin)) || c == '_')
218 obstack_1grow (oout, c);
219
220 ungetc (c, fin);
221 }
222
223 /*-----------------------------------------------------------------.
224 | Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
225 | NULL). In fact we just saw a `/', which might or might not be a |
226 | comment. In any case, copy what we saw. |
227 | |
228 | OUT2 might be NULL. |
229 `-----------------------------------------------------------------*/
230
231 static inline void
232 copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
233 {
234 int cplus_comment;
235 int ended;
236 int c;
237
238 /* We read a `/', output it. */
239 obstack_1grow (oout1, '/');
240 if (oout2)
241 obstack_1grow (oout2, '/');
242
243 switch ((c = getc (fin)))
244 {
245 case '/':
246 cplus_comment = 1;
247 break;
248 case '*':
249 cplus_comment = 0;
250 break;
251 default:
252 ungetc (c, fin);
253 return;
254 }
255
256 obstack_1grow (oout1, c);
257 if (oout2)
258 obstack_1grow (oout2, c);
259 c = getc (fin);
260
261 ended = 0;
262 while (!ended)
263 {
264 if (!cplus_comment && c == '*')
265 {
266 while (c == '*')
267 {
268 obstack_1grow (oout1, c);
269 if (oout2)
270 obstack_1grow (oout2, c);
271 c = getc (fin);
272 }
273
274 if (c == '/')
275 {
276 obstack_1grow (oout1, c);
277 if (oout2)
278 obstack_1grow (oout2, c);
279 ended = 1;
280 }
281 }
282 else if (c == '\n')
283 {
284 lineno++;
285 obstack_1grow (oout1, c);
286 if (oout2)
287 obstack_1grow (oout2, c);
288 if (cplus_comment)
289 ended = 1;
290 else
291 c = getc (fin);
292 }
293 else if (c == EOF)
294 fatal (_("unterminated comment"));
295 else
296 {
297 obstack_1grow (oout1, c);
298 if (oout2)
299 obstack_1grow (oout2, c);
300 c = getc (fin);
301 }
302 }
303 }
304
305
306 /*-------------------------------------------------------------------.
307 | Dump the comment (actually the current string starting with a `/') |
308 | from FIN to OOUT. |
309 `-------------------------------------------------------------------*/
310
311 static inline void
312 copy_comment (FILE *fin, struct obstack *oout)
313 {
314 copy_comment2 (fin, oout, NULL);
315 }
316
317
318 /*-----------------------------------------------------------------.
319 | FIN is pointing to a location (i.e., a `@'). Output to OOUT a |
320 | reference to this location. STACK_OFFSET is the number of values |
321 | in the current rule so far, which says where to find `$0' with |
322 | respect to the top of the stack. |
323 `-----------------------------------------------------------------*/
324
325 static inline void
326 copy_at (FILE *fin, struct obstack *oout, int stack_offset)
327 {
328 int c;
329
330 c = getc (fin);
331 if (c == '$')
332 {
333 obstack_sgrow (oout, "yyloc");
334 locations_flag = 1;
335 }
336 else if (isdigit (c) || c == '-')
337 {
338 int n;
339
340 ungetc (c, fin);
341 n = read_signed_integer (fin);
342
343 obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
344 locations_flag = 1;
345 }
346 else
347 {
348 char buf[] = "@c";
349 buf[1] = c;
350 complain (_("%s is invalid"), quote (buf));
351 }
352 }
353
354
355 /*-------------------------------------------------------------------.
356 | FIN is pointing to a wannabee semantic value (i.e., a `$'). |
357 | |
358 | Possible inputs: $[<TYPENAME>]($|integer) |
359 | |
360 | Output to OOUT a reference to this semantic value. STACK_OFFSET is |
361 | the number of values in the current rule so far, which says where |
362 | to find `$0' with respect to the top of the stack. |
363 `-------------------------------------------------------------------*/
364
365 static inline void
366 copy_dollar (FILE *fin, struct obstack *oout,
367 symbol_list *rule, int stack_offset)
368 {
369 int c = getc (fin);
370 const char *type_name = NULL;
371
372 /* Get the type name if explicit. */
373 if (c == '<')
374 {
375 read_type_name (fin);
376 type_name = token_buffer;
377 value_components_used = 1;
378 c = getc (fin);
379 }
380
381 if (c == '$')
382 {
383 obstack_sgrow (oout, "yyval");
384
385 if (!type_name)
386 type_name = get_type_name (0, rule);
387 if (type_name)
388 obstack_fgrow1 (oout, ".%s", type_name);
389 if (!type_name && typed)
390 complain (_("$$ of `%s' has no declared type"),
391 rule->sym->tag);
392 }
393 else if (isdigit (c) || c == '-')
394 {
395 int n;
396 ungetc (c, fin);
397 n = read_signed_integer (fin);
398
399 if (!type_name && n > 0)
400 type_name = get_type_name (n, rule);
401
402 obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
403
404 if (type_name)
405 obstack_fgrow1 (oout, ".%s", type_name);
406 if (!type_name && typed)
407 complain (_("$%d of `%s' has no declared type"),
408 n, rule->sym->tag);
409 }
410 else
411 {
412 char buf[] = "$c";
413 buf[1] = c;
414 complain (_("%s is invalid"), quote (buf));
415 }
416 }
417 \f
418 /*-------------------------------------------------------------------.
419 | Copy the contents of a `%{ ... %}' into the definitions file. The |
420 | `%{' has already been read. Return after reading the `%}'. |
421 `-------------------------------------------------------------------*/
422
423 static void
424 copy_definition (void)
425 {
426 int c;
427 /* -1 while reading a character if prev char was %. */
428 int after_percent;
429
430 #if 0
431 if (!no_lines_flag)
432 {
433 obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
434 lineno, quotearg_style (c_quoting_style,
435 muscle_find("filename")));
436 }
437 #endif
438
439 after_percent = 0;
440
441 c = getc (finput);
442
443 for (;;)
444 {
445 switch (c)
446 {
447 case '\n':
448 obstack_1grow (&attrs_obstack, c);
449 lineno++;
450 break;
451
452 case '%':
453 after_percent = -1;
454 break;
455
456 case '\'':
457 case '"':
458 copy_string (finput, &attrs_obstack, c);
459 break;
460
461 case '/':
462 copy_comment (finput, &attrs_obstack);
463 break;
464
465 case EOF:
466 fatal ("%s", _("unterminated `%{' definition"));
467
468 default:
469 obstack_1grow (&attrs_obstack, c);
470 }
471
472 c = getc (finput);
473
474 if (after_percent)
475 {
476 if (c == '}')
477 return;
478 obstack_1grow (&attrs_obstack, '%');
479 }
480 after_percent = 0;
481 }
482 }
483
484
485 /*-------------------------------------------------------------------.
486 | Parse what comes after %token or %nterm. For %token, WHAT_IS is |
487 | token_sym and WHAT_IS_NOT is nterm_sym. For %nterm, the arguments |
488 | are reversed. |
489 `-------------------------------------------------------------------*/
490
491 static void
492 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
493 {
494 token_t token = 0;
495 char *typename = 0;
496
497 /* The symbol being defined. */
498 struct bucket *symbol = NULL;
499
500 /* After `%token' and `%nterm', any number of symbols maybe be
501 defined. */
502 for (;;)
503 {
504 int tmp_char = ungetc (skip_white_space (), finput);
505
506 /* `%' (for instance from `%token', or from `%%' etc.) is the
507 only valid means to end this declaration. */
508 if (tmp_char == '%')
509 return;
510 if (tmp_char == EOF)
511 fatal (_("Premature EOF after %s"), token_buffer);
512
513 token = lex ();
514 if (token == tok_comma)
515 {
516 symbol = NULL;
517 continue;
518 }
519 if (token == tok_typename)
520 {
521 typename = xstrdup (token_buffer);
522 value_components_used = 1;
523 symbol = NULL;
524 }
525 else if (token == tok_identifier && *symval->tag == '\"' && symbol)
526 {
527 if (symval->alias)
528 warn (_("symbol `%s' used more than once as a literal string"),
529 symval->tag);
530 else if (symbol->alias)
531 warn (_("symbol `%s' given more than one literal string"),
532 symbol->tag);
533 else
534 {
535 symval->class = token_sym;
536 symval->type_name = typename;
537 symval->user_token_number = symbol->user_token_number;
538 symbol->user_token_number = SALIAS;
539 symval->alias = symbol;
540 symbol->alias = symval;
541 /* symbol and symval combined are only one symbol */
542 nsyms--;
543 }
544 translations = 1;
545 symbol = NULL;
546 }
547 else if (token == tok_identifier)
548 {
549 int oldclass = symval->class;
550 symbol = symval;
551
552 if (symbol->class == what_is_not)
553 complain (_("symbol %s redefined"), symbol->tag);
554 symbol->class = what_is;
555 if (what_is == nterm_sym && oldclass != nterm_sym)
556 symbol->value = nvars++;
557
558 if (typename)
559 {
560 if (symbol->type_name == NULL)
561 symbol->type_name = typename;
562 else if (strcmp (typename, symbol->type_name) != 0)
563 complain (_("type redeclaration for %s"), symbol->tag);
564 }
565 }
566 else if (symbol && token == tok_number)
567 {
568 symbol->user_token_number = numval;
569 translations = 1;
570 }
571 else
572 {
573 complain (_("`%s' is invalid in %s"),
574 token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
575 skip_to_char ('%');
576 }
577 }
578
579 }
580
581
582 /*------------------------------.
583 | Parse what comes after %start |
584 `------------------------------*/
585
586 static void
587 parse_start_decl (void)
588 {
589 if (start_flag)
590 complain (_("multiple %s declarations"), "%start");
591 if (lex () != tok_identifier)
592 complain (_("invalid %s declaration"), "%start");
593 else
594 {
595 start_flag = 1;
596 startval = symval;
597 }
598 }
599
600 /*-----------------------------------------------------------.
601 | read in a %type declaration and record its information for |
602 | get_type_name to access |
603 `-----------------------------------------------------------*/
604
605 static void
606 parse_type_decl (void)
607 {
608 char *name;
609
610 if (lex () != tok_typename)
611 {
612 complain ("%s", _("%type declaration has no <typename>"));
613 skip_to_char ('%');
614 return;
615 }
616
617 name = xstrdup (token_buffer);
618
619 for (;;)
620 {
621 token_t t;
622 int tmp_char = ungetc (skip_white_space (), finput);
623
624 if (tmp_char == '%')
625 return;
626 if (tmp_char == EOF)
627 fatal (_("Premature EOF after %s"), token_buffer);
628
629 t = lex ();
630
631 switch (t)
632 {
633
634 case tok_comma:
635 case tok_semicolon:
636 break;
637
638 case tok_identifier:
639 if (symval->type_name == NULL)
640 symval->type_name = name;
641 else if (strcmp (name, symval->type_name) != 0)
642 complain (_("type redeclaration for %s"), symval->tag);
643
644 break;
645
646 default:
647 complain (_("invalid %%type declaration due to item: %s"),
648 token_buffer);
649 skip_to_char ('%');
650 }
651 }
652 }
653
654
655
656 /*----------------------------------------------------------------.
657 | Read in a %left, %right or %nonassoc declaration and record its |
658 | information. |
659 `----------------------------------------------------------------*/
660
661 static void
662 parse_assoc_decl (associativity assoc)
663 {
664 char *name = NULL;
665 int prev = 0;
666
667 lastprec++; /* Assign a new precedence level, never 0. */
668
669 for (;;)
670 {
671 token_t t;
672 int tmp_char = ungetc (skip_white_space (), finput);
673
674 if (tmp_char == '%')
675 return;
676 if (tmp_char == EOF)
677 fatal (_("Premature EOF after %s"), token_buffer);
678
679 t = lex ();
680
681 switch (t)
682 {
683 case tok_typename:
684 name = xstrdup (token_buffer);
685 break;
686
687 case tok_comma:
688 break;
689
690 case tok_identifier:
691 if (symval->prec != 0)
692 complain (_("redefining precedence of %s"), symval->tag);
693 symval->prec = lastprec;
694 symval->assoc = assoc;
695 if (symval->class == nterm_sym)
696 complain (_("symbol %s redefined"), symval->tag);
697 symval->class = token_sym;
698 if (name)
699 { /* record the type, if one is specified */
700 if (symval->type_name == NULL)
701 symval->type_name = name;
702 else if (strcmp (name, symval->type_name) != 0)
703 complain (_("type redeclaration for %s"), symval->tag);
704 }
705 break;
706
707 case tok_number:
708 if (prev == tok_identifier)
709 {
710 symval->user_token_number = numval;
711 translations = 1;
712 }
713 else
714 {
715 complain (_
716 ("invalid text (%s) - number should be after identifier"),
717 token_buffer);
718 skip_to_char ('%');
719 }
720 break;
721
722 case tok_semicolon:
723 return;
724
725 default:
726 complain (_("unexpected item: %s"), token_buffer);
727 skip_to_char ('%');
728 }
729
730 prev = t;
731
732 }
733 }
734
735
736
737 /*--------------------------------------------------------------.
738 | Copy the union declaration into the stype muscle |
739 | (and fdefines), where it is made into the definition of |
740 | YYSTYPE, the type of elements of the parser value stack. |
741 `--------------------------------------------------------------*/
742
743 static void
744 parse_union_decl (void)
745 {
746 int c;
747 int count = 0;
748 struct obstack union_obstack;
749
750 if (typed)
751 complain (_("multiple %s declarations"), "%union");
752
753 typed = 1;
754
755 if (no_lines_flag)
756 obstack_1grow (&attrs_obstack, '\n');
757
758 obstack_init (&union_obstack);
759 obstack_sgrow (&union_obstack, "union");
760 if (defines_flag)
761 obstack_sgrow (&defines_obstack, "typedef union");
762
763 c = getc (finput);
764
765 while (c != EOF)
766 {
767 obstack_1grow (&union_obstack, c);
768 if (defines_flag)
769 obstack_1grow (&defines_obstack, c);
770
771 switch (c)
772 {
773 case '\n':
774 lineno++;
775 break;
776
777 case '/':
778 copy_comment2 (finput, &defines_obstack, &union_obstack);
779 break;
780
781 case '{':
782 count++;
783 break;
784
785 case '}':
786 if (count == 0)
787 complain (_("unmatched %s"), "`}'");
788 count--;
789 if (count <= 0)
790 {
791 if (defines_flag)
792 obstack_sgrow (&defines_obstack, " YYSTYPE;\n");
793 /* JF don't choke on trailing semi */
794 c = skip_white_space ();
795 if (c != ';')
796 ungetc (c, finput);
797 obstack_1grow (&union_obstack, 0);
798 muscle_insert ("stype", obstack_finish (&union_obstack));
799 return;
800 }
801 }
802
803 c = getc (finput);
804 }
805
806 }
807
808
809 /*-------------------------------------------------------.
810 | Parse the declaration %expect N which says to expect N |
811 | shift-reduce conflicts. |
812 `-------------------------------------------------------*/
813
814 static void
815 parse_expect_decl (void)
816 {
817 int c = skip_white_space ();
818 ungetc (c, finput);
819
820 if (!isdigit (c))
821 complain (_("argument of %%expect is not an integer"));
822 else
823 expected_conflicts = read_signed_integer (finput);
824 }
825
826
827 /*-------------------------------------------------------------------.
828 | Parse what comes after %thong. the full syntax is |
829 | |
830 | %thong <type> token number literal |
831 | |
832 | the <type> or number may be omitted. The number specifies the |
833 | user_token_number. |
834 | |
835 | Two symbols are entered in the table, one for the token symbol and |
836 | one for the literal. Both are given the <type>, if any, from the |
837 | declaration. The ->user_token_number of the first is SALIAS and |
838 | the ->user_token_number of the second is set to the number, if |
839 | any, from the declaration. The two symbols are linked via |
840 | pointers in their ->alias fields. |
841 | |
842 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter, |
843 | only the literal string is retained it is the literal string that |
844 | is output to yytname |
845 `-------------------------------------------------------------------*/
846
847 static void
848 parse_thong_decl (void)
849 {
850 token_t token;
851 struct bucket *symbol;
852 char *typename = 0;
853 int usrtoknum;
854
855 translations = 1;
856 token = lex (); /* fetch typename or first token */
857 if (token == tok_typename)
858 {
859 typename = xstrdup (token_buffer);
860 value_components_used = 1;
861 token = lex (); /* fetch first token */
862 }
863
864 /* process first token */
865
866 if (token != tok_identifier)
867 {
868 complain (_("unrecognized item %s, expected an identifier"),
869 token_buffer);
870 skip_to_char ('%');
871 return;
872 }
873 symval->class = token_sym;
874 symval->type_name = typename;
875 symval->user_token_number = SALIAS;
876 symbol = symval;
877
878 token = lex (); /* get number or literal string */
879
880 if (token == tok_number)
881 {
882 usrtoknum = numval;
883 token = lex (); /* okay, did number, now get literal */
884 }
885 else
886 usrtoknum = 0;
887
888 /* process literal string token */
889
890 if (token != tok_identifier || *symval->tag != '\"')
891 {
892 complain (_("expected string constant instead of %s"), token_buffer);
893 skip_to_char ('%');
894 return;
895 }
896 symval->class = token_sym;
897 symval->type_name = typename;
898 symval->user_token_number = usrtoknum;
899
900 symval->alias = symbol;
901 symbol->alias = symval;
902
903 /* symbol and symval combined are only one symbol. */
904 nsyms--;
905 }
906
907 /* FIXME. */
908
909 static void
910 parse_muscle_decl (void)
911 {
912 int ch = ungetc (skip_white_space (), finput);
913 char* muscle_key;
914 char* muscle_value;
915
916 /* Read key. */
917 if (!isalpha (ch) && ch != '_')
918 {
919 complain (_("invalid %s declaration"), "%define");
920 skip_to_char ('%');
921 return;
922 }
923 copy_identifier (finput, &muscle_obstack);
924 obstack_1grow (&muscle_obstack, 0);
925 muscle_key = obstack_finish (&muscle_obstack);
926
927 /* Read value. */
928 ch = skip_white_space ();
929 if (ch != '"')
930 {
931 ungetc (ch, finput);
932 if (ch != EOF)
933 {
934 complain (_("invalid %s declaration"), "%define");
935 skip_to_char ('%');
936 return;
937 }
938 else
939 fatal (_("Premature EOF after %s"), "\"");
940 }
941 copy_string2 (finput, &muscle_obstack, '"', 0);
942 obstack_1grow (&muscle_obstack, 0);
943 muscle_value = obstack_finish (&muscle_obstack);
944
945 /* Store the (key, value) pair in the environment. */
946 muscle_insert (muscle_key, muscle_value);
947 }
948
949
950 /*----------------------------------.
951 | Parse what comes after %skeleton. |
952 `----------------------------------*/
953
954 void
955 parse_skel_decl (void)
956 {
957 /* Complete with parse_dquoted_param () on the CVS branch 1.29. */
958 }
959
960 /*------------------------------------------.
961 | Parse what comes after %header_extension. |
962 `------------------------------------------*/
963
964 static void
965 parse_header_extension_decl (void)
966 {
967 char buff[32];
968
969 if (header_extension)
970 complain (_("multiple %%header_extension declarations"));
971 fscanf (finput, "%s", buff);
972 header_extension = xstrdup (buff);
973 }
974
975 /*------------------------------------------.
976 | Parse what comes after %source_extension. |
977 `------------------------------------------*/
978
979 static void
980 parse_source_extension_decl (void)
981 {
982 char buff[32];
983
984 if (src_extension)
985 complain (_("multiple %%source_extension declarations"));
986 fscanf (finput, "%s", buff);
987 src_extension = xstrdup (buff);
988 }
989
990 /*----------------------------------------------------------------.
991 | Read from finput until `%%' is seen. Discard the `%%'. Handle |
992 | any `%' declarations, and copy the contents of any `%{ ... %}' |
993 | groups to ATTRS_OBSTACK. |
994 `----------------------------------------------------------------*/
995
996 static void
997 read_declarations (void)
998 {
999 int c;
1000 int tok;
1001
1002 for (;;)
1003 {
1004 c = skip_white_space ();
1005
1006 if (c == '%')
1007 {
1008 tok = parse_percent_token ();
1009
1010 switch (tok)
1011 {
1012 case tok_two_percents:
1013 return;
1014
1015 case tok_percent_left_curly:
1016 copy_definition ();
1017 break;
1018
1019 case tok_token:
1020 parse_token_decl (token_sym, nterm_sym);
1021 break;
1022
1023 case tok_nterm:
1024 parse_token_decl (nterm_sym, token_sym);
1025 break;
1026
1027 case tok_type:
1028 parse_type_decl ();
1029 break;
1030
1031 case tok_start:
1032 parse_start_decl ();
1033 break;
1034
1035 case tok_union:
1036 parse_union_decl ();
1037 break;
1038
1039 case tok_expect:
1040 parse_expect_decl ();
1041 break;
1042
1043 case tok_thong:
1044 parse_thong_decl ();
1045 break;
1046
1047 case tok_left:
1048 parse_assoc_decl (left_assoc);
1049 break;
1050
1051 case tok_right:
1052 parse_assoc_decl (right_assoc);
1053 break;
1054
1055 case tok_nonassoc:
1056 parse_assoc_decl (non_assoc);
1057 break;
1058
1059 case tok_hdrext:
1060 parse_header_extension_decl ();
1061 break;
1062
1063 case tok_srcext:
1064 parse_source_extension_decl ();
1065 break;
1066
1067 case tok_define:
1068 parse_muscle_decl ();
1069 break;
1070
1071 case tok_skel:
1072 parse_skel_decl ();
1073 break;
1074
1075 case tok_noop:
1076 break;
1077
1078 default:
1079 complain (_("unrecognized: %s"), token_buffer);
1080 skip_to_char ('%');
1081 }
1082 }
1083 else if (c == EOF)
1084 fatal (_("no input grammar"));
1085 else
1086 {
1087 char buf[] = "c";
1088 buf[0] = c;
1089 complain (_("unknown character: %s"), quote (buf));
1090 skip_to_char ('%');
1091 }
1092 }
1093 }
1094 \f
1095 /*-------------------------------------------------------------------.
1096 | Assuming that a `{' has just been seen, copy everything up to the |
1097 | matching `}' into the actions file. STACK_OFFSET is the number of |
1098 | values in the current rule so far, which says where to find `$0' |
1099 | with respect to the top of the stack. |
1100 `-------------------------------------------------------------------*/
1101
1102 static void
1103 copy_action (symbol_list *rule, int stack_offset)
1104 {
1105 int c;
1106 int count;
1107 char buf[4096];
1108
1109 /* offset is always 0 if parser has already popped the stack pointer */
1110 if (semantic_parser)
1111 stack_offset = 0;
1112
1113 obstack_fgrow1 (&action_obstack, "\ncase %d:\n", nrules);
1114
1115 if (!no_lines_flag)
1116 {
1117 obstack_fgrow2 (&action_obstack, muscle_find ("linef"),
1118 lineno, quotearg_style (c_quoting_style,
1119 muscle_find ("filename")));
1120 }
1121 obstack_1grow (&action_obstack, '{');
1122
1123 count = 1;
1124 c = getc (finput);
1125
1126 while (count > 0)
1127 {
1128 while (c != '}')
1129 {
1130 switch (c)
1131 {
1132 case '\n':
1133 obstack_1grow (&action_obstack, c);
1134 lineno++;
1135 break;
1136
1137 case '{':
1138 obstack_1grow (&action_obstack, c);
1139 count++;
1140 break;
1141
1142 case '\'':
1143 case '"':
1144 copy_string (finput, &action_obstack, c);
1145 break;
1146
1147 case '/':
1148 copy_comment (finput, &action_obstack);
1149 break;
1150
1151 case '$':
1152 copy_dollar (finput, &action_obstack,
1153 rule, stack_offset);
1154 break;
1155
1156 case '@':
1157 copy_at (finput, &action_obstack,
1158 stack_offset);
1159 break;
1160
1161 case EOF:
1162 fatal (_("unmatched %s"), "`{'");
1163
1164 default:
1165 obstack_1grow (&action_obstack, c);
1166 }
1167
1168 c = getc (finput);
1169 }
1170
1171 /* above loop exits when c is '}' */
1172
1173 if (--count)
1174 {
1175 obstack_1grow (&action_obstack, c);
1176 c = getc (finput);
1177 }
1178 }
1179
1180 obstack_sgrow (&action_obstack, ";\n break;}");
1181 }
1182 \f
1183 /*-------------------------------------------------------------------.
1184 | After `%guard' is seen in the input file, copy the actual guard |
1185 | into the guards file. If the guard is followed by an action, copy |
1186 | that into the actions file. STACK_OFFSET is the number of values |
1187 | in the current rule so far, which says where to find `$0' with |
1188 | respect to the top of the stack, for the simple parser in which |
1189 | the stack is not popped until after the guard is run. |
1190 `-------------------------------------------------------------------*/
1191
1192 static void
1193 copy_guard (symbol_list *rule, int stack_offset)
1194 {
1195 int c;
1196 int count;
1197 int brace_flag = 0;
1198
1199 /* offset is always 0 if parser has already popped the stack pointer */
1200 if (semantic_parser)
1201 stack_offset = 0;
1202
1203 obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
1204 if (!no_lines_flag)
1205 obstack_fgrow2 (&guard_obstack, muscle_find ("linef"),
1206 lineno, quotearg_style (c_quoting_style,
1207 muscle_find ("filename")));
1208 obstack_1grow (&guard_obstack, '{');
1209
1210 count = 0;
1211 c = getc (finput);
1212
1213 while (brace_flag ? (count > 0) : (c != ';'))
1214 {
1215 switch (c)
1216 {
1217 case '\n':
1218 obstack_1grow (&guard_obstack, c);
1219 lineno++;
1220 break;
1221
1222 case '{':
1223 obstack_1grow (&guard_obstack, c);
1224 brace_flag = 1;
1225 count++;
1226 break;
1227
1228 case '}':
1229 obstack_1grow (&guard_obstack, c);
1230 if (count > 0)
1231 count--;
1232 else
1233 {
1234 complain (_("unmatched %s"), "`}'");
1235 c = getc (finput); /* skip it */
1236 }
1237 break;
1238
1239 case '\'':
1240 case '"':
1241 copy_string (finput, &guard_obstack, c);
1242 break;
1243
1244 case '/':
1245 copy_comment (finput, &guard_obstack);
1246 break;
1247
1248 case '$':
1249 copy_dollar (finput, &guard_obstack, rule, stack_offset);
1250 break;
1251
1252 case '@':
1253 copy_at (finput, &guard_obstack, stack_offset);
1254 break;
1255
1256 case EOF:
1257 fatal ("%s", _("unterminated %guard clause"));
1258
1259 default:
1260 obstack_1grow (&guard_obstack, c);
1261 }
1262
1263 if (c != '}' || count != 0)
1264 c = getc (finput);
1265 }
1266
1267 c = skip_white_space ();
1268
1269 obstack_sgrow (&guard_obstack, ";\n break;}");
1270 if (c == '{')
1271 copy_action (rule, stack_offset);
1272 else if (c == '=')
1273 {
1274 c = getc (finput); /* why not skip_white_space -wjh */
1275 if (c == '{')
1276 copy_action (rule, stack_offset);
1277 }
1278 else
1279 ungetc (c, finput);
1280 }
1281 \f
1282
1283 static void
1284 record_rule_line (void)
1285 {
1286 /* Record each rule's source line number in rline table. */
1287
1288 if (nrules >= rline_allocated)
1289 {
1290 rline_allocated = nrules * 2;
1291 rline = XREALLOC (rline, short, rline_allocated);
1292 }
1293 rline[nrules] = lineno;
1294 }
1295
1296
1297 /*-------------------------------------------------------------------.
1298 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1299 | with the user's names. |
1300 `-------------------------------------------------------------------*/
1301
1302 static bucket *
1303 gensym (void)
1304 {
1305 /* Incremented for each generated symbol */
1306 static int gensym_count = 0;
1307 static char buf[256];
1308
1309 bucket *sym;
1310
1311 sprintf (buf, "@%d", ++gensym_count);
1312 token_buffer = buf;
1313 sym = getsym (token_buffer);
1314 sym->class = nterm_sym;
1315 sym->value = nvars++;
1316 return sym;
1317 }
1318
1319 #if 0
1320 /*------------------------------------------------------------------.
1321 | read in a %type declaration and record its information for |
1322 | get_type_name to access. This is unused. It is only called from |
1323 | the #if 0 part of readgram |
1324 `------------------------------------------------------------------*/
1325
1326 static int
1327 get_type (void)
1328 {
1329 int k;
1330 token_t token;
1331 char *name;
1332
1333 token = lex ();
1334
1335 if (token != tok_typename)
1336 {
1337 complain (_("invalid %s declaration"), "%type");
1338 return t;
1339 }
1340
1341 name = xstrdup (token_buffer);
1342
1343 for (;;)
1344 {
1345 token = lex ();
1346
1347 switch (token)
1348 {
1349 case tok_semicolon:
1350 return lex ();
1351
1352 case tok_comma:
1353 break;
1354
1355 case tok_identifier:
1356 if (symval->type_name == NULL)
1357 symval->type_name = name;
1358 else if (strcmp (name, symval->type_name) != 0)
1359 complain (_("type redeclaration for %s"), symval->tag);
1360
1361 break;
1362
1363 default:
1364 return token;
1365 }
1366 }
1367 }
1368
1369 #endif
1370 \f
1371 /*------------------------------------------------------------------.
1372 | Parse the input grammar into a one symbol_list structure. Each |
1373 | rule is represented by a sequence of symbols: the left hand side |
1374 | followed by the contents of the right hand side, followed by a |
1375 | null pointer instead of a symbol to terminate the rule. The next |
1376 | symbol is the lhs of the following rule. |
1377 | |
1378 | All guards and actions are copied out to the appropriate files, |
1379 | labelled by the rule number they apply to. |
1380 `------------------------------------------------------------------*/
1381
1382 static void
1383 readgram (void)
1384 {
1385 token_t t;
1386 bucket *lhs = NULL;
1387 symbol_list *p;
1388 symbol_list *p1;
1389 bucket *bp;
1390
1391 /* Points to first symbol_list of current rule. its symbol is the
1392 lhs of the rule. */
1393 symbol_list *crule;
1394 /* Points to the symbol_list preceding crule. */
1395 symbol_list *crule1;
1396
1397 p1 = NULL;
1398
1399 t = lex ();
1400
1401 while (t != tok_two_percents && t != tok_eof)
1402 {
1403 if (t == tok_identifier || t == tok_bar)
1404 {
1405 int action_flag = 0;
1406 /* Number of symbols in rhs of this rule so far */
1407 int rulelength = 0;
1408 int xactions = 0; /* JF for error checking */
1409 bucket *first_rhs = 0;
1410
1411 if (t == tok_identifier)
1412 {
1413 lhs = symval;
1414
1415 if (!start_flag)
1416 {
1417 startval = lhs;
1418 start_flag = 1;
1419 }
1420
1421 t = lex ();
1422 if (t != tok_colon)
1423 {
1424 complain (_("ill-formed rule: initial symbol not followed by colon"));
1425 unlex (t);
1426 }
1427 }
1428
1429 if (nrules == 0 && t == tok_bar)
1430 {
1431 complain (_("grammar starts with vertical bar"));
1432 lhs = symval; /* BOGUS: use a random symval */
1433 }
1434 /* start a new rule and record its lhs. */
1435
1436 nrules++;
1437 nitems++;
1438
1439 record_rule_line ();
1440
1441 p = XCALLOC (symbol_list, 1);
1442 p->sym = lhs;
1443
1444 crule1 = p1;
1445 if (p1)
1446 p1->next = p;
1447 else
1448 grammar = p;
1449
1450 p1 = p;
1451 crule = p;
1452
1453 /* mark the rule's lhs as a nonterminal if not already so. */
1454
1455 if (lhs->class == unknown_sym)
1456 {
1457 lhs->class = nterm_sym;
1458 lhs->value = nvars;
1459 nvars++;
1460 }
1461 else if (lhs->class == token_sym)
1462 complain (_("rule given for %s, which is a token"), lhs->tag);
1463
1464 /* read the rhs of the rule. */
1465
1466 for (;;)
1467 {
1468 t = lex ();
1469 if (t == tok_prec)
1470 {
1471 t = lex ();
1472 crule->ruleprec = symval;
1473 t = lex ();
1474 }
1475
1476 if (!(t == tok_identifier || t == tok_left_curly))
1477 break;
1478
1479 /* If next token is an identifier, see if a colon follows it.
1480 If one does, exit this rule now. */
1481 if (t == tok_identifier)
1482 {
1483 bucket *ssave;
1484 token_t t1;
1485
1486 ssave = symval;
1487 t1 = lex ();
1488 unlex (t1);
1489 symval = ssave;
1490 if (t1 == tok_colon)
1491 break;
1492
1493 if (!first_rhs) /* JF */
1494 first_rhs = symval;
1495 /* Not followed by colon =>
1496 process as part of this rule's rhs. */
1497 }
1498
1499 /* If we just passed an action, that action was in the middle
1500 of a rule, so make a dummy rule to reduce it to a
1501 non-terminal. */
1502 if (action_flag)
1503 {
1504 bucket *sdummy;
1505
1506 /* Since the action was written out with this rule's
1507 number, we must give the new rule this number by
1508 inserting the new rule before it. */
1509
1510 /* Make a dummy nonterminal, a gensym. */
1511 sdummy = gensym ();
1512
1513 /* Make a new rule, whose body is empty,
1514 before the current one, so that the action
1515 just read can belong to it. */
1516 nrules++;
1517 nitems++;
1518 record_rule_line ();
1519 p = XCALLOC (symbol_list, 1);
1520 if (crule1)
1521 crule1->next = p;
1522 else
1523 grammar = p;
1524 p->sym = sdummy;
1525 crule1 = XCALLOC (symbol_list, 1);
1526 p->next = crule1;
1527 crule1->next = crule;
1528
1529 /* Insert the dummy generated by that rule into this
1530 rule. */
1531 nitems++;
1532 p = XCALLOC (symbol_list, 1);
1533 p->sym = sdummy;
1534 p1->next = p;
1535 p1 = p;
1536
1537 action_flag = 0;
1538 }
1539
1540 if (t == tok_identifier)
1541 {
1542 nitems++;
1543 p = XCALLOC (symbol_list, 1);
1544 p->sym = symval;
1545 p1->next = p;
1546 p1 = p;
1547 }
1548 else /* handle an action. */
1549 {
1550 copy_action (crule, rulelength);
1551 action_flag = 1;
1552 xactions++; /* JF */
1553 }
1554 rulelength++;
1555 } /* end of read rhs of rule */
1556
1557 /* Put an empty link in the list to mark the end of this rule */
1558 p = XCALLOC (symbol_list, 1);
1559 p1->next = p;
1560 p1 = p;
1561
1562 if (t == tok_prec)
1563 {
1564 complain (_("two @prec's in a row"));
1565 t = lex ();
1566 crule->ruleprec = symval;
1567 t = lex ();
1568 }
1569 if (t == tok_guard)
1570 {
1571 if (!semantic_parser)
1572 complain (_("%%guard present but %%semantic_parser not specified"));
1573
1574 copy_guard (crule, rulelength);
1575 t = lex ();
1576 }
1577 else if (t == tok_left_curly)
1578 {
1579 /* This case never occurs -wjh */
1580 if (action_flag)
1581 complain (_("two actions at end of one rule"));
1582 copy_action (crule, rulelength);
1583 action_flag = 1;
1584 xactions++; /* -wjh */
1585 t = lex ();
1586 }
1587 /* If $$ is being set in default way, report if any type
1588 mismatch. */
1589 else if (!xactions
1590 && first_rhs && lhs->type_name != first_rhs->type_name)
1591 {
1592 if (lhs->type_name == 0
1593 || first_rhs->type_name == 0
1594 || strcmp (lhs->type_name, first_rhs->type_name))
1595 complain (_("type clash (`%s' `%s') on default action"),
1596 lhs->type_name ? lhs->type_name : "",
1597 first_rhs->type_name ? first_rhs->type_name : "");
1598 }
1599 /* Warn if there is no default for $$ but we need one. */
1600 else if (!xactions && !first_rhs && lhs->type_name != 0)
1601 complain (_("empty rule for typed nonterminal, and no action"));
1602 if (t == tok_semicolon)
1603 t = lex ();
1604 }
1605 #if 0
1606 /* these things can appear as alternatives to rules. */
1607 /* NO, they cannot.
1608 a) none of the documentation allows them
1609 b) most of them scan forward until finding a next %
1610 thus they may swallow lots of intervening rules
1611 */
1612 else if (t == tok_token)
1613 {
1614 parse_token_decl (token_sym, nterm_sym);
1615 t = lex ();
1616 }
1617 else if (t == tok_nterm)
1618 {
1619 parse_token_decl (nterm_sym, token_sym);
1620 t = lex ();
1621 }
1622 else if (t == tok_type)
1623 {
1624 t = get_type ();
1625 }
1626 else if (t == tok_union)
1627 {
1628 parse_union_decl ();
1629 t = lex ();
1630 }
1631 else if (t == tok_expect)
1632 {
1633 parse_expect_decl ();
1634 t = lex ();
1635 }
1636 else if (t == tok_start)
1637 {
1638 parse_start_decl ();
1639 t = lex ();
1640 }
1641 #endif
1642
1643 else
1644 {
1645 complain (_("invalid input: %s"), quote (token_buffer));
1646 t = lex ();
1647 }
1648 }
1649
1650 /* grammar has been read. Do some checking */
1651
1652 if (nsyms > MAXSHORT)
1653 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1654 MAXSHORT);
1655 if (nrules == 0)
1656 fatal (_("no rules in the input grammar"));
1657
1658 /* Report any undefined symbols and consider them nonterminals. */
1659
1660 for (bp = firstsymbol; bp; bp = bp->next)
1661 if (bp->class == unknown_sym)
1662 {
1663 complain (_
1664 ("symbol %s is used, but is not defined as a token and has no rules"),
1665 bp->tag);
1666 bp->class = nterm_sym;
1667 bp->value = nvars++;
1668 }
1669
1670 ntokens = nsyms - nvars;
1671 }
1672
1673 /* At the end of the grammar file, some C source code must
1674 be stored. It is going to be associated to the epilogue
1675 directive. */
1676 static void
1677 read_additionnal_code (void)
1678 {
1679 char c;
1680 struct obstack el_obstack;
1681
1682 obstack_init (&el_obstack);
1683
1684 while ((c = getc (finput)) != EOF)
1685 obstack_1grow (&el_obstack, c);
1686
1687 obstack_1grow (&el_obstack, 0);
1688 muscle_insert ("epilogue", obstack_finish (&el_obstack));
1689 }
1690
1691 \f
1692 /*--------------------------------------------------------------.
1693 | For named tokens, but not literal ones, define the name. The |
1694 | value is the user token number. |
1695 `--------------------------------------------------------------*/
1696
1697 static void
1698 output_token_defines (struct obstack *oout)
1699 {
1700 bucket *bp;
1701 char *cp, *symbol;
1702 char c;
1703
1704 for (bp = firstsymbol; bp; bp = bp->next)
1705 {
1706 symbol = bp->tag; /* get symbol */
1707
1708 if (bp->value >= ntokens)
1709 continue;
1710 if (bp->user_token_number == SALIAS)
1711 continue;
1712 if ('\'' == *symbol)
1713 continue; /* skip literal character */
1714 if (bp == errtoken)
1715 continue; /* skip error token */
1716 if ('\"' == *symbol)
1717 {
1718 /* use literal string only if given a symbol with an alias */
1719 if (bp->alias)
1720 symbol = bp->alias->tag;
1721 else
1722 continue;
1723 }
1724
1725 /* Don't #define nonliteral tokens whose names contain periods. */
1726 cp = symbol;
1727 while ((c = *cp++) && c != '.');
1728 if (c != '\0')
1729 continue;
1730
1731 obstack_fgrow2 (oout, "# define\t%s\t%d\n",
1732 symbol,
1733 (translations ? bp->user_token_number : bp->value));
1734 if (semantic_parser)
1735 obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1736 }
1737 }
1738
1739
1740 /*------------------------------------------------------------------.
1741 | Assign symbol numbers, and write definition of token names into |
1742 | FDEFINES. Set up vectors TAGS and SPREC of names and precedences |
1743 | of symbols. |
1744 `------------------------------------------------------------------*/
1745
1746 static void
1747 packsymbols (void)
1748 {
1749 bucket *bp;
1750 int tokno = 1;
1751 int i;
1752 int last_user_token_number;
1753 static char DOLLAR[] = "$";
1754
1755 tags = XCALLOC (char *, nsyms + 1);
1756 tags[0] = DOLLAR;
1757 user_toknums = XCALLOC (short, nsyms + 1);
1758 user_toknums[0] = 0;
1759
1760 sprec = XCALLOC (short, nsyms);
1761 sassoc = XCALLOC (short, nsyms);
1762
1763 max_user_token_number = 256;
1764 last_user_token_number = 256;
1765
1766 for (bp = firstsymbol; bp; bp = bp->next)
1767 {
1768 if (bp->class == nterm_sym)
1769 {
1770 bp->value += ntokens;
1771 }
1772 else if (bp->alias)
1773 {
1774 /* this symbol and its alias are a single token defn.
1775 allocate a tokno, and assign to both check agreement of
1776 ->prec and ->assoc fields and make both the same */
1777 if (bp->value == 0)
1778 bp->value = bp->alias->value = tokno++;
1779
1780 if (bp->prec != bp->alias->prec)
1781 {
1782 if (bp->prec != 0 && bp->alias->prec != 0
1783 && bp->user_token_number == SALIAS)
1784 complain (_("conflicting precedences for %s and %s"),
1785 bp->tag, bp->alias->tag);
1786 if (bp->prec != 0)
1787 bp->alias->prec = bp->prec;
1788 else
1789 bp->prec = bp->alias->prec;
1790 }
1791
1792 if (bp->assoc != bp->alias->assoc)
1793 {
1794 if (bp->assoc != 0 && bp->alias->assoc != 0
1795 && bp->user_token_number == SALIAS)
1796 complain (_("conflicting assoc values for %s and %s"),
1797 bp->tag, bp->alias->tag);
1798 if (bp->assoc != 0)
1799 bp->alias->assoc = bp->assoc;
1800 else
1801 bp->assoc = bp->alias->assoc;
1802 }
1803
1804 if (bp->user_token_number == SALIAS)
1805 continue; /* do not do processing below for SALIASs */
1806
1807 }
1808 else /* bp->class == token_sym */
1809 {
1810 bp->value = tokno++;
1811 }
1812
1813 if (bp->class == token_sym)
1814 {
1815 if (translations && !(bp->user_token_number))
1816 bp->user_token_number = ++last_user_token_number;
1817 if (bp->user_token_number > max_user_token_number)
1818 max_user_token_number = bp->user_token_number;
1819 }
1820
1821 tags[bp->value] = bp->tag;
1822 user_toknums[bp->value] = bp->user_token_number;
1823 sprec[bp->value] = bp->prec;
1824 sassoc[bp->value] = bp->assoc;
1825
1826 }
1827
1828 if (translations)
1829 {
1830 int j;
1831
1832 token_translations = XCALLOC (short, max_user_token_number + 1);
1833
1834 /* initialize all entries for literal tokens to 2, the internal
1835 token number for $undefined., which represents all invalid
1836 inputs. */
1837 for (j = 0; j <= max_user_token_number; j++)
1838 token_translations[j] = 2;
1839
1840 for (bp = firstsymbol; bp; bp = bp->next)
1841 {
1842 if (bp->value >= ntokens)
1843 continue; /* non-terminal */
1844 if (bp->user_token_number == SALIAS)
1845 continue;
1846 if (token_translations[bp->user_token_number] != 2)
1847 complain (_("tokens %s and %s both assigned number %d"),
1848 tags[token_translations[bp->user_token_number]],
1849 bp->tag, bp->user_token_number);
1850 token_translations[bp->user_token_number] = bp->value;
1851 }
1852 }
1853
1854 error_token_number = errtoken->value;
1855
1856 output_token_defines (&output_obstack);
1857 obstack_1grow (&output_obstack, 0);
1858 muscle_insert ("tokendef", obstack_finish (&output_obstack));
1859
1860 #if 0
1861 if (!no_parser_flag)
1862 output_token_defines (&table_obstack);
1863 #endif
1864
1865 if (startval->class == unknown_sym)
1866 fatal (_("the start symbol %s is undefined"), startval->tag);
1867 else if (startval->class == token_sym)
1868 fatal (_("the start symbol %s is a token"), startval->tag);
1869
1870 start_symbol = startval->value;
1871
1872 if (defines_flag)
1873 {
1874 output_token_defines (&defines_obstack);
1875
1876 if (!pure_parser)
1877 {
1878 if (spec_name_prefix)
1879 obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1880 spec_name_prefix);
1881 else
1882 obstack_sgrow (&defines_obstack,
1883 "\nextern YYSTYPE yylval;\n");
1884 }
1885
1886 if (semantic_parser)
1887 for (i = ntokens; i < nsyms; i++)
1888 {
1889 /* don't make these for dummy nonterminals made by gensym. */
1890 if (*tags[i] != '@')
1891 obstack_fgrow2 (&defines_obstack,
1892 "# define\tNT%s\t%d\n", tags[i], i);
1893 }
1894 #if 0
1895 /* `fdefines' is now a temporary file, so we need to copy its
1896 contents in `done', so we can't close it here. */
1897 fclose (fdefines);
1898 fdefines = NULL;
1899 #endif
1900 }
1901 }
1902
1903
1904 /*---------------------------------------------------------------.
1905 | Convert the rules into the representation using RRHS, RLHS and |
1906 | RITEMS. |
1907 `---------------------------------------------------------------*/
1908
1909 static void
1910 packgram (void)
1911 {
1912 int itemno;
1913 int ruleno;
1914 symbol_list *p;
1915
1916 bucket *ruleprec;
1917
1918 ritem = XCALLOC (short, nitems + 1);
1919 rlhs = XCALLOC (short, nrules) - 1;
1920 rrhs = XCALLOC (short, nrules) - 1;
1921 rprec = XCALLOC (short, nrules) - 1;
1922 rprecsym = XCALLOC (short, nrules) - 1;
1923 rassoc = XCALLOC (short, nrules) - 1;
1924
1925 itemno = 0;
1926 ruleno = 1;
1927
1928 p = grammar;
1929 while (p)
1930 {
1931 rlhs[ruleno] = p->sym->value;
1932 rrhs[ruleno] = itemno;
1933 ruleprec = p->ruleprec;
1934
1935 p = p->next;
1936 while (p && p->sym)
1937 {
1938 ritem[itemno++] = p->sym->value;
1939 /* A rule gets by default the precedence and associativity
1940 of the last token in it. */
1941 if (p->sym->class == token_sym)
1942 {
1943 rprec[ruleno] = p->sym->prec;
1944 rassoc[ruleno] = p->sym->assoc;
1945 }
1946 if (p)
1947 p = p->next;
1948 }
1949
1950 /* If this rule has a %prec,
1951 the specified symbol's precedence replaces the default. */
1952 if (ruleprec)
1953 {
1954 rprec[ruleno] = ruleprec->prec;
1955 rassoc[ruleno] = ruleprec->assoc;
1956 rprecsym[ruleno] = ruleprec->value;
1957 }
1958
1959 ritem[itemno++] = -ruleno;
1960 ruleno++;
1961
1962 if (p)
1963 p = p->next;
1964 }
1965
1966 ritem[itemno] = 0;
1967 }
1968 \f
1969 /*-------------------------------------------------------------------.
1970 | Read in the grammar specification and record it in the format |
1971 | described in gram.h. All guards are copied into the GUARD_OBSTACK |
1972 | and all actions into ACTION_OBSTACK, in each case forming the body |
1973 | of a C function (YYGUARD or YYACTION) which contains a switch |
1974 | statement to decide which guard or action to execute. |
1975 `-------------------------------------------------------------------*/
1976
1977 void
1978 reader (void)
1979 {
1980 start_flag = 0;
1981 startval = NULL; /* start symbol not specified yet. */
1982
1983 #if 0
1984 /* initially assume token number translation not needed. */
1985 translations = 0;
1986 #endif
1987 /* Nowadays translations is always set to 1, since we give `error' a
1988 user-token-number to satisfy the Posix demand for YYERRCODE==256.
1989 */
1990 translations = 1;
1991
1992 nsyms = 1;
1993 nvars = 0;
1994 nrules = 0;
1995 nitems = 0;
1996 rline_allocated = 10;
1997 rline = XCALLOC (short, rline_allocated);
1998
1999 typed = 0;
2000 lastprec = 0;
2001
2002 semantic_parser = 0;
2003 pure_parser = 0;
2004
2005 grammar = NULL;
2006
2007 init_lex ();
2008 lineno = 1;
2009
2010 /* Initialize the muscle obstack. */
2011 obstack_init (&muscle_obstack);
2012
2013 /* Initialize the symbol table. */
2014 tabinit ();
2015
2016 /* Construct the error token */
2017 errtoken = getsym ("error");
2018 errtoken->class = token_sym;
2019 errtoken->user_token_number = 256; /* Value specified by POSIX. */
2020
2021 /* Construct a token that represents all undefined literal tokens.
2022 It is always token number 2. */
2023 undeftoken = getsym ("$undefined.");
2024 undeftoken->class = token_sym;
2025 undeftoken->user_token_number = 2;
2026
2027 /* Read the declaration section. Copy %{ ... %} groups to
2028 TABLE_OBSTACK and FDEFINES file. Also notice any %token, %left,
2029 etc. found there. */
2030 read_declarations ();
2031 /* Read in the grammar, build grammar in list form. Write out
2032 guards and actions. */
2033 readgram ();
2034 /* Some C code is given at the end of the grammar file. */
2035 read_additionnal_code ();
2036
2037 /* Now we know whether we need the line-number stack. If we do,
2038 write its type into the .tab.h file.
2039 This is no longer need with header skeleton. */
2040
2041 /* Assign the symbols their symbol numbers. Write #defines for the
2042 token symbols into FDEFINES if requested. */
2043 packsymbols ();
2044 /* Convert the grammar into the format described in gram.h. */
2045 packgram ();
2046 /* Free the symbol table data structure since symbols are now all
2047 referred to by symbol number. */
2048 free_symtab ();
2049 }