]> git.saurik.com Git - bison.git/blob - src/reader.c
Document help-bison list.
[bison.git] / src / reader.c
1 /* Input parser for bison
2 Copyright (C) 1984, 1986, 1989, 1992, 1998 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 /* read in the grammar specification and record it in the format described in gram.h.
23 All guards are copied into the fguard file and all actions into faction,
24 in each case forming the body of a C function (yyguard or yyaction)
25 which contains a switch statement to decide which guard or action to execute.
26
27 The entry point is reader(). */
28
29 #include <stdio.h>
30 #include "system.h"
31 #include "files.h"
32 #include "alloc.h"
33 #include "symtab.h"
34 #include "lex.h"
35 #include "gram.h"
36 #include "machine.h"
37
38 #define LTYPESTR "\n#ifndef YYLTYPE\ntypedef\n struct yyltype\n\
39 {\n int timestamp;\n int first_line;\n int first_column;\
40 \n int last_line;\n int last_column;\n char *text;\n }\n\
41 yyltype;\n\n#define YYLTYPE yyltype\n#endif\n\n"
42
43 /* Number of slots allocated (but not necessarily used yet) in `rline' */
44 int rline_allocated;
45
46 extern char *program_name;
47 extern int definesflag;
48 extern int nolinesflag;
49 extern int noparserflag;
50 extern int rawtoknumflag;
51 extern bucket *symval;
52 extern int numval;
53 extern int expected_conflicts;
54 extern char *token_buffer;
55 extern int maxtoken;
56
57 extern void init_lex PARAMS((void));
58 extern char *grow_token_buffer PARAMS((char *));
59 extern void tabinit PARAMS((void));
60 extern void output_headers PARAMS((void));
61 extern void output_trailers PARAMS((void));
62 extern void free_symtab PARAMS((void));
63 extern void open_extra_files PARAMS((void));
64 extern char *int_to_string PARAMS((int));
65 extern char *printable_version PARAMS((int));
66 extern void fatal PARAMS((char *));
67 extern void fatals PARAMS((char *, char *));
68 extern void warn PARAMS((char *));
69 extern void warni PARAMS((char *, int));
70 extern void warns PARAMS((char *, char *));
71 extern void warnss PARAMS((char *, char *, char *));
72 extern void warnsss PARAMS((char *, char *, char *, char *));
73 extern void unlex PARAMS((int));
74 extern void done PARAMS((int));
75
76 extern int skip_white_space PARAMS((void));
77 extern int parse_percent_token PARAMS((void));
78 extern int lex PARAMS((void));
79
80 typedef
81 struct symbol_list
82 {
83 struct symbol_list *next;
84 bucket *sym;
85 bucket *ruleprec;
86 }
87 symbol_list;
88
89
90 void reader PARAMS((void));
91 void reader_output_yylsp PARAMS((FILE *));
92 void read_declarations PARAMS((void));
93 void copy_definition PARAMS((void));
94 void parse_token_decl PARAMS((int, int));
95 void parse_start_decl PARAMS((void));
96 void parse_type_decl PARAMS((void));
97 void parse_assoc_decl PARAMS((int));
98 void parse_union_decl PARAMS((void));
99 void parse_expect_decl PARAMS((void));
100 char *get_type_name PARAMS((int, symbol_list *));
101 void copy_guard PARAMS((symbol_list *, int));
102 void parse_thong_decl PARAMS((void));
103 void copy_action PARAMS((symbol_list *, int));
104 bucket *gensym PARAMS((void));
105 void readgram PARAMS((void));
106 void record_rule_line PARAMS((void));
107 void packsymbols PARAMS((void));
108 void output_token_defines PARAMS((FILE *));
109 void packgram PARAMS((void));
110 int read_signed_integer PARAMS((FILE *));
111
112 #if 0
113 static int get_type PARAMS((void));
114 #endif
115
116 int lineno;
117 symbol_list *grammar;
118 int start_flag;
119 bucket *startval;
120 char **tags;
121 int *user_toknums;
122
123 /* Nonzero if components of semantic values are used, implying
124 they must be unions. */
125 static int value_components_used;
126
127 static int typed; /* nonzero if %union has been seen. */
128
129 static int lastprec; /* incremented for each %left, %right or %nonassoc seen */
130
131 static int gensym_count; /* incremented for each generated symbol */
132
133 static bucket *errtoken;
134 static bucket *undeftoken;
135
136 /* Nonzero if any action or guard uses the @n construct. */
137 static int yylsp_needed;
138
139 extern char *version_string;
140
141
142 static void
143 skip_to_char (int target)
144 {
145 int c;
146 if (target == '\n')
147 warn(_(" Skipping to next \\n"));
148 else
149 warni(_(" Skipping to next %c"), target);
150
151 do
152 c = skip_white_space();
153 while (c != target && c != EOF);
154 if (c != EOF)
155 ungetc(c, finput);
156 }
157
158
159 void
160 reader (void)
161 {
162 start_flag = 0;
163 startval = NULL; /* start symbol not specified yet. */
164
165 #if 0
166 translations = 0; /* initially assume token number translation not needed. */
167 #endif
168 /* Nowadays translations is always set to 1,
169 since we give `error' a user-token-number
170 to satisfy the Posix demand for YYERRCODE==256. */
171 translations = 1;
172
173 nsyms = 1;
174 nvars = 0;
175 nrules = 0;
176 nitems = 0;
177 rline_allocated = 10;
178 rline = NEW2(rline_allocated, short);
179
180 typed = 0;
181 lastprec = 0;
182
183 gensym_count = 0;
184
185 semantic_parser = 0;
186 pure_parser = 0;
187 yylsp_needed = 0;
188
189 grammar = NULL;
190
191 init_lex();
192 lineno = 1;
193
194 /* initialize the symbol table. */
195 tabinit();
196 /* construct the error token */
197 errtoken = getsym("error");
198 errtoken->class = STOKEN;
199 errtoken->user_token_number = 256; /* Value specified by posix. */
200 /* construct a token that represents all undefined literal tokens. */
201 /* it is always token number 2. */
202 undeftoken = getsym("$undefined.");
203 undeftoken->class = STOKEN;
204 undeftoken->user_token_number = 2;
205 /* Read the declaration section. Copy %{ ... %} groups to ftable and fdefines file.
206 Also notice any %token, %left, etc. found there. */
207 if (noparserflag)
208 fprintf(ftable, "\n/* Bison-generated parse tables, made from %s\n",
209 infile);
210 else
211 fprintf(ftable, "\n/* A Bison parser, made from %s\n", infile);
212 fprintf(ftable, " by %s */\n\n", version_string);
213 fprintf(ftable, "#define YYBISON 1 /* Identify Bison output. */\n\n");
214 read_declarations();
215 /* start writing the guard and action files, if they are needed. */
216 output_headers();
217 /* read in the grammar, build grammar in list form. write out guards and actions. */
218 readgram();
219 /* Now we know whether we need the line-number stack.
220 If we do, write its type into the .tab.h file. */
221 if (fdefines)
222 reader_output_yylsp(fdefines);
223 /* write closing delimiters for actions and guards. */
224 output_trailers();
225 if (yylsp_needed)
226 fprintf(ftable, "#define YYLSP_NEEDED\n\n");
227 /* assign the symbols their symbol numbers.
228 Write #defines for the token symbols into fdefines if requested. */
229 packsymbols();
230 /* convert the grammar into the format described in gram.h. */
231 packgram();
232 /* free the symbol table data structure
233 since symbols are now all referred to by symbol number. */
234 free_symtab();
235 }
236
237 void
238 reader_output_yylsp (FILE *f)
239 {
240 if (yylsp_needed)
241 fprintf(f, LTYPESTR);
242 }
243
244 /* read from finput until %% is seen. Discard the %%.
245 Handle any % declarations,
246 and copy the contents of any %{ ... %} groups to fattrs. */
247
248 void
249 read_declarations (void)
250 {
251 register int c;
252 register int tok;
253
254 for (;;)
255 {
256 c = skip_white_space();
257
258 if (c == '%')
259 {
260 tok = parse_percent_token();
261
262 switch (tok)
263 {
264 case TWO_PERCENTS:
265 return;
266
267 case PERCENT_LEFT_CURLY:
268 copy_definition();
269 break;
270
271 case TOKEN:
272 parse_token_decl (STOKEN, SNTERM);
273 break;
274
275 case NTERM:
276 parse_token_decl (SNTERM, STOKEN);
277 break;
278
279 case TYPE:
280 parse_type_decl();
281 break;
282
283 case START:
284 parse_start_decl();
285 break;
286
287 case UNION:
288 parse_union_decl();
289 break;
290
291 case EXPECT:
292 parse_expect_decl();
293 break;
294 case THONG:
295 parse_thong_decl();
296 break;
297 case LEFT:
298 parse_assoc_decl(LEFT_ASSOC);
299 break;
300
301 case RIGHT:
302 parse_assoc_decl(RIGHT_ASSOC);
303 break;
304
305 case NONASSOC:
306 parse_assoc_decl(NON_ASSOC);
307 break;
308
309 case SEMANTIC_PARSER:
310 if (semantic_parser == 0)
311 {
312 semantic_parser = 1;
313 open_extra_files();
314 }
315 break;
316
317 case PURE_PARSER:
318 pure_parser = 1;
319 break;
320
321 case NOOP:
322 break;
323
324 default:
325 warns(_("unrecognized: %s"), token_buffer);
326 skip_to_char('%');
327 }
328 }
329 else if (c == EOF)
330 fatal(_("no input grammar"));
331 else
332 {
333 char buff[100];
334 sprintf(buff, _("unknown character: %s"), printable_version(c));
335 warn(buff);
336 skip_to_char('%');
337 }
338 }
339 }
340
341
342 /* copy the contents of a %{ ... %} into the definitions file.
343 The %{ has already been read. Return after reading the %}. */
344
345 void
346 copy_definition (void)
347 {
348 register int c;
349 register int match;
350 register int ended;
351 register int after_percent; /* -1 while reading a character if prev char was % */
352 int cplus_comment;
353
354 if (!nolinesflag)
355 fprintf(fattrs, "#line %d \"%s\"\n", lineno, infile);
356
357 after_percent = 0;
358
359 c = getc(finput);
360
361 for (;;)
362 {
363 switch (c)
364 {
365 case '\n':
366 putc(c, fattrs);
367 lineno++;
368 break;
369
370 case '%':
371 after_percent = -1;
372 break;
373
374 case '\'':
375 case '"':
376 match = c;
377 putc(c, fattrs);
378 c = getc(finput);
379
380 while (c != match)
381 {
382 if (c == EOF)
383 fatal(_("unterminated string at end of file"));
384 if (c == '\n')
385 {
386 warn(_("unterminated string"));
387 ungetc(c, finput);
388 c = match;
389 continue;
390 }
391
392 putc(c, fattrs);
393
394 if (c == '\\')
395 {
396 c = getc(finput);
397 if (c == EOF)
398 fatal(_("unterminated string at end of file"));
399 putc(c, fattrs);
400 if (c == '\n')
401 lineno++;
402 }
403
404 c = getc(finput);
405 }
406
407 putc(c, fattrs);
408 break;
409
410 case '/':
411 putc(c, fattrs);
412 c = getc(finput);
413 if (c != '*' && c != '/')
414 continue;
415
416 cplus_comment = (c == '/');
417 putc(c, fattrs);
418 c = getc(finput);
419
420 ended = 0;
421 while (!ended)
422 {
423 if (!cplus_comment && c == '*')
424 {
425 while (c == '*')
426 {
427 putc(c, fattrs);
428 c = getc(finput);
429 }
430
431 if (c == '/')
432 {
433 putc(c, fattrs);
434 ended = 1;
435 }
436 }
437 else if (c == '\n')
438 {
439 lineno++;
440 putc(c, fattrs);
441 if (cplus_comment)
442 ended = 1;
443 else
444 c = getc(finput);
445 }
446 else if (c == EOF)
447 fatal(_("unterminated comment in `%{' definition"));
448 else
449 {
450 putc(c, fattrs);
451 c = getc(finput);
452 }
453 }
454
455 break;
456
457 case EOF:
458 fatal(_("unterminated `%{' definition"));
459
460 default:
461 putc(c, fattrs);
462 }
463
464 c = getc(finput);
465
466 if (after_percent)
467 {
468 if (c == '}')
469 return;
470 putc('%', fattrs);
471 }
472 after_percent = 0;
473
474 }
475
476 }
477
478
479
480 /* parse what comes after %token or %nterm.
481 For %token, what_is is STOKEN and what_is_not is SNTERM.
482 For %nterm, the arguments are reversed. */
483
484 void
485 parse_token_decl (int what_is, int what_is_not)
486 {
487 register int token = 0;
488 register char *typename = 0;
489 register struct bucket *symbol = NULL; /* pts to symbol being defined */
490 int k;
491
492 for (;;)
493 {
494 int tmp_char = ungetc (skip_white_space (), finput);
495
496 if (tmp_char == '%')
497 return;
498 if (tmp_char == EOF)
499 fatals ("Premature EOF after %s", token_buffer);
500
501 token = lex();
502 if (token == COMMA)
503 {
504 symbol = NULL;
505 continue;
506 }
507 if (token == TYPENAME)
508 {
509 k = strlen(token_buffer);
510 typename = NEW2(k + 1, char);
511 strcpy(typename, token_buffer);
512 value_components_used = 1;
513 symbol = NULL;
514 }
515 else if (token == IDENTIFIER && *symval->tag == '\"'
516 && symbol)
517 {
518 translations = 1;
519 symval->class = STOKEN;
520 symval->type_name = typename;
521 symval->user_token_number = symbol->user_token_number;
522 symbol->user_token_number = SALIAS;
523
524 symval->alias = symbol;
525 symbol->alias = symval;
526 symbol = NULL;
527
528 nsyms--; /* symbol and symval combined are only one symbol */
529 }
530 else if (token == IDENTIFIER)
531 {
532 int oldclass = symval->class;
533 symbol = symval;
534
535 if (symbol->class == what_is_not)
536 warns(_("symbol %s redefined"), symbol->tag);
537 symbol->class = what_is;
538 if (what_is == SNTERM && oldclass != SNTERM)
539 symbol->value = nvars++;
540
541 if (typename)
542 {
543 if (symbol->type_name == NULL)
544 symbol->type_name = typename;
545 else if (strcmp(typename, symbol->type_name) != 0)
546 warns(_("type redeclaration for %s"), symbol->tag);
547 }
548 }
549 else if (symbol && token == NUMBER)
550 {
551 symbol->user_token_number = numval;
552 translations = 1;
553 }
554 else
555 {
556 warnss(_("`%s' is invalid in %s"),
557 token_buffer,
558 (what_is == STOKEN) ? "%token" : "%nterm");
559 skip_to_char('%');
560 }
561 }
562
563 }
564
565 /* parse what comes after %thong
566 the full syntax is
567 %thong <type> token number literal
568 the <type> or number may be omitted. The number specifies the
569 user_token_number.
570
571 Two symbols are entered in the table, one for the token symbol and
572 one for the literal. Both are given the <type>, if any, from the declaration.
573 The ->user_token_number of the first is SALIAS and the ->user_token_number
574 of the second is set to the number, if any, from the declaration.
575 The two symbols are linked via pointers in their ->alias fields.
576
577 during output_defines_table, the symbol is reported
578 thereafter, only the literal string is retained
579 it is the literal string that is output to yytname
580 */
581
582 void
583 parse_thong_decl (void)
584 {
585 register int token;
586 register struct bucket *symbol;
587 register char *typename = 0;
588 int k, usrtoknum;
589
590 translations = 1;
591 token = lex(); /* fetch typename or first token */
592 if (token == TYPENAME) {
593 k = strlen(token_buffer);
594 typename = NEW2(k + 1, char);
595 strcpy(typename, token_buffer);
596 value_components_used = 1;
597 token = lex(); /* fetch first token */
598 }
599
600 /* process first token */
601
602 if (token != IDENTIFIER)
603 {
604 warns(_("unrecognized item %s, expected an identifier"),
605 token_buffer);
606 skip_to_char('%');
607 return;
608 }
609 symval->class = STOKEN;
610 symval->type_name = typename;
611 symval->user_token_number = SALIAS;
612 symbol = symval;
613
614 token = lex(); /* get number or literal string */
615
616 if (token == NUMBER) {
617 usrtoknum = numval;
618 token = lex(); /* okay, did number, now get literal */
619 }
620 else usrtoknum = 0;
621
622 /* process literal string token */
623
624 if (token != IDENTIFIER || *symval->tag != '\"')
625 {
626 warns(_("expected string constant instead of %s"),
627 token_buffer);
628 skip_to_char('%');
629 return;
630 }
631 symval->class = STOKEN;
632 symval->type_name = typename;
633 symval->user_token_number = usrtoknum;
634
635 symval->alias = symbol;
636 symbol->alias = symval;
637
638 nsyms--; /* symbol and symval combined are only one symbol */
639 }
640
641
642 /* parse what comes after %start */
643
644 void
645 parse_start_decl (void)
646 {
647 if (start_flag)
648 warn(_("multiple %start declarations"));
649 if (lex() != IDENTIFIER)
650 warn(_("invalid %start declaration"));
651 else
652 {
653 start_flag = 1;
654 startval = symval;
655 }
656 }
657
658
659
660 /* read in a %type declaration and record its information for get_type_name to access */
661
662 void
663 parse_type_decl (void)
664 {
665 register int k;
666 register char *name;
667
668 if (lex() != TYPENAME)
669 {
670 warn(_("%type declaration has no <typename>"));
671 skip_to_char('%');
672 return;
673 }
674
675 k = strlen(token_buffer);
676 name = NEW2(k + 1, char);
677 strcpy(name, token_buffer);
678
679 for (;;)
680 {
681 register int t;
682 int tmp_char = ungetc (skip_white_space (), finput);
683
684 if (tmp_char == '%')
685 return;
686 if (tmp_char == EOF)
687 fatals ("Premature EOF after %s", token_buffer);
688
689 t = lex();
690
691 switch (t)
692 {
693
694 case COMMA:
695 case SEMICOLON:
696 break;
697
698 case IDENTIFIER:
699 if (symval->type_name == NULL)
700 symval->type_name = name;
701 else if (strcmp(name, symval->type_name) != 0)
702 warns(_("type redeclaration for %s"), symval->tag);
703
704 break;
705
706 default:
707 warns(_("invalid %%type declaration due to item: `%s'"), token_buffer);
708 skip_to_char('%');
709 }
710 }
711 }
712
713
714
715 /* read in a %left, %right or %nonassoc declaration and record its information. */
716 /* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC. */
717
718 void
719 parse_assoc_decl (int assoc)
720 {
721 register int k;
722 register char *name = NULL;
723 register int prev = 0;
724
725 lastprec++; /* Assign a new precedence level, never 0. */
726
727 for (;;)
728 {
729 register int t;
730 int tmp_char = ungetc (skip_white_space (), finput);
731
732 if (tmp_char == '%')
733 return;
734 if (tmp_char == EOF)
735 fatals ("Premature EOF after %s", token_buffer);
736
737 t = lex();
738
739 switch (t)
740 {
741
742 case TYPENAME:
743 k = strlen(token_buffer);
744 name = NEW2(k + 1, char);
745 strcpy(name, token_buffer);
746 break;
747
748 case COMMA:
749 break;
750
751 case IDENTIFIER:
752 if (symval->prec != 0)
753 warns(_("redefining precedence of %s"), symval->tag);
754 symval->prec = lastprec;
755 symval->assoc = assoc;
756 if (symval->class == SNTERM)
757 warns(_("symbol %s redefined"), symval->tag);
758 symval->class = STOKEN;
759 if (name)
760 { /* record the type, if one is specified */
761 if (symval->type_name == NULL)
762 symval->type_name = name;
763 else if (strcmp(name, symval->type_name) != 0)
764 warns(_("type redeclaration for %s"), symval->tag);
765 }
766 break;
767
768 case NUMBER:
769 if (prev == IDENTIFIER)
770 {
771 symval->user_token_number = numval;
772 translations = 1;
773 }
774 else
775 {
776 warns(_("invalid text (%s) - number should be after identifier"),
777 token_buffer);
778 skip_to_char('%');
779 }
780 break;
781
782 case SEMICOLON:
783 return;
784
785 default:
786 warns(_("unexpected item: %s"), token_buffer);
787 skip_to_char('%');
788 }
789
790 prev = t;
791
792 }
793 }
794
795
796
797 /* copy the union declaration into fattrs (and fdefines),
798 where it is made into the
799 definition of YYSTYPE, the type of elements of the parser value stack. */
800
801 void
802 parse_union_decl (void)
803 {
804 register int c;
805 register int count;
806 register int in_comment;
807 int cplus_comment;
808
809 if (typed)
810 warn(_("multiple %union declarations"));
811
812 typed = 1;
813
814 if (!nolinesflag)
815 fprintf(fattrs, "\n#line %d \"%s\"\n", lineno, infile);
816 else
817 fprintf(fattrs, "\n");
818
819 fprintf(fattrs, "typedef union");
820 if (fdefines)
821 fprintf(fdefines, "typedef union");
822
823 count = 0;
824 in_comment = 0;
825
826 c = getc(finput);
827
828 while (c != EOF)
829 {
830 putc(c, fattrs);
831 if (fdefines)
832 putc(c, fdefines);
833
834 switch (c)
835 {
836 case '\n':
837 lineno++;
838 break;
839
840 case '/':
841 c = getc(finput);
842 if (c != '*' && c != '/')
843 ungetc(c, finput);
844 else
845 {
846 putc(c, fattrs);
847 if (fdefines)
848 putc(c, fdefines);
849 cplus_comment = (c == '/');
850 in_comment = 1;
851 c = getc(finput);
852 while (in_comment)
853 {
854 putc(c, fattrs);
855 if (fdefines)
856 putc(c, fdefines);
857
858 if (c == '\n')
859 {
860 lineno++;
861 if (cplus_comment)
862 {
863 in_comment = 0;
864 break;
865 }
866 }
867 if (c == EOF)
868 fatal(_("unterminated comment at end of file"));
869
870 if (!cplus_comment && c == '*')
871 {
872 c = getc(finput);
873 if (c == '/')
874 {
875 putc('/', fattrs);
876 if (fdefines)
877 putc('/', fdefines);
878 in_comment = 0;
879 }
880 }
881 else
882 c = getc(finput);
883 }
884 }
885 break;
886
887
888 case '{':
889 count++;
890 break;
891
892 case '}':
893 if (count == 0)
894 warn (_("unmatched close-brace (`}')"));
895 count--;
896 if (count <= 0)
897 {
898 fprintf(fattrs, " YYSTYPE;\n");
899 if (fdefines)
900 fprintf(fdefines, " YYSTYPE;\n");
901 /* JF don't choke on trailing semi */
902 c=skip_white_space();
903 if(c!=';') ungetc(c,finput);
904 return;
905 }
906 }
907
908 c = getc(finput);
909 }
910 }
911
912 /* parse the declaration %expect N which says to expect N
913 shift-reduce conflicts. */
914
915 void
916 parse_expect_decl (void)
917 {
918 register int c;
919 register int count;
920 char buffer[20];
921
922 c = getc(finput);
923 while (c == ' ' || c == '\t')
924 c = getc(finput);
925
926 count = 0;
927 while (c >= '0' && c <= '9')
928 {
929 if (count < 20)
930 buffer[count++] = c;
931 c = getc(finput);
932 }
933 buffer[count] = 0;
934
935 ungetc (c, finput);
936
937 if (count <= 0 || count > 10)
938 warn(_("argument of %expect is not an integer"));
939 expected_conflicts = atoi (buffer);
940 }
941
942 /* that's all of parsing the declaration section */
943 \f
944 /* Get the data type (alternative in the union) of the value for symbol n in rule rule. */
945
946 char *
947 get_type_name (int n, symbol_list *rule)
948 {
949 static char *msg = N_("invalid $ value");
950
951 register int i;
952 register symbol_list *rp;
953
954 if (n < 0)
955 {
956 warn(_(msg));
957 return NULL;
958 }
959
960 rp = rule;
961 i = 0;
962
963 while (i < n)
964 {
965 rp = rp->next;
966 if (rp == NULL || rp->sym == NULL)
967 {
968 warn(_(msg));
969 return NULL;
970 }
971 i++;
972 }
973
974 return (rp->sym->type_name);
975 }
976
977
978 /* after %guard is seen in the input file,
979 copy the actual guard into the guards file.
980 If the guard is followed by an action, copy that into the actions file.
981 stack_offset is the number of values in the current rule so far,
982 which says where to find $0 with respect to the top of the stack,
983 for the simple parser in which the stack is not popped until after the guard is run. */
984
985 void
986 copy_guard (symbol_list *rule, int stack_offset)
987 {
988 register int c;
989 register int n;
990 register int count;
991 register int match;
992 register int ended;
993 register char *type_name;
994 int brace_flag = 0;
995 int cplus_comment;
996
997 /* offset is always 0 if parser has already popped the stack pointer */
998 if (semantic_parser) stack_offset = 0;
999
1000 fprintf(fguard, "\ncase %d:\n", nrules);
1001 if (!nolinesflag)
1002 fprintf(fguard, "#line %d \"%s\"\n", lineno, infile);
1003 putc('{', fguard);
1004
1005 count = 0;
1006 c = getc(finput);
1007
1008 while (brace_flag ? (count > 0) : (c != ';'))
1009 {
1010 switch (c)
1011 {
1012 case '\n':
1013 putc(c, fguard);
1014 lineno++;
1015 break;
1016
1017 case '{':
1018 putc(c, fguard);
1019 brace_flag = 1;
1020 count++;
1021 break;
1022
1023 case '}':
1024 putc(c, fguard);
1025 if (count > 0)
1026 count--;
1027 else
1028 {
1029 warn(_("unmatched right brace (`}')"));
1030 c = getc(finput); /* skip it */
1031 }
1032 break;
1033
1034 case '\'':
1035 case '"':
1036 match = c;
1037 putc(c, fguard);
1038 c = getc(finput);
1039
1040 while (c != match)
1041 {
1042 if (c == EOF)
1043 fatal(_("unterminated string at end of file"));
1044 if (c == '\n')
1045 {
1046 warn(_("unterminated string"));
1047 ungetc(c, finput);
1048 c = match; /* invent terminator */
1049 continue;
1050 }
1051
1052 putc(c, fguard);
1053
1054 if (c == '\\')
1055 {
1056 c = getc(finput);
1057 if (c == EOF)
1058 fatal(_("unterminated string"));
1059 putc(c, fguard);
1060 if (c == '\n')
1061 lineno++;
1062 }
1063
1064 c = getc(finput);
1065 }
1066
1067 putc(c, fguard);
1068 break;
1069
1070 case '/':
1071 putc(c, fguard);
1072 c = getc(finput);
1073 if (c != '*' && c != '/')
1074 continue;
1075
1076 cplus_comment = (c == '/');
1077 putc(c, fguard);
1078 c = getc(finput);
1079
1080 ended = 0;
1081 while (!ended)
1082 {
1083 if (!cplus_comment && c == '*')
1084 {
1085 while (c == '*')
1086 {
1087 putc(c, fguard);
1088 c = getc(finput);
1089 }
1090
1091 if (c == '/')
1092 {
1093 putc(c, fguard);
1094 ended = 1;
1095 }
1096 }
1097 else if (c == '\n')
1098 {
1099 lineno++;
1100 putc(c, fguard);
1101 if (cplus_comment)
1102 ended = 1;
1103 else
1104 c = getc(finput);
1105 }
1106 else if (c == EOF)
1107 fatal(_("unterminated comment"));
1108 else
1109 {
1110 putc(c, fguard);
1111 c = getc(finput);
1112 }
1113 }
1114
1115 break;
1116
1117 case '$':
1118 c = getc(finput);
1119 type_name = NULL;
1120
1121 if (c == '<')
1122 {
1123 register char *cp = token_buffer;
1124
1125 while ((c = getc(finput)) != '>' && c > 0)
1126 {
1127 if (cp == token_buffer + maxtoken)
1128 cp = grow_token_buffer(cp);
1129
1130 *cp++ = c;
1131 }
1132 *cp = 0;
1133 type_name = token_buffer;
1134
1135 c = getc(finput);
1136 }
1137
1138 if (c == '$')
1139 {
1140 fprintf(fguard, "yyval");
1141 if (!type_name) type_name = rule->sym->type_name;
1142 if (type_name)
1143 fprintf(fguard, ".%s", type_name);
1144 if(!type_name && typed)
1145 warns(_("$$ of `%s' has no declared type"), rule->sym->tag);
1146 }
1147
1148 else if (isdigit(c) || c == '-')
1149 {
1150 ungetc (c, finput);
1151 n = read_signed_integer(finput);
1152 c = getc(finput);
1153
1154 if (!type_name && n > 0)
1155 type_name = get_type_name(n, rule);
1156
1157 fprintf(fguard, "yyvsp[%d]", n - stack_offset);
1158 if (type_name)
1159 fprintf(fguard, ".%s", type_name);
1160 if(!type_name && typed)
1161 warnss(_("$%s of `%s' has no declared type"), int_to_string(n), rule->sym->tag);
1162 continue;
1163 }
1164 else
1165 warns(_("$%s is invalid"), printable_version(c));
1166
1167 break;
1168
1169 case '@':
1170 c = getc(finput);
1171 if (isdigit(c) || c == '-')
1172 {
1173 ungetc (c, finput);
1174 n = read_signed_integer(finput);
1175 c = getc(finput);
1176 }
1177 else
1178 {
1179 warns(_("@%s is invalid"), printable_version(c));
1180 n = 1;
1181 }
1182
1183 fprintf(fguard, "yylsp[%d]", n - stack_offset);
1184 yylsp_needed = 1;
1185
1186 continue;
1187
1188 case EOF:
1189 fatal(_("unterminated %%guard clause"));
1190
1191 default:
1192 putc(c, fguard);
1193 }
1194
1195 if (c != '}' || count != 0)
1196 c = getc(finput);
1197 }
1198
1199 c = skip_white_space();
1200
1201 fprintf(fguard, ";\n break;}");
1202 if (c == '{')
1203 copy_action(rule, stack_offset);
1204 else if (c == '=')
1205 {
1206 c = getc(finput); /* why not skip_white_space -wjh */
1207 if (c == '{')
1208 copy_action(rule, stack_offset);
1209 }
1210 else
1211 ungetc(c, finput);
1212 }
1213
1214
1215
1216 /* Assuming that a { has just been seen, copy everything up to the matching }
1217 into the actions file.
1218 stack_offset is the number of values in the current rule so far,
1219 which says where to find $0 with respect to the top of the stack. */
1220
1221 void
1222 copy_action (symbol_list *rule, int stack_offset)
1223 {
1224 register int c;
1225 register int n;
1226 register int count;
1227 register int match;
1228 register int ended;
1229 register char *type_name;
1230 int cplus_comment;
1231
1232 /* offset is always 0 if parser has already popped the stack pointer */
1233 if (semantic_parser) stack_offset = 0;
1234
1235 fprintf(faction, "\ncase %d:\n", nrules);
1236 if (!nolinesflag)
1237 fprintf(faction, "#line %d \"%s\"\n", lineno, infile);
1238 putc('{', faction);
1239
1240 count = 1;
1241 c = getc(finput);
1242
1243 while (count > 0)
1244 {
1245 while (c != '}')
1246 {
1247 switch (c)
1248 {
1249 case '\n':
1250 putc(c, faction);
1251 lineno++;
1252 break;
1253
1254 case '{':
1255 putc(c, faction);
1256 count++;
1257 break;
1258
1259 case '\'':
1260 case '"':
1261 match = c;
1262 putc(c, faction);
1263 c = getc(finput);
1264
1265 while (c != match)
1266 {
1267 if (c == '\n')
1268 {
1269 warn(_("unterminated string"));
1270 ungetc(c, finput);
1271 c = match;
1272 continue;
1273 }
1274 else if (c == EOF)
1275 fatal(_("unterminated string at end of file"));
1276
1277 putc(c, faction);
1278
1279 if (c == '\\')
1280 {
1281 c = getc(finput);
1282 if (c == EOF)
1283 fatal(_("unterminated string"));
1284 putc(c, faction);
1285 if (c == '\n')
1286 lineno++;
1287 }
1288
1289 c = getc(finput);
1290 }
1291
1292 putc(c, faction);
1293 break;
1294
1295 case '/':
1296 putc(c, faction);
1297 c = getc(finput);
1298 if (c != '*' && c != '/')
1299 continue;
1300
1301 cplus_comment = (c == '/');
1302 putc(c, faction);
1303 c = getc(finput);
1304
1305 ended = 0;
1306 while (!ended)
1307 {
1308 if (!cplus_comment && c == '*')
1309 {
1310 while (c == '*')
1311 {
1312 putc(c, faction);
1313 c = getc(finput);
1314 }
1315
1316 if (c == '/')
1317 {
1318 putc(c, faction);
1319 ended = 1;
1320 }
1321 }
1322 else if (c == '\n')
1323 {
1324 lineno++;
1325 putc(c, faction);
1326 if (cplus_comment)
1327 ended = 1;
1328 else
1329 c = getc(finput);
1330 }
1331 else if (c == EOF)
1332 fatal(_("unterminated comment"));
1333 else
1334 {
1335 putc(c, faction);
1336 c = getc(finput);
1337 }
1338 }
1339
1340 break;
1341
1342 case '$':
1343 c = getc(finput);
1344 type_name = NULL;
1345
1346 if (c == '<')
1347 {
1348 register char *cp = token_buffer;
1349
1350 while ((c = getc(finput)) != '>' && c > 0)
1351 {
1352 if (cp == token_buffer + maxtoken)
1353 cp = grow_token_buffer(cp);
1354
1355 *cp++ = c;
1356 }
1357 *cp = 0;
1358 type_name = token_buffer;
1359 value_components_used = 1;
1360
1361 c = getc(finput);
1362 }
1363 if (c == '$')
1364 {
1365 fprintf(faction, "yyval");
1366 if (!type_name) type_name = get_type_name(0, rule);
1367 if (type_name)
1368 fprintf(faction, ".%s", type_name);
1369 if(!type_name && typed)
1370 warns(_("$$ of `%s' has no declared type"), rule->sym->tag);
1371 }
1372 else if (isdigit(c) || c == '-')
1373 {
1374 ungetc (c, finput);
1375 n = read_signed_integer(finput);
1376 c = getc(finput);
1377
1378 if (!type_name && n > 0)
1379 type_name = get_type_name(n, rule);
1380
1381 fprintf(faction, "yyvsp[%d]", n - stack_offset);
1382 if (type_name)
1383 fprintf(faction, ".%s", type_name);
1384 if(!type_name && typed)
1385 warnss(_("$%s of `%s' has no declared type"),
1386 int_to_string(n), rule->sym->tag);
1387 continue;
1388 }
1389 else
1390 warns(_("$%s is invalid"), printable_version(c));
1391
1392 break;
1393
1394 case '@':
1395 c = getc(finput);
1396 if (isdigit(c) || c == '-')
1397 {
1398 ungetc (c, finput);
1399 n = read_signed_integer(finput);
1400 c = getc(finput);
1401 }
1402 else
1403 {
1404 warn(_("invalid @-construct"));
1405 n = 1;
1406 }
1407
1408 fprintf(faction, "yylsp[%d]", n - stack_offset);
1409 yylsp_needed = 1;
1410
1411 continue;
1412
1413 case EOF:
1414 fatal(_("unmatched `{'"));
1415
1416 default:
1417 putc(c, faction);
1418 }
1419
1420 c = getc(finput);
1421 }
1422
1423 /* above loop exits when c is '}' */
1424
1425 if (--count)
1426 {
1427 putc(c, faction);
1428 c = getc(finput);
1429 }
1430 }
1431
1432 fprintf(faction, ";\n break;}");
1433 }
1434
1435
1436
1437 /* generate a dummy symbol, a nonterminal,
1438 whose name cannot conflict with the user's names. */
1439
1440 bucket *
1441 gensym (void)
1442 {
1443 register bucket *sym;
1444
1445 sprintf (token_buffer, "@%d", ++gensym_count);
1446 sym = getsym(token_buffer);
1447 sym->class = SNTERM;
1448 sym->value = nvars++;
1449 return (sym);
1450 }
1451
1452 /* Parse the input grammar into a one symbol_list structure.
1453 Each rule is represented by a sequence of symbols: the left hand side
1454 followed by the contents of the right hand side, followed by a null pointer
1455 instead of a symbol to terminate the rule.
1456 The next symbol is the lhs of the following rule.
1457
1458 All guards and actions are copied out to the appropriate files,
1459 labelled by the rule number they apply to. */
1460
1461 void
1462 readgram (void)
1463 {
1464 register int t;
1465 register bucket *lhs = NULL;
1466 register symbol_list *p;
1467 register symbol_list *p1;
1468 register bucket *bp;
1469
1470 symbol_list *crule; /* points to first symbol_list of current rule. */
1471 /* its symbol is the lhs of the rule. */
1472 symbol_list *crule1; /* points to the symbol_list preceding crule. */
1473
1474 p1 = NULL;
1475
1476 t = lex();
1477
1478 while (t != TWO_PERCENTS && t != ENDFILE)
1479 {
1480 if (t == IDENTIFIER || t == BAR)
1481 {
1482 register int actionflag = 0;
1483 int rulelength = 0; /* number of symbols in rhs of this rule so far */
1484 int xactions = 0; /* JF for error checking */
1485 bucket *first_rhs = 0;
1486
1487 if (t == IDENTIFIER)
1488 {
1489 lhs = symval;
1490
1491 if (!start_flag)
1492 {
1493 startval = lhs;
1494 start_flag = 1;
1495 }
1496
1497 t = lex();
1498 if (t != COLON)
1499 {
1500 warn(_("ill-formed rule: initial symbol not followed by colon"));
1501 unlex(t);
1502 }
1503 }
1504
1505 if (nrules == 0 && t == BAR)
1506 {
1507 warn(_("grammar starts with vertical bar"));
1508 lhs = symval; /* BOGUS: use a random symval */
1509 }
1510 /* start a new rule and record its lhs. */
1511
1512 nrules++;
1513 nitems++;
1514
1515 record_rule_line ();
1516
1517 p = NEW(symbol_list);
1518 p->sym = lhs;
1519
1520 crule1 = p1;
1521 if (p1)
1522 p1->next = p;
1523 else
1524 grammar = p;
1525
1526 p1 = p;
1527 crule = p;
1528
1529 /* mark the rule's lhs as a nonterminal if not already so. */
1530
1531 if (lhs->class == SUNKNOWN)
1532 {
1533 lhs->class = SNTERM;
1534 lhs->value = nvars;
1535 nvars++;
1536 }
1537 else if (lhs->class == STOKEN)
1538 warns(_("rule given for %s, which is a token"), lhs->tag);
1539
1540 /* read the rhs of the rule. */
1541
1542 for (;;)
1543 {
1544 t = lex();
1545 if (t == PREC)
1546 {
1547 t = lex();
1548 crule->ruleprec = symval;
1549 t = lex();
1550 }
1551
1552 if (! (t == IDENTIFIER || t == LEFT_CURLY)) break;
1553
1554 /* If next token is an identifier, see if a colon follows it.
1555 If one does, exit this rule now. */
1556 if (t == IDENTIFIER)
1557 {
1558 register bucket *ssave;
1559 register int t1;
1560
1561 ssave = symval;
1562 t1 = lex();
1563 unlex(t1);
1564 symval = ssave;
1565 if (t1 == COLON) break;
1566
1567 if(!first_rhs) /* JF */
1568 first_rhs = symval;
1569 /* Not followed by colon =>
1570 process as part of this rule's rhs. */
1571 }
1572
1573 /* If we just passed an action, that action was in the middle
1574 of a rule, so make a dummy rule to reduce it to a
1575 non-terminal. */
1576 if (actionflag)
1577 {
1578 register bucket *sdummy;
1579
1580 /* Since the action was written out with this rule's */
1581 /* number, we must give the new rule this number */
1582 /* by inserting the new rule before it. */
1583
1584 /* Make a dummy nonterminal, a gensym. */
1585 sdummy = gensym();
1586
1587 /* Make a new rule, whose body is empty,
1588 before the current one, so that the action
1589 just read can belong to it. */
1590 nrules++;
1591 nitems++;
1592 record_rule_line ();
1593 p = NEW(symbol_list);
1594 if (crule1)
1595 crule1->next = p;
1596 else grammar = p;
1597 p->sym = sdummy;
1598 crule1 = NEW(symbol_list);
1599 p->next = crule1;
1600 crule1->next = crule;
1601
1602 /* insert the dummy generated by that rule into this rule. */
1603 nitems++;
1604 p = NEW(symbol_list);
1605 p->sym = sdummy;
1606 p1->next = p;
1607 p1 = p;
1608
1609 actionflag = 0;
1610 }
1611
1612 if (t == IDENTIFIER)
1613 {
1614 nitems++;
1615 p = NEW(symbol_list);
1616 p->sym = symval;
1617 p1->next = p;
1618 p1 = p;
1619 }
1620 else /* handle an action. */
1621 {
1622 copy_action(crule, rulelength);
1623 actionflag = 1;
1624 xactions++; /* JF */
1625 }
1626 rulelength++;
1627 } /* end of read rhs of rule */
1628
1629 /* Put an empty link in the list to mark the end of this rule */
1630 p = NEW(symbol_list);
1631 p1->next = p;
1632 p1 = p;
1633
1634 if (t == PREC)
1635 {
1636 warn(_("two @prec's in a row"));
1637 t = lex();
1638 crule->ruleprec = symval;
1639 t = lex();
1640 }
1641 if (t == GUARD)
1642 {
1643 if (! semantic_parser)
1644 warn(_("%%guard present but %%semantic_parser not specified"));
1645
1646 copy_guard(crule, rulelength);
1647 t = lex();
1648 }
1649 else if (t == LEFT_CURLY)
1650 {
1651 /* This case never occurs -wjh */
1652 if (actionflag) warn(_("two actions at end of one rule"));
1653 copy_action(crule, rulelength);
1654 actionflag = 1;
1655 xactions++; /* -wjh */
1656 t = lex();
1657 }
1658 /* If $$ is being set in default way,
1659 warn if any type mismatch. */
1660 else if (!xactions && first_rhs && lhs->type_name != first_rhs->type_name)
1661 {
1662 if (lhs->type_name == 0 || first_rhs->type_name == 0
1663 || strcmp(lhs->type_name,first_rhs->type_name))
1664 warnss(_("type clash (`%s' `%s') on default action"),
1665 lhs->type_name ? lhs->type_name : "",
1666 first_rhs->type_name ? first_rhs->type_name : "");
1667 }
1668 /* Warn if there is no default for $$ but we need one. */
1669 else if (!xactions && !first_rhs && lhs->type_name != 0)
1670 warn(_("empty rule for typed nonterminal, and no action"));
1671 if (t == SEMICOLON)
1672 t = lex();
1673 }
1674 #if 0
1675 /* these things can appear as alternatives to rules. */
1676 /* NO, they cannot.
1677 a) none of the documentation allows them
1678 b) most of them scan forward until finding a next %
1679 thus they may swallow lots of intervening rules
1680 */
1681 else if (t == TOKEN)
1682 {
1683 parse_token_decl(STOKEN, SNTERM);
1684 t = lex();
1685 }
1686 else if (t == NTERM)
1687 {
1688 parse_token_decl(SNTERM, STOKEN);
1689 t = lex();
1690 }
1691 else if (t == TYPE)
1692 {
1693 t = get_type();
1694 }
1695 else if (t == UNION)
1696 {
1697 parse_union_decl();
1698 t = lex();
1699 }
1700 else if (t == EXPECT)
1701 {
1702 parse_expect_decl();
1703 t = lex();
1704 }
1705 else if (t == START)
1706 {
1707 parse_start_decl();
1708 t = lex();
1709 }
1710 #endif
1711
1712 else
1713 {
1714 warns(_("invalid input: %s"), token_buffer);
1715 t = lex();
1716 }
1717 }
1718
1719 /* grammar has been read. Do some checking */
1720
1721 if (nsyms > MAXSHORT)
1722 fatals(_("too many symbols (tokens plus nonterminals); maximum %s"),
1723 int_to_string(MAXSHORT));
1724 if (nrules == 0)
1725 fatal(_("no rules in the input grammar"));
1726
1727 if (typed == 0 /* JF put out same default YYSTYPE as YACC does */
1728 && !value_components_used)
1729 {
1730 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1731 but it seems better to be consistent.
1732 Most programs should declare their own type anyway. */
1733 fprintf(fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1734 if (fdefines)
1735 fprintf(fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1736 }
1737
1738 /* Report any undefined symbols and consider them nonterminals. */
1739
1740 for (bp = firstsymbol; bp; bp = bp->next)
1741 if (bp->class == SUNKNOWN)
1742 {
1743 warns(_("symbol %s is used, but is not defined as a token and has no rules"),
1744 bp->tag);
1745 bp->class = SNTERM;
1746 bp->value = nvars++;
1747 }
1748
1749 ntokens = nsyms - nvars;
1750 }
1751
1752
1753 void
1754 record_rule_line (void)
1755 {
1756 /* Record each rule's source line number in rline table. */
1757
1758 if (nrules >= rline_allocated)
1759 {
1760 rline_allocated = nrules * 2;
1761 rline = (short *) xrealloc ((char *) rline,
1762 rline_allocated * sizeof (short));
1763 }
1764 rline[nrules] = lineno;
1765 }
1766
1767
1768 #if 0
1769 /* read in a %type declaration and record its information for get_type_name to access */
1770 /* this is unused. it is only called from the #if 0 part of readgram */
1771 static int
1772 get_type (void)
1773 {
1774 register int k;
1775 register int t;
1776 register char *name;
1777
1778 t = lex();
1779
1780 if (t != TYPENAME)
1781 {
1782 warn(_("ill-formed %type declaration"));
1783 return t;
1784 }
1785
1786 k = strlen(token_buffer);
1787 name = NEW2(k + 1, char);
1788 strcpy(name, token_buffer);
1789
1790 for (;;)
1791 {
1792 t = lex();
1793
1794 switch (t)
1795 {
1796 case SEMICOLON:
1797 return (lex());
1798
1799 case COMMA:
1800 break;
1801
1802 case IDENTIFIER:
1803 if (symval->type_name == NULL)
1804 symval->type_name = name;
1805 else if (strcmp(name, symval->type_name) != 0)
1806 warns(_("type redeclaration for %s"), symval->tag);
1807
1808 break;
1809
1810 default:
1811 return (t);
1812 }
1813 }
1814 }
1815 #endif
1816
1817
1818 /* assign symbol numbers, and write definition of token names into fdefines.
1819 Set up vectors tags and sprec of names and precedences of symbols. */
1820
1821 void
1822 packsymbols (void)
1823 {
1824 register bucket *bp;
1825 register int tokno = 1;
1826 register int i;
1827 register int last_user_token_number;
1828
1829 /* int lossage = 0; JF set but not used */
1830
1831 tags = NEW2(nsyms + 1, char *);
1832 tags[0] = "$";
1833 user_toknums = NEW2(nsyms + 1, int);
1834 user_toknums[0] = 0;
1835
1836 sprec = NEW2(nsyms, short);
1837 sassoc = NEW2(nsyms, short);
1838
1839 max_user_token_number = 256;
1840 last_user_token_number = 256;
1841
1842 for (bp = firstsymbol; bp; bp = bp->next)
1843 {
1844 if (bp->class == SNTERM)
1845 {
1846 bp->value += ntokens;
1847 }
1848 else if (bp->alias)
1849 {
1850 /* this symbol and its alias are a single token defn.
1851 allocate a tokno, and assign to both
1852 check agreement of ->prec and ->assoc fields
1853 and make both the same
1854 */
1855 if (bp->value == 0)
1856 bp->value = bp->alias->value = tokno++;
1857
1858 if (bp->prec != bp->alias->prec) {
1859 if (bp->prec != 0 && bp->alias->prec != 0
1860 && bp->user_token_number == SALIAS)
1861 warnss(_("conflicting precedences for %s and %s"),
1862 bp->tag, bp->alias->tag);
1863 if (bp->prec != 0) bp->alias->prec = bp->prec;
1864 else bp->prec = bp->alias->prec;
1865 }
1866
1867 if (bp->assoc != bp->alias->assoc) {
1868 if (bp->assoc != 0 && bp->alias->assoc != 0
1869 && bp->user_token_number == SALIAS)
1870 warnss(_("conflicting assoc values for %s and %s"),
1871 bp->tag, bp->alias->tag);
1872 if (bp->assoc != 0) bp->alias->assoc = bp->assoc;
1873 else bp->assoc = bp->alias->assoc;
1874 }
1875
1876 if (bp->user_token_number == SALIAS)
1877 continue; /* do not do processing below for SALIASs */
1878
1879 }
1880 else /* bp->class == STOKEN */
1881 {
1882 bp->value = tokno++;
1883 }
1884
1885 if (bp->class == STOKEN)
1886 {
1887 if (translations && !(bp->user_token_number))
1888 bp->user_token_number = ++last_user_token_number;
1889 if (bp->user_token_number > max_user_token_number)
1890 max_user_token_number = bp->user_token_number;
1891 }
1892
1893 tags[bp->value] = bp->tag;
1894 user_toknums[bp->value] = bp->user_token_number;
1895 sprec[bp->value] = bp->prec;
1896 sassoc[bp->value] = bp->assoc;
1897
1898 }
1899
1900 if (translations)
1901 {
1902 register int i;
1903
1904 token_translations = NEW2(max_user_token_number+1, short);
1905
1906 /* initialize all entries for literal tokens to 2,
1907 the internal token number for $undefined.,
1908 which represents all invalid inputs. */
1909 for (i = 0; i <= max_user_token_number; i++)
1910 token_translations[i] = 2;
1911
1912 for (bp = firstsymbol; bp; bp = bp->next)
1913 {
1914 if (bp->value >= ntokens) continue; /* non-terminal */
1915 if (bp->user_token_number == SALIAS) continue;
1916 if (token_translations[bp->user_token_number] != 2)
1917 warnsss(_("tokens %s and %s both assigned number %s"),
1918 tags[token_translations[bp->user_token_number]],
1919 bp->tag,
1920 int_to_string(bp->user_token_number));
1921 token_translations[bp->user_token_number] = bp->value;
1922 }
1923 }
1924
1925 error_token_number = errtoken->value;
1926
1927 if (! noparserflag)
1928 output_token_defines(ftable);
1929
1930 if (startval->class == SUNKNOWN)
1931 fatals(_("the start symbol %s is undefined"), startval->tag);
1932 else if (startval->class == STOKEN)
1933 fatals(_("the start symbol %s is a token"), startval->tag);
1934
1935 start_symbol = startval->value;
1936
1937 if (definesflag)
1938 {
1939 output_token_defines(fdefines);
1940
1941 if (!pure_parser)
1942 {
1943 if (spec_name_prefix)
1944 fprintf(fdefines, "\nextern YYSTYPE %slval;\n", spec_name_prefix);
1945 else
1946 fprintf(fdefines, "\nextern YYSTYPE yylval;\n");
1947 }
1948
1949 if (semantic_parser)
1950 for (i = ntokens; i < nsyms; i++)
1951 {
1952 /* don't make these for dummy nonterminals made by gensym. */
1953 if (*tags[i] != '@')
1954 fprintf(fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1955 }
1956 #if 0
1957 /* `fdefines' is now a temporary file, so we need to copy its
1958 contents in `done', so we can't close it here. */
1959 fclose(fdefines);
1960 fdefines = NULL;
1961 #endif
1962 }
1963 }
1964
1965 /* For named tokens, but not literal ones, define the name.
1966 The value is the user token number.
1967 */
1968 void
1969 output_token_defines (FILE *file)
1970 {
1971 bucket *bp;
1972 register char *cp, *symbol;
1973 register char c;
1974
1975 for (bp = firstsymbol; bp; bp = bp->next)
1976 {
1977 symbol = bp->tag; /* get symbol */
1978
1979 if (bp->value >= ntokens) continue;
1980 if (bp->user_token_number == SALIAS) continue;
1981 if ('\'' == *symbol) continue; /* skip literal character */
1982 if (bp == errtoken) continue; /* skip error token */
1983 if ('\"' == *symbol)
1984 {
1985 /* use literal string only if given a symbol with an alias */
1986 if (bp->alias)
1987 symbol = bp->alias->tag;
1988 else
1989 continue;
1990 }
1991
1992 /* Don't #define nonliteral tokens whose names contain periods. */
1993 cp = symbol;
1994 while ((c = *cp++) && c != '.');
1995 if (c != '\0') continue;
1996
1997 fprintf(file, "#define\t%s\t%d\n", symbol,
1998 ((translations && ! rawtoknumflag)
1999 ? bp->user_token_number
2000 : bp->value));
2001 if (semantic_parser)
2002 fprintf(file, "#define\tT%s\t%d\n", symbol, bp->value);
2003 }
2004
2005 putc('\n', file);
2006 }
2007
2008
2009
2010 /* convert the rules into the representation using rrhs, rlhs and ritems. */
2011
2012 void
2013 packgram (void)
2014 {
2015 register int itemno;
2016 register int ruleno;
2017 register symbol_list *p;
2018 /* register bucket *bp; JF unused */
2019
2020 bucket *ruleprec;
2021
2022 ritem = NEW2(nitems + 1, short);
2023 rlhs = NEW2(nrules, short) - 1;
2024 rrhs = NEW2(nrules, short) - 1;
2025 rprec = NEW2(nrules, short) - 1;
2026 rprecsym = NEW2(nrules, short) - 1;
2027 rassoc = NEW2(nrules, short) - 1;
2028
2029 itemno = 0;
2030 ruleno = 1;
2031
2032 p = grammar;
2033 while (p)
2034 {
2035 rlhs[ruleno] = p->sym->value;
2036 rrhs[ruleno] = itemno;
2037 ruleprec = p->ruleprec;
2038
2039 p = p->next;
2040 while (p && p->sym)
2041 {
2042 ritem[itemno++] = p->sym->value;
2043 /* A rule gets by default the precedence and associativity
2044 of the last token in it. */
2045 if (p->sym->class == STOKEN)
2046 {
2047 rprec[ruleno] = p->sym->prec;
2048 rassoc[ruleno] = p->sym->assoc;
2049 }
2050 if (p) p = p->next;
2051 }
2052
2053 /* If this rule has a %prec,
2054 the specified symbol's precedence replaces the default. */
2055 if (ruleprec)
2056 {
2057 rprec[ruleno] = ruleprec->prec;
2058 rassoc[ruleno] = ruleprec->assoc;
2059 rprecsym[ruleno] = ruleprec->value;
2060 }
2061
2062 ritem[itemno++] = -ruleno;
2063 ruleno++;
2064
2065 if (p) p = p->next;
2066 }
2067
2068 ritem[itemno] = 0;
2069 }
2070 \f
2071 /* Read a signed integer from STREAM and return its value. */
2072
2073 int
2074 read_signed_integer (FILE *stream)
2075 {
2076 register int c = getc(stream);
2077 register int sign = 1;
2078 register int n;
2079
2080 if (c == '-')
2081 {
2082 c = getc(stream);
2083 sign = -1;
2084 }
2085 n = 0;
2086 while (isdigit(c))
2087 {
2088 n = 10*n + (c - '0');
2089 c = getc(stream);
2090 }
2091
2092 ungetc(c, stream);
2093
2094 return n * sign;
2095 }