]> git.saurik.com Git - bison.git/blob - src/reader.c
3f7cb0e347098e384b80b423a835196511c50eae
[bison.git] / src / reader.c
1 /* Input parser for bison
2 Copyright (C) 1984, 1986, 1989 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
21 /* read in the grammar specification and record it in the format described in gram.h.
22 All guards are copied into the fguard file and all actions into faction,
23 in each case forming the body of a C function (yyguard or yyaction)
24 which contains a switch statement to decide which guard or action to execute.
25
26 The entry point is reader(). */
27
28 #include <stdio.h>
29 #include <ctype.h>
30 #include "system.h"
31 #include "files.h"
32 #include "new.h"
33 #include "symtab.h"
34 #include "lex.h"
35 #include "gram.h"
36 #include "machine.h"
37
38 #define LTYPESTR "\n#ifndef YYLTYPE\ntypedef\n struct yyltype\n\
39 {\n int timestamp;\n int first_line;\n int first_column;\
40 \n int last_line;\n int last_column;\n char *text;\n }\n\
41 yyltype;\n\n#define YYLTYPE yyltype\n#endif\n\n"
42
43 /* Number of slots allocated (but not necessarily used yet) in `rline' */
44 int rline_allocated;
45
46 extern char *program_name;
47 extern int definesflag;
48 extern int nolinesflag;
49 extern bucket *symval;
50 extern int numval;
51 extern int failure;
52 extern int expected_conflicts;
53 extern char *token_buffer;
54
55 extern int atoi ();
56
57 extern void init_lex();
58 extern void tabinit();
59 extern void output_headers();
60 extern void output_trailers();
61 extern void free_symtab();
62 extern void open_extra_files();
63 extern void fatal();
64 extern void fatals();
65 extern void unlex();
66 extern void done();
67
68 extern int skip_white_space();
69 extern int parse_percent_token();
70 extern int lex();
71
72 void read_declarations();
73 void copy_definition();
74 void parse_token_decl();
75 void parse_start_decl();
76 void parse_type_decl();
77 void parse_assoc_decl();
78 void parse_union_decl();
79 void parse_expect_decl();
80 void copy_action();
81 void readgram();
82 void record_rule_line();
83 void packsymbols();
84 void output_token_defines();
85 void packgram();
86 int read_signed_integer();
87 int get_type();
88
89 typedef
90 struct symbol_list
91 {
92 struct symbol_list *next;
93 bucket *sym;
94 bucket *ruleprec;
95 }
96 symbol_list;
97
98
99
100 int lineno;
101 symbol_list *grammar;
102 int start_flag;
103 bucket *startval;
104 char **tags;
105
106 /* Nonzero if components of semantic values are used, implying
107 they must be unions. */
108 static int value_components_used;
109
110 static int typed; /* nonzero if %union has been seen. */
111
112 static int lastprec; /* incremented for each %left, %right or %nonassoc seen */
113
114 static int gensym_count; /* incremented for each generated symbol */
115
116 static bucket *errtoken;
117
118 /* Nonzero if any action or guard uses the @n construct. */
119 static int yylsp_needed;
120
121 extern char *version_string;
122
123 void
124 reader()
125 {
126 start_flag = 0;
127 startval = NULL; /* start symbol not specified yet. */
128
129 #if 0
130 translations = 0; /* initially assume token number translation not needed. */
131 #endif
132 /* Nowadays translations is always set to 1,
133 since we give `error' a user-token-number
134 to satisfy the Posix demand for YYERRCODE==256. */
135 translations = 1;
136
137 nsyms = 1;
138 nvars = 0;
139 nrules = 0;
140 nitems = 0;
141 rline_allocated = 10;
142 rline = NEW2(rline_allocated, short);
143
144 typed = 0;
145 lastprec = 0;
146
147 gensym_count = 0;
148
149 semantic_parser = 0;
150 pure_parser = 0;
151 yylsp_needed = 0;
152
153 grammar = NULL;
154
155 init_lex();
156 lineno = 1;
157
158 /* initialize the symbol table. */
159 tabinit();
160 /* construct the error token */
161 errtoken = getsym("error");
162 errtoken->class = STOKEN;
163 errtoken->user_token_number = 256; /* Value specified by posix. */
164 /* construct a token that represents all undefined literal tokens. */
165 /* it is always token number 2. */
166 getsym("$illegal.")->class = STOKEN;
167 /* Read the declaration section. Copy %{ ... %} groups to ftable and fdefines file.
168 Also notice any %token, %left, etc. found there. */
169 fprintf(ftable, "\n/* A Bison parser, made from %s", infile);
170 fprintf(ftable, " with Bison version %s */\n\n", version_string);
171 fprintf(ftable, "#define YYBISON 1 /* Identify Bison output. */\n\n");
172 read_declarations();
173 /* output the definition of YYLTYPE into the fattrs and fdefines files. */
174 /* fattrs winds up in the .tab.c file, before bison.simple. */
175 fprintf(fattrs, LTYPESTR);
176 /* start writing the guard and action files, if they are needed. */
177 output_headers();
178 /* read in the grammar, build grammar in list form. write out guards and actions. */
179 readgram();
180 /* Now we know whether we need the line-number stack.
181 If we do, write its type into the .tab.h file. */
182 if (yylsp_needed)
183 {
184 if (fdefines)
185 fprintf(fdefines, LTYPESTR);
186 }
187 /* write closing delimiters for actions and guards. */
188 output_trailers();
189 if (yylsp_needed)
190 fprintf(ftable, "#define YYLSP_NEEDED\n\n");
191 /* assign the symbols their symbol numbers.
192 Write #defines for the token symbols into fdefines if requested. */
193 packsymbols();
194 /* convert the grammar into the format described in gram.h. */
195 packgram();
196 /* free the symbol table data structure
197 since symbols are now all referred to by symbol number. */
198 free_symtab();
199 }
200
201
202
203 /* read from finput until %% is seen. Discard the %%.
204 Handle any % declarations,
205 and copy the contents of any %{ ... %} groups to fattrs. */
206
207 void
208 read_declarations ()
209 {
210 register int c;
211 register int tok;
212
213 for (;;)
214 {
215 c = skip_white_space();
216
217 if (c == '%')
218 {
219 tok = parse_percent_token();
220
221 switch (tok)
222 {
223 case TWO_PERCENTS:
224 return;
225
226 case PERCENT_LEFT_CURLY:
227 copy_definition();
228 break;
229
230 case TOKEN:
231 parse_token_decl (STOKEN, SNTERM);
232 break;
233
234 case NTERM:
235 parse_token_decl (SNTERM, STOKEN);
236 break;
237
238 case TYPE:
239 parse_type_decl();
240 break;
241
242 case START:
243 parse_start_decl();
244 break;
245
246 case UNION:
247 parse_union_decl();
248 break;
249
250 case EXPECT:
251 parse_expect_decl();
252 break;
253
254 case LEFT:
255 parse_assoc_decl(LEFT_ASSOC);
256 break;
257
258 case RIGHT:
259 parse_assoc_decl(RIGHT_ASSOC);
260 break;
261
262 case NONASSOC:
263 parse_assoc_decl(NON_ASSOC);
264 break;
265
266 case SEMANTIC_PARSER:
267 if (semantic_parser == 0)
268 {
269 semantic_parser = 1;
270 open_extra_files();
271 }
272 break;
273
274 case PURE_PARSER:
275 pure_parser = 1;
276 break;
277
278 default:
279 fatal("junk after `%%' in definition section");
280 }
281 }
282 else if (c == EOF)
283 fatal("no input grammar");
284 else if (c >= 040 && c <= 0177)
285 fatals ("unknown character `%c' in declaration section", c);
286 else
287 fatals ("unknown character with code 0x%x in declaration section", c);
288 }
289 }
290
291
292 /* copy the contents of a %{ ... %} into the definitions file.
293 The %{ has already been read. Return after reading the %}. */
294
295 void
296 copy_definition ()
297 {
298 register int c;
299 register int match;
300 register int ended;
301 register int after_percent; /* -1 while reading a character if prev char was % */
302 int cplus_comment;
303
304 if (!nolinesflag)
305 fprintf(fattrs, "#line %d \"%s\"\n", lineno, infile);
306
307 after_percent = 0;
308
309 c = getc(finput);
310
311 for (;;)
312 {
313 switch (c)
314 {
315 case '\n':
316 putc(c, fattrs);
317 lineno++;
318 break;
319
320 case '%':
321 after_percent = -1;
322 break;
323
324 case '\'':
325 case '"':
326 match = c;
327 putc(c, fattrs);
328 c = getc(finput);
329
330 while (c != match)
331 {
332 if (c == EOF || c == '\n')
333 fatal("unterminated string");
334
335 putc(c, fattrs);
336
337 if (c == '\\')
338 {
339 c = getc(finput);
340 if (c == EOF)
341 fatal("unterminated string");
342 putc(c, fattrs);
343 if (c == '\n')
344 lineno++;
345 }
346
347 c = getc(finput);
348 }
349
350 putc(c, fattrs);
351 break;
352
353 case '/':
354 putc(c, fattrs);
355 c = getc(finput);
356 if (c != '*' && c != '/')
357 continue;
358
359 cplus_comment = (c == '/');
360 putc(c, fattrs);
361 c = getc(finput);
362
363 ended = 0;
364 while (!ended)
365 {
366 if (!cplus_comment && c == '*')
367 {
368 while (c == '*')
369 {
370 putc(c, fattrs);
371 c = getc(finput);
372 }
373
374 if (c == '/')
375 {
376 putc(c, fattrs);
377 ended = 1;
378 }
379 }
380 else if (c == '\n')
381 {
382 lineno++;
383 putc(c, fattrs);
384 if (cplus_comment)
385 ended = 1;
386 else
387 c = getc(finput);
388 }
389 else if (c == EOF)
390 fatal("unterminated comment in `%{' definition");
391 else
392 {
393 putc(c, fattrs);
394 c = getc(finput);
395 }
396 }
397
398 break;
399
400 case EOF:
401 fatal("unterminated `%{' definition");
402
403 default:
404 putc(c, fattrs);
405 }
406
407 c = getc(finput);
408
409 if (after_percent)
410 {
411 if (c == '}')
412 return;
413 putc('%', fattrs);
414 }
415 after_percent = 0;
416
417 }
418
419 }
420
421
422
423 /* parse what comes after %token or %nterm.
424 For %token, what_is is STOKEN and what_is_not is SNTERM.
425 For %nterm, the arguments are reversed. */
426
427 void
428 parse_token_decl (what_is, what_is_not)
429 int what_is, what_is_not;
430 {
431 /* register int start_lineno; JF */
432 register int token = 0;
433 register int prev;
434 register char *typename = 0;
435 int k;
436
437 /* start_lineno = lineno; JF */
438
439 for (;;)
440 {
441 if(ungetc(skip_white_space(), finput) == '%')
442 return;
443
444 /* if (lineno != start_lineno)
445 return; JF */
446
447 /* we have not passed a newline, so the token now starting is in this declaration */
448 prev = token;
449
450 token = lex();
451 if (token == COMMA)
452 continue;
453 if (token == TYPENAME)
454 {
455 k = strlen(token_buffer);
456 typename = NEW2(k + 1, char);
457 strcpy(typename, token_buffer);
458 value_components_used = 1;
459 }
460 else if (token == IDENTIFIER)
461 {
462 int oldclass = symval->class;
463
464 if (symval->class == what_is_not)
465 fatals("symbol %s redefined", symval->tag);
466 symval->class = what_is;
467 if (what_is == SNTERM && oldclass != SNTERM)
468 symval->value = nvars++;
469
470 if (typename)
471 {
472 if (symval->type_name == NULL)
473 symval->type_name = typename;
474 else
475 fatals("type redeclaration for %s", symval->tag);
476 }
477 }
478 else if (prev == IDENTIFIER && token == NUMBER)
479 {
480 symval->user_token_number = numval;
481 translations = 1;
482 }
483 else
484 fatal("invalid text in %token or %nterm declaration");
485 }
486
487 }
488
489
490
491 /* parse what comes after %start */
492
493 void
494 parse_start_decl ()
495 {
496 if (start_flag)
497 fatal("multiple %start declarations");
498 start_flag = 1;
499 if (lex() != IDENTIFIER)
500 fatal("invalid %start declaration");
501 startval = symval;
502 }
503
504
505
506 /* read in a %type declaration and record its information for get_type_name to access */
507
508 void
509 parse_type_decl ()
510 {
511 register int k;
512 register char *name;
513 /* register int start_lineno; JF */
514
515 if (lex() != TYPENAME)
516 fatal("ill-formed %type declaration");
517
518 k = strlen(token_buffer);
519 name = NEW2(k + 1, char);
520 strcpy(name, token_buffer);
521
522 /* start_lineno = lineno; */
523
524 for (;;)
525 {
526 register int t;
527
528 if(ungetc(skip_white_space(), finput) == '%')
529 return;
530
531 /* if (lineno != start_lineno)
532 return; JF */
533
534 /* we have not passed a newline, so the token now starting is in this declaration */
535
536 t = lex();
537
538 switch (t)
539 {
540
541 case COMMA:
542 case SEMICOLON:
543 break;
544
545 case IDENTIFIER:
546 if (symval->type_name == NULL)
547 symval->type_name = name;
548 else
549 fatals("type redeclaration for %s", symval->tag);
550
551 break;
552
553 default:
554 fatal("invalid %type declaration");
555 }
556 }
557 }
558
559
560
561 /* read in a %left, %right or %nonassoc declaration and record its information. */
562 /* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC. */
563
564 void
565 parse_assoc_decl (assoc)
566 int assoc;
567 {
568 register int k;
569 register char *name = NULL;
570 /* register int start_lineno; JF */
571 register int prev = 0; /* JF added = 0 to keep lint happy */
572
573 lastprec++; /* Assign a new precedence level, never 0. */
574
575 /* start_lineno = lineno; */
576
577 for (;;)
578 {
579 register int t;
580
581 if(ungetc(skip_white_space(), finput) == '%')
582 return;
583
584 /* if (lineno != start_lineno)
585 return; JF */
586
587 /* we have not passed a newline, so the token now starting is in this declaration */
588
589 t = lex();
590
591 switch (t)
592 {
593
594 case TYPENAME:
595 k = strlen(token_buffer);
596 name = NEW2(k + 1, char);
597 strcpy(name, token_buffer);
598 break;
599
600 case COMMA:
601 break;
602
603 case IDENTIFIER:
604 if (symval->prec != 0)
605 fatals("redefining precedence of %s", symval->tag);
606 symval->prec = lastprec;
607 symval->assoc = assoc;
608 if (symval->class == SNTERM)
609 fatals("symbol %s redefined", symval->tag);
610 symval->class = STOKEN;
611 if (name)
612 { /* record the type, if one is specified */
613 if (symval->type_name == NULL)
614 symval->type_name = name;
615 else
616 fatals("type redeclaration for %s", symval->tag);
617 }
618 break;
619
620 case NUMBER:
621 if (prev == IDENTIFIER)
622 {
623 symval->user_token_number = numval;
624 translations = 1;
625 }
626 else
627 fatal("invalid text in association declaration");
628 break;
629
630 case SEMICOLON:
631 return;
632
633 default:
634 fatal("malformatted association declaration");
635 }
636
637 prev = t;
638
639 }
640 }
641
642
643
644 /* copy the union declaration into fattrs (and fdefines),
645 where it is made into the
646 definition of YYSTYPE, the type of elements of the parser value stack. */
647
648 void
649 parse_union_decl()
650 {
651 register int c;
652 register int count;
653 register int in_comment;
654 int cplus_comment;
655
656 if (typed)
657 fatal("multiple %union declarations");
658
659 typed = 1;
660
661 if (!nolinesflag)
662 fprintf(fattrs, "\n#line %d \"%s\"\n", lineno, infile);
663 else
664 fprintf(fattrs, "\n");
665
666 fprintf(fattrs, "typedef union");
667 if (fdefines)
668 fprintf(fdefines, "typedef union");
669
670 count = 0;
671 in_comment = 0;
672
673 c = getc(finput);
674
675 while (c != EOF)
676 {
677 putc(c, fattrs);
678 if (fdefines)
679 putc(c, fdefines);
680
681 switch (c)
682 {
683 case '\n':
684 lineno++;
685 break;
686
687 case '/':
688 c = getc(finput);
689 if (c != '*' && c != '/')
690 ungetc(c, finput);
691 else
692 {
693 putc(c, fattrs);
694 if (fdefines)
695 putc(c, fdefines);
696 cplus_comment = (c == '/');
697 in_comment = 1;
698 c = getc(finput);
699 while (in_comment)
700 {
701 putc(c, fattrs);
702 if (fdefines)
703 putc(c, fdefines);
704
705 if (c == '\n')
706 {
707 lineno++;
708 if (cplus_comment)
709 {
710 in_comment = 0;
711 break;
712 }
713 }
714 if (c == EOF)
715 fatal("unterminated comment");
716
717 if (!cplus_comment && c == '*')
718 {
719 c = getc(finput);
720 if (c == '/')
721 {
722 putc('/', fattrs);
723 if (fdefines)
724 putc('/', fdefines);
725 in_comment = 0;
726 }
727 }
728 else
729 c = getc(finput);
730 }
731 }
732 break;
733
734
735 case '{':
736 count++;
737 break;
738
739 case '}':
740 if (count == 0)
741 fatal ("unmatched close-brace (`}')");
742 count--;
743 if (count == 0)
744 {
745 fprintf(fattrs, " YYSTYPE;\n");
746 if (fdefines)
747 fprintf(fdefines, " YYSTYPE;\n");
748 /* JF don't choke on trailing semi */
749 c=skip_white_space();
750 if(c!=';') ungetc(c,finput);
751 return;
752 }
753 }
754
755 c = getc(finput);
756 }
757 }
758
759 /* parse the declaration %expect N which says to expect N
760 shift-reduce conflicts. */
761
762 void
763 parse_expect_decl()
764 {
765 register int c;
766 register int count;
767 char buffer[20];
768
769 c = getc(finput);
770 while (c == ' ' || c == '\t')
771 c = getc(finput);
772
773 count = 0;
774 while (c >= '0' && c <= '9')
775 {
776 if (count < 20)
777 buffer[count++] = c;
778 c = getc(finput);
779 }
780 buffer[count] = 0;
781
782 ungetc (c, finput);
783
784 expected_conflicts = atoi (buffer);
785 }
786
787 /* that's all of parsing the declaration section */
788 \f
789 /* Get the data type (alternative in the union) of the value for symbol n in rule rule. */
790
791 char *
792 get_type_name(n, rule)
793 int n;
794 symbol_list *rule;
795 {
796 static char *msg = "invalid $ value";
797
798 register int i;
799 register symbol_list *rp;
800
801 if (n < 0)
802 fatal(msg);
803
804 rp = rule;
805 i = 0;
806
807 while (i < n)
808 {
809 rp = rp->next;
810 if (rp == NULL || rp->sym == NULL)
811 fatal(msg);
812 i++;
813 }
814
815 return (rp->sym->type_name);
816 }
817
818
819
820 /* after %guard is seen in the input file,
821 copy the actual guard into the guards file.
822 If the guard is followed by an action, copy that into the actions file.
823 stack_offset is the number of values in the current rule so far,
824 which says where to find $0 with respect to the top of the stack,
825 for the simple parser in which the stack is not popped until after the guard is run. */
826
827 void
828 copy_guard(rule, stack_offset)
829 symbol_list *rule;
830 int stack_offset;
831 {
832 register int c;
833 register int n;
834 register int count;
835 register int match;
836 register int ended;
837 register char *type_name;
838 int brace_flag = 0;
839 int cplus_comment;
840
841 /* offset is always 0 if parser has already popped the stack pointer */
842 if (semantic_parser) stack_offset = 0;
843
844 fprintf(fguard, "\ncase %d:\n", nrules);
845 if (!nolinesflag)
846 fprintf(fguard, "#line %d \"%s\"\n", lineno, infile);
847 putc('{', fguard);
848
849 count = 0;
850 c = getc(finput);
851
852 while (brace_flag ? (count > 0) : (c != ';'))
853 {
854 switch (c)
855 {
856 case '\n':
857 putc(c, fguard);
858 lineno++;
859 break;
860
861 case '{':
862 putc(c, fguard);
863 brace_flag = 1;
864 count++;
865 break;
866
867 case '}':
868 putc(c, fguard);
869 if (count > 0)
870 count--;
871 else
872 fatal("unmatched right brace ('}')");
873 break;
874
875 case '\'':
876 case '"':
877 match = c;
878 putc(c, fguard);
879 c = getc(finput);
880
881 while (c != match)
882 {
883 if (c == EOF || c == '\n')
884 fatal("unterminated string");
885
886 putc(c, fguard);
887
888 if (c == '\\')
889 {
890 c = getc(finput);
891 if (c == EOF)
892 fatal("unterminated string");
893 putc(c, fguard);
894 if (c == '\n')
895 lineno++;
896 }
897
898 c = getc(finput);
899 }
900
901 putc(c, fguard);
902 break;
903
904 case '/':
905 putc(c, fguard);
906 c = getc(finput);
907 if (c != '*' && c != '/')
908 continue;
909
910 cplus_comment = (c == '/');
911 putc(c, fguard);
912 c = getc(finput);
913
914 ended = 0;
915 while (!ended)
916 {
917 if (!cplus_comment && c == '*')
918 {
919 while (c == '*')
920 {
921 putc(c, fguard);
922 c = getc(finput);
923 }
924
925 if (c == '/')
926 {
927 putc(c, fguard);
928 ended = 1;
929 }
930 }
931 else if (c == '\n')
932 {
933 lineno++;
934 putc(c, fguard);
935 if (cplus_comment)
936 ended = 1;
937 else
938 c = getc(finput);
939 }
940 else if (c == EOF)
941 fatal("unterminated comment");
942 else
943 {
944 putc(c, fguard);
945 c = getc(finput);
946 }
947 }
948
949 break;
950
951 case '$':
952 c = getc(finput);
953 type_name = NULL;
954
955 if (c == '<')
956 {
957 register char *cp = token_buffer;
958
959 while ((c = getc(finput)) != '>' && c > 0)
960 *cp++ = c;
961 *cp = 0;
962 type_name = token_buffer;
963
964 c = getc(finput);
965 }
966
967 if (c == '$')
968 {
969 fprintf(fguard, "yyval");
970 if (!type_name) type_name = rule->sym->type_name;
971 if (type_name)
972 fprintf(fguard, ".%s", type_name);
973 if(!type_name && typed) /* JF */
974 fprintf(stderr,"%s:%d: warning: $$ of '%s' has no declared type.\n",infile,lineno,rule->sym->tag);
975 }
976
977 else if (isdigit(c) || c == '-')
978 {
979 ungetc (c, finput);
980 n = read_signed_integer(finput);
981 c = getc(finput);
982
983 if (!type_name && n > 0)
984 type_name = get_type_name(n, rule);
985
986 fprintf(fguard, "yyvsp[%d]", n - stack_offset);
987 if (type_name)
988 fprintf(fguard, ".%s", type_name);
989 if(!type_name && typed) /* JF */
990 fprintf(stderr,"%s:%d: warning: $%d of '%s' has no declared type.\n",infile,lineno,n,rule->sym->tag);
991 continue;
992 }
993 else
994 fatals("$%c is invalid",c); /* JF changed style */
995
996 break;
997
998 case '@':
999 c = getc(finput);
1000 if (isdigit(c) || c == '-')
1001 {
1002 ungetc (c, finput);
1003 n = read_signed_integer(finput);
1004 c = getc(finput);
1005 }
1006 else
1007 fatals("@%c is invalid",c); /* JF changed style */
1008
1009 fprintf(fguard, "yylsp[%d]", n - stack_offset);
1010 yylsp_needed = 1;
1011
1012 continue;
1013
1014 case EOF:
1015 fatal("unterminated %guard clause");
1016
1017 default:
1018 putc(c, fguard);
1019 }
1020
1021 if (c != '}' || count != 0)
1022 c = getc(finput);
1023 }
1024
1025 c = skip_white_space();
1026
1027 fprintf(fguard, ";\n break;}");
1028 if (c == '{')
1029 copy_action(rule, stack_offset);
1030 else if (c == '=')
1031 {
1032 c = getc(finput);
1033 if (c == '{')
1034 copy_action(rule, stack_offset);
1035 }
1036 else
1037 ungetc(c, finput);
1038 }
1039
1040
1041
1042 /* Assuming that a { has just been seen, copy everything up to the matching }
1043 into the actions file.
1044 stack_offset is the number of values in the current rule so far,
1045 which says where to find $0 with respect to the top of the stack. */
1046
1047 void
1048 copy_action(rule, stack_offset)
1049 symbol_list *rule;
1050 int stack_offset;
1051 {
1052 register int c;
1053 register int n;
1054 register int count;
1055 register int match;
1056 register int ended;
1057 register char *type_name;
1058 int cplus_comment;
1059
1060 /* offset is always 0 if parser has already popped the stack pointer */
1061 if (semantic_parser) stack_offset = 0;
1062
1063 fprintf(faction, "\ncase %d:\n", nrules);
1064 if (!nolinesflag)
1065 fprintf(faction, "#line %d \"%s\"\n", lineno, infile);
1066 putc('{', faction);
1067
1068 count = 1;
1069 c = getc(finput);
1070
1071 while (count > 0)
1072 {
1073 while (c != '}')
1074 {
1075 switch (c)
1076 {
1077 case '\n':
1078 putc(c, faction);
1079 lineno++;
1080 break;
1081
1082 case '{':
1083 putc(c, faction);
1084 count++;
1085 break;
1086
1087 case '\'':
1088 case '"':
1089 match = c;
1090 putc(c, faction);
1091 c = getc(finput);
1092
1093 while (c != match)
1094 {
1095 if (c == EOF || c == '\n')
1096 fatal("unterminated string");
1097
1098 putc(c, faction);
1099
1100 if (c == '\\')
1101 {
1102 c = getc(finput);
1103 if (c == EOF)
1104 fatal("unterminated string");
1105 putc(c, faction);
1106 if (c == '\n')
1107 lineno++;
1108 }
1109
1110 c = getc(finput);
1111 }
1112
1113 putc(c, faction);
1114 break;
1115
1116 case '/':
1117 putc(c, faction);
1118 c = getc(finput);
1119 if (c != '*' && c != '/')
1120 continue;
1121
1122 cplus_comment = (c == '/');
1123 putc(c, faction);
1124 c = getc(finput);
1125
1126 ended = 0;
1127 while (!ended)
1128 {
1129 if (!cplus_comment && c == '*')
1130 {
1131 while (c == '*')
1132 {
1133 putc(c, faction);
1134 c = getc(finput);
1135 }
1136
1137 if (c == '/')
1138 {
1139 putc(c, faction);
1140 ended = 1;
1141 }
1142 }
1143 else if (c == '\n')
1144 {
1145 lineno++;
1146 putc(c, faction);
1147 if (cplus_comment)
1148 ended = 1;
1149 else
1150 c = getc(finput);
1151 }
1152 else if (c == EOF)
1153 fatal("unterminated comment");
1154 else
1155 {
1156 putc(c, faction);
1157 c = getc(finput);
1158 }
1159 }
1160
1161 break;
1162
1163 case '$':
1164 c = getc(finput);
1165 type_name = NULL;
1166
1167 if (c == '<')
1168 {
1169 register char *cp = token_buffer;
1170
1171 while ((c = getc(finput)) != '>' && c > 0)
1172 *cp++ = c;
1173 *cp = 0;
1174 type_name = token_buffer;
1175 value_components_used = 1;
1176
1177 c = getc(finput);
1178 }
1179 if (c == '$')
1180 {
1181 fprintf(faction, "yyval");
1182 if (!type_name) type_name = get_type_name(0, rule);
1183 if (type_name)
1184 fprintf(faction, ".%s", type_name);
1185 if(!type_name && typed) /* JF */
1186 fprintf(stderr,"%s:%d: warning: $$ of '%s' has no declared type.\n",infile,lineno,rule->sym->tag);
1187 }
1188 else if (isdigit(c) || c == '-')
1189 {
1190 ungetc (c, finput);
1191 n = read_signed_integer(finput);
1192 c = getc(finput);
1193
1194 if (!type_name && n > 0)
1195 type_name = get_type_name(n, rule);
1196
1197 fprintf(faction, "yyvsp[%d]", n - stack_offset);
1198 if (type_name)
1199 fprintf(faction, ".%s", type_name);
1200 if(!type_name && typed) /* JF */
1201 fprintf(stderr,"%s:%d: warning: $%d of '%s' has no declared type.\n",infile,lineno,n,rule->sym->tag);
1202 continue;
1203 }
1204 else
1205 fatals("$%c is invalid",c); /* JF changed format */
1206
1207 break;
1208
1209 case '@':
1210 c = getc(finput);
1211 if (isdigit(c) || c == '-')
1212 {
1213 ungetc (c, finput);
1214 n = read_signed_integer(finput);
1215 c = getc(finput);
1216 }
1217 else
1218 fatal("invalid @-construct");
1219
1220 fprintf(faction, "yylsp[%d]", n - stack_offset);
1221 yylsp_needed = 1;
1222
1223 continue;
1224
1225 case EOF:
1226 fatal("unmatched '{'");
1227
1228 default:
1229 putc(c, faction);
1230 }
1231
1232 c = getc(finput);
1233 }
1234
1235 /* above loop exits when c is '}' */
1236
1237 if (--count)
1238 {
1239 putc(c, faction);
1240 c = getc(finput);
1241 }
1242 }
1243
1244 fprintf(faction, ";\n break;}");
1245 }
1246
1247
1248
1249 /* generate a dummy symbol, a nonterminal,
1250 whose name cannot conflict with the user's names. */
1251
1252 bucket *
1253 gensym()
1254 {
1255 register bucket *sym;
1256
1257 sprintf (token_buffer, "@%d", ++gensym_count);
1258 sym = getsym(token_buffer);
1259 sym->class = SNTERM;
1260 sym->value = nvars++;
1261 return (sym);
1262 }
1263
1264 /* Parse the input grammar into a one symbol_list structure.
1265 Each rule is represented by a sequence of symbols: the left hand side
1266 followed by the contents of the right hand side, followed by a null pointer
1267 instead of a symbol to terminate the rule.
1268 The next symbol is the lhs of the following rule.
1269
1270 All guards and actions are copied out to the appropriate files,
1271 labelled by the rule number they apply to. */
1272
1273 void
1274 readgram()
1275 {
1276 register int t;
1277 register bucket *lhs;
1278 register symbol_list *p;
1279 register symbol_list *p1;
1280 register bucket *bp;
1281
1282 symbol_list *crule; /* points to first symbol_list of current rule. */
1283 /* its symbol is the lhs of the rule. */
1284 symbol_list *crule1; /* points to the symbol_list preceding crule. */
1285
1286 p1 = NULL;
1287
1288 t = lex();
1289
1290 while (t != TWO_PERCENTS && t != ENDFILE)
1291 {
1292 if (t == IDENTIFIER || t == BAR)
1293 {
1294 register int actionflag = 0;
1295 int rulelength = 0; /* number of symbols in rhs of this rule so far */
1296 int xactions = 0; /* JF for error checking */
1297 bucket *first_rhs = 0;
1298
1299 if (t == IDENTIFIER)
1300 {
1301 lhs = symval;
1302
1303 t = lex();
1304 if (t != COLON)
1305 fatal("ill-formed rule");
1306 }
1307
1308 if (nrules == 0)
1309 {
1310 if (t == BAR)
1311 fatal("grammar starts with vertical bar");
1312
1313 if (!start_flag)
1314 startval = lhs;
1315 }
1316
1317 /* start a new rule and record its lhs. */
1318
1319 nrules++;
1320 nitems++;
1321
1322 record_rule_line ();
1323
1324 p = NEW(symbol_list);
1325 p->sym = lhs;
1326
1327 crule1 = p1;
1328 if (p1)
1329 p1->next = p;
1330 else
1331 grammar = p;
1332
1333 p1 = p;
1334 crule = p;
1335
1336 /* mark the rule's lhs as a nonterminal if not already so. */
1337
1338 if (lhs->class == SUNKNOWN)
1339 {
1340 lhs->class = SNTERM;
1341 lhs->value = nvars;
1342 nvars++;
1343 }
1344 else if (lhs->class == STOKEN)
1345 fatals("rule given for %s, which is a token", lhs->tag);
1346
1347 /* read the rhs of the rule. */
1348
1349 for (;;)
1350 {
1351 t = lex();
1352
1353 if (! (t == IDENTIFIER || t == LEFT_CURLY)) break;
1354
1355 /* If next token is an identifier, see if a colon follows it.
1356 If one does, exit this rule now. */
1357 if (t == IDENTIFIER)
1358 {
1359 register bucket *ssave;
1360 register int t1;
1361
1362 ssave = symval;
1363 t1 = lex();
1364 unlex(t1);
1365 symval = ssave;
1366 if (t1 == COLON) break;
1367
1368 if(!first_rhs) /* JF */
1369 first_rhs = symval;
1370 /* Not followed by colon =>
1371 process as part of this rule's rhs. */
1372 }
1373
1374 /* If we just passed an action, that action was in the middle
1375 of a rule, so make a dummy rule to reduce it to a
1376 non-terminal. */
1377 if (actionflag)
1378 {
1379 register bucket *sdummy;
1380
1381 /* Since the action was written out with this rule's */
1382 /* number, we must write give the new rule this number */
1383 /* by inserting the new rule before it. */
1384
1385 /* Make a dummy nonterminal, a gensym. */
1386 sdummy = gensym();
1387
1388 /* Make a new rule, whose body is empty,
1389 before the current one, so that the action
1390 just read can belong to it. */
1391 nrules++;
1392 nitems++;
1393 record_rule_line ();
1394 p = NEW(symbol_list);
1395 if (crule1)
1396 crule1->next = p;
1397 else grammar = p;
1398 p->sym = sdummy;
1399 crule1 = NEW(symbol_list);
1400 p->next = crule1;
1401 crule1->next = crule;
1402
1403 /* insert the dummy generated by that rule into this rule. */
1404 nitems++;
1405 p = NEW(symbol_list);
1406 p->sym = sdummy;
1407 p1->next = p;
1408 p1 = p;
1409
1410 actionflag = 0;
1411 }
1412
1413 if (t == IDENTIFIER)
1414 {
1415 nitems++;
1416 p = NEW(symbol_list);
1417 p->sym = symval;
1418 p1->next = p;
1419 p1 = p;
1420 }
1421 else /* handle an action. */
1422 {
1423 copy_action(crule, rulelength);
1424 actionflag = 1;
1425 xactions++; /* JF */
1426 }
1427 rulelength++;
1428 }
1429
1430 /* Put an empty link in the list to mark the end of this rule */
1431 p = NEW(symbol_list);
1432 p1->next = p;
1433 p1 = p;
1434
1435 if (t == PREC)
1436 {
1437 t = lex();
1438 crule->ruleprec = symval;
1439 t = lex();
1440 }
1441 if (t == GUARD)
1442 {
1443 if (! semantic_parser)
1444 fatal("%guard present but %semantic_parser not specified");
1445
1446 copy_guard(crule, rulelength);
1447 t = lex();
1448 }
1449 else if (t == LEFT_CURLY)
1450 {
1451 if (actionflag) fatal("two actions at end of one rule");
1452 copy_action(crule, rulelength);
1453 t = lex();
1454 }
1455 /* If $$ is being set in default way,
1456 warn if any type mismatch. */
1457 else if (!xactions && first_rhs && lhs->type_name != first_rhs->type_name)
1458 {
1459 if (lhs->type_name == 0 || first_rhs->type_name == 0
1460 || strcmp(lhs->type_name,first_rhs->type_name))
1461 fprintf(stderr, "%s:%d: warning: type clash ('%s' '%s') on default action\n",
1462 infile,
1463 lineno,
1464 lhs->type_name ? lhs->type_name : "",
1465 first_rhs->type_name ? first_rhs->type_name : "");
1466 }
1467 /* Warn if there is no default for $$ but we need one. */
1468 else if (!xactions && !first_rhs && lhs->type_name != 0)
1469 fprintf(stderr,
1470 "%s:%d: warning: empty rule for typed nonterminal, and no action\n",
1471 infile,
1472 lineno);
1473 if (t == SEMICOLON)
1474 t = lex();
1475 }
1476 /* these things can appear as alternatives to rules. */
1477 else if (t == TOKEN)
1478 {
1479 parse_token_decl(STOKEN, SNTERM);
1480 t = lex();
1481 }
1482 else if (t == NTERM)
1483 {
1484 parse_token_decl(SNTERM, STOKEN);
1485 t = lex();
1486 }
1487 else if (t == TYPE)
1488 {
1489 t = get_type();
1490 }
1491 else if (t == UNION)
1492 {
1493 parse_union_decl();
1494 t = lex();
1495 }
1496 else if (t == EXPECT)
1497 {
1498 parse_expect_decl();
1499 t = lex();
1500 }
1501 else if (t == START)
1502 {
1503 parse_start_decl();
1504 t = lex();
1505 }
1506 else
1507 fatal("invalid input");
1508 }
1509
1510 if (nsyms > MAXSHORT)
1511 fatals("too many symbols (tokens plus nonterminals); maximum %d",
1512 MAXSHORT);
1513 if (nrules == 0)
1514 fatal("no input grammar");
1515
1516 if (typed == 0 /* JF put out same default YYSTYPE as YACC does */
1517 && !value_components_used)
1518 {
1519 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1520 but it seems better to be consistent.
1521 Most programs should declare their own type anyway. */
1522 fprintf(fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1523 if (fdefines)
1524 fprintf(fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1525 }
1526
1527 /* Report any undefined symbols and consider them nonterminals. */
1528
1529 for (bp = firstsymbol; bp; bp = bp->next)
1530 if (bp->class == SUNKNOWN)
1531 {
1532 fprintf(stderr, "symbol %s used, not defined as token, and no rules for it\n",
1533 bp->tag);
1534 failure = 1;
1535 bp->class = SNTERM;
1536 bp->value = nvars++;
1537 }
1538
1539 ntokens = nsyms - nvars;
1540 }
1541
1542
1543 void
1544 record_rule_line ()
1545 {
1546 /* Record each rule's source line number in rline table. */
1547
1548 if (nrules >= rline_allocated)
1549 {
1550 rline_allocated = nrules * 2;
1551 rline = (short *) xrealloc (rline,
1552 rline_allocated * sizeof (short));
1553 }
1554 rline[nrules] = lineno;
1555 }
1556
1557
1558 /* read in a %type declaration and record its information for get_type_name to access */
1559
1560 int
1561 get_type()
1562 {
1563 register int k;
1564 register int t;
1565 register char *name;
1566
1567 t = lex();
1568
1569 if (t != TYPENAME)
1570 fatal("ill-formed %type declaration");
1571
1572 k = strlen(token_buffer);
1573 name = NEW2(k + 1, char);
1574 strcpy(name, token_buffer);
1575
1576 for (;;)
1577 {
1578 t = lex();
1579
1580 switch (t)
1581 {
1582 case SEMICOLON:
1583 return (lex());
1584
1585 case COMMA:
1586 break;
1587
1588 case IDENTIFIER:
1589 if (symval->type_name == NULL)
1590 symval->type_name = name;
1591 else
1592 fatals("type redeclaration for %s", symval->tag);
1593
1594 break;
1595
1596 default:
1597 return (t);
1598 }
1599 }
1600 }
1601
1602
1603
1604 /* assign symbol numbers, and write definition of token names into fdefines.
1605 Set up vectors tags and sprec of names and precedences of symbols. */
1606
1607 void
1608 packsymbols()
1609 {
1610 register bucket *bp;
1611 register int tokno = 1;
1612 register int i;
1613 register int last_user_token_number;
1614
1615 /* int lossage = 0; JF set but not used */
1616
1617 tags = NEW2(nsyms + 1, char *);
1618 tags[0] = "$";
1619
1620 sprec = NEW2(nsyms, short);
1621 sassoc = NEW2(nsyms, short);
1622
1623 max_user_token_number = 256;
1624 last_user_token_number = 256;
1625
1626 for (bp = firstsymbol; bp; bp = bp->next)
1627 {
1628 if (bp->class == SNTERM)
1629 {
1630 bp->value += ntokens;
1631 }
1632 else
1633 {
1634 if (translations && !(bp->user_token_number))
1635 bp->user_token_number = ++last_user_token_number;
1636 if (bp->user_token_number > max_user_token_number)
1637 max_user_token_number = bp->user_token_number;
1638 bp->value = tokno++;
1639 }
1640
1641 tags[bp->value] = bp->tag;
1642 sprec[bp->value] = bp->prec;
1643 sassoc[bp->value] = bp->assoc;
1644
1645 }
1646
1647 if (translations)
1648 {
1649 register int i;
1650
1651 token_translations = NEW2(max_user_token_number+1, short);
1652
1653 /* initialize all entries for literal tokens to 2,
1654 the internal token number for $illegal., which represents all invalid inputs. */
1655 for (i = 0; i <= max_user_token_number; i++)
1656 token_translations[i] = 2;
1657 }
1658
1659 for (bp = firstsymbol; bp; bp = bp->next)
1660 {
1661 if (bp->value >= ntokens) continue;
1662 if (translations)
1663 {
1664 if (token_translations[bp->user_token_number] != 2)
1665 {
1666 /* JF made this a call to fatals() */
1667 fatals( "tokens %s and %s both assigned number %d",
1668 tags[token_translations[bp->user_token_number]],
1669 bp->tag,
1670 bp->user_token_number);
1671 }
1672 token_translations[bp->user_token_number] = bp->value;
1673 }
1674 }
1675
1676 error_token_number = errtoken->value;
1677
1678 output_token_defines(ftable);
1679
1680 if (startval->class == SUNKNOWN)
1681 fatals("the start symbol %s is undefined", startval->tag);
1682 else if (startval->class == STOKEN)
1683 fatals("the start symbol %s is a token", startval->tag);
1684
1685 start_symbol = startval->value;
1686
1687 if (definesflag)
1688 {
1689 output_token_defines(fdefines);
1690
1691 if (!pure_parser)
1692 {
1693 if (spec_name_prefix)
1694 fprintf(fdefines, "\nextern YYSTYPE %slval;\n", spec_name_prefix);
1695 else
1696 fprintf(fdefines, "\nextern YYSTYPE yylval;\n");
1697 }
1698
1699 if (semantic_parser)
1700 for (i = ntokens; i < nsyms; i++)
1701 {
1702 /* don't make these for dummy nonterminals made by gensym. */
1703 if (*tags[i] != '@')
1704 fprintf(fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1705 }
1706 #if 0
1707 /* `fdefines' is now a temporary file, so we need to copy its
1708 contents in `done', so we can't close it here. */
1709 fclose(fdefines);
1710 fdefines = NULL;
1711 #endif
1712 }
1713 }
1714
1715
1716 void
1717 output_token_defines(file)
1718 FILE *file;
1719 {
1720 bucket *bp;
1721
1722 for (bp = firstsymbol; bp; bp = bp->next)
1723 {
1724 if (bp->value >= ntokens) continue;
1725
1726 /* For named tokens, but not literal ones, define the name. */
1727 /* The value is the user token number. */
1728
1729 if ('\'' != *tags[bp->value] && bp != errtoken)
1730 {
1731 register char *cp = tags[bp->value];
1732 register char c;
1733
1734 /* Don't #define nonliteral tokens whose names contain periods. */
1735
1736 while ((c = *cp++) && c != '.');
1737 if (!c)
1738 {
1739 fprintf(file, "#define\t%s\t%d\n", tags[bp->value],
1740 (translations ? bp->user_token_number : bp->value));
1741 if (semantic_parser)
1742 fprintf(file, "#define\tT%s\t%d\n", tags[bp->value],
1743 bp->value);
1744 }
1745 }
1746 }
1747
1748 putc('\n', file);
1749 }
1750
1751
1752
1753 /* convert the rules into the representation using rrhs, rlhs and ritems. */
1754
1755 void
1756 packgram()
1757 {
1758 register int itemno;
1759 register int ruleno;
1760 register symbol_list *p;
1761 /* register bucket *bp; JF unused */
1762
1763 bucket *ruleprec;
1764
1765 ritem = NEW2(nitems + 1, short);
1766 rlhs = NEW2(nrules, short) - 1;
1767 rrhs = NEW2(nrules, short) - 1;
1768 rprec = NEW2(nrules, short) - 1;
1769 rprecsym = NEW2(nrules, short) - 1;
1770 rassoc = NEW2(nrules, short) - 1;
1771
1772 itemno = 0;
1773 ruleno = 1;
1774
1775 p = grammar;
1776 while (p)
1777 {
1778 rlhs[ruleno] = p->sym->value;
1779 rrhs[ruleno] = itemno;
1780 ruleprec = p->ruleprec;
1781
1782 p = p->next;
1783 while (p && p->sym)
1784 {
1785 ritem[itemno++] = p->sym->value;
1786 /* A rule gets by default the precedence and associativity
1787 of the last token in it. */
1788 if (p->sym->class == STOKEN)
1789 {
1790 rprec[ruleno] = p->sym->prec;
1791 rassoc[ruleno] = p->sym->assoc;
1792 }
1793 if (p) p = p->next;
1794 }
1795
1796 /* If this rule has a %prec,
1797 the specified symbol's precedence replaces the default. */
1798 if (ruleprec)
1799 {
1800 rprec[ruleno] = ruleprec->prec;
1801 rassoc[ruleno] = ruleprec->assoc;
1802 rprecsym[ruleno] = ruleprec->value;
1803 }
1804
1805 ritem[itemno++] = -ruleno;
1806 ruleno++;
1807
1808 if (p) p = p->next;
1809 }
1810
1811 ritem[itemno] = 0;
1812 }
1813 \f
1814 /* Read a signed integer from STREAM and return its value. */
1815
1816 int
1817 read_signed_integer (stream)
1818 FILE *stream;
1819 {
1820 register int c = getc(stream);
1821 register int sign = 1;
1822 register int n;
1823
1824 if (c == '-')
1825 {
1826 c = getc(stream);
1827 sign = -1;
1828 }
1829 n = 0;
1830 while (isdigit(c))
1831 {
1832 n = 10*n + (c - '0');
1833 c = getc(stream);
1834 }
1835
1836 ungetc(c, stream);
1837
1838 return n * sign;
1839 }