]> git.saurik.com Git - bison.git/blame - src/reader.c
Initial revision
[bison.git] / src / reader.c
CommitLineData
1ff442ca
NF
1/* Input parser for bison
2 Copyright (C) 1984, 1986, 1989 Free Software Foundation, Inc.
3
4This file is part of Bison, the GNU Compiler Compiler.
5
6Bison is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2, or (at your option)
9any later version.
10
11Bison is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with Bison; see the file COPYING. If not, write to
18the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
21/* read in the grammar specification and record it in the format described in gram.h.
22 All guards are copied into the fguard file and all actions into faction,
23 in each case forming the body of a C function (yyguard or yyaction)
24 which contains a switch statement to decide which guard or action to execute.
25
26The entry point is reader(). */
27
28#include <stdio.h>
29#include <ctype.h>
30#include "system.h"
31#include "files.h"
32#include "new.h"
33#include "symtab.h"
34#include "lex.h"
35#include "gram.h"
36#include "machine.h"
37
38#define LTYPESTR "\n#ifndef YYLTYPE\ntypedef\n struct yyltype\n\
39 {\n int timestamp;\n int first_line;\n int first_column;\
40\n int last_line;\n int last_column;\n char *text;\n }\n\
41 yyltype;\n\n#define YYLTYPE yyltype\n#endif\n\n"
42
43/* Number of slots allocated (but not necessarily used yet) in `rline' */
44int rline_allocated;
45
46extern char *program_name;
47extern int definesflag;
48extern int nolinesflag;
49extern bucket *symval;
50extern int numval;
51extern int failure;
52extern int expected_conflicts;
53extern char *token_buffer;
54
1ff442ca
NF
55extern void init_lex();
56extern void tabinit();
57extern void output_headers();
58extern void output_trailers();
59extern void free_symtab();
60extern void open_extra_files();
61extern void fatal();
62extern void fatals();
63extern void unlex();
64extern void done();
65
66extern int skip_white_space();
67extern int parse_percent_token();
68extern int lex();
69
70void read_declarations();
71void copy_definition();
72void parse_token_decl();
73void parse_start_decl();
74void parse_type_decl();
75void parse_assoc_decl();
76void parse_union_decl();
77void parse_expect_decl();
78void copy_action();
79void readgram();
80void record_rule_line();
81void packsymbols();
82void output_token_defines();
83void packgram();
84int read_signed_integer();
85int get_type();
86
87typedef
88 struct symbol_list
89 {
90 struct symbol_list *next;
91 bucket *sym;
92 bucket *ruleprec;
93 }
94 symbol_list;
95
96
97
98int lineno;
99symbol_list *grammar;
100int start_flag;
101bucket *startval;
102char **tags;
103
104/* Nonzero if components of semantic values are used, implying
105 they must be unions. */
106static int value_components_used;
107
108static int typed; /* nonzero if %union has been seen. */
109
110static int lastprec; /* incremented for each %left, %right or %nonassoc seen */
111
112static int gensym_count; /* incremented for each generated symbol */
113
114static bucket *errtoken;
115
116/* Nonzero if any action or guard uses the @n construct. */
117static int yylsp_needed;
118
119extern char *version_string;
120
121void
122reader()
123{
124 start_flag = 0;
125 startval = NULL; /* start symbol not specified yet. */
126
127#if 0
128 translations = 0; /* initially assume token number translation not needed. */
129#endif
130 /* Nowadays translations is always set to 1,
131 since we give `error' a user-token-number
132 to satisfy the Posix demand for YYERRCODE==256. */
133 translations = 1;
134
135 nsyms = 1;
136 nvars = 0;
137 nrules = 0;
138 nitems = 0;
139 rline_allocated = 10;
140 rline = NEW2(rline_allocated, short);
141
142 typed = 0;
143 lastprec = 0;
144
145 gensym_count = 0;
146
147 semantic_parser = 0;
148 pure_parser = 0;
149 yylsp_needed = 0;
150
151 grammar = NULL;
152
153 init_lex();
154 lineno = 1;
155
156 /* initialize the symbol table. */
157 tabinit();
158 /* construct the error token */
159 errtoken = getsym("error");
160 errtoken->class = STOKEN;
161 errtoken->user_token_number = 256; /* Value specified by posix. */
162 /* construct a token that represents all undefined literal tokens. */
163 /* it is always token number 2. */
164 getsym("$illegal.")->class = STOKEN;
165 /* Read the declaration section. Copy %{ ... %} groups to ftable and fdefines file.
166 Also notice any %token, %left, etc. found there. */
167 fprintf(ftable, "\n/* A Bison parser, made from %s", infile);
168 fprintf(ftable, " with Bison version %s */\n\n", version_string);
169 fprintf(ftable, "#define YYBISON 1 /* Identify Bison output. */\n\n");
170 read_declarations();
171 /* output the definition of YYLTYPE into the fattrs and fdefines files. */
172 /* fattrs winds up in the .tab.c file, before bison.simple. */
173 fprintf(fattrs, LTYPESTR);
174 /* start writing the guard and action files, if they are needed. */
175 output_headers();
176 /* read in the grammar, build grammar in list form. write out guards and actions. */
177 readgram();
178 /* Now we know whether we need the line-number stack.
179 If we do, write its type into the .tab.h file. */
180 if (yylsp_needed)
181 {
182 if (fdefines)
183 fprintf(fdefines, LTYPESTR);
184 }
185 /* write closing delimiters for actions and guards. */
186 output_trailers();
187 if (yylsp_needed)
188 fprintf(ftable, "#define YYLSP_NEEDED\n\n");
189 /* assign the symbols their symbol numbers.
190 Write #defines for the token symbols into fdefines if requested. */
191 packsymbols();
192 /* convert the grammar into the format described in gram.h. */
193 packgram();
194 /* free the symbol table data structure
195 since symbols are now all referred to by symbol number. */
196 free_symtab();
197}
198
199
200
201/* read from finput until %% is seen. Discard the %%.
202Handle any % declarations,
203and copy the contents of any %{ ... %} groups to fattrs. */
204
205void
206read_declarations ()
207{
208 register int c;
209 register int tok;
210
211 for (;;)
212 {
213 c = skip_white_space();
214
215 if (c == '%')
216 {
217 tok = parse_percent_token();
218
219 switch (tok)
220 {
221 case TWO_PERCENTS:
222 return;
223
224 case PERCENT_LEFT_CURLY:
225 copy_definition();
226 break;
227
228 case TOKEN:
229 parse_token_decl (STOKEN, SNTERM);
230 break;
231
232 case NTERM:
233 parse_token_decl (SNTERM, STOKEN);
234 break;
235
236 case TYPE:
237 parse_type_decl();
238 break;
239
240 case START:
241 parse_start_decl();
242 break;
243
244 case UNION:
245 parse_union_decl();
246 break;
247
248 case EXPECT:
249 parse_expect_decl();
250 break;
251
252 case LEFT:
253 parse_assoc_decl(LEFT_ASSOC);
254 break;
255
256 case RIGHT:
257 parse_assoc_decl(RIGHT_ASSOC);
258 break;
259
260 case NONASSOC:
261 parse_assoc_decl(NON_ASSOC);
262 break;
263
264 case SEMANTIC_PARSER:
265 if (semantic_parser == 0)
266 {
267 semantic_parser = 1;
268 open_extra_files();
269 }
270 break;
271
272 case PURE_PARSER:
273 pure_parser = 1;
274 break;
275
276 default:
277 fatal("junk after `%%' in definition section");
278 }
279 }
280 else if (c == EOF)
281 fatal("no input grammar");
282 else if (c >= 040 && c <= 0177)
283 fatals ("unknown character `%c' in declaration section", c);
284 else
285 fatals ("unknown character with code 0x%x in declaration section", c);
286 }
287}
288
289
290/* copy the contents of a %{ ... %} into the definitions file.
291The %{ has already been read. Return after reading the %}. */
292
293void
294copy_definition ()
295{
296 register int c;
297 register int match;
298 register int ended;
299 register int after_percent; /* -1 while reading a character if prev char was % */
300 int cplus_comment;
301
302 if (!nolinesflag)
303 fprintf(fattrs, "#line %d \"%s\"\n", lineno, infile);
304
305 after_percent = 0;
306
307 c = getc(finput);
308
309 for (;;)
310 {
311 switch (c)
312 {
313 case '\n':
314 putc(c, fattrs);
315 lineno++;
316 break;
317
318 case '%':
319 after_percent = -1;
320 break;
321
322 case '\'':
323 case '"':
324 match = c;
325 putc(c, fattrs);
326 c = getc(finput);
327
328 while (c != match)
329 {
330 if (c == EOF || c == '\n')
331 fatal("unterminated string");
332
333 putc(c, fattrs);
334
335 if (c == '\\')
336 {
337 c = getc(finput);
338 if (c == EOF)
339 fatal("unterminated string");
340 putc(c, fattrs);
341 if (c == '\n')
342 lineno++;
343 }
344
345 c = getc(finput);
346 }
347
348 putc(c, fattrs);
349 break;
350
351 case '/':
352 putc(c, fattrs);
353 c = getc(finput);
354 if (c != '*' && c != '/')
355 continue;
356
357 cplus_comment = (c == '/');
358 putc(c, fattrs);
359 c = getc(finput);
360
361 ended = 0;
362 while (!ended)
363 {
364 if (!cplus_comment && c == '*')
365 {
366 while (c == '*')
367 {
368 putc(c, fattrs);
369 c = getc(finput);
370 }
371
372 if (c == '/')
373 {
374 putc(c, fattrs);
375 ended = 1;
376 }
377 }
378 else if (c == '\n')
379 {
380 lineno++;
381 putc(c, fattrs);
382 if (cplus_comment)
383 ended = 1;
384 else
385 c = getc(finput);
386 }
387 else if (c == EOF)
388 fatal("unterminated comment in `%{' definition");
389 else
390 {
391 putc(c, fattrs);
392 c = getc(finput);
393 }
394 }
395
396 break;
397
398 case EOF:
399 fatal("unterminated `%{' definition");
400
401 default:
402 putc(c, fattrs);
403 }
404
405 c = getc(finput);
406
407 if (after_percent)
408 {
409 if (c == '}')
410 return;
411 putc('%', fattrs);
412 }
413 after_percent = 0;
414
415 }
416
417}
418
419
420
421/* parse what comes after %token or %nterm.
422For %token, what_is is STOKEN and what_is_not is SNTERM.
423For %nterm, the arguments are reversed. */
424
425void
426parse_token_decl (what_is, what_is_not)
427 int what_is, what_is_not;
428{
429/* register int start_lineno; JF */
430 register int token = 0;
431 register int prev;
432 register char *typename = 0;
433 int k;
434
435/* start_lineno = lineno; JF */
436
437 for (;;)
438 {
439 if(ungetc(skip_white_space(), finput) == '%')
440 return;
441
442/* if (lineno != start_lineno)
443 return; JF */
444
445 /* we have not passed a newline, so the token now starting is in this declaration */
446 prev = token;
447
448 token = lex();
449 if (token == COMMA)
450 continue;
451 if (token == TYPENAME)
452 {
453 k = strlen(token_buffer);
454 typename = NEW2(k + 1, char);
455 strcpy(typename, token_buffer);
456 value_components_used = 1;
457 }
458 else if (token == IDENTIFIER)
459 {
460 int oldclass = symval->class;
461
462 if (symval->class == what_is_not)
463 fatals("symbol %s redefined", symval->tag);
464 symval->class = what_is;
465 if (what_is == SNTERM && oldclass != SNTERM)
466 symval->value = nvars++;
467
468 if (typename)
469 {
470 if (symval->type_name == NULL)
471 symval->type_name = typename;
472 else
473 fatals("type redeclaration for %s", symval->tag);
474 }
475 }
476 else if (prev == IDENTIFIER && token == NUMBER)
477 {
478 symval->user_token_number = numval;
479 translations = 1;
480 }
481 else
482 fatal("invalid text in %token or %nterm declaration");
483 }
484
485}
486
487
488
489/* parse what comes after %start */
490
491void
492parse_start_decl ()
493{
494 if (start_flag)
495 fatal("multiple %start declarations");
496 start_flag = 1;
497 if (lex() != IDENTIFIER)
498 fatal("invalid %start declaration");
499 startval = symval;
500}
501
502
503
504/* read in a %type declaration and record its information for get_type_name to access */
505
506void
507parse_type_decl ()
508{
509 register int k;
510 register char *name;
511/* register int start_lineno; JF */
512
513 if (lex() != TYPENAME)
514 fatal("ill-formed %type declaration");
515
516 k = strlen(token_buffer);
517 name = NEW2(k + 1, char);
518 strcpy(name, token_buffer);
519
520/* start_lineno = lineno; */
521
522 for (;;)
523 {
524 register int t;
525
526 if(ungetc(skip_white_space(), finput) == '%')
527 return;
528
529/* if (lineno != start_lineno)
530 return; JF */
531
532 /* we have not passed a newline, so the token now starting is in this declaration */
533
534 t = lex();
535
536 switch (t)
537 {
538
539 case COMMA:
540 case SEMICOLON:
541 break;
542
543 case IDENTIFIER:
544 if (symval->type_name == NULL)
545 symval->type_name = name;
546 else
547 fatals("type redeclaration for %s", symval->tag);
548
549 break;
550
551 default:
552 fatal("invalid %type declaration");
553 }
554 }
555}
556
557
558
559/* read in a %left, %right or %nonassoc declaration and record its information. */
560/* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC. */
561
562void
563parse_assoc_decl (assoc)
564int assoc;
565{
566 register int k;
567 register char *name = NULL;
568/* register int start_lineno; JF */
569 register int prev = 0; /* JF added = 0 to keep lint happy */
570
571 lastprec++; /* Assign a new precedence level, never 0. */
572
573/* start_lineno = lineno; */
574
575 for (;;)
576 {
577 register int t;
578
579 if(ungetc(skip_white_space(), finput) == '%')
580 return;
581
582 /* if (lineno != start_lineno)
583 return; JF */
584
585 /* we have not passed a newline, so the token now starting is in this declaration */
586
587 t = lex();
588
589 switch (t)
590 {
591
592 case TYPENAME:
593 k = strlen(token_buffer);
594 name = NEW2(k + 1, char);
595 strcpy(name, token_buffer);
596 break;
597
598 case COMMA:
599 break;
600
601 case IDENTIFIER:
602 if (symval->prec != 0)
603 fatals("redefining precedence of %s", symval->tag);
604 symval->prec = lastprec;
605 symval->assoc = assoc;
606 if (symval->class == SNTERM)
607 fatals("symbol %s redefined", symval->tag);
608 symval->class = STOKEN;
609 if (name)
610 { /* record the type, if one is specified */
611 if (symval->type_name == NULL)
612 symval->type_name = name;
613 else
614 fatals("type redeclaration for %s", symval->tag);
615 }
616 break;
617
618 case NUMBER:
619 if (prev == IDENTIFIER)
620 {
621 symval->user_token_number = numval;
622 translations = 1;
623 }
624 else
625 fatal("invalid text in association declaration");
626 break;
627
628 case SEMICOLON:
629 return;
630
631 default:
632 fatal("malformatted association declaration");
633 }
634
635 prev = t;
636
637 }
638}
639
640
641
642/* copy the union declaration into fattrs (and fdefines),
643 where it is made into the
644 definition of YYSTYPE, the type of elements of the parser value stack. */
645
646void
647parse_union_decl()
648{
649 register int c;
650 register int count;
651 register int in_comment;
652 int cplus_comment;
653
654 if (typed)
655 fatal("multiple %union declarations");
656
657 typed = 1;
658
659 if (!nolinesflag)
660 fprintf(fattrs, "\n#line %d \"%s\"\n", lineno, infile);
661 else
662 fprintf(fattrs, "\n");
663
664 fprintf(fattrs, "typedef union");
665 if (fdefines)
666 fprintf(fdefines, "typedef union");
667
668 count = 0;
669 in_comment = 0;
670
671 c = getc(finput);
672
673 while (c != EOF)
674 {
675 putc(c, fattrs);
676 if (fdefines)
677 putc(c, fdefines);
678
679 switch (c)
680 {
681 case '\n':
682 lineno++;
683 break;
684
685 case '/':
686 c = getc(finput);
687 if (c != '*' && c != '/')
688 ungetc(c, finput);
689 else
690 {
691 putc(c, fattrs);
692 if (fdefines)
693 putc(c, fdefines);
694 cplus_comment = (c == '/');
695 in_comment = 1;
696 c = getc(finput);
697 while (in_comment)
698 {
699 putc(c, fattrs);
700 if (fdefines)
701 putc(c, fdefines);
702
703 if (c == '\n')
704 {
705 lineno++;
706 if (cplus_comment)
707 {
708 in_comment = 0;
709 break;
710 }
711 }
712 if (c == EOF)
713 fatal("unterminated comment");
714
715 if (!cplus_comment && c == '*')
716 {
717 c = getc(finput);
718 if (c == '/')
719 {
720 putc('/', fattrs);
721 if (fdefines)
722 putc('/', fdefines);
723 in_comment = 0;
724 }
725 }
726 else
727 c = getc(finput);
728 }
729 }
730 break;
731
732
733 case '{':
734 count++;
735 break;
736
737 case '}':
738 if (count == 0)
739 fatal ("unmatched close-brace (`}')");
740 count--;
741 if (count == 0)
742 {
743 fprintf(fattrs, " YYSTYPE;\n");
744 if (fdefines)
745 fprintf(fdefines, " YYSTYPE;\n");
746 /* JF don't choke on trailing semi */
747 c=skip_white_space();
748 if(c!=';') ungetc(c,finput);
749 return;
750 }
751 }
752
753 c = getc(finput);
754 }
755}
756
757/* parse the declaration %expect N which says to expect N
758 shift-reduce conflicts. */
759
760void
761parse_expect_decl()
762{
763 register int c;
764 register int count;
765 char buffer[20];
766
767 c = getc(finput);
768 while (c == ' ' || c == '\t')
769 c = getc(finput);
770
771 count = 0;
772 while (c >= '0' && c <= '9')
773 {
774 if (count < 20)
775 buffer[count++] = c;
776 c = getc(finput);
777 }
778 buffer[count] = 0;
779
780 ungetc (c, finput);
781
782 expected_conflicts = atoi (buffer);
783}
784
785/* that's all of parsing the declaration section */
786\f
787/* Get the data type (alternative in the union) of the value for symbol n in rule rule. */
788
789char *
790get_type_name(n, rule)
791int n;
792symbol_list *rule;
793{
794 static char *msg = "invalid $ value";
795
796 register int i;
797 register symbol_list *rp;
798
799 if (n < 0)
800 fatal(msg);
801
802 rp = rule;
803 i = 0;
804
805 while (i < n)
806 {
807 rp = rp->next;
808 if (rp == NULL || rp->sym == NULL)
809 fatal(msg);
810 i++;
811 }
812
813 return (rp->sym->type_name);
814}
815
816
817
818/* after %guard is seen in the input file,
819copy the actual guard into the guards file.
820If the guard is followed by an action, copy that into the actions file.
821stack_offset is the number of values in the current rule so far,
822which says where to find $0 with respect to the top of the stack,
823for the simple parser in which the stack is not popped until after the guard is run. */
824
825void
826copy_guard(rule, stack_offset)
827symbol_list *rule;
828int stack_offset;
829{
830 register int c;
831 register int n;
832 register int count;
833 register int match;
834 register int ended;
835 register char *type_name;
836 int brace_flag = 0;
837 int cplus_comment;
838
839 /* offset is always 0 if parser has already popped the stack pointer */
840 if (semantic_parser) stack_offset = 0;
841
842 fprintf(fguard, "\ncase %d:\n", nrules);
843 if (!nolinesflag)
844 fprintf(fguard, "#line %d \"%s\"\n", lineno, infile);
845 putc('{', fguard);
846
847 count = 0;
848 c = getc(finput);
849
850 while (brace_flag ? (count > 0) : (c != ';'))
851 {
852 switch (c)
853 {
854 case '\n':
855 putc(c, fguard);
856 lineno++;
857 break;
858
859 case '{':
860 putc(c, fguard);
861 brace_flag = 1;
862 count++;
863 break;
864
865 case '}':
866 putc(c, fguard);
867 if (count > 0)
868 count--;
869 else
870 fatal("unmatched right brace ('}')");
871 break;
872
873 case '\'':
874 case '"':
875 match = c;
876 putc(c, fguard);
877 c = getc(finput);
878
879 while (c != match)
880 {
881 if (c == EOF || c == '\n')
882 fatal("unterminated string");
883
884 putc(c, fguard);
885
886 if (c == '\\')
887 {
888 c = getc(finput);
889 if (c == EOF)
890 fatal("unterminated string");
891 putc(c, fguard);
892 if (c == '\n')
893 lineno++;
894 }
895
896 c = getc(finput);
897 }
898
899 putc(c, fguard);
900 break;
901
902 case '/':
903 putc(c, fguard);
904 c = getc(finput);
905 if (c != '*' && c != '/')
906 continue;
907
908 cplus_comment = (c == '/');
909 putc(c, fguard);
910 c = getc(finput);
911
912 ended = 0;
913 while (!ended)
914 {
915 if (!cplus_comment && c == '*')
916 {
917 while (c == '*')
918 {
919 putc(c, fguard);
920 c = getc(finput);
921 }
922
923 if (c == '/')
924 {
925 putc(c, fguard);
926 ended = 1;
927 }
928 }
929 else if (c == '\n')
930 {
931 lineno++;
932 putc(c, fguard);
933 if (cplus_comment)
934 ended = 1;
935 else
936 c = getc(finput);
937 }
938 else if (c == EOF)
939 fatal("unterminated comment");
940 else
941 {
942 putc(c, fguard);
943 c = getc(finput);
944 }
945 }
946
947 break;
948
949 case '$':
950 c = getc(finput);
951 type_name = NULL;
952
953 if (c == '<')
954 {
955 register char *cp = token_buffer;
956
957 while ((c = getc(finput)) != '>' && c > 0)
958 *cp++ = c;
959 *cp = 0;
960 type_name = token_buffer;
961
962 c = getc(finput);
963 }
964
965 if (c == '$')
966 {
967 fprintf(fguard, "yyval");
968 if (!type_name) type_name = rule->sym->type_name;
969 if (type_name)
970 fprintf(fguard, ".%s", type_name);
971 if(!type_name && typed) /* JF */
972 fprintf(stderr,"%s:%d: warning: $$ of '%s' has no declared type.\n",infile,lineno,rule->sym->tag);
973 }
974
975 else if (isdigit(c) || c == '-')
976 {
977 ungetc (c, finput);
978 n = read_signed_integer(finput);
979 c = getc(finput);
980
981 if (!type_name && n > 0)
982 type_name = get_type_name(n, rule);
983
984 fprintf(fguard, "yyvsp[%d]", n - stack_offset);
985 if (type_name)
986 fprintf(fguard, ".%s", type_name);
987 if(!type_name && typed) /* JF */
988 fprintf(stderr,"%s:%d: warning: $%d of '%s' has no declared type.\n",infile,lineno,n,rule->sym->tag);
989 continue;
990 }
991 else
992 fatals("$%c is invalid",c); /* JF changed style */
993
994 break;
995
996 case '@':
997 c = getc(finput);
998 if (isdigit(c) || c == '-')
999 {
1000 ungetc (c, finput);
1001 n = read_signed_integer(finput);
1002 c = getc(finput);
1003 }
1004 else
1005 fatals("@%c is invalid",c); /* JF changed style */
1006
1007 fprintf(fguard, "yylsp[%d]", n - stack_offset);
1008 yylsp_needed = 1;
1009
1010 continue;
1011
1012 case EOF:
1013 fatal("unterminated %guard clause");
1014
1015 default:
1016 putc(c, fguard);
1017 }
1018
1019 if (c != '}' || count != 0)
1020 c = getc(finput);
1021 }
1022
1023 c = skip_white_space();
1024
1025 fprintf(fguard, ";\n break;}");
1026 if (c == '{')
1027 copy_action(rule, stack_offset);
1028 else if (c == '=')
1029 {
1030 c = getc(finput);
1031 if (c == '{')
1032 copy_action(rule, stack_offset);
1033 }
1034 else
1035 ungetc(c, finput);
1036}
1037
1038
1039
1040/* Assuming that a { has just been seen, copy everything up to the matching }
1041into the actions file.
1042stack_offset is the number of values in the current rule so far,
1043which says where to find $0 with respect to the top of the stack. */
1044
1045void
1046copy_action(rule, stack_offset)
1047symbol_list *rule;
1048int stack_offset;
1049{
1050 register int c;
1051 register int n;
1052 register int count;
1053 register int match;
1054 register int ended;
1055 register char *type_name;
1056 int cplus_comment;
1057
1058 /* offset is always 0 if parser has already popped the stack pointer */
1059 if (semantic_parser) stack_offset = 0;
1060
1061 fprintf(faction, "\ncase %d:\n", nrules);
1062 if (!nolinesflag)
1063 fprintf(faction, "#line %d \"%s\"\n", lineno, infile);
1064 putc('{', faction);
1065
1066 count = 1;
1067 c = getc(finput);
1068
1069 while (count > 0)
1070 {
1071 while (c != '}')
1072 {
1073 switch (c)
1074 {
1075 case '\n':
1076 putc(c, faction);
1077 lineno++;
1078 break;
1079
1080 case '{':
1081 putc(c, faction);
1082 count++;
1083 break;
1084
1085 case '\'':
1086 case '"':
1087 match = c;
1088 putc(c, faction);
1089 c = getc(finput);
1090
1091 while (c != match)
1092 {
1093 if (c == EOF || c == '\n')
1094 fatal("unterminated string");
1095
1096 putc(c, faction);
1097
1098 if (c == '\\')
1099 {
1100 c = getc(finput);
1101 if (c == EOF)
1102 fatal("unterminated string");
1103 putc(c, faction);
1104 if (c == '\n')
1105 lineno++;
1106 }
1107
1108 c = getc(finput);
1109 }
1110
1111 putc(c, faction);
1112 break;
1113
1114 case '/':
1115 putc(c, faction);
1116 c = getc(finput);
1117 if (c != '*' && c != '/')
1118 continue;
1119
1120 cplus_comment = (c == '/');
1121 putc(c, faction);
1122 c = getc(finput);
1123
1124 ended = 0;
1125 while (!ended)
1126 {
1127 if (!cplus_comment && c == '*')
1128 {
1129 while (c == '*')
1130 {
1131 putc(c, faction);
1132 c = getc(finput);
1133 }
1134
1135 if (c == '/')
1136 {
1137 putc(c, faction);
1138 ended = 1;
1139 }
1140 }
1141 else if (c == '\n')
1142 {
1143 lineno++;
1144 putc(c, faction);
1145 if (cplus_comment)
1146 ended = 1;
1147 else
1148 c = getc(finput);
1149 }
1150 else if (c == EOF)
1151 fatal("unterminated comment");
1152 else
1153 {
1154 putc(c, faction);
1155 c = getc(finput);
1156 }
1157 }
1158
1159 break;
1160
1161 case '$':
1162 c = getc(finput);
1163 type_name = NULL;
1164
1165 if (c == '<')
1166 {
1167 register char *cp = token_buffer;
1168
1169 while ((c = getc(finput)) != '>' && c > 0)
1170 *cp++ = c;
1171 *cp = 0;
1172 type_name = token_buffer;
1173 value_components_used = 1;
1174
1175 c = getc(finput);
1176 }
1177 if (c == '$')
1178 {
1179 fprintf(faction, "yyval");
1180 if (!type_name) type_name = get_type_name(0, rule);
1181 if (type_name)
1182 fprintf(faction, ".%s", type_name);
1183 if(!type_name && typed) /* JF */
1184 fprintf(stderr,"%s:%d: warning: $$ of '%s' has no declared type.\n",infile,lineno,rule->sym->tag);
1185 }
1186 else if (isdigit(c) || c == '-')
1187 {
1188 ungetc (c, finput);
1189 n = read_signed_integer(finput);
1190 c = getc(finput);
1191
1192 if (!type_name && n > 0)
1193 type_name = get_type_name(n, rule);
1194
1195 fprintf(faction, "yyvsp[%d]", n - stack_offset);
1196 if (type_name)
1197 fprintf(faction, ".%s", type_name);
1198 if(!type_name && typed) /* JF */
1199 fprintf(stderr,"%s:%d: warning: $%d of '%s' has no declared type.\n",infile,lineno,n,rule->sym->tag);
1200 continue;
1201 }
1202 else
1203 fatals("$%c is invalid",c); /* JF changed format */
1204
1205 break;
1206
1207 case '@':
1208 c = getc(finput);
1209 if (isdigit(c) || c == '-')
1210 {
1211 ungetc (c, finput);
1212 n = read_signed_integer(finput);
1213 c = getc(finput);
1214 }
1215 else
1216 fatal("invalid @-construct");
1217
1218 fprintf(faction, "yylsp[%d]", n - stack_offset);
1219 yylsp_needed = 1;
1220
1221 continue;
1222
1223 case EOF:
1224 fatal("unmatched '{'");
1225
1226 default:
1227 putc(c, faction);
1228 }
1229
1230 c = getc(finput);
1231 }
1232
1233 /* above loop exits when c is '}' */
1234
1235 if (--count)
1236 {
1237 putc(c, faction);
1238 c = getc(finput);
1239 }
1240 }
1241
1242 fprintf(faction, ";\n break;}");
1243}
1244
1245
1246
1247/* generate a dummy symbol, a nonterminal,
1248whose name cannot conflict with the user's names. */
1249
1250bucket *
1251gensym()
1252{
1253 register bucket *sym;
1254
1255 sprintf (token_buffer, "@%d", ++gensym_count);
1256 sym = getsym(token_buffer);
1257 sym->class = SNTERM;
1258 sym->value = nvars++;
1259 return (sym);
1260}
1261
1262/* Parse the input grammar into a one symbol_list structure.
1263Each rule is represented by a sequence of symbols: the left hand side
1264followed by the contents of the right hand side, followed by a null pointer
1265instead of a symbol to terminate the rule.
1266The next symbol is the lhs of the following rule.
1267
1268All guards and actions are copied out to the appropriate files,
1269labelled by the rule number they apply to. */
1270
1271void
1272readgram()
1273{
1274 register int t;
1275 register bucket *lhs;
1276 register symbol_list *p;
1277 register symbol_list *p1;
1278 register bucket *bp;
1279
1280 symbol_list *crule; /* points to first symbol_list of current rule. */
1281 /* its symbol is the lhs of the rule. */
1282 symbol_list *crule1; /* points to the symbol_list preceding crule. */
1283
1284 p1 = NULL;
1285
1286 t = lex();
1287
1288 while (t != TWO_PERCENTS && t != ENDFILE)
1289 {
1290 if (t == IDENTIFIER || t == BAR)
1291 {
1292 register int actionflag = 0;
1293 int rulelength = 0; /* number of symbols in rhs of this rule so far */
1294 int xactions = 0; /* JF for error checking */
1295 bucket *first_rhs = 0;
1296
1297 if (t == IDENTIFIER)
1298 {
1299 lhs = symval;
1300
1301 t = lex();
1302 if (t != COLON)
1303 fatal("ill-formed rule");
1304 }
1305
1306 if (nrules == 0)
1307 {
1308 if (t == BAR)
1309 fatal("grammar starts with vertical bar");
1310
1311 if (!start_flag)
1312 startval = lhs;
1313 }
1314
1315 /* start a new rule and record its lhs. */
1316
1317 nrules++;
1318 nitems++;
1319
1320 record_rule_line ();
1321
1322 p = NEW(symbol_list);
1323 p->sym = lhs;
1324
1325 crule1 = p1;
1326 if (p1)
1327 p1->next = p;
1328 else
1329 grammar = p;
1330
1331 p1 = p;
1332 crule = p;
1333
1334 /* mark the rule's lhs as a nonterminal if not already so. */
1335
1336 if (lhs->class == SUNKNOWN)
1337 {
1338 lhs->class = SNTERM;
1339 lhs->value = nvars;
1340 nvars++;
1341 }
1342 else if (lhs->class == STOKEN)
1343 fatals("rule given for %s, which is a token", lhs->tag);
1344
1345 /* read the rhs of the rule. */
1346
1347 for (;;)
1348 {
1349 t = lex();
1350
1351 if (! (t == IDENTIFIER || t == LEFT_CURLY)) break;
1352
1353 /* If next token is an identifier, see if a colon follows it.
1354 If one does, exit this rule now. */
1355 if (t == IDENTIFIER)
1356 {
1357 register bucket *ssave;
1358 register int t1;
1359
1360 ssave = symval;
1361 t1 = lex();
1362 unlex(t1);
1363 symval = ssave;
1364 if (t1 == COLON) break;
1365
1366 if(!first_rhs) /* JF */
1367 first_rhs = symval;
1368 /* Not followed by colon =>
1369 process as part of this rule's rhs. */
1370 }
1371
1372 /* If we just passed an action, that action was in the middle
1373 of a rule, so make a dummy rule to reduce it to a
1374 non-terminal. */
1375 if (actionflag)
1376 {
1377 register bucket *sdummy;
1378
1379 /* Since the action was written out with this rule's */
1380 /* number, we must write give the new rule this number */
1381 /* by inserting the new rule before it. */
1382
1383 /* Make a dummy nonterminal, a gensym. */
1384 sdummy = gensym();
1385
1386 /* Make a new rule, whose body is empty,
1387 before the current one, so that the action
1388 just read can belong to it. */
1389 nrules++;
1390 nitems++;
1391 record_rule_line ();
1392 p = NEW(symbol_list);
1393 if (crule1)
1394 crule1->next = p;
1395 else grammar = p;
1396 p->sym = sdummy;
1397 crule1 = NEW(symbol_list);
1398 p->next = crule1;
1399 crule1->next = crule;
1400
1401 /* insert the dummy generated by that rule into this rule. */
1402 nitems++;
1403 p = NEW(symbol_list);
1404 p->sym = sdummy;
1405 p1->next = p;
1406 p1 = p;
1407
1408 actionflag = 0;
1409 }
1410
1411 if (t == IDENTIFIER)
1412 {
1413 nitems++;
1414 p = NEW(symbol_list);
1415 p->sym = symval;
1416 p1->next = p;
1417 p1 = p;
1418 }
1419 else /* handle an action. */
1420 {
1421 copy_action(crule, rulelength);
1422 actionflag = 1;
1423 xactions++; /* JF */
1424 }
1425 rulelength++;
1426 }
1427
1428 /* Put an empty link in the list to mark the end of this rule */
1429 p = NEW(symbol_list);
1430 p1->next = p;
1431 p1 = p;
1432
1433 if (t == PREC)
1434 {
1435 t = lex();
1436 crule->ruleprec = symval;
1437 t = lex();
1438 }
1439 if (t == GUARD)
1440 {
1441 if (! semantic_parser)
1442 fatal("%guard present but %semantic_parser not specified");
1443
1444 copy_guard(crule, rulelength);
1445 t = lex();
1446 }
1447 else if (t == LEFT_CURLY)
1448 {
1449 if (actionflag) fatal("two actions at end of one rule");
1450 copy_action(crule, rulelength);
1451 t = lex();
1452 }
1453 /* If $$ is being set in default way,
1454 warn if any type mismatch. */
1455 else if (!xactions && first_rhs && lhs->type_name != first_rhs->type_name)
1456 {
1457 if (lhs->type_name == 0 || first_rhs->type_name == 0
1458 || strcmp(lhs->type_name,first_rhs->type_name))
1459 fprintf(stderr, "%s:%d: warning: type clash ('%s' '%s') on default action\n",
1460 infile,
1461 lineno,
1462 lhs->type_name ? lhs->type_name : "",
1463 first_rhs->type_name ? first_rhs->type_name : "");
1464 }
1465 /* Warn if there is no default for $$ but we need one. */
1466 else if (!xactions && !first_rhs && lhs->type_name != 0)
1467 fprintf(stderr,
1468 "%s:%d: warning: empty rule for typed nonterminal, and no action\n",
1469 infile,
1470 lineno);
1471 if (t == SEMICOLON)
1472 t = lex();
1473 }
1474 /* these things can appear as alternatives to rules. */
1475 else if (t == TOKEN)
1476 {
1477 parse_token_decl(STOKEN, SNTERM);
1478 t = lex();
1479 }
1480 else if (t == NTERM)
1481 {
1482 parse_token_decl(SNTERM, STOKEN);
1483 t = lex();
1484 }
1485 else if (t == TYPE)
1486 {
1487 t = get_type();
1488 }
1489 else if (t == UNION)
1490 {
1491 parse_union_decl();
1492 t = lex();
1493 }
1494 else if (t == EXPECT)
1495 {
1496 parse_expect_decl();
1497 t = lex();
1498 }
1499 else if (t == START)
1500 {
1501 parse_start_decl();
1502 t = lex();
1503 }
1504 else
1505 fatal("invalid input");
1506 }
1507
1508 if (nsyms > MAXSHORT)
1509 fatals("too many symbols (tokens plus nonterminals); maximum %d",
1510 MAXSHORT);
1511 if (nrules == 0)
1512 fatal("no input grammar");
1513
1514 if (typed == 0 /* JF put out same default YYSTYPE as YACC does */
1515 && !value_components_used)
1516 {
1517 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1518 but it seems better to be consistent.
1519 Most programs should declare their own type anyway. */
1520 fprintf(fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1521 if (fdefines)
1522 fprintf(fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1523 }
1524
1525 /* Report any undefined symbols and consider them nonterminals. */
1526
1527 for (bp = firstsymbol; bp; bp = bp->next)
1528 if (bp->class == SUNKNOWN)
1529 {
1530 fprintf(stderr, "symbol %s used, not defined as token, and no rules for it\n",
1531 bp->tag);
1532 failure = 1;
1533 bp->class = SNTERM;
1534 bp->value = nvars++;
1535 }
1536
1537 ntokens = nsyms - nvars;
1538}
1539
1540
1541void
1542record_rule_line ()
1543{
1544 /* Record each rule's source line number in rline table. */
1545
1546 if (nrules >= rline_allocated)
1547 {
1548 rline_allocated = nrules * 2;
9a1af7e6
DM
1549 rline = (short *) xrealloc (rline,
1550 rline_allocated * sizeof (short));
1ff442ca
NF
1551 }
1552 rline[nrules] = lineno;
1553}
1554
1555
1556/* read in a %type declaration and record its information for get_type_name to access */
1557
1558int
1559get_type()
1560{
1561 register int k;
1562 register int t;
1563 register char *name;
1564
1565 t = lex();
1566
1567 if (t != TYPENAME)
1568 fatal("ill-formed %type declaration");
1569
1570 k = strlen(token_buffer);
1571 name = NEW2(k + 1, char);
1572 strcpy(name, token_buffer);
1573
1574 for (;;)
1575 {
1576 t = lex();
1577
1578 switch (t)
1579 {
1580 case SEMICOLON:
1581 return (lex());
1582
1583 case COMMA:
1584 break;
1585
1586 case IDENTIFIER:
1587 if (symval->type_name == NULL)
1588 symval->type_name = name;
1589 else
1590 fatals("type redeclaration for %s", symval->tag);
1591
1592 break;
1593
1594 default:
1595 return (t);
1596 }
1597 }
1598}
1599
1600
1601
1602/* assign symbol numbers, and write definition of token names into fdefines.
1603Set up vectors tags and sprec of names and precedences of symbols. */
1604
1605void
1606packsymbols()
1607{
1608 register bucket *bp;
1609 register int tokno = 1;
1610 register int i;
1611 register int last_user_token_number;
1612
1613 /* int lossage = 0; JF set but not used */
1614
1615 tags = NEW2(nsyms + 1, char *);
1616 tags[0] = "$";
1617
1618 sprec = NEW2(nsyms, short);
1619 sassoc = NEW2(nsyms, short);
1620
1621 max_user_token_number = 256;
1622 last_user_token_number = 256;
1623
1624 for (bp = firstsymbol; bp; bp = bp->next)
1625 {
1626 if (bp->class == SNTERM)
1627 {
1628 bp->value += ntokens;
1629 }
1630 else
1631 {
1632 if (translations && !(bp->user_token_number))
1633 bp->user_token_number = ++last_user_token_number;
1634 if (bp->user_token_number > max_user_token_number)
1635 max_user_token_number = bp->user_token_number;
1636 bp->value = tokno++;
1637 }
1638
1639 tags[bp->value] = bp->tag;
1640 sprec[bp->value] = bp->prec;
1641 sassoc[bp->value] = bp->assoc;
1642
1643 }
1644
1645 if (translations)
1646 {
1647 register int i;
1648
1649 token_translations = NEW2(max_user_token_number+1, short);
1650
1651 /* initialize all entries for literal tokens to 2,
1652 the internal token number for $illegal., which represents all invalid inputs. */
1653 for (i = 0; i <= max_user_token_number; i++)
1654 token_translations[i] = 2;
1655 }
1656
1657 for (bp = firstsymbol; bp; bp = bp->next)
1658 {
1659 if (bp->value >= ntokens) continue;
1660 if (translations)
1661 {
1662 if (token_translations[bp->user_token_number] != 2)
1663 {
1664 /* JF made this a call to fatals() */
1665 fatals( "tokens %s and %s both assigned number %d",
1666 tags[token_translations[bp->user_token_number]],
1667 bp->tag,
1668 bp->user_token_number);
1669 }
1670 token_translations[bp->user_token_number] = bp->value;
1671 }
1672 }
1673
1674 error_token_number = errtoken->value;
1675
1676 output_token_defines(ftable);
1677
1678 if (startval->class == SUNKNOWN)
1679 fatals("the start symbol %s is undefined", startval->tag);
1680 else if (startval->class == STOKEN)
1681 fatals("the start symbol %s is a token", startval->tag);
1682
1683 start_symbol = startval->value;
1684
1685 if (definesflag)
1686 {
1687 output_token_defines(fdefines);
1688
1689 if (!pure_parser)
1690 {
1691 if (spec_name_prefix)
1692 fprintf(fdefines, "\nextern YYSTYPE %slval;\n", spec_name_prefix);
1693 else
1694 fprintf(fdefines, "\nextern YYSTYPE yylval;\n");
1695 }
1696
1697 if (semantic_parser)
1698 for (i = ntokens; i < nsyms; i++)
1699 {
1700 /* don't make these for dummy nonterminals made by gensym. */
1701 if (*tags[i] != '@')
1702 fprintf(fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1703 }
1704#if 0
1705 /* `fdefines' is now a temporary file, so we need to copy its
1706 contents in `done', so we can't close it here. */
1707 fclose(fdefines);
1708 fdefines = NULL;
1709#endif
1710 }
1711}
1712
1713
1714void
1715output_token_defines(file)
1716FILE *file;
1717{
1718 bucket *bp;
1719
1720 for (bp = firstsymbol; bp; bp = bp->next)
1721 {
1722 if (bp->value >= ntokens) continue;
1723
1724 /* For named tokens, but not literal ones, define the name. */
1725 /* The value is the user token number. */
1726
1727 if ('\'' != *tags[bp->value] && bp != errtoken)
1728 {
1729 register char *cp = tags[bp->value];
1730 register char c;
1731
1732 /* Don't #define nonliteral tokens whose names contain periods. */
1733
1734 while ((c = *cp++) && c != '.');
1735 if (!c)
1736 {
1737 fprintf(file, "#define\t%s\t%d\n", tags[bp->value],
1738 (translations ? bp->user_token_number : bp->value));
1739 if (semantic_parser)
1740 fprintf(file, "#define\tT%s\t%d\n", tags[bp->value],
1741 bp->value);
1742 }
1743 }
1744 }
1745
1746 putc('\n', file);
1747}
1748
1749
1750
1751/* convert the rules into the representation using rrhs, rlhs and ritems. */
1752
1753void
1754packgram()
1755{
1756 register int itemno;
1757 register int ruleno;
1758 register symbol_list *p;
1759/* register bucket *bp; JF unused */
1760
1761 bucket *ruleprec;
1762
1763 ritem = NEW2(nitems + 1, short);
1764 rlhs = NEW2(nrules, short) - 1;
1765 rrhs = NEW2(nrules, short) - 1;
1766 rprec = NEW2(nrules, short) - 1;
1767 rprecsym = NEW2(nrules, short) - 1;
1768 rassoc = NEW2(nrules, short) - 1;
1769
1770 itemno = 0;
1771 ruleno = 1;
1772
1773 p = grammar;
1774 while (p)
1775 {
1776 rlhs[ruleno] = p->sym->value;
1777 rrhs[ruleno] = itemno;
1778 ruleprec = p->ruleprec;
1779
1780 p = p->next;
1781 while (p && p->sym)
1782 {
1783 ritem[itemno++] = p->sym->value;
1784 /* A rule gets by default the precedence and associativity
1785 of the last token in it. */
1786 if (p->sym->class == STOKEN)
1787 {
1788 rprec[ruleno] = p->sym->prec;
1789 rassoc[ruleno] = p->sym->assoc;
1790 }
1791 if (p) p = p->next;
1792 }
1793
1794 /* If this rule has a %prec,
1795 the specified symbol's precedence replaces the default. */
1796 if (ruleprec)
1797 {
1798 rprec[ruleno] = ruleprec->prec;
1799 rassoc[ruleno] = ruleprec->assoc;
1800 rprecsym[ruleno] = ruleprec->value;
1801 }
1802
1803 ritem[itemno++] = -ruleno;
1804 ruleno++;
1805
1806 if (p) p = p->next;
1807 }
1808
1809 ritem[itemno] = 0;
1810}
1811\f
1812/* Read a signed integer from STREAM and return its value. */
1813
1814int
1815read_signed_integer (stream)
1816 FILE *stream;
1817{
1818 register int c = getc(stream);
1819 register int sign = 1;
1820 register int n;
1821
1822 if (c == '-')
1823 {
1824 c = getc(stream);
1825 sign = -1;
1826 }
1827 n = 0;
1828 while (isdigit(c))
1829 {
1830 n = 10*n + (c - '0');
1831 c = getc(stream);
1832 }
1833
1834 ungetc(c, stream);
1835
1836 return n * sign;
1837}