]>
git.saurik.com Git - bison.git/blob - src/reader.c
0aea31594cecedc97ae01cc3d237f31f7d7b9475
1 /* Input parser for bison
2 Copyright (C) 1984, 86, 89, 92, 98, 2000 Free Software Foundation, Inc.
4 This file is part of Bison, the GNU Compiler Compiler.
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
22 /* Read in the grammar specification and record it in the format
23 described in gram.h. All guards are copied into the fguard file
24 and all actions into faction, in each case forming the body of a C
25 function (yyguard or yyaction) which contains a switch statement to
26 decide which guard or action to execute.
28 The entry point is reader (). */
57 #define YYLTYPE yyltype\n\
61 /* Number of slots allocated (but not necessarily used yet) in `rline' */
64 extern char *program_name
;
65 extern int definesflag
;
66 extern int nolinesflag
;
67 extern int noparserflag
;
68 extern int rawtoknumflag
;
69 extern bucket
*symval
;
71 extern int expected_conflicts
;
72 extern char *token_buffer
;
75 extern void init_lex
PARAMS((void));
76 extern char *grow_token_buffer
PARAMS((char *));
77 extern void tabinit
PARAMS((void));
78 extern void output_headers
PARAMS((void));
79 extern void output_trailers
PARAMS((void));
80 extern void free_symtab
PARAMS((void));
81 extern void open_extra_files
PARAMS((void));
82 extern char *int_to_string
PARAMS((int));
83 extern char *printable_version
PARAMS((int));
84 extern void fatal
PARAMS((char *));
85 extern void fatals
PARAMS((char *, char *));
86 extern void warn
PARAMS((char *));
87 extern void warni
PARAMS((char *, int));
88 extern void warns
PARAMS((char *, char *));
89 extern void warnss
PARAMS((char *, char *, char *));
90 extern void warnsss
PARAMS((char *, char *, char *, char *));
91 extern void unlex
PARAMS((int));
92 extern void done
PARAMS((int));
94 extern int skip_white_space
PARAMS((void));
95 extern int parse_percent_token
PARAMS((void));
96 extern int lex
PARAMS((void));
101 struct symbol_list
*next
;
108 void reader
PARAMS((void));
109 void reader_output_yylsp
PARAMS((FILE *));
110 void read_declarations
PARAMS((void));
111 void copy_definition
PARAMS((void));
112 void parse_token_decl
PARAMS((int, int));
113 void parse_start_decl
PARAMS((void));
114 void parse_type_decl
PARAMS((void));
115 void parse_assoc_decl
PARAMS((int));
116 void parse_union_decl
PARAMS((void));
117 void parse_expect_decl
PARAMS((void));
118 char *get_type_name
PARAMS((int, symbol_list
*));
119 void copy_guard
PARAMS((symbol_list
*, int));
120 void parse_thong_decl
PARAMS((void));
121 void copy_action
PARAMS((symbol_list
*, int));
122 bucket
*gensym
PARAMS((void));
123 void readgram
PARAMS((void));
124 void record_rule_line
PARAMS((void));
125 void packsymbols
PARAMS((void));
126 void output_token_defines
PARAMS((FILE *));
127 void packgram
PARAMS((void));
128 int read_signed_integer
PARAMS((FILE *));
131 static int get_type
PARAMS((void));
135 symbol_list
*grammar
;
141 /* Nonzero if components of semantic values are used, implying
142 they must be unions. */
143 static int value_components_used
;
145 static int typed
; /* nonzero if %union has been seen. */
147 static int lastprec
; /* incremented for each %left, %right or %nonassoc seen */
149 static int gensym_count
; /* incremented for each generated symbol */
151 static bucket
*errtoken
;
152 static bucket
*undeftoken
;
154 /* Nonzero if any action or guard uses the @n construct. */
155 static int yylsp_needed
;
159 skip_to_char (int target
)
163 warn(_(" Skipping to next \\n"));
165 warni(_(" Skipping to next %c"), target
);
168 c
= skip_white_space();
169 while (c
!= target
&& c
!= EOF
);
179 startval
= NULL
; /* start symbol not specified yet. */
182 translations
= 0; /* initially assume token number translation not needed. */
184 /* Nowadays translations is always set to 1,
185 since we give `error' a user-token-number
186 to satisfy the Posix demand for YYERRCODE==256. */
193 rline_allocated
= 10;
194 rline
= NEW2(rline_allocated
, short);
210 /* initialize the symbol table. */
212 /* construct the error token */
213 errtoken
= getsym("error");
214 errtoken
->class = STOKEN
;
215 errtoken
->user_token_number
= 256; /* Value specified by posix. */
216 /* construct a token that represents all undefined literal tokens. */
217 /* it is always token number 2. */
218 undeftoken
= getsym("$undefined.");
219 undeftoken
->class = STOKEN
;
220 undeftoken
->user_token_number
= 2;
221 /* Read the declaration section. Copy %{ ... %} groups to ftable and fdefines file.
222 Also notice any %token, %left, etc. found there. */
224 fprintf(ftable
, "\n/* Bison-generated parse tables, made from %s\n",
227 fprintf(ftable
, "\n/* A Bison parser, made from %s\n", infile
);
228 fprintf(ftable
, " by %s */\n\n", VERSION_STRING
);
229 fprintf(ftable
, "#define YYBISON 1 /* Identify Bison output. */\n\n");
231 /* start writing the guard and action files, if they are needed. */
233 /* read in the grammar, build grammar in list form. write out guards and actions. */
235 /* Now we know whether we need the line-number stack.
236 If we do, write its type into the .tab.h file. */
238 reader_output_yylsp(fdefines
);
239 /* write closing delimiters for actions and guards. */
242 fprintf(ftable
, "#define YYLSP_NEEDED\n\n");
243 /* assign the symbols their symbol numbers.
244 Write #defines for the token symbols into fdefines if requested. */
246 /* convert the grammar into the format described in gram.h. */
248 /* free the symbol table data structure
249 since symbols are now all referred to by symbol number. */
254 reader_output_yylsp (FILE *f
)
257 fprintf(f
, LTYPESTR
);
260 /* Read from finput until `%%' is seen. Discard the `%%'. Handle any
261 `%' declarations, and copy the contents of any `%{ ... %}' groups
265 read_declarations (void)
272 c
= skip_white_space();
276 tok
= parse_percent_token();
283 case PERCENT_LEFT_CURLY
:
288 parse_token_decl (STOKEN
, SNTERM
);
292 parse_token_decl (SNTERM
, STOKEN
);
314 parse_assoc_decl(LEFT_ASSOC
);
318 parse_assoc_decl(RIGHT_ASSOC
);
322 parse_assoc_decl(NON_ASSOC
);
325 case SEMANTIC_PARSER
:
326 if (semantic_parser
== 0)
341 warns(_("unrecognized: %s"), token_buffer
);
346 fatal(_("no input grammar"));
349 warns (_("unknown character: %s"), printable_version(c
));
356 /* copy the contents of a %{ ... %} into the definitions file.
357 The %{ has already been read. Return after reading the %}. */
360 copy_definition (void)
365 register int after_percent
; /* -1 while reading a character if prev char was % */
369 fprintf(fattrs
, "#line %d \"%s\"\n", lineno
, infile
);
397 fatal(_("unterminated string at end of file"));
400 warn(_("unterminated string"));
412 fatal(_("unterminated string at end of file"));
427 if (c
!= '*' && c
!= '/')
430 cplus_comment
= (c
== '/');
437 if (!cplus_comment
&& c
== '*')
461 fatal(_("unterminated comment in `%{' definition"));
472 fatal(_("unterminated `%{' definition"));
494 /* parse what comes after %token or %nterm.
495 For %token, what_is is STOKEN and what_is_not is SNTERM.
496 For %nterm, the arguments are reversed. */
499 parse_token_decl (int what_is
, int what_is_not
)
501 register int token
= 0;
502 register char *typename
= 0;
503 register struct bucket
*symbol
= NULL
; /* pts to symbol being defined */
508 int tmp_char
= ungetc (skip_white_space (), finput
);
513 fatals ("Premature EOF after %s", token_buffer
);
521 if (token
== TYPENAME
)
523 k
= strlen(token_buffer
);
524 typename
= NEW2(k
+ 1, char);
525 strcpy(typename
, token_buffer
);
526 value_components_used
= 1;
529 else if (token
== IDENTIFIER
&& *symval
->tag
== '\"'
533 symval
->class = STOKEN
;
534 symval
->type_name
= typename
;
535 symval
->user_token_number
= symbol
->user_token_number
;
536 symbol
->user_token_number
= SALIAS
;
538 symval
->alias
= symbol
;
539 symbol
->alias
= symval
;
542 nsyms
--; /* symbol and symval combined are only one symbol */
544 else if (token
== IDENTIFIER
)
546 int oldclass
= symval
->class;
549 if (symbol
->class == what_is_not
)
550 warns(_("symbol %s redefined"), symbol
->tag
);
551 symbol
->class = what_is
;
552 if (what_is
== SNTERM
&& oldclass
!= SNTERM
)
553 symbol
->value
= nvars
++;
557 if (symbol
->type_name
== NULL
)
558 symbol
->type_name
= typename
;
559 else if (strcmp(typename
, symbol
->type_name
) != 0)
560 warns(_("type redeclaration for %s"), symbol
->tag
);
563 else if (symbol
&& token
== NUMBER
)
565 symbol
->user_token_number
= numval
;
570 warnss(_("`%s' is invalid in %s"),
572 (what_is
== STOKEN
) ? "%token" : "%nterm");
579 /* parse what comes after %thong
581 %thong <type> token number literal
582 the <type> or number may be omitted. The number specifies the
585 Two symbols are entered in the table, one for the token symbol and
586 one for the literal. Both are given the <type>, if any, from the declaration.
587 The ->user_token_number of the first is SALIAS and the ->user_token_number
588 of the second is set to the number, if any, from the declaration.
589 The two symbols are linked via pointers in their ->alias fields.
591 during output_defines_table, the symbol is reported
592 thereafter, only the literal string is retained
593 it is the literal string that is output to yytname
597 parse_thong_decl (void)
600 register struct bucket
*symbol
;
601 register char *typename
= 0;
605 token
= lex(); /* fetch typename or first token */
606 if (token
== TYPENAME
) {
607 k
= strlen(token_buffer
);
608 typename
= NEW2(k
+ 1, char);
609 strcpy(typename
, token_buffer
);
610 value_components_used
= 1;
611 token
= lex(); /* fetch first token */
614 /* process first token */
616 if (token
!= IDENTIFIER
)
618 warns(_("unrecognized item %s, expected an identifier"),
623 symval
->class = STOKEN
;
624 symval
->type_name
= typename
;
625 symval
->user_token_number
= SALIAS
;
628 token
= lex(); /* get number or literal string */
630 if (token
== NUMBER
) {
632 token
= lex(); /* okay, did number, now get literal */
636 /* process literal string token */
638 if (token
!= IDENTIFIER
|| *symval
->tag
!= '\"')
640 warns(_("expected string constant instead of %s"),
645 symval
->class = STOKEN
;
646 symval
->type_name
= typename
;
647 symval
->user_token_number
= usrtoknum
;
649 symval
->alias
= symbol
;
650 symbol
->alias
= symval
;
652 nsyms
--; /* symbol and symval combined are only one symbol */
656 /* parse what comes after %start */
659 parse_start_decl (void)
662 warn(_("multiple %start declarations"));
663 if (lex() != IDENTIFIER
)
664 warn(_("invalid %start declaration"));
674 /* read in a %type declaration and record its information for get_type_name to access */
677 parse_type_decl (void)
682 if (lex() != TYPENAME
)
684 warn(_("%type declaration has no <typename>"));
689 k
= strlen(token_buffer
);
690 name
= NEW2(k
+ 1, char);
691 strcpy(name
, token_buffer
);
696 int tmp_char
= ungetc (skip_white_space (), finput
);
701 fatals ("Premature EOF after %s", token_buffer
);
713 if (symval
->type_name
== NULL
)
714 symval
->type_name
= name
;
715 else if (strcmp(name
, symval
->type_name
) != 0)
716 warns(_("type redeclaration for %s"), symval
->tag
);
721 warns(_("invalid %%type declaration due to item: `%s'"), token_buffer
);
729 /* read in a %left, %right or %nonassoc declaration and record its information. */
730 /* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC. */
733 parse_assoc_decl (int assoc
)
736 register char *name
= NULL
;
737 register int prev
= 0;
739 lastprec
++; /* Assign a new precedence level, never 0. */
744 int tmp_char
= ungetc (skip_white_space (), finput
);
749 fatals ("Premature EOF after %s", token_buffer
);
757 k
= strlen(token_buffer
);
758 name
= NEW2(k
+ 1, char);
759 strcpy(name
, token_buffer
);
766 if (symval
->prec
!= 0)
767 warns(_("redefining precedence of %s"), symval
->tag
);
768 symval
->prec
= lastprec
;
769 symval
->assoc
= assoc
;
770 if (symval
->class == SNTERM
)
771 warns(_("symbol %s redefined"), symval
->tag
);
772 symval
->class = STOKEN
;
774 { /* record the type, if one is specified */
775 if (symval
->type_name
== NULL
)
776 symval
->type_name
= name
;
777 else if (strcmp(name
, symval
->type_name
) != 0)
778 warns(_("type redeclaration for %s"), symval
->tag
);
783 if (prev
== IDENTIFIER
)
785 symval
->user_token_number
= numval
;
790 warns(_("invalid text (%s) - number should be after identifier"),
800 warns(_("unexpected item: %s"), token_buffer
);
811 /* copy the union declaration into fattrs (and fdefines),
812 where it is made into the
813 definition of YYSTYPE, the type of elements of the parser value stack. */
816 parse_union_decl (void)
820 register int in_comment
;
824 warn(_("multiple %union declarations"));
829 fprintf(fattrs
, "\n#line %d \"%s\"\n", lineno
, infile
);
831 fprintf(fattrs
, "\n");
833 fprintf(fattrs
, "typedef union");
835 fprintf(fdefines
, "typedef union");
856 if (c
!= '*' && c
!= '/')
863 cplus_comment
= (c
== '/');
882 fatal(_("unterminated comment at end of file"));
884 if (!cplus_comment
&& c
== '*')
908 warn (_("unmatched close-brace (`}')"));
912 fprintf(fattrs
, " YYSTYPE;\n");
914 fprintf(fdefines
, " YYSTYPE;\n");
915 /* JF don't choke on trailing semi */
916 c
=skip_white_space();
917 if(c
!=';') ungetc(c
,finput
);
926 /* parse the declaration %expect N which says to expect N
927 shift-reduce conflicts. */
930 parse_expect_decl (void)
937 while (c
== ' ' || c
== '\t')
941 while (c
>= '0' && c
<= '9')
951 if (count
<= 0 || count
> 10)
952 warn(_("argument of %expect is not an integer"));
953 expected_conflicts
= atoi (buffer
);
956 /* that's all of parsing the declaration section */
958 /* Get the data type (alternative in the union) of the value for symbol n in rule rule. */
961 get_type_name (int n
, symbol_list
*rule
)
963 static char *msg
= N_("invalid $ value");
966 register symbol_list
*rp
;
980 if (rp
== NULL
|| rp
->sym
== NULL
)
988 return (rp
->sym
->type_name
);
992 /* Dump the string from FINPUT to FOUTPUT. MATCH is the delimiter of
993 the string (either ' or "). */
996 copy_string (FILE *finput
, FILE *foutput
, int match
)
1000 putc (match
, foutput
);
1006 fatal(_("unterminated string at end of file"));
1009 warn (_("unterminated string"));
1011 c
= match
; /* invent terminator */
1021 fatal (_("unterminated string"));
1034 /* Dump the comment from FINPUT to FOUTPUT. C is either `*' or `/',
1035 depending upon the type of comments used. */
1038 copy_comment (FILE *finput
, FILE *foutput
, int c
)
1044 cplus_comment
= (c
== '/');
1051 if (!cplus_comment
&& c
== '*')
1075 fatal (_("unterminated comment"));
1084 /* After `%guard' is seen in the input file, copy the actual guard
1085 into the guards file. If the guard is followed by an action, copy
1086 that into the actions file. STACK_OFFSET is the number of values
1087 in the current rule so far, which says where to find `$0' with
1088 respect to the top of the stack, for the simple parser in which the
1089 stack is not popped until after the guard is run. */
1092 copy_guard (symbol_list
*rule
, int stack_offset
)
1097 register char *type_name
;
1100 /* offset is always 0 if parser has already popped the stack pointer */
1101 if (semantic_parser
) stack_offset
= 0;
1103 fprintf(fguard
, "\ncase %d:\n", nrules
);
1105 fprintf (fguard
, "#line %d \"%s\"\n", lineno
, infile
);
1111 while (brace_flag
? (count
> 0) : (c
!= ';'))
1132 warn(_("unmatched right brace (`}')"));
1133 c
= getc(finput
); /* skip it */
1139 copy_string (finput
, fguard
, c
);
1145 if (c
!= '*' && c
!= '/')
1147 copy_comment (finput
, fguard
, c
);
1156 register char *cp
= token_buffer
;
1158 while ((c
= getc(finput
)) != '>' && c
> 0)
1160 if (cp
== token_buffer
+ maxtoken
)
1161 cp
= grow_token_buffer(cp
);
1166 type_name
= token_buffer
;
1173 fprintf(fguard
, "yyval");
1175 type_name
= rule
->sym
->type_name
;
1177 fprintf(fguard
, ".%s", type_name
);
1178 if(!type_name
&& typed
)
1179 warns(_("$$ of `%s' has no declared type"), rule
->sym
->tag
);
1181 else if (isdigit(c
) || c
== '-')
1184 n
= read_signed_integer (finput
);
1187 if (!type_name
&& n
> 0)
1188 type_name
= get_type_name(n
, rule
);
1190 fprintf(fguard
, "yyvsp[%d]", n
- stack_offset
);
1192 fprintf(fguard
, ".%s", type_name
);
1193 if (!type_name
&& typed
)
1194 warnss (_("$%s of `%s' has no declared type"),
1195 int_to_string(n
), rule
->sym
->tag
);
1199 warns(_("$%s is invalid"), printable_version(c
));
1206 fprintf (fguard
, "yyloc");
1209 else if (isdigit(c
) || c
== '-')
1212 n
= read_signed_integer (finput
);
1214 fprintf (fguard
, "yylsp[%d]", n
- stack_offset
);
1220 warns (_("@%s is invalid"), printable_version (c
));
1226 fatal (_("unterminated %%guard clause"));
1232 if (c
!= '}' || count
!= 0)
1236 c
= skip_white_space();
1238 fprintf(fguard
, ";\n break;}");
1240 copy_action(rule
, stack_offset
);
1243 c
= getc(finput
); /* why not skip_white_space -wjh */
1245 copy_action(rule
, stack_offset
);
1253 /* Assuming that a `{' has just been seen, copy everything up to the
1254 matching `}' into the actions file. STACK_OFFSET is the number of
1255 values in the current rule so far, which says where to find `$0'
1256 with respect to the top of the stack. */
1259 copy_action (symbol_list
*rule
, int stack_offset
)
1264 register char *type_name
;
1266 /* offset is always 0 if parser has already popped the stack pointer */
1267 if (semantic_parser
)
1270 fprintf (faction
, "\ncase %d:\n", nrules
);
1272 fprintf (faction
, "#line %d \"%s\"\n", lineno
, infile
);
1273 putc ('{', faction
);
1296 copy_string (finput
, faction
, c
);
1302 if (c
!= '*' && c
!= '/')
1304 copy_comment (finput
, faction
, c
);
1313 register char *cp
= token_buffer
;
1315 while ((c
= getc(finput
)) != '>' && c
> 0)
1317 if (cp
== token_buffer
+ maxtoken
)
1318 cp
= grow_token_buffer(cp
);
1323 type_name
= token_buffer
;
1324 value_components_used
= 1;
1330 fprintf(faction
, "yyval");
1332 type_name
= get_type_name(0, rule
);
1334 fprintf(faction
, ".%s", type_name
);
1335 if(!type_name
&& typed
)
1336 warns(_("$$ of `%s' has no declared type"),
1339 else if (isdigit(c
) || c
== '-')
1342 n
= read_signed_integer(finput
);
1345 if (!type_name
&& n
> 0)
1346 type_name
= get_type_name(n
, rule
);
1348 fprintf(faction
, "yyvsp[%d]", n
- stack_offset
);
1350 fprintf(faction
, ".%s", type_name
);
1351 if(!type_name
&& typed
)
1352 warnss(_("$%s of `%s' has no declared type"),
1353 int_to_string(n
), rule
->sym
->tag
);
1357 warns(_("$%s is invalid"), printable_version(c
));
1365 fprintf (faction
, "yyloc");
1368 else if (isdigit(c
) || c
== '-')
1371 n
= read_signed_integer (finput
);
1373 fprintf (faction
, "yylsp[%d]", n
- stack_offset
);
1379 warns (_("@%s is invalid"), printable_version (c
));
1385 fatal(_("unmatched `{'"));
1394 /* above loop exits when c is '}' */
1403 fprintf(faction
, ";\n break;}");
1408 /* generate a dummy symbol, a nonterminal,
1409 whose name cannot conflict with the user's names. */
1414 register bucket
*sym
;
1416 sprintf (token_buffer
, "@%d", ++gensym_count
);
1417 sym
= getsym(token_buffer
);
1418 sym
->class = SNTERM
;
1419 sym
->value
= nvars
++;
1423 /* Parse the input grammar into a one symbol_list structure.
1424 Each rule is represented by a sequence of symbols: the left hand side
1425 followed by the contents of the right hand side, followed by a null pointer
1426 instead of a symbol to terminate the rule.
1427 The next symbol is the lhs of the following rule.
1429 All guards and actions are copied out to the appropriate files,
1430 labelled by the rule number they apply to. */
1436 register bucket
*lhs
= NULL
;
1437 register symbol_list
*p
;
1438 register symbol_list
*p1
;
1439 register bucket
*bp
;
1441 symbol_list
*crule
; /* points to first symbol_list of current rule. */
1442 /* its symbol is the lhs of the rule. */
1443 symbol_list
*crule1
; /* points to the symbol_list preceding crule. */
1449 while (t
!= TWO_PERCENTS
&& t
!= ENDFILE
)
1451 if (t
== IDENTIFIER
|| t
== BAR
)
1453 register int actionflag
= 0;
1454 int rulelength
= 0; /* number of symbols in rhs of this rule so far */
1455 int xactions
= 0; /* JF for error checking */
1456 bucket
*first_rhs
= 0;
1458 if (t
== IDENTIFIER
)
1471 warn(_("ill-formed rule: initial symbol not followed by colon"));
1476 if (nrules
== 0 && t
== BAR
)
1478 warn(_("grammar starts with vertical bar"));
1479 lhs
= symval
; /* BOGUS: use a random symval */
1481 /* start a new rule and record its lhs. */
1486 record_rule_line ();
1488 p
= NEW(symbol_list
);
1500 /* mark the rule's lhs as a nonterminal if not already so. */
1502 if (lhs
->class == SUNKNOWN
)
1504 lhs
->class = SNTERM
;
1508 else if (lhs
->class == STOKEN
)
1509 warns(_("rule given for %s, which is a token"), lhs
->tag
);
1511 /* read the rhs of the rule. */
1519 crule
->ruleprec
= symval
;
1523 if (! (t
== IDENTIFIER
|| t
== LEFT_CURLY
)) break;
1525 /* If next token is an identifier, see if a colon follows it.
1526 If one does, exit this rule now. */
1527 if (t
== IDENTIFIER
)
1529 register bucket
*ssave
;
1536 if (t1
== COLON
) break;
1538 if(!first_rhs
) /* JF */
1540 /* Not followed by colon =>
1541 process as part of this rule's rhs. */
1544 /* If we just passed an action, that action was in the middle
1545 of a rule, so make a dummy rule to reduce it to a
1549 register bucket
*sdummy
;
1551 /* Since the action was written out with this rule's */
1552 /* number, we must give the new rule this number */
1553 /* by inserting the new rule before it. */
1555 /* Make a dummy nonterminal, a gensym. */
1558 /* Make a new rule, whose body is empty,
1559 before the current one, so that the action
1560 just read can belong to it. */
1563 record_rule_line ();
1564 p
= NEW(symbol_list
);
1569 crule1
= NEW(symbol_list
);
1571 crule1
->next
= crule
;
1573 /* insert the dummy generated by that rule into this rule. */
1575 p
= NEW(symbol_list
);
1583 if (t
== IDENTIFIER
)
1586 p
= NEW(symbol_list
);
1591 else /* handle an action. */
1593 copy_action(crule
, rulelength
);
1595 xactions
++; /* JF */
1598 } /* end of read rhs of rule */
1600 /* Put an empty link in the list to mark the end of this rule */
1601 p
= NEW(symbol_list
);
1607 warn(_("two @prec's in a row"));
1609 crule
->ruleprec
= symval
;
1614 if (! semantic_parser
)
1615 warn(_("%%guard present but %%semantic_parser not specified"));
1617 copy_guard(crule
, rulelength
);
1620 else if (t
== LEFT_CURLY
)
1622 /* This case never occurs -wjh */
1624 warn(_("two actions at end of one rule"));
1625 copy_action(crule
, rulelength
);
1627 xactions
++; /* -wjh */
1630 /* If $$ is being set in default way, warn if any type
1634 && lhs
->type_name
!= first_rhs
->type_name
)
1636 if (lhs
->type_name
== 0
1637 || first_rhs
->type_name
== 0
1638 || strcmp(lhs
->type_name
,first_rhs
->type_name
))
1639 warnss(_("type clash (`%s' `%s') on default action"),
1640 lhs
->type_name
? lhs
->type_name
: "",
1641 first_rhs
->type_name
? first_rhs
->type_name
: "");
1643 /* Warn if there is no default for $$ but we need one. */
1644 else if (!xactions
&& !first_rhs
&& lhs
->type_name
!= 0)
1645 warn(_("empty rule for typed nonterminal, and no action"));
1650 /* these things can appear as alternatives to rules. */
1652 a) none of the documentation allows them
1653 b) most of them scan forward until finding a next %
1654 thus they may swallow lots of intervening rules
1656 else if (t
== TOKEN
)
1658 parse_token_decl(STOKEN
, SNTERM
);
1661 else if (t
== NTERM
)
1663 parse_token_decl(SNTERM
, STOKEN
);
1670 else if (t
== UNION
)
1675 else if (t
== EXPECT
)
1677 parse_expect_decl();
1680 else if (t
== START
)
1689 warns(_("invalid input: %s"), token_buffer
);
1694 /* grammar has been read. Do some checking */
1696 if (nsyms
> MAXSHORT
)
1697 fatals(_("too many symbols (tokens plus nonterminals); maximum %s"),
1698 int_to_string(MAXSHORT
));
1700 fatal(_("no rules in the input grammar"));
1702 if (typed
== 0 /* JF put out same default YYSTYPE as YACC does */
1703 && !value_components_used
)
1705 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1706 but it seems better to be consistent.
1707 Most programs should declare their own type anyway. */
1708 fprintf(fattrs
, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1710 fprintf(fdefines
, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1713 /* Report any undefined symbols and consider them nonterminals. */
1715 for (bp
= firstsymbol
; bp
; bp
= bp
->next
)
1716 if (bp
->class == SUNKNOWN
)
1718 warns(_("symbol %s is used, but is not defined as a token and has no rules"),
1721 bp
->value
= nvars
++;
1724 ntokens
= nsyms
- nvars
;
1729 record_rule_line (void)
1731 /* Record each rule's source line number in rline table. */
1733 if (nrules
>= rline_allocated
)
1735 rline_allocated
= nrules
* 2;
1736 rline
= (short *) xrealloc ((char *) rline
,
1737 rline_allocated
* sizeof (short));
1739 rline
[nrules
] = lineno
;
1744 /* read in a %type declaration and record its information for get_type_name to access */
1745 /* this is unused. it is only called from the #if 0 part of readgram */
1751 register char *name
;
1757 warn(_("ill-formed %type declaration"));
1761 k
= strlen(token_buffer
);
1762 name
= NEW2(k
+ 1, char);
1763 strcpy(name
, token_buffer
);
1778 if (symval
->type_name
== NULL
)
1779 symval
->type_name
= name
;
1780 else if (strcmp(name
, symval
->type_name
) != 0)
1781 warns(_("type redeclaration for %s"), symval
->tag
);
1793 /* assign symbol numbers, and write definition of token names into fdefines.
1794 Set up vectors tags and sprec of names and precedences of symbols. */
1799 register bucket
*bp
;
1800 register int tokno
= 1;
1802 register int last_user_token_number
;
1804 /* int lossage = 0; JF set but not used */
1806 tags
= NEW2(nsyms
+ 1, char *);
1808 user_toknums
= NEW2(nsyms
+ 1, int);
1809 user_toknums
[0] = 0;
1811 sprec
= NEW2(nsyms
, short);
1812 sassoc
= NEW2(nsyms
, short);
1814 max_user_token_number
= 256;
1815 last_user_token_number
= 256;
1817 for (bp
= firstsymbol
; bp
; bp
= bp
->next
)
1819 if (bp
->class == SNTERM
)
1821 bp
->value
+= ntokens
;
1825 /* this symbol and its alias are a single token defn.
1826 allocate a tokno, and assign to both
1827 check agreement of ->prec and ->assoc fields
1828 and make both the same
1831 bp
->value
= bp
->alias
->value
= tokno
++;
1833 if (bp
->prec
!= bp
->alias
->prec
) {
1834 if (bp
->prec
!= 0 && bp
->alias
->prec
!= 0
1835 && bp
->user_token_number
== SALIAS
)
1836 warnss(_("conflicting precedences for %s and %s"),
1837 bp
->tag
, bp
->alias
->tag
);
1838 if (bp
->prec
!= 0) bp
->alias
->prec
= bp
->prec
;
1839 else bp
->prec
= bp
->alias
->prec
;
1842 if (bp
->assoc
!= bp
->alias
->assoc
) {
1843 if (bp
->assoc
!= 0 && bp
->alias
->assoc
!= 0
1844 && bp
->user_token_number
== SALIAS
)
1845 warnss(_("conflicting assoc values for %s and %s"),
1846 bp
->tag
, bp
->alias
->tag
);
1847 if (bp
->assoc
!= 0) bp
->alias
->assoc
= bp
->assoc
;
1848 else bp
->assoc
= bp
->alias
->assoc
;
1851 if (bp
->user_token_number
== SALIAS
)
1852 continue; /* do not do processing below for SALIASs */
1855 else /* bp->class == STOKEN */
1857 bp
->value
= tokno
++;
1860 if (bp
->class == STOKEN
)
1862 if (translations
&& !(bp
->user_token_number
))
1863 bp
->user_token_number
= ++last_user_token_number
;
1864 if (bp
->user_token_number
> max_user_token_number
)
1865 max_user_token_number
= bp
->user_token_number
;
1868 tags
[bp
->value
] = bp
->tag
;
1869 user_toknums
[bp
->value
] = bp
->user_token_number
;
1870 sprec
[bp
->value
] = bp
->prec
;
1871 sassoc
[bp
->value
] = bp
->assoc
;
1879 token_translations
= NEW2(max_user_token_number
+1, short);
1881 /* initialize all entries for literal tokens to 2,
1882 the internal token number for $undefined.,
1883 which represents all invalid inputs. */
1884 for (i
= 0; i
<= max_user_token_number
; i
++)
1885 token_translations
[i
] = 2;
1887 for (bp
= firstsymbol
; bp
; bp
= bp
->next
)
1889 if (bp
->value
>= ntokens
) continue; /* non-terminal */
1890 if (bp
->user_token_number
== SALIAS
) continue;
1891 if (token_translations
[bp
->user_token_number
] != 2)
1892 warnsss(_("tokens %s and %s both assigned number %s"),
1893 tags
[token_translations
[bp
->user_token_number
]],
1895 int_to_string(bp
->user_token_number
));
1896 token_translations
[bp
->user_token_number
] = bp
->value
;
1900 error_token_number
= errtoken
->value
;
1903 output_token_defines(ftable
);
1905 if (startval
->class == SUNKNOWN
)
1906 fatals(_("the start symbol %s is undefined"), startval
->tag
);
1907 else if (startval
->class == STOKEN
)
1908 fatals(_("the start symbol %s is a token"), startval
->tag
);
1910 start_symbol
= startval
->value
;
1914 output_token_defines(fdefines
);
1918 if (spec_name_prefix
)
1919 fprintf(fdefines
, "\nextern YYSTYPE %slval;\n", spec_name_prefix
);
1921 fprintf(fdefines
, "\nextern YYSTYPE yylval;\n");
1924 if (semantic_parser
)
1925 for (i
= ntokens
; i
< nsyms
; i
++)
1927 /* don't make these for dummy nonterminals made by gensym. */
1928 if (*tags
[i
] != '@')
1929 fprintf(fdefines
, "#define\tNT%s\t%d\n", tags
[i
], i
);
1932 /* `fdefines' is now a temporary file, so we need to copy its
1933 contents in `done', so we can't close it here. */
1940 /* For named tokens, but not literal ones, define the name.
1941 The value is the user token number.
1944 output_token_defines (FILE *file
)
1947 register char *cp
, *symbol
;
1950 for (bp
= firstsymbol
; bp
; bp
= bp
->next
)
1952 symbol
= bp
->tag
; /* get symbol */
1954 if (bp
->value
>= ntokens
) continue;
1955 if (bp
->user_token_number
== SALIAS
) continue;
1956 if ('\'' == *symbol
) continue; /* skip literal character */
1957 if (bp
== errtoken
) continue; /* skip error token */
1958 if ('\"' == *symbol
)
1960 /* use literal string only if given a symbol with an alias */
1962 symbol
= bp
->alias
->tag
;
1967 /* Don't #define nonliteral tokens whose names contain periods. */
1969 while ((c
= *cp
++) && c
!= '.');
1970 if (c
!= '\0') continue;
1972 fprintf(file
, "#define\t%s\t%d\n", symbol
,
1973 ((translations
&& ! rawtoknumflag
)
1974 ? bp
->user_token_number
1976 if (semantic_parser
)
1977 fprintf(file
, "#define\tT%s\t%d\n", symbol
, bp
->value
);
1985 /* convert the rules into the representation using rrhs, rlhs and ritems. */
1990 register int itemno
;
1991 register int ruleno
;
1992 register symbol_list
*p
;
1993 /* register bucket *bp; JF unused */
1997 ritem
= NEW2(nitems
+ 1, short);
1998 rlhs
= NEW2(nrules
, short) - 1;
1999 rrhs
= NEW2(nrules
, short) - 1;
2000 rprec
= NEW2(nrules
, short) - 1;
2001 rprecsym
= NEW2(nrules
, short) - 1;
2002 rassoc
= NEW2(nrules
, short) - 1;
2010 rlhs
[ruleno
] = p
->sym
->value
;
2011 rrhs
[ruleno
] = itemno
;
2012 ruleprec
= p
->ruleprec
;
2017 ritem
[itemno
++] = p
->sym
->value
;
2018 /* A rule gets by default the precedence and associativity
2019 of the last token in it. */
2020 if (p
->sym
->class == STOKEN
)
2022 rprec
[ruleno
] = p
->sym
->prec
;
2023 rassoc
[ruleno
] = p
->sym
->assoc
;
2028 /* If this rule has a %prec,
2029 the specified symbol's precedence replaces the default. */
2032 rprec
[ruleno
] = ruleprec
->prec
;
2033 rassoc
[ruleno
] = ruleprec
->assoc
;
2034 rprecsym
[ruleno
] = ruleprec
->value
;
2037 ritem
[itemno
++] = -ruleno
;
2046 /* Read a signed integer from STREAM and return its value. */
2049 read_signed_integer (FILE *stream
)
2051 register int c
= getc(stream
);
2052 register int sign
= 1;
2063 n
= 10*n
+ (c
- '0');