Commit | Line | Data |
---|---|---|
e46cedb9 PE |
1 | /* Data definitions for internal representation of Bison's input. |
2 | ||
34136e65 | 3 | Copyright (C) 1984, 1986, 1989, 1992, 2001-2007, 2009-2012 Free |
575619af | 4 | Software Foundation, Inc. |
f7d4d87a | 5 | |
b2ca4022 | 6 | This file is part of Bison, the GNU Compiler Compiler. |
f7d4d87a | 7 | |
f16b0819 | 8 | This program is free software: you can redistribute it and/or modify |
b2ca4022 | 9 | it under the terms of the GNU General Public License as published by |
f16b0819 PE |
10 | the Free Software Foundation, either version 3 of the License, or |
11 | (at your option) any later version. | |
f7d4d87a | 12 | |
f16b0819 | 13 | This program is distributed in the hope that it will be useful, |
b2ca4022 AD |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
f7d4d87a | 17 | |
b2ca4022 | 18 | You should have received a copy of the GNU General Public License |
f16b0819 | 19 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
f7d4d87a | 20 | |
b2ca4022 AD |
21 | #ifndef GRAM_H_ |
22 | # define GRAM_H_ | |
f7d4d87a | 23 | |
aea13e97 | 24 | /* Representation of the grammar rules: |
f7d4d87a | 25 | |
aea13e97 AD |
26 | NTOKENS is the number of tokens, and NVARS is the number of |
27 | variables (nonterminals). NSYMS is the total number, ntokens + | |
b2ca4022 | 28 | nvars. |
f7d4d87a | 29 | |
b2ca4022 | 30 | Each symbol (either token or variable) receives a symbol number. |
aea13e97 AD |
31 | Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1 |
32 | are for variables. Symbol number zero is the end-of-input token. | |
33 | This token is counted in ntokens. The true number of token values | |
34 | assigned is NTOKENS reduced by one for each alias declaration. | |
35 | ||
36 | The rules receive rule numbers 1 to NRULES in the order they are | |
37 | written. More precisely Bison augments the grammar with the | |
88bce5a2 AD |
38 | initial rule, `$accept: START-SYMBOL $end', which is numbered 1, |
39 | all the user rules are 2, 3 etc. Each time a rule number is | |
40 | presented to the user, we subtract 1, so *displayed* rule numbers | |
41 | are 0, 1, 2... | |
aea13e97 AD |
42 | |
43 | Internally, we cannot use the number 0 for a rule because for | |
44 | instance RITEM stores both symbol (the RHS) and rule numbers: the | |
45 | symbols are shorts >= 0, and rule number are stored negative. | |
46 | Therefore 0 cannot be used, since it would be both the rule number | |
88bce5a2 | 47 | 0, and the token $end). |
aea13e97 | 48 | |
fdbcd8e2 | 49 | Actions are accessed via the rule number. |
f7d4d87a | 50 | |
b2ed6e58 | 51 | The rules themselves are described by several arrays: amongst which |
1a2b5d37 | 52 | RITEM, and RULES. |
b2ed6e58 | 53 | |
7478c462 | 54 | RULES is an array of rules, whose members are: |
b2ed6e58 | 55 | |
03b31c0c | 56 | RULES[R].lhs -- the symbol of the left hand side of rule R. |
b2ed6e58 | 57 | |
aea13e97 AD |
58 | RULES[R].rhs -- the index in RITEM of the beginning of the portion |
59 | for rule R. | |
f7d4d87a | 60 | |
03b31c0c | 61 | RULES[R].prec -- the symbol providing the precedence level of R. |
652a871c | 62 | |
03b31c0c AD |
63 | RULES[R].precsym -- the symbol attached (via %prec) to give its |
64 | precedence to R. Of course, if set, it is equal to `prec', but we | |
65 | need to distinguish one from the other when reducing: a symbol used | |
66 | in a %prec is not useless. | |
652a871c | 67 | |
1a2b5d37 | 68 | RULES[R].assoc -- the associativity of R. |
e41dc700 | 69 | |
88bce5a2 AD |
70 | RULES[R].dprec -- the dynamic precedence level of R (for GLR |
71 | parsing). | |
676385e2 | 72 | |
88bce5a2 AD |
73 | RULES[R].merger -- index of merging function for R (for GLR |
74 | parsing). | |
676385e2 | 75 | |
1a2b5d37 | 76 | RULES[R].line -- the line where R was defined. |
652a871c | 77 | |
738c69de | 78 | RULES[R].useful -- true iff the rule is used (i.e., false if thrown |
03b31c0c | 79 | away by reduce). |
68f1e3ed | 80 | |
b2ed6e58 AD |
81 | The right hand side is stored as symbol numbers in a portion of |
82 | RITEM. | |
f7d4d87a | 83 | |
b2ca4022 AD |
84 | The length of the portion is one greater than the number of symbols |
85 | in the rule's right hand side. The last element in the portion | |
86 | contains minus R, which identifies it as the end of a portion and | |
87 | says which rule it is for. | |
f7d4d87a | 88 | |
a900a624 AD |
89 | The portions of RITEM come in order of increasing rule number. |
90 | NRITEMS is the total length of RITEM. Each element of RITEM is | |
91 | called an "item" and its index in RITEM is an item number. | |
f7d4d87a | 92 | |
b2ca4022 AD |
93 | Item numbers are used in the finite state machine to represent |
94 | places that parsing can get to. | |
f7d4d87a | 95 | |
aea13e97 | 96 | SYMBOLS[I]->prec records the precedence level of each symbol. |
f7d4d87a | 97 | |
b2ca4022 AD |
98 | Precedence levels are assigned in increasing order starting with 1 |
99 | so that numerically higher precedence values mean tighter binding | |
100 | as they ought to. Zero as a symbol or rule's precedence means none | |
101 | is assigned. | |
f7d4d87a | 102 | |
aea13e97 | 103 | Associativities are recorded similarly in SYMBOLS[I]->assoc. */ |
f7d4d87a | 104 | |
8efe435c AD |
105 | # include "location.h" |
106 | # include "symtab.h" | |
f7d4d87a | 107 | |
e9690142 JD |
108 | # define ISTOKEN(i) ((i) < ntokens) |
109 | # define ISVAR(i) ((i) >= ntokens) | |
f7d4d87a | 110 | |
f7d4d87a DM |
111 | extern int nsyms; |
112 | extern int ntokens; | |
113 | extern int nvars; | |
114 | ||
7478c462 | 115 | typedef int item_number; |
e9071366 | 116 | #define ITEM_NUMBER_MAX INT_MAX |
7478c462 | 117 | extern item_number *ritem; |
0c2d3f4c | 118 | extern unsigned int nritems; |
b2ed6e58 | 119 | |
7478c462 PE |
120 | /* There is weird relationship between OT1H item_number and OTOH |
121 | symbol_number and rule_number: we store the latter in | |
122 | item_number. symbol_number values are stored as-is, while | |
123 | the negation of (rule_number + 1) is stored. | |
5fbb0954 | 124 | |
7478c462 | 125 | Therefore, a symbol_number must be a valid item_number, and we |
5fbb0954 | 126 | sometimes have to perform the converse transformation. */ |
e46cedb9 PE |
127 | |
128 | static inline item_number | |
ba24760f | 129 | symbol_number_as_item_number (symbol_number sym) |
e46cedb9 | 130 | { |
ba24760f | 131 | return sym; |
e46cedb9 PE |
132 | } |
133 | ||
134 | static inline symbol_number | |
135 | item_number_as_symbol_number (item_number i) | |
136 | { | |
137 | return i; | |
138 | } | |
5fbb0954 | 139 | |
36b5e963 AD |
140 | static inline bool |
141 | item_number_is_symbol_number (item_number i) | |
142 | { | |
143 | return i >= 0; | |
144 | } | |
145 | ||
7478c462 | 146 | /* Rule numbers. */ |
f6fbd3da | 147 | typedef int rule_number; |
e9071366 | 148 | #define RULE_NUMBER_MAX INT_MAX |
7478c462 | 149 | extern rule_number nrules; |
e46cedb9 PE |
150 | |
151 | static inline item_number | |
152 | rule_number_as_item_number (rule_number r) | |
153 | { | |
154 | return -1 - r; | |
155 | } | |
156 | ||
157 | static inline rule_number | |
158 | item_number_as_rule_number (item_number i) | |
159 | { | |
160 | return -1 - i; | |
161 | } | |
9222837b | 162 | |
36b5e963 AD |
163 | static inline bool |
164 | item_number_is_rule_number (item_number i) | |
165 | { | |
166 | return i < 0; | |
167 | } | |
9222837b AD |
168 | |
169 | /*--------. | |
170 | | Rules. | | |
171 | `--------*/ | |
62a3e4f0 | 172 | |
7478c462 | 173 | typedef struct |
652a871c | 174 | { |
c3b407f4 AD |
175 | /* The number of the rule in the source. It is usually the index in |
176 | RULES too, except if there are useless rules. */ | |
7478c462 | 177 | rule_number user_number; |
d7e1f00c AD |
178 | |
179 | /* The index in RULES. Usually the rule number in the source, | |
180 | except if some rules are useless. */ | |
7478c462 | 181 | rule_number number; |
c3b407f4 | 182 | |
7478c462 PE |
183 | symbol *lhs; |
184 | item_number *rhs; | |
03b31c0c AD |
185 | |
186 | /* This symbol provides both the associativity, and the precedence. */ | |
7478c462 | 187 | symbol *prec; |
03b31c0c | 188 | |
f6fbd3da PE |
189 | int dprec; |
190 | int merger; | |
676385e2 | 191 | |
03b31c0c | 192 | /* This symbol was attached to the rule via %prec. */ |
7478c462 | 193 | symbol *precsym; |
03b31c0c | 194 | |
7478c462 | 195 | location location; |
68f1e3ed | 196 | bool useful; |
ca2a6d15 | 197 | bool is_predicate; |
f499b062 | 198 | |
3f96f4dc | 199 | const char *action; |
7478c462 PE |
200 | location action_location; |
201 | } rule; | |
652a871c | 202 | |
7478c462 | 203 | extern rule *rules; |
652a871c | 204 | |
c8f002c7 | 205 | /* A function that selects a rule. */ |
7478c462 | 206 | typedef bool (*rule_filter) (rule *); |
c8f002c7 | 207 | |
cff03fb2 JD |
208 | /* Return true IFF the rule has a `number' smaller than NRULES. That is, it is |
209 | useful in the grammar. */ | |
210 | bool rule_useful_in_grammar_p (rule *r); | |
c8f002c7 | 211 | |
cff03fb2 JD |
212 | /* Return true IFF the rule has a `number' higher than NRULES. That is, it is |
213 | useless in the grammar. */ | |
214 | bool rule_useless_in_grammar_p (rule *r); | |
c8f002c7 | 215 | |
cff03fb2 JD |
216 | /* Return true IFF the rule is not flagged as useful but is useful in the |
217 | grammar. In other words, it was discarded because of conflicts. */ | |
218 | bool rule_useless_in_parser_p (rule *r); | |
c8f002c7 | 219 | |
7478c462 | 220 | /* Print this rule's number and lhs on OUT. If a PREVIOUS_LHS was |
c8f002c7 AD |
221 | already displayed (by a previous call for another rule), avoid |
222 | useless repetitions. */ | |
7478c462 | 223 | void rule_lhs_print (rule *r, symbol *previous_lhs, FILE *out); |
41d7a5f2 | 224 | void rule_lhs_print_xml (rule *r, FILE *out, int level); |
c8f002c7 AD |
225 | |
226 | /* Return the length of the RHS. */ | |
08c81469 | 227 | size_t rule_rhs_length (rule *r); |
c8f002c7 | 228 | |
7478c462 PE |
229 | /* Print this rule's RHS on OUT. */ |
230 | void rule_rhs_print (rule *r, FILE *out); | |
c8f002c7 | 231 | |
7478c462 PE |
232 | /* Print this rule on OUT. */ |
233 | void rule_print (rule *r, FILE *out); | |
c8f002c7 AD |
234 | |
235 | ||
236 | ||
237 | ||
0e78e603 | 238 | /* Table of the symbols, indexed by the symbol number. */ |
7478c462 | 239 | extern symbol **symbols; |
0e78e603 | 240 | |
680e8701 AD |
241 | /* TOKEN_TRANSLATION -- a table indexed by a token number as returned |
242 | by the user's yylex routine, it yields the internal token number | |
243 | used by the parser and throughout bison. */ | |
7478c462 | 244 | extern symbol_number *token_translations; |
f7d4d87a DM |
245 | extern int max_user_token_number; |
246 | ||
f7d4d87a | 247 | |
c8f002c7 | 248 | |
3067fbef | 249 | /* Dump RITEM for traces. */ |
d33cb3ae | 250 | void ritem_print (FILE *out); |
c2713865 AD |
251 | |
252 | /* Return the size of the longest rule RHS. */ | |
d33cb3ae | 253 | size_t ritem_longest_rhs (void); |
c2713865 | 254 | |
cff03fb2 | 255 | /* Print the grammar's rules that match FILTER on OUT under TITLE. */ |
d33cb3ae | 256 | void grammar_rules_partial_print (FILE *out, const char *title, |
e9690142 | 257 | rule_filter filter); |
9757c359 | 258 | |
cff03fb2 | 259 | /* Print the grammar's useful rules on OUT. */ |
d33cb3ae | 260 | void grammar_rules_print (FILE *out); |
d80fb37a | 261 | /* Print all of the grammar's rules with a "usefulness" attribute. */ |
41d7a5f2 | 262 | void grammar_rules_print_xml (FILE *out, int level); |
6b98e4b5 | 263 | |
78ab8f67 | 264 | /* Dump the grammar. */ |
d33cb3ae | 265 | void grammar_dump (FILE *out, const char *title); |
78ab8f67 | 266 | |
c8f002c7 | 267 | /* Report on STDERR the rules that are not flagged USEFUL, using the |
cff03fb2 JD |
268 | MESSAGE (which can be `rule useless in grammar' when invoked after grammar |
269 | reduction, or `rule useless in parser due to conflicts' after conflicts | |
270 | were taken into account). */ | |
271 | void grammar_rules_useless_report (const char *message); | |
c8f002c7 | 272 | |
5372019f | 273 | /* Free the packed grammar. */ |
d33cb3ae | 274 | void grammar_free (void); |
5372019f | 275 | |
b2ca4022 | 276 | #endif /* !GRAM_H_ */ |