]>
Commit | Line | Data |
---|---|---|
f7d4d87a | 1 | /* Data definitions for internal representation of bison's input, |
d7e1f00c | 2 | Copyright (C) 1984, 1986, 1989, 1992, 2001, 2002 |
99013900 | 3 | Free Software Foundation, Inc. |
f7d4d87a | 4 | |
b2ca4022 | 5 | This file is part of Bison, the GNU Compiler Compiler. |
f7d4d87a | 6 | |
b2ca4022 AD |
7 | Bison is free software; you can redistribute it and/or modify |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 2, or (at your option) | |
10 | any later version. | |
f7d4d87a | 11 | |
b2ca4022 AD |
12 | Bison is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
f7d4d87a | 16 | |
b2ca4022 AD |
17 | You should have received a copy of the GNU General Public License |
18 | along with Bison; see the file COPYING. If not, write to | |
19 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
20 | Boston, MA 02111-1307, USA. */ | |
f7d4d87a | 21 | |
b2ca4022 AD |
22 | #ifndef GRAM_H_ |
23 | # define GRAM_H_ | |
f7d4d87a | 24 | |
aea13e97 | 25 | /* Representation of the grammar rules: |
f7d4d87a | 26 | |
aea13e97 AD |
27 | NTOKENS is the number of tokens, and NVARS is the number of |
28 | variables (nonterminals). NSYMS is the total number, ntokens + | |
b2ca4022 | 29 | nvars. |
f7d4d87a | 30 | |
b2ca4022 | 31 | Each symbol (either token or variable) receives a symbol number. |
aea13e97 AD |
32 | Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1 |
33 | are for variables. Symbol number zero is the end-of-input token. | |
34 | This token is counted in ntokens. The true number of token values | |
35 | assigned is NTOKENS reduced by one for each alias declaration. | |
36 | ||
37 | The rules receive rule numbers 1 to NRULES in the order they are | |
38 | written. More precisely Bison augments the grammar with the | |
88bce5a2 AD |
39 | initial rule, `$accept: START-SYMBOL $end', which is numbered 1, |
40 | all the user rules are 2, 3 etc. Each time a rule number is | |
41 | presented to the user, we subtract 1, so *displayed* rule numbers | |
42 | are 0, 1, 2... | |
aea13e97 AD |
43 | |
44 | Internally, we cannot use the number 0 for a rule because for | |
45 | instance RITEM stores both symbol (the RHS) and rule numbers: the | |
46 | symbols are shorts >= 0, and rule number are stored negative. | |
47 | Therefore 0 cannot be used, since it would be both the rule number | |
88bce5a2 | 48 | 0, and the token $end). |
aea13e97 | 49 | |
fdbcd8e2 | 50 | Actions are accessed via the rule number. |
f7d4d87a | 51 | |
b2ed6e58 | 52 | The rules themselves are described by several arrays: amongst which |
1a2b5d37 | 53 | RITEM, and RULES. |
b2ed6e58 | 54 | |
1a2b5d37 | 55 | RULES is an array of struct rule_s, which members are: |
b2ed6e58 | 56 | |
03b31c0c | 57 | RULES[R].lhs -- the symbol of the left hand side of rule R. |
b2ed6e58 | 58 | |
aea13e97 AD |
59 | RULES[R].rhs -- the index in RITEM of the beginning of the portion |
60 | for rule R. | |
f7d4d87a | 61 | |
03b31c0c | 62 | RULES[R].prec -- the symbol providing the precedence level of R. |
652a871c | 63 | |
03b31c0c AD |
64 | RULES[R].precsym -- the symbol attached (via %prec) to give its |
65 | precedence to R. Of course, if set, it is equal to `prec', but we | |
66 | need to distinguish one from the other when reducing: a symbol used | |
67 | in a %prec is not useless. | |
652a871c | 68 | |
1a2b5d37 | 69 | RULES[R].assoc -- the associativity of R. |
e41dc700 | 70 | |
88bce5a2 AD |
71 | RULES[R].dprec -- the dynamic precedence level of R (for GLR |
72 | parsing). | |
676385e2 | 73 | |
88bce5a2 AD |
74 | RULES[R].merger -- index of merging function for R (for GLR |
75 | parsing). | |
676385e2 | 76 | |
1a2b5d37 | 77 | RULES[R].line -- the line where R was defined. |
652a871c | 78 | |
03b31c0c AD |
79 | RULES[R].useful -- TRUE iff the rule is used (i.e., FALSE if thrown |
80 | away by reduce). | |
68f1e3ed | 81 | |
b2ed6e58 AD |
82 | The right hand side is stored as symbol numbers in a portion of |
83 | RITEM. | |
f7d4d87a | 84 | |
b2ca4022 AD |
85 | The length of the portion is one greater than the number of symbols |
86 | in the rule's right hand side. The last element in the portion | |
87 | contains minus R, which identifies it as the end of a portion and | |
88 | says which rule it is for. | |
f7d4d87a | 89 | |
a900a624 AD |
90 | The portions of RITEM come in order of increasing rule number. |
91 | NRITEMS is the total length of RITEM. Each element of RITEM is | |
92 | called an "item" and its index in RITEM is an item number. | |
f7d4d87a | 93 | |
b2ca4022 AD |
94 | Item numbers are used in the finite state machine to represent |
95 | places that parsing can get to. | |
f7d4d87a | 96 | |
aea13e97 | 97 | SYMBOLS[I]->prec records the precedence level of each symbol. |
f7d4d87a | 98 | |
b2ca4022 AD |
99 | Precedence levels are assigned in increasing order starting with 1 |
100 | so that numerically higher precedence values mean tighter binding | |
101 | as they ought to. Zero as a symbol or rule's precedence means none | |
102 | is assigned. | |
f7d4d87a | 103 | |
aea13e97 | 104 | Associativities are recorded similarly in SYMBOLS[I]->assoc. */ |
f7d4d87a | 105 | |
8efe435c AD |
106 | # include "location.h" |
107 | # include "symtab.h" | |
f7d4d87a | 108 | |
8efe435c AD |
109 | # define ISTOKEN(s) ((s) < ntokens) |
110 | # define ISVAR(s) ((s) >= ntokens) | |
f7d4d87a | 111 | |
f7d4d87a DM |
112 | extern int nsyms; |
113 | extern int ntokens; | |
114 | extern int nvars; | |
115 | ||
62a3e4f0 | 116 | typedef int item_number_t; |
9222837b | 117 | # define ITEM_NUMBER_MAX ((item_number_t) INT_MAX) |
f704e333 | 118 | # define ITEM_NUMBER_MIN ((item_number_t) INT_MIN) |
62a3e4f0 | 119 | extern item_number_t *ritem; |
0c2d3f4c | 120 | extern unsigned int nritems; |
b2ed6e58 | 121 | |
9222837b AD |
122 | /* There is weird relationship between OT1H item_number_t and OTOH |
123 | symbol_number_t and rule_number_t: we store the latter in | |
124 | item_number_t. symbol_number_t are stored as are, while | |
4b3d3a8e | 125 | the negation of (rule_number_t + 1) are stored. |
5fbb0954 | 126 | |
a49aecd5 | 127 | Therefore, an symbol_number_t must be a valid item_number_t, and we |
5fbb0954 | 128 | sometimes have to perform the converse transformation. */ |
8efe435c AD |
129 | # define symbol_number_as_item_number(Tok) ((item_number_t) (Tok)) |
130 | # define item_number_as_symbol_number(Ite) ((symbol_number_t) (Ite)) | |
5fbb0954 | 131 | |
a49aecd5 | 132 | extern symbol_number_t start_symbol; |
f7d4d87a | 133 | |
9222837b AD |
134 | /* Rules numbers. */ |
135 | typedef short rule_number_t; | |
136 | # define RULE_NUMBER_MAX ((rule_number_t) SHRT_MAX) | |
137 | extern rule_number_t nrules; | |
138 | # define int_of_rule_number(RNum) ((int) (RNum)) | |
4b3d3a8e AD |
139 | # define rule_number_as_item_number(RNum) ((item_number_t) (- RNum - 1)) |
140 | # define item_number_as_rule_number(INum) ((rule_number_t) (- INum - 1)) | |
9222837b AD |
141 | |
142 | ||
143 | /*--------. | |
144 | | Rules. | | |
145 | `--------*/ | |
62a3e4f0 | 146 | |
652a871c AD |
147 | typedef struct rule_s |
148 | { | |
c3b407f4 AD |
149 | /* The number of the rule in the source. It is usually the index in |
150 | RULES too, except if there are useless rules. */ | |
9222837b | 151 | rule_number_t user_number; |
d7e1f00c AD |
152 | |
153 | /* The index in RULES. Usually the rule number in the source, | |
154 | except if some rules are useless. */ | |
9222837b | 155 | rule_number_t number; |
c3b407f4 | 156 | |
db8837cb | 157 | symbol_t *lhs; |
62a3e4f0 | 158 | item_number_t *rhs; |
03b31c0c AD |
159 | |
160 | /* This symbol provides both the associativity, and the precedence. */ | |
db8837cb | 161 | symbol_t *prec; |
03b31c0c | 162 | |
676385e2 PH |
163 | short dprec; |
164 | short merger; | |
165 | ||
03b31c0c | 166 | /* This symbol was attached to the rule via %prec. */ |
db8837cb | 167 | symbol_t *precsym; |
03b31c0c | 168 | |
8efe435c | 169 | location_t location; |
68f1e3ed | 170 | bool useful; |
f499b062 | 171 | |
3f96f4dc | 172 | const char *action; |
8efe435c | 173 | location_t action_location; |
652a871c AD |
174 | } rule_t; |
175 | ||
1a2b5d37 | 176 | extern struct rule_s *rules; |
652a871c | 177 | |
c8f002c7 AD |
178 | /* A function that selects a rule. */ |
179 | typedef bool (*rule_filter_t) PARAMS ((rule_t *r)); | |
180 | ||
181 | /* Return true IFF the rule has a `number' smaller than NRULES. */ | |
182 | bool rule_useful_p PARAMS ((rule_t *r)); | |
183 | ||
184 | /* Return true IFF the rule has a `number' higher than NRULES. */ | |
185 | bool rule_useless_p PARAMS ((rule_t *r)); | |
186 | ||
187 | /* Return true IFF the rule is not flagged as useful *and* is useful. | |
188 | In other words, it was discarded because of conflicts. */ | |
189 | bool rule_never_reduced_p PARAMS ((rule_t *r)); | |
190 | ||
191 | /* Print this RULE's number and lhs on OUT. If a PREVIOUS_LHS was | |
192 | already displayed (by a previous call for another rule), avoid | |
193 | useless repetitions. */ | |
194 | void rule_lhs_print PARAMS ((rule_t *rule, symbol_t *previous_lhs, FILE *out)); | |
195 | ||
196 | /* Return the length of the RHS. */ | |
197 | int rule_rhs_length PARAMS ((rule_t *rule)); | |
198 | ||
199 | /* Print this RULE's RHS on OUT. */ | |
200 | void rule_rhs_print PARAMS ((rule_t *rule, FILE *out)); | |
201 | ||
202 | /* Print this RULE on OUT. */ | |
203 | void rule_print PARAMS ((rule_t *rule, FILE *out)); | |
204 | ||
205 | ||
206 | ||
207 | ||
0e78e603 | 208 | /* Table of the symbols, indexed by the symbol number. */ |
db8837cb | 209 | extern symbol_t **symbols; |
0e78e603 | 210 | |
680e8701 AD |
211 | /* TOKEN_TRANSLATION -- a table indexed by a token number as returned |
212 | by the user's yylex routine, it yields the internal token number | |
213 | used by the parser and throughout bison. */ | |
a49aecd5 | 214 | extern symbol_number_t *token_translations; |
f7d4d87a DM |
215 | extern int max_user_token_number; |
216 | ||
f7d4d87a | 217 | |
c8f002c7 | 218 | |
676385e2 PH |
219 | /* GLR_PARSER is nonzero if the input file says to use the GLR |
220 | (Generalized LR) parser, and to output some additional | |
221 | information used by the GLR algorithm. */ | |
222 | ||
223 | extern int glr_parser; | |
224 | ||
b2ca4022 AD |
225 | /* PURE_PARSER is nonzero if should generate a parser that is all pure |
226 | and reentrant. */ | |
f7d4d87a DM |
227 | |
228 | extern int pure_parser; | |
229 | ||
3067fbef | 230 | /* Dump RITEM for traces. */ |
c2713865 AD |
231 | void ritem_print PARAMS ((FILE *out)); |
232 | ||
233 | /* Return the size of the longest rule RHS. */ | |
234 | size_t ritem_longest_rhs PARAMS ((void)); | |
235 | ||
9757c359 AD |
236 | /* Print the grammar's rules numbers from BEGIN (inclusive) to END |
237 | (exclusive) on OUT under TITLE. */ | |
238 | void grammar_rules_partial_print PARAMS ((FILE *out, const char *title, | |
c8f002c7 | 239 | rule_filter_t filter)); |
9757c359 | 240 | |
6b98e4b5 AD |
241 | /* Print the grammar's rules on OUT. */ |
242 | void grammar_rules_print PARAMS ((FILE *out)); | |
243 | ||
78ab8f67 AD |
244 | /* Dump the grammar. */ |
245 | void grammar_dump PARAMS ((FILE *out, const char *title)); | |
246 | ||
c8f002c7 AD |
247 | /* Report on STDERR the rules that are not flagged USEFUL, using the |
248 | MESSAGE (which can be `useless rule' when invoked after grammar | |
249 | reduction, or `never reduced' after conflicts were taken into | |
250 | account). */ | |
251 | void grammar_rules_never_reduced_report PARAMS ((const char *message)); | |
252 | ||
5372019f AD |
253 | /* Free the packed grammar. */ |
254 | void grammar_free PARAMS ((void)); | |
255 | ||
b2ca4022 | 256 | #endif /* !GRAM_H_ */ |