]>
Commit | Line | Data |
---|---|---|
1 | /* Data definitions for internal representation of Bison's input. | |
2 | ||
3 | Copyright (C) 1984, 1986, 1989, 1992, 2001, 2002, 2003, 2004, 2005, 2006 | |
4 | Free Software Foundation, Inc. | |
5 | ||
6 | This file is part of Bison, the GNU Compiler Compiler. | |
7 | ||
8 | Bison is free software; you can redistribute it and/or modify | |
9 | it under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 2, or (at your option) | |
11 | any later version. | |
12 | ||
13 | Bison is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with Bison; see the file COPYING. If not, write to | |
20 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
21 | Boston, MA 02110-1301, USA. */ | |
22 | ||
23 | #ifndef GRAM_H_ | |
24 | # define GRAM_H_ | |
25 | ||
26 | /* Representation of the grammar rules: | |
27 | ||
28 | NTOKENS is the number of tokens, and NVARS is the number of | |
29 | variables (nonterminals). NSYMS is the total number, ntokens + | |
30 | nvars. | |
31 | ||
32 | Each symbol (either token or variable) receives a symbol number. | |
33 | Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1 | |
34 | are for variables. Symbol number zero is the end-of-input token. | |
35 | This token is counted in ntokens. The true number of token values | |
36 | assigned is NTOKENS reduced by one for each alias declaration. | |
37 | ||
38 | The rules receive rule numbers 1 to NRULES in the order they are | |
39 | written. More precisely Bison augments the grammar with the | |
40 | initial rule, `$accept: START-SYMBOL $end', which is numbered 1, | |
41 | all the user rules are 2, 3 etc. Each time a rule number is | |
42 | presented to the user, we subtract 1, so *displayed* rule numbers | |
43 | are 0, 1, 2... | |
44 | ||
45 | Internally, we cannot use the number 0 for a rule because for | |
46 | instance RITEM stores both symbol (the RHS) and rule numbers: the | |
47 | symbols are shorts >= 0, and rule number are stored negative. | |
48 | Therefore 0 cannot be used, since it would be both the rule number | |
49 | 0, and the token $end). | |
50 | ||
51 | Actions are accessed via the rule number. | |
52 | ||
53 | The rules themselves are described by several arrays: amongst which | |
54 | RITEM, and RULES. | |
55 | ||
56 | RULES is an array of rules, whose members are: | |
57 | ||
58 | RULES[R].lhs -- the symbol of the left hand side of rule R. | |
59 | ||
60 | RULES[R].rhs -- the index in RITEM of the beginning of the portion | |
61 | for rule R. | |
62 | ||
63 | RULES[R].prec -- the symbol providing the precedence level of R. | |
64 | ||
65 | RULES[R].precsym -- the symbol attached (via %prec) to give its | |
66 | precedence to R. Of course, if set, it is equal to `prec', but we | |
67 | need to distinguish one from the other when reducing: a symbol used | |
68 | in a %prec is not useless. | |
69 | ||
70 | RULES[R].assoc -- the associativity of R. | |
71 | ||
72 | RULES[R].dprec -- the dynamic precedence level of R (for GLR | |
73 | parsing). | |
74 | ||
75 | RULES[R].merger -- index of merging function for R (for GLR | |
76 | parsing). | |
77 | ||
78 | RULES[R].line -- the line where R was defined. | |
79 | ||
80 | RULES[R].useful -- true iff the rule is used (i.e., false if thrown | |
81 | away by reduce). | |
82 | ||
83 | The right hand side is stored as symbol numbers in a portion of | |
84 | RITEM. | |
85 | ||
86 | The length of the portion is one greater than the number of symbols | |
87 | in the rule's right hand side. The last element in the portion | |
88 | contains minus R, which identifies it as the end of a portion and | |
89 | says which rule it is for. | |
90 | ||
91 | The portions of RITEM come in order of increasing rule number. | |
92 | NRITEMS is the total length of RITEM. Each element of RITEM is | |
93 | called an "item" and its index in RITEM is an item number. | |
94 | ||
95 | Item numbers are used in the finite state machine to represent | |
96 | places that parsing can get to. | |
97 | ||
98 | SYMBOLS[I]->prec records the precedence level of each symbol. | |
99 | ||
100 | Precedence levels are assigned in increasing order starting with 1 | |
101 | so that numerically higher precedence values mean tighter binding | |
102 | as they ought to. Zero as a symbol or rule's precedence means none | |
103 | is assigned. | |
104 | ||
105 | Associativities are recorded similarly in SYMBOLS[I]->assoc. */ | |
106 | ||
107 | # include "location.h" | |
108 | # include "symtab.h" | |
109 | ||
110 | # define ISTOKEN(i) ((i) < ntokens) | |
111 | # define ISVAR(i) ((i) >= ntokens) | |
112 | ||
113 | extern int nsyms; | |
114 | extern int ntokens; | |
115 | extern int nvars; | |
116 | ||
117 | typedef int item_number; | |
118 | #define ITEM_NUMBER_MAX INT_MAX | |
119 | extern item_number *ritem; | |
120 | extern unsigned int nritems; | |
121 | ||
122 | /* There is weird relationship between OT1H item_number and OTOH | |
123 | symbol_number and rule_number: we store the latter in | |
124 | item_number. symbol_number values are stored as-is, while | |
125 | the negation of (rule_number + 1) is stored. | |
126 | ||
127 | Therefore, a symbol_number must be a valid item_number, and we | |
128 | sometimes have to perform the converse transformation. */ | |
129 | ||
130 | static inline item_number | |
131 | symbol_number_as_item_number (symbol_number sym) | |
132 | { | |
133 | return sym; | |
134 | } | |
135 | ||
136 | static inline symbol_number | |
137 | item_number_as_symbol_number (item_number i) | |
138 | { | |
139 | return i; | |
140 | } | |
141 | ||
142 | static inline bool | |
143 | item_number_is_symbol_number (item_number i) | |
144 | { | |
145 | return i >= 0; | |
146 | } | |
147 | ||
148 | /* Rule numbers. */ | |
149 | typedef int rule_number; | |
150 | #define RULE_NUMBER_MAX INT_MAX | |
151 | extern rule_number nrules; | |
152 | ||
153 | static inline item_number | |
154 | rule_number_as_item_number (rule_number r) | |
155 | { | |
156 | return -1 - r; | |
157 | } | |
158 | ||
159 | static inline rule_number | |
160 | item_number_as_rule_number (item_number i) | |
161 | { | |
162 | return -1 - i; | |
163 | } | |
164 | ||
165 | static inline bool | |
166 | item_number_is_rule_number (item_number i) | |
167 | { | |
168 | return i < 0; | |
169 | } | |
170 | ||
171 | /*--------. | |
172 | | Rules. | | |
173 | `--------*/ | |
174 | ||
175 | typedef struct | |
176 | { | |
177 | /* The number of the rule in the source. It is usually the index in | |
178 | RULES too, except if there are useless rules. */ | |
179 | rule_number user_number; | |
180 | ||
181 | /* The index in RULES. Usually the rule number in the source, | |
182 | except if some rules are useless. */ | |
183 | rule_number number; | |
184 | ||
185 | symbol *lhs; | |
186 | item_number *rhs; | |
187 | ||
188 | /* This symbol provides both the associativity, and the precedence. */ | |
189 | symbol *prec; | |
190 | ||
191 | int dprec; | |
192 | int merger; | |
193 | ||
194 | /* This symbol was attached to the rule via %prec. */ | |
195 | symbol *precsym; | |
196 | ||
197 | location location; | |
198 | bool useful; | |
199 | ||
200 | const char *action; | |
201 | location action_location; | |
202 | } rule; | |
203 | ||
204 | extern rule *rules; | |
205 | ||
206 | /* A function that selects a rule. */ | |
207 | typedef bool (*rule_filter) (rule *); | |
208 | ||
209 | /* Return true IFF the rule has a `number' smaller than NRULES. */ | |
210 | bool rule_useful_p (rule *r); | |
211 | ||
212 | /* Return true IFF the rule has a `number' higher than NRULES. */ | |
213 | bool rule_useless_p (rule *r); | |
214 | ||
215 | /* Return true IFF the rule is not flagged as useful *and* is useful. | |
216 | In other words, it was discarded because of conflicts. */ | |
217 | bool rule_never_reduced_p (rule *r); | |
218 | ||
219 | /* Print this rule's number and lhs on OUT. If a PREVIOUS_LHS was | |
220 | already displayed (by a previous call for another rule), avoid | |
221 | useless repetitions. */ | |
222 | void rule_lhs_print (rule *r, symbol *previous_lhs, FILE *out); | |
223 | ||
224 | /* Return the length of the RHS. */ | |
225 | int rule_rhs_length (rule *r); | |
226 | ||
227 | /* Print this rule's RHS on OUT. */ | |
228 | void rule_rhs_print (rule *r, FILE *out); | |
229 | ||
230 | /* Print this rule on OUT. */ | |
231 | void rule_print (rule *r, FILE *out); | |
232 | ||
233 | ||
234 | ||
235 | ||
236 | /* Table of the symbols, indexed by the symbol number. */ | |
237 | extern symbol **symbols; | |
238 | ||
239 | /* TOKEN_TRANSLATION -- a table indexed by a token number as returned | |
240 | by the user's yylex routine, it yields the internal token number | |
241 | used by the parser and throughout bison. */ | |
242 | extern symbol_number *token_translations; | |
243 | extern int max_user_token_number; | |
244 | ||
245 | ||
246 | ||
247 | /* Dump RITEM for traces. */ | |
248 | void ritem_print (FILE *out); | |
249 | ||
250 | /* Return the size of the longest rule RHS. */ | |
251 | size_t ritem_longest_rhs (void); | |
252 | ||
253 | /* Print the grammar's rules numbers from BEGIN (inclusive) to END | |
254 | (exclusive) on OUT under TITLE. */ | |
255 | void grammar_rules_partial_print (FILE *out, const char *title, | |
256 | rule_filter filter); | |
257 | ||
258 | /* Print the grammar's rules on OUT. */ | |
259 | void grammar_rules_print (FILE *out); | |
260 | ||
261 | /* Dump the grammar. */ | |
262 | void grammar_dump (FILE *out, const char *title); | |
263 | ||
264 | /* Report on STDERR the rules that are not flagged USEFUL, using the | |
265 | MESSAGE (which can be `useless rule' when invoked after grammar | |
266 | reduction, or `never reduced' after conflicts were taken into | |
267 | account). */ | |
268 | void grammar_rules_never_reduced_report (const char *message); | |
269 | ||
270 | /* Free the packed grammar. */ | |
271 | void grammar_free (void); | |
272 | ||
273 | #endif /* !GRAM_H_ */ |