]>
Commit | Line | Data |
---|---|---|
1 | /* Data definitions for internal representation of bison's input, | |
2 | Copyright (C) 1984, 1986, 1989, 1992, 2001, 2002 | |
3 | Free Software Foundation, Inc. | |
4 | ||
5 | This file is part of Bison, the GNU Compiler Compiler. | |
6 | ||
7 | Bison is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 2, or (at your option) | |
10 | any later version. | |
11 | ||
12 | Bison is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with Bison; see the file COPYING. If not, write to | |
19 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
20 | Boston, MA 02111-1307, USA. */ | |
21 | ||
22 | #ifndef GRAM_H_ | |
23 | # define GRAM_H_ | |
24 | ||
25 | /* Representation of the grammar rules: | |
26 | ||
27 | NTOKENS is the number of tokens, and NVARS is the number of | |
28 | variables (nonterminals). NSYMS is the total number, ntokens + | |
29 | nvars. | |
30 | ||
31 | Each symbol (either token or variable) receives a symbol number. | |
32 | Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1 | |
33 | are for variables. Symbol number zero is the end-of-input token. | |
34 | This token is counted in ntokens. The true number of token values | |
35 | assigned is NTOKENS reduced by one for each alias declaration. | |
36 | ||
37 | The rules receive rule numbers 1 to NRULES in the order they are | |
38 | written. More precisely Bison augments the grammar with the | |
39 | initial rule, `$axiom: START-SYMBOL EOF', which is numbered 1, all | |
40 | the user rules are 2, 3 etc. Each time a rule number is presented | |
41 | to the user, we subtract 1, so *displayed* rule numbers are 0, 1, | |
42 | 2... | |
43 | ||
44 | Internally, we cannot use the number 0 for a rule because for | |
45 | instance RITEM stores both symbol (the RHS) and rule numbers: the | |
46 | symbols are shorts >= 0, and rule number are stored negative. | |
47 | Therefore 0 cannot be used, since it would be both the rule number | |
48 | 0, and the token EOF). | |
49 | ||
50 | Actions are accessed via the rule number. | |
51 | ||
52 | The rules themselves are described by several arrays: amongst which | |
53 | RITEM, and RULES. | |
54 | ||
55 | RULES is an array of struct rule_s, which members are: | |
56 | ||
57 | RULES[R].lhs -- the symbol of the left hand side of rule R. | |
58 | ||
59 | RULES[R].rhs -- the index in RITEM of the beginning of the portion | |
60 | for rule R. | |
61 | ||
62 | RULES[R].prec -- the symbol providing the precedence level of R. | |
63 | ||
64 | RULES[R].precsym -- the symbol attached (via %prec) to give its | |
65 | precedence to R. Of course, if set, it is equal to `prec', but we | |
66 | need to distinguish one from the other when reducing: a symbol used | |
67 | in a %prec is not useless. | |
68 | ||
69 | RULES[R].assoc -- the associativity of R. | |
70 | ||
71 | RULES[R].dprec -- the dynamic precedence level of R (for GLR parsing). | |
72 | ||
73 | RULES[R].merger -- index of merging function for R (for GLR parsing). | |
74 | ||
75 | RULES[R].line -- the line where R was defined. | |
76 | ||
77 | RULES[R].useful -- TRUE iff the rule is used (i.e., FALSE if thrown | |
78 | away by reduce). | |
79 | ||
80 | The right hand side is stored as symbol numbers in a portion of | |
81 | RITEM. | |
82 | ||
83 | The length of the portion is one greater than the number of symbols | |
84 | in the rule's right hand side. The last element in the portion | |
85 | contains minus R, which identifies it as the end of a portion and | |
86 | says which rule it is for. | |
87 | ||
88 | The portions of RITEM come in order of increasing rule number. | |
89 | NRITEMS is the total length of RITEM. Each element of RITEM is | |
90 | called an "item" and its index in RITEM is an item number. | |
91 | ||
92 | Item numbers are used in the finite state machine to represent | |
93 | places that parsing can get to. | |
94 | ||
95 | SYMBOLS[I]->prec records the precedence level of each symbol. | |
96 | ||
97 | Precedence levels are assigned in increasing order starting with 1 | |
98 | so that numerically higher precedence values mean tighter binding | |
99 | as they ought to. Zero as a symbol or rule's precedence means none | |
100 | is assigned. | |
101 | ||
102 | Associativities are recorded similarly in SYMBOLS[I]->assoc. */ | |
103 | ||
104 | # include "location.h" | |
105 | # include "symtab.h" | |
106 | ||
107 | # define ISTOKEN(s) ((s) < ntokens) | |
108 | # define ISVAR(s) ((s) >= ntokens) | |
109 | ||
110 | extern int nsyms; | |
111 | extern int ntokens; | |
112 | extern int nvars; | |
113 | ||
114 | typedef int item_number_t; | |
115 | # define ITEM_NUMBER_MAX ((item_number_t) INT_MAX) | |
116 | # define ITEM_NUMBER_MIN ((item_number_t) MIN_MAX) | |
117 | extern item_number_t *ritem; | |
118 | extern unsigned int nritems; | |
119 | ||
120 | /* There is weird relationship between OT1H item_number_t and OTOH | |
121 | symbol_number_t and rule_number_t: we store the latter in | |
122 | item_number_t. symbol_number_t are stored as are, while | |
123 | the negation of (rule_number_t + 1) are stored. | |
124 | ||
125 | Therefore, an symbol_number_t must be a valid item_number_t, and we | |
126 | sometimes have to perform the converse transformation. */ | |
127 | # define symbol_number_as_item_number(Tok) ((item_number_t) (Tok)) | |
128 | # define item_number_as_symbol_number(Ite) ((symbol_number_t) (Ite)) | |
129 | ||
130 | extern symbol_number_t start_symbol; | |
131 | ||
132 | /* Rules numbers. */ | |
133 | typedef short rule_number_t; | |
134 | # define RULE_NUMBER_MAX ((rule_number_t) SHRT_MAX) | |
135 | extern rule_number_t nrules; | |
136 | # define int_of_rule_number(RNum) ((int) (RNum)) | |
137 | # define rule_number_as_item_number(RNum) ((item_number_t) (- RNum - 1)) | |
138 | # define item_number_as_rule_number(INum) ((rule_number_t) (- INum - 1)) | |
139 | ||
140 | ||
141 | /*--------. | |
142 | | Rules. | | |
143 | `--------*/ | |
144 | ||
145 | typedef struct rule_s | |
146 | { | |
147 | /* The number of the rule in the source. It is usually the index in | |
148 | RULES too, except if there are useless rules. */ | |
149 | rule_number_t user_number; | |
150 | ||
151 | /* The index in RULES. Usually the rule number in the source, | |
152 | except if some rules are useless. */ | |
153 | rule_number_t number; | |
154 | ||
155 | symbol_t *lhs; | |
156 | item_number_t *rhs; | |
157 | ||
158 | /* This symbol provides both the associativity, and the precedence. */ | |
159 | symbol_t *prec; | |
160 | ||
161 | short dprec; | |
162 | short merger; | |
163 | ||
164 | /* This symbol was attached to the rule via %prec. */ | |
165 | symbol_t *precsym; | |
166 | ||
167 | location_t location; | |
168 | bool useful; | |
169 | ||
170 | const char *action; | |
171 | location_t action_location; | |
172 | } rule_t; | |
173 | ||
174 | extern struct rule_s *rules; | |
175 | ||
176 | /* Table of the symbols, indexed by the symbol number. */ | |
177 | extern symbol_t **symbols; | |
178 | ||
179 | /* TOKEN_TRANSLATION -- a table indexed by a token number as returned | |
180 | by the user's yylex routine, it yields the internal token number | |
181 | used by the parser and throughout bison. */ | |
182 | extern symbol_number_t *token_translations; | |
183 | extern int max_user_token_number; | |
184 | ||
185 | ||
186 | /* GLR_PARSER is nonzero if the input file says to use the GLR | |
187 | (Generalized LR) parser, and to output some additional | |
188 | information used by the GLR algorithm. */ | |
189 | ||
190 | extern int glr_parser; | |
191 | ||
192 | /* PURE_PARSER is nonzero if should generate a parser that is all pure | |
193 | and reentrant. */ | |
194 | ||
195 | extern int pure_parser; | |
196 | ||
197 | /* Print this RULE's number and lhs on OUT. If a PREVIOUS_LHS was | |
198 | already displayed (by a previous call for another rule), avoid | |
199 | useless repetitions. */ | |
200 | void rule_lhs_print PARAMS ((rule_t *rule, symbol_t *previous_lhs, FILE *out)); | |
201 | ||
202 | /* Return the length of the RHS. */ | |
203 | int rule_rhs_length PARAMS ((rule_t *rule)); | |
204 | ||
205 | /* Print this RULE's RHS on OUT. */ | |
206 | void rule_rhs_print PARAMS ((rule_t *rule, FILE *out)); | |
207 | ||
208 | /* Print this RULE on OUT. */ | |
209 | void rule_print PARAMS ((rule_t *rule, FILE *out)); | |
210 | ||
211 | /* Dump RITEM for traces. */ | |
212 | void ritem_print PARAMS ((FILE *out)); | |
213 | ||
214 | /* Return the size of the longest rule RHS. */ | |
215 | size_t ritem_longest_rhs PARAMS ((void)); | |
216 | ||
217 | /* Print the grammar's rules numbers from BEGIN (inclusive) to END | |
218 | (exclusive) on OUT under TITLE. */ | |
219 | void grammar_rules_partial_print PARAMS ((FILE *out, const char *title, | |
220 | rule_number_t begin, | |
221 | rule_number_t end)); | |
222 | ||
223 | /* Print the grammar's rules on OUT. */ | |
224 | void grammar_rules_print PARAMS ((FILE *out)); | |
225 | ||
226 | /* Dump the grammar. */ | |
227 | void grammar_dump PARAMS ((FILE *out, const char *title)); | |
228 | ||
229 | /* Free the packed grammar. */ | |
230 | void grammar_free PARAMS ((void)); | |
231 | ||
232 | #endif /* !GRAM_H_ */ |