]>
Commit | Line | Data |
---|---|---|
d0fb370f | 1 | /* Type definitions for nondeterministic finite state machine for bison, |
d954473d | 2 | Copyright 1984, 1989, 2000, 2001 Free Software Foundation, Inc. |
d0fb370f | 3 | |
a70083a3 | 4 | This file is part of Bison, the GNU Compiler Compiler. |
d0fb370f | 5 | |
a70083a3 AD |
6 | Bison is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 2, or (at your option) | |
9 | any later version. | |
d0fb370f | 10 | |
a70083a3 AD |
11 | Bison is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
d0fb370f | 15 | |
a70083a3 AD |
16 | You should have received a copy of the GNU General Public License |
17 | along with Bison; see the file COPYING. If not, write to | |
18 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
19 | Boston, MA 02111-1307, USA. */ | |
d0fb370f RS |
20 | |
21 | ||
22 | /* These type definitions are used to represent a nondeterministic | |
a70083a3 AD |
23 | finite state machine that parses the specified grammar. This |
24 | information is generated by the function generate_states in the | |
25 | file LR0. | |
26 | ||
27 | Each state of the machine is described by a set of items -- | |
28 | particular positions in particular rules -- that are the possible | |
29 | places where parsing could continue when the machine is in this | |
30 | state. These symbols at these items are the allowable inputs that | |
31 | can follow now. | |
32 | ||
9801d40c | 33 | A core represents one state. States are numbered in the NUMBER |
a70083a3 | 34 | field. When generate_states is finished, the starting state is |
9801d40c AD |
35 | state 0 and NSTATES is the number of states. (FIXME: This sentence |
36 | is no longer true: A transition to a state whose state number is | |
37 | NSTATES indicates termination.) All the cores are chained together | |
38 | and FIRST_STATE points to the first one (state 0). | |
a70083a3 AD |
39 | |
40 | For each state there is a particular symbol which must have been | |
41 | the last thing accepted to reach that state. It is the | |
9801d40c | 42 | ACCESSING_SYMBOL of the core. |
a70083a3 | 43 | |
5123689b | 44 | Each core contains a vector of NITEMS items which are the indices |
9801d40c | 45 | in the RITEMS vector of the items that are selected in this state. |
a70083a3 | 46 | |
8b752b00 AD |
47 | The two types of actions are shifts/gotos (push the lookahead token |
48 | and read another/goto to the state designated by a nterm) and | |
49 | reductions (combine the last n things on the stack via a rule, | |
50 | replace them with the symbol that the rule derives, and leave the | |
51 | lookahead token alone). When the states are generated, these | |
52 | actions are represented in two other lists. | |
53 | ||
54 | Each transition_t structure describes the possible transitions out | |
55 | of one state, the state whose number is in the number field. Each | |
56 | contains a vector of numbers of the states that transitions can go | |
57 | to. The accessing_symbol fields of those states' cores say what | |
58 | kind of input leads to them. | |
59 | ||
60 | A transition to state zero should be ignored: conflict resolution | |
61 | deletes transitions by having them point to zero. | |
a70083a3 AD |
62 | |
63 | Each reductions structure describes the possible reductions at the | |
64 | state whose number is in the number field. The data is a list of | |
65 | nreds rules, represented by their rule numbers. first_reduction | |
66 | points to the list of these structures. | |
67 | ||
68 | Conflict resolution can decide that certain tokens in certain | |
69 | states should explicitly be errors (for implementing %nonassoc). | |
70 | For each state, the tokens that are errors for this reason are | |
8b752b00 | 71 | recorded in an errs structure, which holds the token numbers. |
a70083a3 | 72 | |
8b752b00 | 73 | There is at least one goto transition present in state zero. It |
a70083a3 AD |
74 | leads to a next-to-final state whose accessing_symbol is the |
75 | grammar's start symbol. The next-to-final state has one shift to | |
76 | the final state, whose accessing_symbol is zero (end of input). | |
8b752b00 AD |
77 | The final state has one shift, which goes to the termination state. |
78 | The reason for the extra state at the end is to placate the | |
79 | parser's strategy of making all decisions one token ahead of its | |
80 | actions. */ | |
a70083a3 AD |
81 | |
82 | #ifndef STATE_H_ | |
83 | # define STATE_H_ | |
84 | ||
c0263492 | 85 | # include "bitsetv.h" |
aa2aab3c | 86 | |
d57650a5 AD |
87 | |
88 | /*-------------------. | |
89 | | Numbering states. | | |
90 | `-------------------*/ | |
91 | ||
92 | typedef short state_number_t; | |
93 | # define STATE_NUMBER_MAX ((state_number_t) SHRT_MAX) | |
94 | ||
95 | /* Be ready to map a state_number_t to an int. */ | |
96 | # define state_number_as_int(Tok) ((int) (Tok)) | |
97 | ||
640748ee AD |
98 | |
99 | typedef struct state_s state_t; | |
100 | ||
ccaf65bc AD |
101 | /*--------------. |
102 | | Transitions. | | |
103 | `--------------*/ | |
aa2aab3c | 104 | |
ccaf65bc | 105 | typedef struct transtion_s |
a70083a3 | 106 | { |
ccaf65bc | 107 | short num; |
640748ee | 108 | state_t *states[1]; |
ccaf65bc | 109 | } transitions_t; |
d954473d AD |
110 | |
111 | ||
8b752b00 AD |
112 | /* What is the symbol labelling the transition to |
113 | TRANSITIONS->states[Num]? Can be a token (amongst which the error | |
114 | token), or non terminals in case of gotos. */ | |
b608206e | 115 | |
8b752b00 | 116 | #define TRANSITION_SYMBOL(Transitions, Num) \ |
640748ee | 117 | (Transitions->states[Num]->accessing_symbol) |
b608206e | 118 | |
8b752b00 | 119 | /* Is the TRANSITIONS->states[Num] a shift? (as opposed to gotos). */ |
aa2aab3c | 120 | |
8b752b00 AD |
121 | #define TRANSITION_IS_SHIFT(Transitions, Num) \ |
122 | (ISTOKEN (TRANSITION_SYMBOL (Transitions, Num))) | |
aa2aab3c | 123 | |
8b752b00 | 124 | /* Is the TRANSITIONS->states[Num] a goto?. */ |
aa2aab3c | 125 | |
8b752b00 AD |
126 | #define TRANSITION_IS_GOTO(Transitions, Num) \ |
127 | (!TRANSITION_IS_SHIFT (Transitions, Num)) | |
aa2aab3c | 128 | |
8b752b00 | 129 | /* Is the TRANSITIONS->states[Num] labelled by the error token? */ |
aa2aab3c | 130 | |
8b752b00 AD |
131 | #define TRANSITION_IS_ERROR(Transitions, Num) \ |
132 | (TRANSITION_SYMBOL (Transitions, Num) == errtoken->number) | |
aa2aab3c | 133 | |
9839bbe5 AD |
134 | /* When resolving a SR conflicts, if the reduction wins, the shift is |
135 | disabled. */ | |
136 | ||
8b752b00 | 137 | #define TRANSITION_DISABLE(Transitions, Num) \ |
640748ee | 138 | (Transitions->states[Num] = NULL) |
9839bbe5 | 139 | |
8b752b00 | 140 | #define TRANSITION_IS_DISABLED(Transitions, Num) \ |
640748ee AD |
141 | (Transitions->states[Num] == NULL) |
142 | ||
143 | ||
144 | /* Iterate over each transition over a token (shifts). */ | |
145 | #define FOR_EACH_SHIFT(Transitions, Iter) \ | |
146 | for (Iter = 0; \ | |
147 | Iter < Transitions->num \ | |
148 | && (TRANSITION_IS_DISABLED (Transitions, Iter) \ | |
149 | || TRANSITION_IS_SHIFT (Transitions, Iter)); \ | |
150 | ++Iter) \ | |
151 | if (!TRANSITION_IS_DISABLED (Transitions, Iter)) | |
152 | ||
9839bbe5 | 153 | |
ccaf65bc | 154 | /* Return the state such these TRANSITIONS contain a shift/goto to it on |
24c7d800 AD |
155 | SYMBOL. Aborts if none found. */ |
156 | struct state_s; | |
ccaf65bc AD |
157 | struct state_s *transitions_to PARAMS ((transitions_t *state, |
158 | symbol_number_t s)); | |
159 | ||
aa2aab3c AD |
160 | |
161 | /*-------. | |
162 | | Errs. | | |
163 | `-------*/ | |
a70083a3 | 164 | |
8a731ca8 | 165 | typedef struct errs_s |
a70083a3 | 166 | { |
d2576365 | 167 | short num; |
640748ee | 168 | symbol_t *symbols[1]; |
8a731ca8 | 169 | } errs_t; |
a70083a3 | 170 | |
640748ee | 171 | errs_t *errs_new PARAMS ((int num, symbol_t **tokens)); |
f59c437a | 172 | |
a70083a3 | 173 | |
aa2aab3c AD |
174 | /*-------------. |
175 | | Reductions. | | |
176 | `-------------*/ | |
a70083a3 | 177 | |
8a731ca8 | 178 | typedef struct reductions_s |
a70083a3 | 179 | { |
d2576365 | 180 | short num; |
640748ee | 181 | rule_t *rules[1]; |
8a731ca8 | 182 | } reductions_t; |
d0fb370f | 183 | |
f693ad14 AD |
184 | |
185 | ||
640748ee AD |
186 | /*---------. |
187 | | States. | | |
188 | `---------*/ | |
f693ad14 | 189 | |
640748ee | 190 | struct state_s |
f693ad14 | 191 | { |
d57650a5 | 192 | state_number_t number; |
a49aecd5 | 193 | symbol_number_t accessing_symbol; |
8b752b00 | 194 | transitions_t *transitions; |
8a731ca8 AD |
195 | reductions_t *reductions; |
196 | errs_t *errs; | |
f693ad14 AD |
197 | |
198 | /* Nonzero if no lookahead is needed to decide what to do in state S. */ | |
199 | char consistent; | |
200 | ||
53d4308d AD |
201 | /* Used in LALR, not LR(0). |
202 | ||
203 | When a state is not consistent (there is an S/R or R/R conflict), | |
204 | lookaheads are needed to enable the reductions. NLOOKAHEADS is | |
205 | the number of lookahead guarded reductions of the | |
206 | LOOKAHEADS_RULE. For each rule LOOKAHEADS_RULE[R], LOOKAHEADS[R] | |
207 | is the bitset of the lookaheads enabling this reduction. */ | |
3877f72b | 208 | int nlookaheads; |
c0263492 AD |
209 | bitsetv lookaheads; |
210 | rule_t **lookaheads_rule; | |
f693ad14 | 211 | |
b408954b AD |
212 | /* If some conflicts were solved thanks to precedence/associativity, |
213 | a human readable description of the resolution. */ | |
214 | const char *solved_conflicts; | |
215 | ||
216 | /* Its items. Must be last, since ITEMS can be arbitrarily large. | |
217 | */ | |
0c2d3f4c | 218 | unsigned short nitems; |
62a3e4f0 | 219 | item_number_t items[1]; |
640748ee | 220 | }; |
f693ad14 | 221 | |
df0e7316 AD |
222 | extern state_number_t nstates; |
223 | extern state_t *final_state; | |
224 | ||
225 | /* Create a new state with ACCESSING_SYMBOL for those items. */ | |
df0e7316 AD |
226 | state_t *state_new PARAMS ((symbol_number_t accessing_symbol, |
227 | size_t core_size, item_number_t *core)); | |
f693ad14 | 228 | |
8b752b00 | 229 | /* Set the transitions of STATE. */ |
ccaf65bc | 230 | void state_transitions_set PARAMS ((state_t *state, |
640748ee | 231 | int num, state_t **transitions)); |
32e1e0a4 | 232 | |
8a731ca8 AD |
233 | /* Set the reductions of STATE. */ |
234 | void state_reductions_set PARAMS ((state_t *state, | |
640748ee | 235 | int num, rule_t **reductions)); |
8b752b00 AD |
236 | |
237 | /* Set the errs of STATE. */ | |
238 | void state_errs_set PARAMS ((state_t *state, | |
640748ee | 239 | int num, symbol_t **errs)); |
8a731ca8 | 240 | |
10e5b8bd AD |
241 | /* Print on OUT all the lookaheads such that this STATE wants to |
242 | reduce this RULE. */ | |
10e5b8bd AD |
243 | void state_rule_lookaheads_print PARAMS ((state_t *state, rule_t *rule, |
244 | FILE *out)); | |
245 | ||
c7ca99d4 AD |
246 | /* Create/destroy the states hash table. */ |
247 | void state_hash_new PARAMS ((void)); | |
248 | void state_hash_free PARAMS ((void)); | |
249 | ||
250 | /* Find the state associated to the CORE, and return it. If it does | |
251 | not exist yet, return NULL. */ | |
252 | state_t *state_hash_lookup PARAMS ((size_t core_size, item_number_t *core)); | |
253 | ||
254 | /* Insert STATE in the state hash table. */ | |
255 | void state_hash_insert PARAMS ((state_t *state)); | |
256 | ||
257 | /* All the states, indexed by the state number. */ | |
258 | extern state_t **states; | |
259 | ||
260 | /* Free all the states. */ | |
261 | void states_free PARAMS ((void)); | |
a70083a3 | 262 | #endif /* !STATE_H_ */ |