]>
Commit | Line | Data |
---|---|---|
1 | /* Type definitions for nondeterministic finite state machine for Bison. | |
2 | ||
3 | Copyright (C) 1984, 1989, 2000, 2001, 2002, 2003, 2004 Free | |
4 | Software Foundation, Inc. | |
5 | ||
6 | This file is part of Bison, the GNU Compiler Compiler. | |
7 | ||
8 | Bison is free software; you can redistribute it and/or modify | |
9 | it under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 2, or (at your option) | |
11 | any later version. | |
12 | ||
13 | Bison is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with Bison; see the file COPYING. If not, write to | |
20 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
21 | Boston, MA 02110-1301, USA. */ | |
22 | ||
23 | ||
24 | /* These type definitions are used to represent a nondeterministic | |
25 | finite state machine that parses the specified grammar. This | |
26 | information is generated by the function generate_states in the | |
27 | file LR0. | |
28 | ||
29 | Each state of the machine is described by a set of items -- | |
30 | particular positions in particular rules -- that are the possible | |
31 | places where parsing could continue when the machine is in this | |
32 | state. These symbols at these items are the allowable inputs that | |
33 | can follow now. | |
34 | ||
35 | A core represents one state. States are numbered in the NUMBER | |
36 | field. When generate_states is finished, the starting state is | |
37 | state 0 and NSTATES is the number of states. (FIXME: This sentence | |
38 | is no longer true: A transition to a state whose state number is | |
39 | NSTATES indicates termination.) All the cores are chained together | |
40 | and FIRST_STATE points to the first one (state 0). | |
41 | ||
42 | For each state there is a particular symbol which must have been | |
43 | the last thing accepted to reach that state. It is the | |
44 | ACCESSING_SYMBOL of the core. | |
45 | ||
46 | Each core contains a vector of NITEMS items which are the indices | |
47 | in the RITEM vector of the items that are selected in this state. | |
48 | ||
49 | The two types of actions are shifts/gotos (push the lookahead token | |
50 | and read another/goto to the state designated by a nterm) and | |
51 | reductions (combine the last n things on the stack via a rule, | |
52 | replace them with the symbol that the rule derives, and leave the | |
53 | lookahead token alone). When the states are generated, these | |
54 | actions are represented in two other lists. | |
55 | ||
56 | Each transition structure describes the possible transitions out | |
57 | of one state, the state whose number is in the number field. Each | |
58 | contains a vector of numbers of the states that transitions can go | |
59 | to. The accessing_symbol fields of those states' cores say what | |
60 | kind of input leads to them. | |
61 | ||
62 | A transition to state zero should be ignored: conflict resolution | |
63 | deletes transitions by having them point to zero. | |
64 | ||
65 | Each reductions structure describes the possible reductions at the | |
66 | state whose number is in the number field. rules is an array of | |
67 | num rules. lookahead_tokens is an array of bitsets, one per rule. | |
68 | ||
69 | Conflict resolution can decide that certain tokens in certain | |
70 | states should explicitly be errors (for implementing %nonassoc). | |
71 | For each state, the tokens that are errors for this reason are | |
72 | recorded in an errs structure, which holds the token numbers. | |
73 | ||
74 | There is at least one goto transition present in state zero. It | |
75 | leads to a next-to-final state whose accessing_symbol is the | |
76 | grammar's start symbol. The next-to-final state has one shift to | |
77 | the final state, whose accessing_symbol is zero (end of input). | |
78 | The final state has one shift, which goes to the termination state. | |
79 | The reason for the extra state at the end is to placate the | |
80 | parser's strategy of making all decisions one token ahead of its | |
81 | actions. */ | |
82 | ||
83 | #ifndef STATE_H_ | |
84 | # define STATE_H_ | |
85 | ||
86 | # include <bitset.h> | |
87 | ||
88 | # include "gram.h" | |
89 | # include "symtab.h" | |
90 | ||
91 | ||
92 | /*-------------------. | |
93 | | Numbering states. | | |
94 | `-------------------*/ | |
95 | ||
96 | typedef int state_number; | |
97 | # define STATE_NUMBER_MAXIMUM INT_MAX | |
98 | ||
99 | /* Be ready to map a state_number to an int. */ | |
100 | static inline int | |
101 | state_number_as_int (state_number s) | |
102 | { | |
103 | return s; | |
104 | } | |
105 | ||
106 | ||
107 | typedef struct state state; | |
108 | ||
109 | /*--------------. | |
110 | | Transitions. | | |
111 | `--------------*/ | |
112 | ||
113 | typedef struct | |
114 | { | |
115 | int num; | |
116 | state *states[1]; | |
117 | } transitions; | |
118 | ||
119 | ||
120 | /* What is the symbol labelling the transition to | |
121 | TRANSITIONS->states[Num]? Can be a token (amongst which the error | |
122 | token), or non terminals in case of gotos. */ | |
123 | ||
124 | #define TRANSITION_SYMBOL(Transitions, Num) \ | |
125 | (Transitions->states[Num]->accessing_symbol) | |
126 | ||
127 | /* Is the TRANSITIONS->states[Num] a shift? (as opposed to gotos). */ | |
128 | ||
129 | #define TRANSITION_IS_SHIFT(Transitions, Num) \ | |
130 | (ISTOKEN (TRANSITION_SYMBOL (Transitions, Num))) | |
131 | ||
132 | /* Is the TRANSITIONS->states[Num] a goto?. */ | |
133 | ||
134 | #define TRANSITION_IS_GOTO(Transitions, Num) \ | |
135 | (!TRANSITION_IS_SHIFT (Transitions, Num)) | |
136 | ||
137 | /* Is the TRANSITIONS->states[Num] labelled by the error token? */ | |
138 | ||
139 | #define TRANSITION_IS_ERROR(Transitions, Num) \ | |
140 | (TRANSITION_SYMBOL (Transitions, Num) == errtoken->number) | |
141 | ||
142 | /* When resolving a SR conflicts, if the reduction wins, the shift is | |
143 | disabled. */ | |
144 | ||
145 | #define TRANSITION_DISABLE(Transitions, Num) \ | |
146 | (Transitions->states[Num] = NULL) | |
147 | ||
148 | #define TRANSITION_IS_DISABLED(Transitions, Num) \ | |
149 | (Transitions->states[Num] == NULL) | |
150 | ||
151 | ||
152 | /* Iterate over each transition over a token (shifts). */ | |
153 | #define FOR_EACH_SHIFT(Transitions, Iter) \ | |
154 | for (Iter = 0; \ | |
155 | Iter < Transitions->num \ | |
156 | && (TRANSITION_IS_DISABLED (Transitions, Iter) \ | |
157 | || TRANSITION_IS_SHIFT (Transitions, Iter)); \ | |
158 | ++Iter) \ | |
159 | if (!TRANSITION_IS_DISABLED (Transitions, Iter)) | |
160 | ||
161 | ||
162 | /* Return the state such SHIFTS contain a shift/goto to it on SYM. | |
163 | Abort if none found. */ | |
164 | struct state *transitions_to (transitions *shifts, symbol_number sym); | |
165 | ||
166 | ||
167 | /*-------. | |
168 | | Errs. | | |
169 | `-------*/ | |
170 | ||
171 | typedef struct | |
172 | { | |
173 | int num; | |
174 | symbol *symbols[1]; | |
175 | } errs; | |
176 | ||
177 | errs *errs_new (int num, symbol **tokens); | |
178 | ||
179 | ||
180 | /*-------------. | |
181 | | Reductions. | | |
182 | `-------------*/ | |
183 | ||
184 | typedef struct | |
185 | { | |
186 | int num; | |
187 | bitset *lookahead_tokens; | |
188 | rule *rules[1]; | |
189 | } reductions; | |
190 | ||
191 | ||
192 | ||
193 | /*---------. | |
194 | | states. | | |
195 | `---------*/ | |
196 | ||
197 | struct state | |
198 | { | |
199 | state_number number; | |
200 | symbol_number accessing_symbol; | |
201 | transitions *transitions; | |
202 | reductions *reductions; | |
203 | errs *errs; | |
204 | ||
205 | /* Nonzero if no lookahead is needed to decide what to do in state S. */ | |
206 | char consistent; | |
207 | ||
208 | /* If some conflicts were solved thanks to precedence/associativity, | |
209 | a human readable description of the resolution. */ | |
210 | const char *solved_conflicts; | |
211 | ||
212 | /* Conflict resolution sometimes makes states unreachable. Initialized to 0 | |
213 | in state_new and then used by state_remove_unreachable_states after | |
214 | conflicts_solve. */ | |
215 | bool reachable; | |
216 | ||
217 | /* Its items. Must be last, since ITEMS can be arbitrarily large. | |
218 | */ | |
219 | size_t nitems; | |
220 | item_number items[1]; | |
221 | }; | |
222 | ||
223 | extern state_number nstates; | |
224 | extern state *final_state; | |
225 | ||
226 | /* Create a new state with ACCESSING_SYMBOL for those items. */ | |
227 | state *state_new (symbol_number accessing_symbol, | |
228 | size_t core_size, item_number *core); | |
229 | ||
230 | /* Set the transitions of STATE. */ | |
231 | void state_transitions_set (state *s, int num, state **trans); | |
232 | ||
233 | /* Set the reductions of STATE. */ | |
234 | void state_reductions_set (state *s, int num, rule **reds); | |
235 | ||
236 | int state_reduction_find (state *s, rule *r); | |
237 | ||
238 | /* Set the errs of STATE. */ | |
239 | void state_errs_set (state *s, int num, symbol **errors); | |
240 | ||
241 | /* Print on OUT all the lookahead tokens such that this STATE wants to | |
242 | reduce R. */ | |
243 | void state_rule_lookahead_tokens_print (state *s, rule *r, FILE *out); | |
244 | ||
245 | /* Create/destroy the states hash table. */ | |
246 | void state_hash_new (void); | |
247 | void state_hash_free (void); | |
248 | ||
249 | /* Find the state associated to the CORE, and return it. If it does | |
250 | not exist yet, return NULL. */ | |
251 | state *state_hash_lookup (size_t core_size, item_number *core); | |
252 | ||
253 | /* Insert STATE in the state hash table. */ | |
254 | void state_hash_insert (state *s); | |
255 | ||
256 | /* Remove unreachable states, renumber remaining states, update NSTATES, and | |
257 | write to OLD_TO_NEW a mapping of old state numbers to new state numbers such | |
258 | that the old value of NSTATES is written as the new state number for removed | |
259 | states. The size of OLD_TO_NEW must be the old value of NSTATES. */ | |
260 | void state_remove_unreachable_states (state_number old_to_new[]); | |
261 | ||
262 | /* All the states, indexed by the state number. */ | |
263 | extern state **states; | |
264 | ||
265 | /* Free all the states. */ | |
266 | void states_free (void); | |
267 | ||
268 | #endif /* !STATE_H_ */ |