]>
Commit | Line | Data |
---|---|---|
1 | /* Type definitions for nondeterministic finite state machine for bison, | |
2 | Copyright 1984, 1989, 2000, 2001 Free Software Foundation, Inc. | |
3 | ||
4 | This file is part of Bison, the GNU Compiler Compiler. | |
5 | ||
6 | Bison is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 2, or (at your option) | |
9 | any later version. | |
10 | ||
11 | Bison is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with Bison; see the file COPYING. If not, write to | |
18 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
19 | Boston, MA 02111-1307, USA. */ | |
20 | ||
21 | ||
22 | /* These type definitions are used to represent a nondeterministic | |
23 | finite state machine that parses the specified grammar. This | |
24 | information is generated by the function generate_states in the | |
25 | file LR0. | |
26 | ||
27 | Each state of the machine is described by a set of items -- | |
28 | particular positions in particular rules -- that are the possible | |
29 | places where parsing could continue when the machine is in this | |
30 | state. These symbols at these items are the allowable inputs that | |
31 | can follow now. | |
32 | ||
33 | A core represents one state. States are numbered in the NUMBER | |
34 | field. When generate_states is finished, the starting state is | |
35 | state 0 and NSTATES is the number of states. (FIXME: This sentence | |
36 | is no longer true: A transition to a state whose state number is | |
37 | NSTATES indicates termination.) All the cores are chained together | |
38 | and FIRST_STATE points to the first one (state 0). | |
39 | ||
40 | For each state there is a particular symbol which must have been | |
41 | the last thing accepted to reach that state. It is the | |
42 | ACCESSING_SYMBOL of the core. | |
43 | ||
44 | Each core contains a vector of NITEMS items which are the indices | |
45 | in the RITEMS vector of the items that are selected in this state. | |
46 | ||
47 | The two types of transitions are shifts (push the lookahead token | |
48 | and read another) and reductions (combine the last n things on the | |
49 | stack via a rule, replace them with the symbol that the rule | |
50 | derives, and leave the lookahead token alone). When the states are | |
51 | generated, these transitions are represented in two other lists. | |
52 | ||
53 | Each shifts structure describes the possible shift transitions out | |
54 | of one state, the state whose number is in the number field. The | |
55 | shifts structures are linked through next and first_shift points to | |
56 | them. Each contains a vector of numbers of the states that shift | |
57 | transitions can go to. The accessing_symbol fields of those | |
58 | states' cores say what kind of input leads to them. | |
59 | ||
60 | A shift to state zero should be ignored. Conflict resolution | |
61 | deletes shifts by changing them to zero. | |
62 | ||
63 | Each reductions structure describes the possible reductions at the | |
64 | state whose number is in the number field. The data is a list of | |
65 | nreds rules, represented by their rule numbers. first_reduction | |
66 | points to the list of these structures. | |
67 | ||
68 | Conflict resolution can decide that certain tokens in certain | |
69 | states should explicitly be errors (for implementing %nonassoc). | |
70 | For each state, the tokens that are errors for this reason are | |
71 | recorded in an errs structure, which has the state number in its | |
72 | number field. The rest of the errs structure is full of token | |
73 | numbers. | |
74 | ||
75 | There is at least one shift transition present in state zero. It | |
76 | leads to a next-to-final state whose accessing_symbol is the | |
77 | grammar's start symbol. The next-to-final state has one shift to | |
78 | the final state, whose accessing_symbol is zero (end of input). | |
79 | The final state has one shift, which goes to the termination state | |
80 | (whose number is nstates-1). The reason for the extra state at the | |
81 | end is to placate the parser's strategy of making all decisions one | |
82 | token ahead of its actions. */ | |
83 | ||
84 | #ifndef STATE_H_ | |
85 | # define STATE_H_ | |
86 | ||
87 | # include "bitsetv.h" | |
88 | ||
89 | ||
90 | /*-------------------. | |
91 | | Numbering states. | | |
92 | `-------------------*/ | |
93 | ||
94 | typedef short state_number_t; | |
95 | # define STATE_NUMBER_MAX ((state_number_t) SHRT_MAX) | |
96 | ||
97 | /* Be ready to map a state_number_t to an int. */ | |
98 | # define state_number_as_int(Tok) ((int) (Tok)) | |
99 | ||
100 | /*---------. | |
101 | | Shifts. | | |
102 | `---------*/ | |
103 | ||
104 | typedef struct shifts_s | |
105 | { | |
106 | short nshifts; | |
107 | state_number_t shifts[1]; | |
108 | } shifts_t; | |
109 | ||
110 | ||
111 | /* What is the symbol which is shifted by SHIFTS->shifts[Shift]? Can | |
112 | be a token (amongst which the error token), or non terminals in | |
113 | case of gotos. */ | |
114 | ||
115 | #define SHIFT_SYMBOL(Shifts, Shift) \ | |
116 | (states[Shifts->shifts[Shift]]->accessing_symbol) | |
117 | ||
118 | /* Is the SHIFTS->shifts[Shift] a real shift? (as opposed to gotos.) */ | |
119 | ||
120 | #define SHIFT_IS_SHIFT(Shifts, Shift) \ | |
121 | (ISTOKEN (SHIFT_SYMBOL (Shifts, Shift))) | |
122 | ||
123 | /* Is the SHIFTS->shifts[Shift] a goto?. */ | |
124 | ||
125 | #define SHIFT_IS_GOTO(Shifts, Shift) \ | |
126 | (!SHIFT_IS_SHIFT (Shifts, Shift)) | |
127 | ||
128 | /* Is the SHIFTS->shifts[Shift] then handling of the error token?. */ | |
129 | ||
130 | #define SHIFT_IS_ERROR(Shifts, Shift) \ | |
131 | (SHIFT_SYMBOL (Shifts, Shift) == errtoken->number) | |
132 | ||
133 | /* When resolving a SR conflicts, if the reduction wins, the shift is | |
134 | disabled. */ | |
135 | ||
136 | #define SHIFT_DISABLE(Shifts, Shift) \ | |
137 | (Shifts->shifts[Shift] = 0) | |
138 | ||
139 | #define SHIFT_IS_DISABLED(Shifts, Shift) \ | |
140 | (Shifts->shifts[Shift] == 0) | |
141 | ||
142 | /* Return the state such these SHIFTS contain a shift/goto to it on | |
143 | SYMBOL. Aborts if none found. */ | |
144 | struct state_s; | |
145 | struct state_s *shifts_to PARAMS ((shifts_t *shifts, symbol_number_t s)); | |
146 | ||
147 | /*-------. | |
148 | | Errs. | | |
149 | `-------*/ | |
150 | ||
151 | typedef struct errs_s | |
152 | { | |
153 | short nerrs; | |
154 | short errs[1]; | |
155 | } errs_t; | |
156 | ||
157 | errs_t *errs_new PARAMS ((int n)); | |
158 | errs_t *errs_dup PARAMS ((errs_t *src)); | |
159 | ||
160 | ||
161 | /*-------------. | |
162 | | Reductions. | | |
163 | `-------------*/ | |
164 | ||
165 | typedef struct reductions_s | |
166 | { | |
167 | short nreds; | |
168 | short rules[1]; | |
169 | } reductions_t; | |
170 | ||
171 | ||
172 | ||
173 | /*----------. | |
174 | | State_t. | | |
175 | `----------*/ | |
176 | ||
177 | typedef struct state_s | |
178 | { | |
179 | state_number_t number; | |
180 | symbol_number_t accessing_symbol; | |
181 | shifts_t *shifts; | |
182 | reductions_t *reductions; | |
183 | errs_t *errs; | |
184 | ||
185 | /* Nonzero if no lookahead is needed to decide what to do in state S. */ | |
186 | char consistent; | |
187 | ||
188 | /* Used in LALR, not LR(0). | |
189 | ||
190 | When a state is not consistent (there is an S/R or R/R conflict), | |
191 | lookaheads are needed to enable the reductions. NLOOKAHEADS is | |
192 | the number of lookahead guarded reductions of the | |
193 | LOOKAHEADS_RULE. For each rule LOOKAHEADS_RULE[R], LOOKAHEADS[R] | |
194 | is the bitset of the lookaheads enabling this reduction. */ | |
195 | int nlookaheads; | |
196 | bitsetv lookaheads; | |
197 | rule_t **lookaheads_rule; | |
198 | ||
199 | /* If some conflicts were solved thanks to precedence/associativity, | |
200 | a human readable description of the resolution. */ | |
201 | const char *solved_conflicts; | |
202 | ||
203 | /* Its items. Must be last, since ITEMS can be arbitrarily large. | |
204 | */ | |
205 | unsigned short nitems; | |
206 | item_number_t items[1]; | |
207 | } state_t; | |
208 | ||
209 | extern state_number_t nstates; | |
210 | extern state_t *final_state; | |
211 | ||
212 | /* Create a new state with ACCESSING_SYMBOL for those items. */ | |
213 | state_t *state_new PARAMS ((symbol_number_t accessing_symbol, | |
214 | size_t core_size, item_number_t *core)); | |
215 | ||
216 | /* Set the shifts of STATE. */ | |
217 | void state_shifts_set PARAMS ((state_t *state, | |
218 | int nshifts, state_number_t *shifts)); | |
219 | ||
220 | /* Set the reductions of STATE. */ | |
221 | void state_reductions_set PARAMS ((state_t *state, | |
222 | int nreductions, short *reductions)); | |
223 | ||
224 | /* Print on OUT all the lookaheads such that this STATE wants to | |
225 | reduce this RULE. */ | |
226 | void state_rule_lookaheads_print PARAMS ((state_t *state, rule_t *rule, | |
227 | FILE *out)); | |
228 | ||
229 | /* Create/destroy the states hash table. */ | |
230 | void state_hash_new PARAMS ((void)); | |
231 | void state_hash_free PARAMS ((void)); | |
232 | ||
233 | /* Find the state associated to the CORE, and return it. If it does | |
234 | not exist yet, return NULL. */ | |
235 | state_t *state_hash_lookup PARAMS ((size_t core_size, item_number_t *core)); | |
236 | ||
237 | /* Insert STATE in the state hash table. */ | |
238 | void state_hash_insert PARAMS ((state_t *state)); | |
239 | ||
240 | /* All the states, indexed by the state number. */ | |
241 | extern state_t **states; | |
242 | ||
243 | /* Free all the states. */ | |
244 | void states_free PARAMS ((void)); | |
245 | #endif /* !STATE_H_ */ |