]>
Commit | Line | Data |
---|---|---|
2073d769 | 1 | /* Type definitions for nondeterministic finite state machine for Bison. |
784573d1 | 2 | |
98744608 JD |
3 | Copyright (C) 1984, 1989, 2000, 2001, 2002, 2003, 2004, 2007, 2009 |
4 | Free Software Foundation, Inc. | |
d0fb370f | 5 | |
a70083a3 | 6 | This file is part of Bison, the GNU Compiler Compiler. |
d0fb370f | 7 | |
f16b0819 | 8 | This program is free software: you can redistribute it and/or modify |
a70083a3 | 9 | it under the terms of the GNU General Public License as published by |
f16b0819 PE |
10 | the Free Software Foundation, either version 3 of the License, or |
11 | (at your option) any later version. | |
d0fb370f | 12 | |
f16b0819 | 13 | This program is distributed in the hope that it will be useful, |
a70083a3 AD |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
d0fb370f | 17 | |
a70083a3 | 18 | You should have received a copy of the GNU General Public License |
f16b0819 | 19 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
d0fb370f RS |
20 | |
21 | ||
22 | /* These type definitions are used to represent a nondeterministic | |
a70083a3 AD |
23 | finite state machine that parses the specified grammar. This |
24 | information is generated by the function generate_states in the | |
25 | file LR0. | |
26 | ||
27 | Each state of the machine is described by a set of items -- | |
28 | particular positions in particular rules -- that are the possible | |
29 | places where parsing could continue when the machine is in this | |
30 | state. These symbols at these items are the allowable inputs that | |
31 | can follow now. | |
32 | ||
9801d40c | 33 | A core represents one state. States are numbered in the NUMBER |
a70083a3 | 34 | field. When generate_states is finished, the starting state is |
9801d40c AD |
35 | state 0 and NSTATES is the number of states. (FIXME: This sentence |
36 | is no longer true: A transition to a state whose state number is | |
37 | NSTATES indicates termination.) All the cores are chained together | |
38 | and FIRST_STATE points to the first one (state 0). | |
a70083a3 AD |
39 | |
40 | For each state there is a particular symbol which must have been | |
41 | the last thing accepted to reach that state. It is the | |
9801d40c | 42 | ACCESSING_SYMBOL of the core. |
a70083a3 | 43 | |
5123689b | 44 | Each core contains a vector of NITEMS items which are the indices |
b09f4f48 | 45 | in the RITEM vector of the items that are selected in this state. |
a70083a3 | 46 | |
742e4900 | 47 | The two types of actions are shifts/gotos (push the lookahead token |
8b752b00 AD |
48 | and read another/goto to the state designated by a nterm) and |
49 | reductions (combine the last n things on the stack via a rule, | |
50 | replace them with the symbol that the rule derives, and leave the | |
742e4900 | 51 | lookahead token alone). When the states are generated, these |
8b752b00 AD |
52 | actions are represented in two other lists. |
53 | ||
784573d1 | 54 | Each transition structure describes the possible transitions out |
8b752b00 AD |
55 | of one state, the state whose number is in the number field. Each |
56 | contains a vector of numbers of the states that transitions can go | |
57 | to. The accessing_symbol fields of those states' cores say what | |
58 | kind of input leads to them. | |
59 | ||
60 | A transition to state zero should be ignored: conflict resolution | |
61 | deletes transitions by having them point to zero. | |
a70083a3 AD |
62 | |
63 | Each reductions structure describes the possible reductions at the | |
b09f4f48 JD |
64 | state whose number is in the number field. rules is an array of |
65 | num rules. lookahead_tokens is an array of bitsets, one per rule. | |
a70083a3 AD |
66 | |
67 | Conflict resolution can decide that certain tokens in certain | |
68 | states should explicitly be errors (for implementing %nonassoc). | |
69 | For each state, the tokens that are errors for this reason are | |
8b752b00 | 70 | recorded in an errs structure, which holds the token numbers. |
a70083a3 | 71 | |
8b752b00 | 72 | There is at least one goto transition present in state zero. It |
a70083a3 AD |
73 | leads to a next-to-final state whose accessing_symbol is the |
74 | grammar's start symbol. The next-to-final state has one shift to | |
75 | the final state, whose accessing_symbol is zero (end of input). | |
8b752b00 AD |
76 | The final state has one shift, which goes to the termination state. |
77 | The reason for the extra state at the end is to placate the | |
78 | parser's strategy of making all decisions one token ahead of its | |
79 | actions. */ | |
a70083a3 AD |
80 | |
81 | #ifndef STATE_H_ | |
82 | # define STATE_H_ | |
83 | ||
784573d1 PE |
84 | # include <bitset.h> |
85 | ||
86 | # include "gram.h" | |
87 | # include "symtab.h" | |
aa2aab3c | 88 | |
d57650a5 AD |
89 | |
90 | /*-------------------. | |
91 | | Numbering states. | | |
92 | `-------------------*/ | |
93 | ||
f6fbd3da PE |
94 | typedef int state_number; |
95 | # define STATE_NUMBER_MAXIMUM INT_MAX | |
d57650a5 | 96 | |
784573d1 | 97 | /* Be ready to map a state_number to an int. */ |
2073d769 PE |
98 | static inline int |
99 | state_number_as_int (state_number s) | |
100 | { | |
101 | return s; | |
102 | } | |
d57650a5 | 103 | |
640748ee | 104 | |
784573d1 | 105 | typedef struct state state; |
640748ee | 106 | |
ccaf65bc AD |
107 | /*--------------. |
108 | | Transitions. | | |
109 | `--------------*/ | |
aa2aab3c | 110 | |
784573d1 | 111 | typedef struct |
a70083a3 | 112 | { |
f6fbd3da | 113 | int num; |
784573d1 PE |
114 | state *states[1]; |
115 | } transitions; | |
d954473d AD |
116 | |
117 | ||
8b752b00 AD |
118 | /* What is the symbol labelling the transition to |
119 | TRANSITIONS->states[Num]? Can be a token (amongst which the error | |
120 | token), or non terminals in case of gotos. */ | |
b608206e | 121 | |
8b752b00 | 122 | #define TRANSITION_SYMBOL(Transitions, Num) \ |
640748ee | 123 | (Transitions->states[Num]->accessing_symbol) |
b608206e | 124 | |
8b752b00 | 125 | /* Is the TRANSITIONS->states[Num] a shift? (as opposed to gotos). */ |
aa2aab3c | 126 | |
8b752b00 AD |
127 | #define TRANSITION_IS_SHIFT(Transitions, Num) \ |
128 | (ISTOKEN (TRANSITION_SYMBOL (Transitions, Num))) | |
aa2aab3c | 129 | |
8b752b00 | 130 | /* Is the TRANSITIONS->states[Num] a goto?. */ |
aa2aab3c | 131 | |
8b752b00 AD |
132 | #define TRANSITION_IS_GOTO(Transitions, Num) \ |
133 | (!TRANSITION_IS_SHIFT (Transitions, Num)) | |
aa2aab3c | 134 | |
8b752b00 | 135 | /* Is the TRANSITIONS->states[Num] labelled by the error token? */ |
aa2aab3c | 136 | |
8b752b00 AD |
137 | #define TRANSITION_IS_ERROR(Transitions, Num) \ |
138 | (TRANSITION_SYMBOL (Transitions, Num) == errtoken->number) | |
aa2aab3c | 139 | |
9839bbe5 AD |
140 | /* When resolving a SR conflicts, if the reduction wins, the shift is |
141 | disabled. */ | |
142 | ||
8b752b00 | 143 | #define TRANSITION_DISABLE(Transitions, Num) \ |
640748ee | 144 | (Transitions->states[Num] = NULL) |
9839bbe5 | 145 | |
8b752b00 | 146 | #define TRANSITION_IS_DISABLED(Transitions, Num) \ |
640748ee AD |
147 | (Transitions->states[Num] == NULL) |
148 | ||
149 | ||
150 | /* Iterate over each transition over a token (shifts). */ | |
151 | #define FOR_EACH_SHIFT(Transitions, Iter) \ | |
152 | for (Iter = 0; \ | |
153 | Iter < Transitions->num \ | |
154 | && (TRANSITION_IS_DISABLED (Transitions, Iter) \ | |
155 | || TRANSITION_IS_SHIFT (Transitions, Iter)); \ | |
156 | ++Iter) \ | |
157 | if (!TRANSITION_IS_DISABLED (Transitions, Iter)) | |
158 | ||
9839bbe5 | 159 | |
a737b216 | 160 | /* Return the state such SHIFTS contain a shift/goto to it on SYM. |
784573d1 | 161 | Abort if none found. */ |
a737b216 | 162 | struct state *transitions_to (transitions *shifts, symbol_number sym); |
ccaf65bc | 163 | |
aa2aab3c AD |
164 | |
165 | /*-------. | |
166 | | Errs. | | |
167 | `-------*/ | |
a70083a3 | 168 | |
784573d1 | 169 | typedef struct |
a70083a3 | 170 | { |
f6fbd3da | 171 | int num; |
784573d1 PE |
172 | symbol *symbols[1]; |
173 | } errs; | |
a70083a3 | 174 | |
784573d1 | 175 | errs *errs_new (int num, symbol **tokens); |
f59c437a | 176 | |
a70083a3 | 177 | |
aa2aab3c AD |
178 | /*-------------. |
179 | | Reductions. | | |
180 | `-------------*/ | |
a70083a3 | 181 | |
784573d1 | 182 | typedef struct |
a70083a3 | 183 | { |
f6fbd3da | 184 | int num; |
742e4900 | 185 | bitset *lookahead_tokens; |
6ce2d93a | 186 | /* Sorted ascendingly on rule number. */ |
784573d1 PE |
187 | rule *rules[1]; |
188 | } reductions; | |
d0fb370f | 189 | |
f693ad14 AD |
190 | |
191 | ||
640748ee | 192 | /*---------. |
784573d1 | 193 | | states. | |
640748ee | 194 | `---------*/ |
f693ad14 | 195 | |
784573d1 | 196 | struct state |
f693ad14 | 197 | { |
784573d1 PE |
198 | state_number number; |
199 | symbol_number accessing_symbol; | |
200 | transitions *transitions; | |
201 | reductions *reductions; | |
202 | errs *errs; | |
f693ad14 | 203 | |
2a6b783d JD |
204 | /* If non-zero, then no lookahead sets on reduce actions are needed to |
205 | decide what to do in state S. */ | |
f693ad14 AD |
206 | char consistent; |
207 | ||
b408954b AD |
208 | /* If some conflicts were solved thanks to precedence/associativity, |
209 | a human readable description of the resolution. */ | |
210 | const char *solved_conflicts; | |
41d7a5f2 | 211 | const char *solved_conflicts_xml; |
b408954b | 212 | |
6ce2d93a JD |
213 | /* Its items. Must be last, since ITEMS can be arbitrarily large. Sorted |
214 | ascendingly on item index in RITEM, which is sorted on rule number. */ | |
f6fbd3da | 215 | size_t nitems; |
784573d1 | 216 | item_number items[1]; |
640748ee | 217 | }; |
f693ad14 | 218 | |
784573d1 PE |
219 | extern state_number nstates; |
220 | extern state *final_state; | |
df0e7316 AD |
221 | |
222 | /* Create a new state with ACCESSING_SYMBOL for those items. */ | |
784573d1 PE |
223 | state *state_new (symbol_number accessing_symbol, |
224 | size_t core_size, item_number *core); | |
f693ad14 | 225 | |
8b752b00 | 226 | /* Set the transitions of STATE. */ |
784573d1 | 227 | void state_transitions_set (state *s, int num, state **trans); |
32e1e0a4 | 228 | |
8a731ca8 | 229 | /* Set the reductions of STATE. */ |
784573d1 | 230 | void state_reductions_set (state *s, int num, rule **reds); |
8b752b00 | 231 | |
784573d1 | 232 | int state_reduction_find (state *s, rule *r); |
cd08e51e | 233 | |
8b752b00 | 234 | /* Set the errs of STATE. */ |
784573d1 | 235 | void state_errs_set (state *s, int num, symbol **errors); |
8a731ca8 | 236 | |
742e4900 | 237 | /* Print on OUT all the lookahead tokens such that this STATE wants to |
784573d1 | 238 | reduce R. */ |
742e4900 | 239 | void state_rule_lookahead_tokens_print (state *s, rule *r, FILE *out); |
41d7a5f2 PE |
240 | void state_rule_lookahead_tokens_print_xml (state *s, rule *r, |
241 | FILE *out, int level); | |
10e5b8bd | 242 | |
c7ca99d4 | 243 | /* Create/destroy the states hash table. */ |
d33cb3ae PE |
244 | void state_hash_new (void); |
245 | void state_hash_free (void); | |
c7ca99d4 AD |
246 | |
247 | /* Find the state associated to the CORE, and return it. If it does | |
248 | not exist yet, return NULL. */ | |
784573d1 | 249 | state *state_hash_lookup (size_t core_size, item_number *core); |
c7ca99d4 AD |
250 | |
251 | /* Insert STATE in the state hash table. */ | |
784573d1 | 252 | void state_hash_insert (state *s); |
c7ca99d4 | 253 | |
5967f0cf JD |
254 | /* Remove unreachable states, renumber remaining states, update NSTATES, and |
255 | write to OLD_TO_NEW a mapping of old state numbers to new state numbers such | |
256 | that the old value of NSTATES is written as the new state number for removed | |
257 | states. The size of OLD_TO_NEW must be the old value of NSTATES. */ | |
258 | void state_remove_unreachable_states (state_number old_to_new[]); | |
259 | ||
c7ca99d4 | 260 | /* All the states, indexed by the state number. */ |
784573d1 | 261 | extern state **states; |
c7ca99d4 AD |
262 | |
263 | /* Free all the states. */ | |
d33cb3ae | 264 | void states_free (void); |
5967f0cf | 265 | |
a70083a3 | 266 | #endif /* !STATE_H_ */ |