| 1 | /* Type definitions for the finite state machine for Bison. |
| 2 | |
| 3 | Copyright (C) 1984, 1989, 2000, 2001, 2002, 2003, 2004, 2007, 2009, |
| 4 | 2010 Free Software Foundation, Inc. |
| 5 | |
| 6 | This file is part of Bison, the GNU Compiler Compiler. |
| 7 | |
| 8 | This program is free software: you can redistribute it and/or modify |
| 9 | it under the terms of the GNU General Public License as published by |
| 10 | the Free Software Foundation, either version 3 of the License, or |
| 11 | (at your option) any later version. |
| 12 | |
| 13 | This program is distributed in the hope that it will be useful, |
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | GNU General Public License for more details. |
| 17 | |
| 18 | You should have received a copy of the GNU General Public License |
| 19 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
| 20 | |
| 21 | |
| 22 | /* These type definitions are used to represent a nondeterministic |
| 23 | finite state machine that parses the specified grammar. This |
| 24 | information is generated by the function generate_states in the |
| 25 | file LR0. |
| 26 | |
| 27 | Each state of the machine is described by a set of items -- |
| 28 | particular positions in particular rules -- that are the possible |
| 29 | places where parsing could continue when the machine is in this |
| 30 | state. These symbols at these items are the allowable inputs that |
| 31 | can follow now. |
| 32 | |
| 33 | A core represents one state. States are numbered in the NUMBER |
| 34 | field. When generate_states is finished, the starting state is |
| 35 | state 0 and NSTATES is the number of states. (FIXME: This sentence |
| 36 | is no longer true: A transition to a state whose state number is |
| 37 | NSTATES indicates termination.) All the cores are chained together |
| 38 | and FIRST_STATE points to the first one (state 0). |
| 39 | |
| 40 | For each state there is a particular symbol which must have been |
| 41 | the last thing accepted to reach that state. It is the |
| 42 | ACCESSING_SYMBOL of the core. |
| 43 | |
| 44 | Each core contains a vector of NITEMS items which are the indices |
| 45 | in the RITEM vector of the items that are selected in this state. |
| 46 | |
| 47 | The two types of actions are shifts/gotos (push the lookahead token |
| 48 | and read another/goto to the state designated by a nterm) and |
| 49 | reductions (combine the last n things on the stack via a rule, |
| 50 | replace them with the symbol that the rule derives, and leave the |
| 51 | lookahead token alone). When the states are generated, these |
| 52 | actions are represented in two other lists. |
| 53 | |
| 54 | Each transition structure describes the possible transitions out |
| 55 | of one state, the state whose number is in the number field. Each |
| 56 | contains a vector of numbers of the states that transitions can go |
| 57 | to. The accessing_symbol fields of those states' cores say what |
| 58 | kind of input leads to them. |
| 59 | |
| 60 | A transition to state zero should be ignored: conflict resolution |
| 61 | deletes transitions by having them point to zero. |
| 62 | |
| 63 | Each reductions structure describes the possible reductions at the |
| 64 | state whose number is in the number field. rules is an array of |
| 65 | num rules. lookahead_tokens is an array of bitsets, one per rule. |
| 66 | |
| 67 | Conflict resolution can decide that certain tokens in certain |
| 68 | states should explicitly be errors (for implementing %nonassoc). |
| 69 | For each state, the tokens that are errors for this reason are |
| 70 | recorded in an errs structure, which holds the token numbers. |
| 71 | |
| 72 | There is at least one goto transition present in state zero. It |
| 73 | leads to a next-to-final state whose accessing_symbol is the |
| 74 | grammar's start symbol. The next-to-final state has one shift to |
| 75 | the final state, whose accessing_symbol is zero (end of input). |
| 76 | The final state has one shift, which goes to the termination state. |
| 77 | The reason for the extra state at the end is to placate the |
| 78 | parser's strategy of making all decisions one token ahead of its |
| 79 | actions. */ |
| 80 | |
| 81 | #ifndef STATE_H_ |
| 82 | # define STATE_H_ |
| 83 | |
| 84 | # include <bitset.h> |
| 85 | |
| 86 | # include "gram.h" |
| 87 | # include "symtab.h" |
| 88 | |
| 89 | |
| 90 | /*-------------------. |
| 91 | | Numbering states. | |
| 92 | `-------------------*/ |
| 93 | |
| 94 | typedef int state_number; |
| 95 | # define STATE_NUMBER_MAXIMUM INT_MAX |
| 96 | |
| 97 | /* Be ready to map a state_number to an int. */ |
| 98 | static inline int |
| 99 | state_number_as_int (state_number s) |
| 100 | { |
| 101 | return s; |
| 102 | } |
| 103 | |
| 104 | |
| 105 | typedef struct state state; |
| 106 | |
| 107 | /*--------------. |
| 108 | | Transitions. | |
| 109 | `--------------*/ |
| 110 | |
| 111 | typedef struct |
| 112 | { |
| 113 | int num; |
| 114 | state *states[1]; |
| 115 | } transitions; |
| 116 | |
| 117 | |
| 118 | /* What is the symbol labelling the transition to |
| 119 | TRANSITIONS->states[Num]? Can be a token (amongst which the error |
| 120 | token), or non terminals in case of gotos. */ |
| 121 | |
| 122 | #define TRANSITION_SYMBOL(Transitions, Num) \ |
| 123 | (Transitions->states[Num]->accessing_symbol) |
| 124 | |
| 125 | /* Is the TRANSITIONS->states[Num] a shift? (as opposed to gotos). */ |
| 126 | |
| 127 | #define TRANSITION_IS_SHIFT(Transitions, Num) \ |
| 128 | (ISTOKEN (TRANSITION_SYMBOL (Transitions, Num))) |
| 129 | |
| 130 | /* Is the TRANSITIONS->states[Num] a goto?. */ |
| 131 | |
| 132 | #define TRANSITION_IS_GOTO(Transitions, Num) \ |
| 133 | (!TRANSITION_IS_SHIFT (Transitions, Num)) |
| 134 | |
| 135 | /* Is the TRANSITIONS->states[Num] labelled by the error token? */ |
| 136 | |
| 137 | #define TRANSITION_IS_ERROR(Transitions, Num) \ |
| 138 | (TRANSITION_SYMBOL (Transitions, Num) == errtoken->number) |
| 139 | |
| 140 | /* When resolving a SR conflicts, if the reduction wins, the shift is |
| 141 | disabled. */ |
| 142 | |
| 143 | #define TRANSITION_DISABLE(Transitions, Num) \ |
| 144 | (Transitions->states[Num] = NULL) |
| 145 | |
| 146 | #define TRANSITION_IS_DISABLED(Transitions, Num) \ |
| 147 | (Transitions->states[Num] == NULL) |
| 148 | |
| 149 | |
| 150 | /* Iterate over each transition over a token (shifts). */ |
| 151 | #define FOR_EACH_SHIFT(Transitions, Iter) \ |
| 152 | for (Iter = 0; \ |
| 153 | Iter < Transitions->num \ |
| 154 | && (TRANSITION_IS_DISABLED (Transitions, Iter) \ |
| 155 | || TRANSITION_IS_SHIFT (Transitions, Iter)); \ |
| 156 | ++Iter) \ |
| 157 | if (!TRANSITION_IS_DISABLED (Transitions, Iter)) |
| 158 | |
| 159 | |
| 160 | /* Return the state such SHIFTS contain a shift/goto to it on SYM. |
| 161 | Abort if none found. */ |
| 162 | struct state *transitions_to (transitions *shifts, symbol_number sym); |
| 163 | |
| 164 | |
| 165 | /*-------. |
| 166 | | Errs. | |
| 167 | `-------*/ |
| 168 | |
| 169 | typedef struct |
| 170 | { |
| 171 | int num; |
| 172 | symbol *symbols[1]; |
| 173 | } errs; |
| 174 | |
| 175 | errs *errs_new (int num, symbol **tokens); |
| 176 | |
| 177 | |
| 178 | /*-------------. |
| 179 | | Reductions. | |
| 180 | `-------------*/ |
| 181 | |
| 182 | typedef struct |
| 183 | { |
| 184 | int num; |
| 185 | bitset *lookahead_tokens; |
| 186 | /* Sorted ascendingly on rule number. */ |
| 187 | rule *rules[1]; |
| 188 | } reductions; |
| 189 | |
| 190 | |
| 191 | |
| 192 | /*---------. |
| 193 | | states. | |
| 194 | `---------*/ |
| 195 | |
| 196 | struct state_list; |
| 197 | |
| 198 | struct state |
| 199 | { |
| 200 | state_number number; |
| 201 | symbol_number accessing_symbol; |
| 202 | transitions *transitions; |
| 203 | reductions *reductions; |
| 204 | errs *errs; |
| 205 | |
| 206 | /* When an includer (such as ielr.c) needs to store states in a list, the |
| 207 | includer can define struct state_list as the list node structure and can |
| 208 | store in this member a reference to the node containing each state. */ |
| 209 | struct state_list *state_list; |
| 210 | |
| 211 | /* If non-zero, then no lookahead sets on reduce actions are needed to |
| 212 | decide what to do in state S. */ |
| 213 | char consistent; |
| 214 | |
| 215 | /* If some conflicts were solved thanks to precedence/associativity, |
| 216 | a human readable description of the resolution. */ |
| 217 | const char *solved_conflicts; |
| 218 | const char *solved_conflicts_xml; |
| 219 | |
| 220 | /* Its items. Must be last, since ITEMS can be arbitrarily large. Sorted |
| 221 | ascendingly on item index in RITEM, which is sorted on rule number. */ |
| 222 | size_t nitems; |
| 223 | item_number items[1]; |
| 224 | }; |
| 225 | |
| 226 | extern state_number nstates; |
| 227 | extern state *final_state; |
| 228 | |
| 229 | /* Create a new state with ACCESSING_SYMBOL for those items. */ |
| 230 | state *state_new (symbol_number accessing_symbol, |
| 231 | size_t core_size, item_number *core); |
| 232 | state *state_new_isocore (state const *s); |
| 233 | |
| 234 | /* Set the transitions of STATE. */ |
| 235 | void state_transitions_set (state *s, int num, state **trans); |
| 236 | |
| 237 | /* Set the reductions of STATE. */ |
| 238 | void state_reductions_set (state *s, int num, rule **reds); |
| 239 | |
| 240 | int state_reduction_find (state *s, rule *r); |
| 241 | |
| 242 | /* Set the errs of STATE. */ |
| 243 | void state_errs_set (state *s, int num, symbol **errors); |
| 244 | |
| 245 | /* Print on OUT all the lookahead tokens such that this STATE wants to |
| 246 | reduce R. */ |
| 247 | void state_rule_lookahead_tokens_print (state *s, rule *r, FILE *out); |
| 248 | void state_rule_lookahead_tokens_print_xml (state *s, rule *r, |
| 249 | FILE *out, int level); |
| 250 | |
| 251 | /* Create/destroy the states hash table. */ |
| 252 | void state_hash_new (void); |
| 253 | void state_hash_free (void); |
| 254 | |
| 255 | /* Find the state associated to the CORE, and return it. If it does |
| 256 | not exist yet, return NULL. */ |
| 257 | state *state_hash_lookup (size_t core_size, item_number *core); |
| 258 | |
| 259 | /* Insert STATE in the state hash table. */ |
| 260 | void state_hash_insert (state *s); |
| 261 | |
| 262 | /* Remove unreachable states, renumber remaining states, update NSTATES, and |
| 263 | write to OLD_TO_NEW a mapping of old state numbers to new state numbers such |
| 264 | that the old value of NSTATES is written as the new state number for removed |
| 265 | states. The size of OLD_TO_NEW must be the old value of NSTATES. */ |
| 266 | void state_remove_unreachable_states (state_number old_to_new[]); |
| 267 | |
| 268 | /* All the states, indexed by the state number. */ |
| 269 | extern state **states; |
| 270 | |
| 271 | /* Free all the states. */ |
| 272 | void states_free (void); |
| 273 | |
| 274 | #endif /* !STATE_H_ */ |