X-Git-Url: https://git.saurik.com/bison.git/blobdiff_plain/d0fb370f6686f29d90739486c31f060f1999842a..1266b636740f0c6719d8cc11a5d569084fa37009:/src/state.h diff --git a/src/state.h b/src/state.h index 53f9d094..2811eb72 100644 --- a/src/state.h +++ b/src/state.h @@ -1,137 +1,266 @@ -/* Type definitions for nondeterministic finite state machine for bison, - Copyright (C) 1984, 1989 Free Software Foundation, Inc. +/* Type definitions for nondeterministic finite state machine for Bison. -This file is part of Bison, the GNU Compiler Compiler. + Copyright (C) 1984, 1989, 2000, 2001, 2002, 2003, 2004, 2007, 2009 + Free Software Foundation, Inc. -Bison is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. + This file is part of Bison, the GNU Compiler Compiler. -Bison is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. -You should have received a copy of the GNU General Public License -along with Bison; see the file COPYING. If not, write to -the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ /* These type definitions are used to represent a nondeterministic - finite state machine that parses the specified grammar. - This information is generated by the function generate_states - in the file LR0. - -Each state of the machine is described by a set of items -- -particular positions in particular rules -- that are the possible -places where parsing could continue when the machine is in this state. -These symbols at these items are the allowable inputs that can follow now. - -A core represents one state. States are numbered in the number field. -When generate_states is finished, the starting state is state 0 -and nstates is the number of states. (A transition to a state -whose state number is nstates indicates termination.) All the cores -are chained together and first_state points to the first one (state 0). - -For each state there is a particular symbol which must have been the -last thing accepted to reach that state. It is the accessing_symbol -of the core. - -Each core contains a vector of nitems items which are the indices -in the ritems vector of the items that are selected in this state. - -The link field is used for chaining buckets that hash states by -their itemsets. This is for recognizing equivalent states and -combining them when the states are generated. - -The two types of transitions are shifts (push the lookahead token -and read another) and reductions (combine the last n things on the -stack via a rule, replace them with the symbol that the rule derives, -and leave the lookahead token alone). When the states are generated, -these transitions are represented in two other lists. - -Each shifts structure describes the possible shift transitions out -of one state, the state whose number is in the number field. -The shifts structures are linked through next and first_shift points to them. -Each contains a vector of numbers of the states that shift transitions -can go to. The accessing_symbol fields of those states' cores say what kind -of input leads to them. - -A shift to state zero should be ignored. Conflict resolution -deletes shifts by changing them to zero. - -Each reductions structure describes the possible reductions at the state -whose number is in the number field. The data is a list of nreds rules, -represented by their rule numbers. first_reduction points to the list -of these structures. - -Conflict resolution can decide that certain tokens in certain -states should explicitly be errors (for implementing %nonassoc). -For each state, the tokens that are errors for this reason -are recorded in an errs structure, which has the state number -in its number field. The rest of the errs structure is full -of token numbers. - -There is at least one shift transition present in state zero. -It leads to a next-to-final state whose accessing_symbol is -the grammar's start symbol. The next-to-final state has one shift -to the final state, whose accessing_symbol is zero (end of input). -The final state has one shift, which goes to the termination state -(whose number is nstates-1). -The reason for the extra state at the end is to placate the parser's -strategy of making all decisions one token ahead of its actions. */ - - -typedef - struct core - { - struct core *next; - struct core *link; - short number; - short accessing_symbol; - short nitems; - short items[1]; - } - core; - - - -typedef - struct shifts - { - struct shifts *next; - short number; - short nshifts; - short shifts[1]; - } - shifts; - - - -typedef - struct errs - { - short nerrs; - short errs[1]; - } - errs; - - - -typedef - struct reductions - { - struct reductions *next; - short number; - short nreds; - short rules[1]; - } - reductions; - - - -extern int nstates; -extern core *first_state; -extern shifts *first_shift; -extern reductions *first_reduction; + finite state machine that parses the specified grammar. This + information is generated by the function generate_states in the + file LR0. + + Each state of the machine is described by a set of items -- + particular positions in particular rules -- that are the possible + places where parsing could continue when the machine is in this + state. These symbols at these items are the allowable inputs that + can follow now. + + A core represents one state. States are numbered in the NUMBER + field. When generate_states is finished, the starting state is + state 0 and NSTATES is the number of states. (FIXME: This sentence + is no longer true: A transition to a state whose state number is + NSTATES indicates termination.) All the cores are chained together + and FIRST_STATE points to the first one (state 0). + + For each state there is a particular symbol which must have been + the last thing accepted to reach that state. It is the + ACCESSING_SYMBOL of the core. + + Each core contains a vector of NITEMS items which are the indices + in the RITEM vector of the items that are selected in this state. + + The two types of actions are shifts/gotos (push the lookahead token + and read another/goto to the state designated by a nterm) and + reductions (combine the last n things on the stack via a rule, + replace them with the symbol that the rule derives, and leave the + lookahead token alone). When the states are generated, these + actions are represented in two other lists. + + Each transition structure describes the possible transitions out + of one state, the state whose number is in the number field. Each + contains a vector of numbers of the states that transitions can go + to. The accessing_symbol fields of those states' cores say what + kind of input leads to them. + + A transition to state zero should be ignored: conflict resolution + deletes transitions by having them point to zero. + + Each reductions structure describes the possible reductions at the + state whose number is in the number field. rules is an array of + num rules. lookahead_tokens is an array of bitsets, one per rule. + + Conflict resolution can decide that certain tokens in certain + states should explicitly be errors (for implementing %nonassoc). + For each state, the tokens that are errors for this reason are + recorded in an errs structure, which holds the token numbers. + + There is at least one goto transition present in state zero. It + leads to a next-to-final state whose accessing_symbol is the + grammar's start symbol. The next-to-final state has one shift to + the final state, whose accessing_symbol is zero (end of input). + The final state has one shift, which goes to the termination state. + The reason for the extra state at the end is to placate the + parser's strategy of making all decisions one token ahead of its + actions. */ + +#ifndef STATE_H_ +# define STATE_H_ + +# include + +# include "gram.h" +# include "symtab.h" + + +/*-------------------. +| Numbering states. | +`-------------------*/ + +typedef int state_number; +# define STATE_NUMBER_MAXIMUM INT_MAX + +/* Be ready to map a state_number to an int. */ +static inline int +state_number_as_int (state_number s) +{ + return s; +} + + +typedef struct state state; + +/*--------------. +| Transitions. | +`--------------*/ + +typedef struct +{ + int num; + state *states[1]; +} transitions; + + +/* What is the symbol labelling the transition to + TRANSITIONS->states[Num]? Can be a token (amongst which the error + token), or non terminals in case of gotos. */ + +#define TRANSITION_SYMBOL(Transitions, Num) \ + (Transitions->states[Num]->accessing_symbol) + +/* Is the TRANSITIONS->states[Num] a shift? (as opposed to gotos). */ + +#define TRANSITION_IS_SHIFT(Transitions, Num) \ + (ISTOKEN (TRANSITION_SYMBOL (Transitions, Num))) + +/* Is the TRANSITIONS->states[Num] a goto?. */ + +#define TRANSITION_IS_GOTO(Transitions, Num) \ + (!TRANSITION_IS_SHIFT (Transitions, Num)) + +/* Is the TRANSITIONS->states[Num] labelled by the error token? */ + +#define TRANSITION_IS_ERROR(Transitions, Num) \ + (TRANSITION_SYMBOL (Transitions, Num) == errtoken->number) + +/* When resolving a SR conflicts, if the reduction wins, the shift is + disabled. */ + +#define TRANSITION_DISABLE(Transitions, Num) \ + (Transitions->states[Num] = NULL) + +#define TRANSITION_IS_DISABLED(Transitions, Num) \ + (Transitions->states[Num] == NULL) + + +/* Iterate over each transition over a token (shifts). */ +#define FOR_EACH_SHIFT(Transitions, Iter) \ + for (Iter = 0; \ + Iter < Transitions->num \ + && (TRANSITION_IS_DISABLED (Transitions, Iter) \ + || TRANSITION_IS_SHIFT (Transitions, Iter)); \ + ++Iter) \ + if (!TRANSITION_IS_DISABLED (Transitions, Iter)) + + +/* Return the state such SHIFTS contain a shift/goto to it on SYM. + Abort if none found. */ +struct state *transitions_to (transitions *shifts, symbol_number sym); + + +/*-------. +| Errs. | +`-------*/ + +typedef struct +{ + int num; + symbol *symbols[1]; +} errs; + +errs *errs_new (int num, symbol **tokens); + + +/*-------------. +| Reductions. | +`-------------*/ + +typedef struct +{ + int num; + bitset *lookahead_tokens; + /* Sorted ascendingly on rule number. */ + rule *rules[1]; +} reductions; + + + +/*---------. +| states. | +`---------*/ + +struct state +{ + state_number number; + symbol_number accessing_symbol; + transitions *transitions; + reductions *reductions; + errs *errs; + + /* If non-zero, then no lookahead sets on reduce actions are needed to + decide what to do in state S. */ + char consistent; + + /* If some conflicts were solved thanks to precedence/associativity, + a human readable description of the resolution. */ + const char *solved_conflicts; + const char *solved_conflicts_xml; + + /* Its items. Must be last, since ITEMS can be arbitrarily large. Sorted + ascendingly on item index in RITEM, which is sorted on rule number. */ + size_t nitems; + item_number items[1]; +}; + +extern state_number nstates; +extern state *final_state; + +/* Create a new state with ACCESSING_SYMBOL for those items. */ +state *state_new (symbol_number accessing_symbol, + size_t core_size, item_number *core); + +/* Set the transitions of STATE. */ +void state_transitions_set (state *s, int num, state **trans); + +/* Set the reductions of STATE. */ +void state_reductions_set (state *s, int num, rule **reds); + +int state_reduction_find (state *s, rule *r); + +/* Set the errs of STATE. */ +void state_errs_set (state *s, int num, symbol **errors); + +/* Print on OUT all the lookahead tokens such that this STATE wants to + reduce R. */ +void state_rule_lookahead_tokens_print (state *s, rule *r, FILE *out); +void state_rule_lookahead_tokens_print_xml (state *s, rule *r, + FILE *out, int level); + +/* Create/destroy the states hash table. */ +void state_hash_new (void); +void state_hash_free (void); + +/* Find the state associated to the CORE, and return it. If it does + not exist yet, return NULL. */ +state *state_hash_lookup (size_t core_size, item_number *core); + +/* Insert STATE in the state hash table. */ +void state_hash_insert (state *s); + +/* Remove unreachable states, renumber remaining states, update NSTATES, and + write to OLD_TO_NEW a mapping of old state numbers to new state numbers such + that the old value of NSTATES is written as the new state number for removed + states. The size of OLD_TO_NEW must be the old value of NSTATES. */ +void state_remove_unreachable_states (state_number old_to_new[]); + +/* All the states, indexed by the state number. */ +extern state **states; + +/* Free all the states. */ +void states_free (void); + +#endif /* !STATE_H_ */