X-Git-Url: https://git.saurik.com/bison.git/blobdiff_plain/c3b407f430a6c4bab6f0ef5160bb0c34290f3abb..a7ee59cfb466c53f7a9bed59df1bb2ee080146b3:/src/gram.h diff --git a/src/gram.h b/src/gram.h index b12a8a91..3b742d56 100644 --- a/src/gram.h +++ b/src/gram.h @@ -1,5 +1,6 @@ -/* Data definitions for internal representation of bison's input, - Copyright 1984, 1986, 1989, 1992, 2001, 2002 +/* Data definitions for internal representation of Bison's input. + + Copyright (C) 1984, 1986, 1989, 1992, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. This file is part of Bison, the GNU Compiler Compiler. @@ -16,8 +17,8 @@ You should have received a copy of the GNU General Public License along with Bison; see the file COPYING. If not, write to - the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. */ #ifndef GRAM_H_ # define GRAM_H_ @@ -36,41 +37,48 @@ The rules receive rule numbers 1 to NRULES in the order they are written. More precisely Bison augments the grammar with the - initial rule, `$axiom: START-SYMBOL EOF', which is numbered 1, all - the user rules are 2, 3 etc. Each time a rule number is presented - to the user, we subtract 1, so *displayed* rule numbers are 0, 1, - 2... + initial rule, `$accept: START-SYMBOL $end', which is numbered 1, + all the user rules are 2, 3 etc. Each time a rule number is + presented to the user, we subtract 1, so *displayed* rule numbers + are 0, 1, 2... Internally, we cannot use the number 0 for a rule because for instance RITEM stores both symbol (the RHS) and rule numbers: the symbols are shorts >= 0, and rule number are stored negative. Therefore 0 cannot be used, since it would be both the rule number - 0, and the token EOF). + 0, and the token $end). - Actions and guards are accessed via the rule number. + Actions are accessed via the rule number. The rules themselves are described by several arrays: amongst which RITEM, and RULES. - RULES is an array of struct rule_s, which members are: + RULES is an array of rules, whose members are: - RULES[R].lhs -- the symbol number of the left hand side of rule R. - If -1, the rule has been thrown out by reduce.c and should be - ignored. + RULES[R].lhs -- the symbol of the left hand side of rule R. RULES[R].rhs -- the index in RITEM of the beginning of the portion for rule R. - RULES[R].prec -- the precedence level of R. + RULES[R].prec -- the symbol providing the precedence level of R. - RULES[R].precsym -- the symbol-number of the symbol in %prec for R - (if any). + RULES[R].precsym -- the symbol attached (via %prec) to give its + precedence to R. Of course, if set, it is equal to `prec', but we + need to distinguish one from the other when reducing: a symbol used + in a %prec is not useless. RULES[R].assoc -- the associativity of R. + RULES[R].dprec -- the dynamic precedence level of R (for GLR + parsing). + + RULES[R].merger -- index of merging function for R (for GLR + parsing). + RULES[R].line -- the line where R was defined. - RULES[R].useful -- TRUE iff the rule is used. + RULES[R].useful -- true iff the rule is used (i.e., false if thrown + away by reduce). The right hand side is stored as symbol numbers in a portion of RITEM. @@ -80,11 +88,9 @@ contains minus R, which identifies it as the end of a portion and says which rule it is for. - The portions of RITEM come in order of increasing rule number and - are followed by an element which is zero to mark the end. nitems - is the total length of ritem, not counting the final zero. Each - element of RITEM is called an "item" and its index in RITEM is an - item number. + The portions of RITEM come in order of increasing rule number. + NRITEMS is the total length of RITEM. Each element of RITEM is + called an "item" and its index in RITEM is an item number. Item numbers are used in the finite state machine to represent places that parsing can get to. @@ -98,85 +104,170 @@ Associativities are recorded similarly in SYMBOLS[I]->assoc. */ +# include "location.h" +# include "symtab.h" -#define ISTOKEN(s) ((s) < ntokens) -#define ISVAR(s) ((s) >= ntokens) +# define ISTOKEN(i) ((i) < ntokens) +# define ISVAR(i) ((i) >= ntokens) -extern int nitems; -extern int nrules; extern int nsyms; extern int ntokens; extern int nvars; -extern short *ritem; -extern int nritems; +typedef int item_number; +#define ITEM_NUMBER_MAX INT_MAX +extern item_number *ritem; +extern unsigned int nritems; -extern int start_symbol; +/* There is weird relationship between OT1H item_number and OTOH + symbol_number and rule_number: we store the latter in + item_number. symbol_number values are stored as-is, while + the negation of (rule_number + 1) is stored. -/* Associativity values for tokens and rules. */ -typedef enum + Therefore, a symbol_number must be a valid item_number, and we + sometimes have to perform the converse transformation. */ + +static inline item_number +symbol_number_as_item_number (symbol_number sym) { - right_assoc, - left_assoc, - non_assoc -} associativity; + return sym; +} +static inline symbol_number +item_number_as_symbol_number (item_number i) +{ + return i; +} -typedef struct rule_s +static inline bool +item_number_is_symbol_number (item_number i) +{ + return i >= 0; +} + +/* Rule numbers. */ +typedef int rule_number; +#define RULE_NUMBER_MAX INT_MAX +extern rule_number nrules; + +static inline item_number +rule_number_as_item_number (rule_number r) +{ + return -1 - r; +} + +static inline rule_number +item_number_as_rule_number (item_number i) +{ + return -1 - i; +} + +static inline bool +item_number_is_rule_number (item_number i) +{ + return i < 0; +} + +/*--------. +| Rules. | +`--------*/ + +typedef struct { /* The number of the rule in the source. It is usually the index in RULES too, except if there are useless rules. */ - short number; - - short lhs; - short *rhs; - short prec; - short precsym; - associativity assoc; - short line; + rule_number user_number; + + /* The index in RULES. Usually the rule number in the source, + except if some rules are useless. */ + rule_number number; + + symbol *lhs; + item_number *rhs; + + /* This symbol provides both the associativity, and the precedence. */ + symbol *prec; + + int dprec; + int merger; + + /* This symbol was attached to the rule via %prec. */ + symbol *precsym; + + location location; bool useful; const char *action; - short action_line; + location action_location; +} rule; - const char *guard; - short guard_line; -} rule_t; +extern rule *rules; -extern struct rule_s *rules; +/* A function that selects a rule. */ +typedef bool (*rule_filter) (rule *); -/* Table of the symbols, indexed by the symbol number. */ -extern struct bucket **symbols; +/* Return true IFF the rule has a `number' smaller than NRULES. */ +bool rule_useful_p (rule *r); -/* token translation table: indexed by a token number as returned by - the user's yylex routine, it yields the internal token number used - by the parser and throughout bison. */ +/* Return true IFF the rule has a `number' higher than NRULES. */ +bool rule_useless_p (rule *r); -extern short *token_translations; -extern int max_user_token_number; +/* Return true IFF the rule is not flagged as useful *and* is useful. + In other words, it was discarded because of conflicts. */ +bool rule_never_reduced_p (rule *r); + +/* Print this rule's number and lhs on OUT. If a PREVIOUS_LHS was + already displayed (by a previous call for another rule), avoid + useless repetitions. */ +void rule_lhs_print (rule *r, symbol *previous_lhs, FILE *out); -/* SEMANTIC_PARSER is nonzero if the input file says to use the hairy - parser that provides for semantic error recovery. If it is zero, - the yacc-compatible simplified parser is used. */ +/* Return the length of the RHS. */ +int rule_rhs_length (rule *r); -extern int semantic_parser; +/* Print this rule's RHS on OUT. */ +void rule_rhs_print (rule *r, FILE *out); -/* PURE_PARSER is nonzero if should generate a parser that is all pure - and reentrant. */ +/* Print this rule on OUT. */ +void rule_print (rule *r, FILE *out); -extern int pure_parser; -/* ERROR_TOKEN_NUMBER is the token number of the error token. */ -extern int error_token_number; -/* Report the length of the RHS. */ -int rule_rhs_length PARAMS ((rule_t *rule)); +/* Table of the symbols, indexed by the symbol number. */ +extern symbol **symbols; + +/* TOKEN_TRANSLATION -- a table indexed by a token number as returned + by the user's yylex routine, it yields the internal token number + used by the parser and throughout bison. */ +extern symbol_number *token_translations; +extern int max_user_token_number; + + /* Dump RITEM for traces. */ -void ritem_print PARAMS ((FILE *out)); +void ritem_print (FILE *out); /* Return the size of the longest rule RHS. */ -size_t ritem_longest_rhs PARAMS ((void)); +size_t ritem_longest_rhs (void); + +/* Print the grammar's rules numbers from BEGIN (inclusive) to END + (exclusive) on OUT under TITLE. */ +void grammar_rules_partial_print (FILE *out, const char *title, + rule_filter filter); + +/* Print the grammar's rules on OUT. */ +void grammar_rules_print (FILE *out); + +/* Dump the grammar. */ +void grammar_dump (FILE *out, const char *title); + +/* Report on STDERR the rules that are not flagged USEFUL, using the + MESSAGE (which can be `useless rule' when invoked after grammar + reduction, or `never reduced' after conflicts were taken into + account). */ +void grammar_rules_never_reduced_report (const char *message); + +/* Free the packed grammar. */ +void grammar_free (void); #endif /* !GRAM_H_ */