X-Git-Url: https://git.saurik.com/bison.git/blobdiff_plain/0e78e603f696b4b820231b56fdb21c2a173e6bd3..d1a1114f7f257f89887ea5825ba0d896dfaa747b:/src/gram.h diff --git a/src/gram.h b/src/gram.h index b8d1c93b..de782f7d 100644 --- a/src/gram.h +++ b/src/gram.h @@ -1,5 +1,6 @@ /* Data definitions for internal representation of bison's input, - Copyright 1984, 1986, 1989, 1992, 2001 Free Software Foundation, Inc. + Copyright (C) 1984, 1986, 1989, 1992, 2001, 2002 + Free Software Foundation, Inc. This file is part of Bison, the GNU Compiler Compiler. @@ -21,45 +22,62 @@ #ifndef GRAM_H_ # define GRAM_H_ -/* representation of the grammar rules: +/* Representation of the grammar rules: - ntokens is the number of tokens, and nvars is the number of - variables (nonterminals). nsyms is the total number, ntokens + + NTOKENS is the number of tokens, and NVARS is the number of + variables (nonterminals). NSYMS is the total number, ntokens + nvars. - (the true number of token values assigned is ntokens reduced by one - for each alias declaration) - Each symbol (either token or variable) receives a symbol number. - Numbers 0 to ntokens-1 are for tokens, and ntokens to nsyms-1 are - for variables. Symbol number zero is the end-of-input token. This - token is counted in ntokens. - - The rules receive rule numbers 1 to nrules in the order they are - written. Actions and guards are accessed via the rule number. + Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1 + are for variables. Symbol number zero is the end-of-input token. + This token is counted in ntokens. The true number of token values + assigned is NTOKENS reduced by one for each alias declaration. + + The rules receive rule numbers 1 to NRULES in the order they are + written. More precisely Bison augments the grammar with the + initial rule, `$accept: START-SYMBOL $end', which is numbered 1, + all the user rules are 2, 3 etc. Each time a rule number is + presented to the user, we subtract 1, so *displayed* rule numbers + are 0, 1, 2... + + Internally, we cannot use the number 0 for a rule because for + instance RITEM stores both symbol (the RHS) and rule numbers: the + symbols are shorts >= 0, and rule number are stored negative. + Therefore 0 cannot be used, since it would be both the rule number + 0, and the token $end). + + Actions are accessed via the rule number. The rules themselves are described by several arrays: amongst which - RITEM, and RULE_TABLE. + RITEM, and RULES. + + RULES is an array of struct rule_s, which members are: - RULE_TABLE is an array of struct rule_s, which members are: + RULES[R].lhs -- the symbol of the left hand side of rule R. - RULE_TABLE[R].lhs -- the symbol number of the left hand side of - rule R. If -1, the rule has been thrown out by reduce.c and should - be ignored. + RULES[R].rhs -- the index in RITEM of the beginning of the portion + for rule R. - RULE_TABLE[R].rhs -- the index in RITEM of the beginning of the - portion for rule R. + RULES[R].prec -- the symbol providing the precedence level of R. - RULE_TABLE[R].prec -- the precedence level of R. + RULES[R].precsym -- the symbol attached (via %prec) to give its + precedence to R. Of course, if set, it is equal to `prec', but we + need to distinguish one from the other when reducing: a symbol used + in a %prec is not useless. - RULE_TABLE[R].precsym -- the symbol-number of the symbol in %prec - for R (if any). + RULES[R].assoc -- the associativity of R. - RULE_TABLE[R].assoc -- the associativity of R. + RULES[R].dprec -- the dynamic precedence level of R (for GLR + parsing). - RULE_TABLE[R].line -- the line where R was defined. + RULES[R].merger -- index of merging function for R (for GLR + parsing). - RULE_TABLE[R].useful -- TRUE iff the rule is used. + RULES[R].line -- the line where R was defined. + + RULES[R].useful -- TRUE iff the rule is used (i.e., FALSE if thrown + away by reduce). The right hand side is stored as symbol numbers in a portion of RITEM. @@ -69,100 +87,170 @@ contains minus R, which identifies it as the end of a portion and says which rule it is for. - The portions of RITEM come in order of increasing rule number and - are followed by an element which is zero to mark the end. nitems - is the total length of ritem, not counting the final zero. Each - element of ritem is called an "item" and its index in ritem is an - item number. + The portions of RITEM come in order of increasing rule number. + NRITEMS is the total length of RITEM. Each element of RITEM is + called an "item" and its index in RITEM is an item number. Item numbers are used in the finite state machine to represent places that parsing can get to. - SPREC records the precedence level of each symbol. + SYMBOLS[I]->prec records the precedence level of each symbol. Precedence levels are assigned in increasing order starting with 1 so that numerically higher precedence values mean tighter binding as they ought to. Zero as a symbol or rule's precedence means none is assigned. - Associativities are recorded similarly in rassoc and sassoc. */ + Associativities are recorded similarly in SYMBOLS[I]->assoc. */ +# include "location.h" +# include "symtab.h" -#define ISTOKEN(s) ((s) < ntokens) -#define ISVAR(s) ((s) >= ntokens) +# define ISTOKEN(s) ((s) < ntokens) +# define ISVAR(s) ((s) >= ntokens) -extern int nitems; -extern int nrules; extern int nsyms; extern int ntokens; extern int nvars; -extern short *ritem; -extern int nritems; +typedef int item_number_t; +# define ITEM_NUMBER_MAX ((item_number_t) INT_MAX) +# define ITEM_NUMBER_MIN ((item_number_t) MIN_MAX) +extern item_number_t *ritem; +extern unsigned int nritems; -extern short *sprec; -extern short *sassoc; +/* There is weird relationship between OT1H item_number_t and OTOH + symbol_number_t and rule_number_t: we store the latter in + item_number_t. symbol_number_t are stored as are, while + the negation of (rule_number_t + 1) are stored. -extern int start_symbol; + Therefore, an symbol_number_t must be a valid item_number_t, and we + sometimes have to perform the converse transformation. */ +# define symbol_number_as_item_number(Tok) ((item_number_t) (Tok)) +# define item_number_as_symbol_number(Ite) ((symbol_number_t) (Ite)) -/* associativity values in elements of rassoc, sassoc. */ -typedef enum -{ - right_assoc, - left_assoc, - non_assoc -} associativity; +extern symbol_number_t start_symbol; +/* Rules numbers. */ +typedef short rule_number_t; +# define RULE_NUMBER_MAX ((rule_number_t) SHRT_MAX) +extern rule_number_t nrules; +# define int_of_rule_number(RNum) ((int) (RNum)) +# define rule_number_as_item_number(RNum) ((item_number_t) (- RNum - 1)) +# define item_number_as_rule_number(INum) ((rule_number_t) (- INum - 1)) + + +/*--------. +| Rules. | +`--------*/ typedef struct rule_s { - short lhs; - short rhs; - short prec; - short precsym; - short assoc; - short line; + /* The number of the rule in the source. It is usually the index in + RULES too, except if there are useless rules. */ + rule_number_t user_number; + + /* The index in RULES. Usually the rule number in the source, + except if some rules are useless. */ + rule_number_t number; + + symbol_t *lhs; + item_number_t *rhs; + + /* This symbol provides both the associativity, and the precedence. */ + symbol_t *prec; + + short dprec; + short merger; + + /* This symbol was attached to the rule via %prec. */ + symbol_t *precsym; + + location_t location; bool useful; const char *action; - short action_line; - - const char *guard; - short guard_line; + location_t action_location; } rule_t; -extern struct rule_s *rule_table; +extern struct rule_s *rules; -/* Table of the symbols, indexed by the symbol number. */ -extern struct bucket **symbols; +/* A function that selects a rule. */ +typedef bool (*rule_filter_t) PARAMS ((rule_t *r)); + +/* Return true IFF the rule has a `number' smaller than NRULES. */ +bool rule_useful_p PARAMS ((rule_t *r)); + +/* Return true IFF the rule has a `number' higher than NRULES. */ +bool rule_useless_p PARAMS ((rule_t *r)); + +/* Return true IFF the rule is not flagged as useful *and* is useful. + In other words, it was discarded because of conflicts. */ +bool rule_never_reduced_p PARAMS ((rule_t *r)); -/* token translation table: indexed by a token number as returned by - the user's yylex routine, it yields the internal token number used - by the parser and throughout bison. */ +/* Print this RULE's number and lhs on OUT. If a PREVIOUS_LHS was + already displayed (by a previous call for another rule), avoid + useless repetitions. */ +void rule_lhs_print PARAMS ((rule_t *rule, symbol_t *previous_lhs, FILE *out)); -extern short *token_translations; +/* Return the length of the RHS. */ +int rule_rhs_length PARAMS ((rule_t *rule)); + +/* Print this RULE's RHS on OUT. */ +void rule_rhs_print PARAMS ((rule_t *rule, FILE *out)); + +/* Print this RULE on OUT. */ +void rule_print PARAMS ((rule_t *rule, FILE *out)); + + + + +/* Table of the symbols, indexed by the symbol number. */ +extern symbol_t **symbols; + +/* TOKEN_TRANSLATION -- a table indexed by a token number as returned + by the user's yylex routine, it yields the internal token number + used by the parser and throughout bison. */ +extern symbol_number_t *token_translations; extern int max_user_token_number; -/* SEMANTIC_PARSER is nonzero if the input file says to use the hairy - parser that provides for semantic error recovery. If it is zero, - the yacc-compatible simplified parser is used. */ -extern int semantic_parser; + +/* GLR_PARSER is nonzero if the input file says to use the GLR + (Generalized LR) parser, and to output some additional + information used by the GLR algorithm. */ + +extern int glr_parser; /* PURE_PARSER is nonzero if should generate a parser that is all pure and reentrant. */ extern int pure_parser; -/* ERROR_TOKEN_NUMBER is the token number of the error token. */ - -extern int error_token_number; - - /* Dump RITEM for traces. */ void ritem_print PARAMS ((FILE *out)); /* Return the size of the longest rule RHS. */ size_t ritem_longest_rhs PARAMS ((void)); +/* Print the grammar's rules numbers from BEGIN (inclusive) to END + (exclusive) on OUT under TITLE. */ +void grammar_rules_partial_print PARAMS ((FILE *out, const char *title, + rule_filter_t filter)); + +/* Print the grammar's rules on OUT. */ +void grammar_rules_print PARAMS ((FILE *out)); + +/* Dump the grammar. */ +void grammar_dump PARAMS ((FILE *out, const char *title)); + +/* Report on STDERR the rules that are not flagged USEFUL, using the + MESSAGE (which can be `useless rule' when invoked after grammar + reduction, or `never reduced' after conflicts were taken into + account). */ +void grammar_rules_never_reduced_report PARAMS ((const char *message)); + +/* Free the packed grammar. */ +void grammar_free PARAMS ((void)); + #endif /* !GRAM_H_ */