X-Git-Url: https://git.saurik.com/bison.git/blobdiff_plain/9f690211058a7f67031248302f1996d96bba2087..cae601224e356773fcd101dce0677f558e7c4357:/src/gram.h diff --git a/src/gram.h b/src/gram.h index 080ce0d9..75f4653e 100644 --- a/src/gram.h +++ b/src/gram.h @@ -1,78 +1,106 @@ /* Data definitions for internal representation of bison's input, - Copyright (C) 1984, 1986, 1989, 1992 Free Software Foundation, Inc. + Copyright 1984, 1986, 1989, 1992, 2001 Free Software Foundation, Inc. -This file is part of Bison, the GNU Compiler Compiler. + This file is part of Bison, the GNU Compiler Compiler. -Bison is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. + Bison is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. -Bison is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. + Bison is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -You should have received a copy of the GNU General Public License -along with Bison; see the file COPYING. If not, write to -the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + You should have received a copy of the GNU General Public License + along with Bison; see the file COPYING. If not, write to + the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ +#ifndef GRAM_H_ +# define GRAM_H_ -/* representation of the grammar rules: +/* Representation of the grammar rules: -ntokens is the number of tokens, and nvars is the number of variables -(nonterminals). nsyms is the total number, ntokens + nvars. + NTOKENS is the number of tokens, and NVARS is the number of + variables (nonterminals). NSYMS is the total number, ntokens + + nvars. - (the true number of token values assigned is ntokens - reduced by one for each alias declaration) + Each symbol (either token or variable) receives a symbol number. + Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1 + are for variables. Symbol number zero is the end-of-input token. + This token is counted in ntokens. The true number of token values + assigned is NTOKENS reduced by one for each alias declaration. -Each symbol (either token or variable) receives a symbol number. -Numbers 0 to ntokens-1 are for tokens, and ntokens to nsyms-1 are for -variables. Symbol number zero is the end-of-input token. This token -is counted in ntokens. + The rules receive rule numbers 1 to NRULES in the order they are + written. More precisely Bison augments the grammar with the + initial rule, `$axiom: START-SYMBOL EOF', which is numbered 1, all + the user rules are 2, 3 etc. Each time a rule number is presented + to the user, we subtract 1, so *displayed* rule numbers are 0, 1, + 2... -The rules receive rule numbers 1 to nrules in the order they are written. -Actions and guards are accessed via the rule number. + Internally, we cannot use the number 0 for a rule because for + instance RITEM stores both symbol (the RHS) and rule numbers: the + symbols are shorts >= 0, and rule number are stored negative. + Therefore 0 cannot be used, since it would be both the rule number + 0, and the token EOF). -The rules themselves are described by three arrays: rrhs, rlhs and -ritem. rlhs[R] is the symbol number of the left hand side of rule R. -The right hand side is stored as symbol numbers in a portion of -ritem. rrhs[R] contains the index in ritem of the beginning of the -portion for rule R. + Actions and guards are accessed via the rule number. -If rlhs[R] is -1, the rule has been thrown out by reduce.c -and should be ignored. + The rules themselves are described by several arrays: amongst which + RITEM, and RULES. -The length of the portion is one greater - than the number of symbols in the rule's right hand side. -The last element in the portion contains minus R, which -identifies it as the end of a portion and says which rule it is for. + RULES is an array of struct rule_s, which members are: -The portions of ritem come in order of increasing rule number and are -followed by an element which is zero to mark the end. nitems is the -total length of ritem, not counting the final zero. Each element of -ritem is called an "item" and its index in ritem is an item number. + RULES[R].lhs -- the symbol number of the left hand side of rule R. + If -1, the rule has been thrown out by reduce.c and should be + ignored. -Item numbers are used in the finite state machine to represent -places that parsing can get to. + RULES[R].rhs -- the index in RITEM of the beginning of the portion + for rule R. -Precedence levels are recorded in the vectors sprec and rprec. -sprec records the precedence level of each symbol, -rprec the precedence level of each rule. -rprecsym is the symbol-number of the symbol in %prec for this rule (if any). + RULES[R].prec -- the precedence level of R. -Precedence levels are assigned in increasing order starting with 1 so -that numerically higher precedence values mean tighter binding as they -ought to. Zero as a symbol or rule's precedence means none is -assigned. + RULES[R].precsym -- the symbol-number of the symbol in %prec for R + (if any). -Associativities are recorded similarly in rassoc and sassoc. */ + RULES[R].assoc -- the associativity of R. + + RULES[R].line -- the line where R was defined. + + RULES[R].useful -- TRUE iff the rule is used. + + The right hand side is stored as symbol numbers in a portion of + RITEM. + + The length of the portion is one greater than the number of symbols + in the rule's right hand side. The last element in the portion + contains minus R, which identifies it as the end of a portion and + says which rule it is for. + + The portions of RITEM come in order of increasing rule number and + are followed by an element which is zero to mark the end. nitems + is the total length of ritem, not counting the final zero. Each + element of RITEM is called an "item" and its index in RITEM is an + item number. + + Item numbers are used in the finite state machine to represent + places that parsing can get to. + + SYMBOLS[I]->prec records the precedence level of each symbol. + + Precedence levels are assigned in increasing order starting with 1 + so that numerically higher precedence values mean tighter binding + as they ought to. Zero as a symbol or rule's precedence means none + is assigned. + + Associativities are recorded similarly in SYMBOLS[I]->assoc. */ #define ISTOKEN(s) ((s) < ntokens) #define ISVAR(s) ((s) >= ntokens) - extern int nitems; extern int nrules; extern int nsyms; @@ -80,46 +108,68 @@ extern int ntokens; extern int nvars; extern short *ritem; -extern short *rlhs; -extern short *rrhs; -extern short *rprec; -extern short *rprecsym; -extern short *sprec; -extern short *rassoc; -extern short *sassoc; -extern short *rline; /* Source line number of each rule */ +extern int nritems; extern int start_symbol; +/* Associativity values for tokens and rules. */ +typedef enum +{ + right_assoc, + left_assoc, + non_assoc +} associativity; + + +typedef struct rule_s +{ + short lhs; + short rhs; + short prec; + short precsym; + associativity assoc; + short line; + bool useful; + + const char *action; + short action_line; + + const char *guard; + short guard_line; +} rule_t; -/* associativity values in elements of rassoc, sassoc. */ +extern struct rule_s *rules; -#define RIGHT_ASSOC 1 -#define LEFT_ASSOC 2 -#define NON_ASSOC 3 +/* Table of the symbols, indexed by the symbol number. */ +extern struct bucket **symbols; -/* token translation table: -indexed by a token number as returned by the user's yylex routine, -it yields the internal token number used by the parser and throughout bison. -If translations is zero, the translation table is not used because -the two kinds of token numbers are the same. -(It is noted in reader.c that "Nowadays translations is always set to 1...") -*/ +/* token translation table: indexed by a token number as returned by + the user's yylex routine, it yields the internal token number used + by the parser and throughout bison. */ extern short *token_translations; -extern int translations; extern int max_user_token_number; -/* semantic_parser is nonzero if the input file says to use the hairy parser -that provides for semantic error recovery. If it is zero, the yacc-compatible -simplified parser is used. */ +/* SEMANTIC_PARSER is nonzero if the input file says to use the hairy + parser that provides for semantic error recovery. If it is zero, + the yacc-compatible simplified parser is used. */ extern int semantic_parser; -/* pure_parser is nonzero if should generate a parser that is all pure and reentrant. */ +/* PURE_PARSER is nonzero if should generate a parser that is all pure + and reentrant. */ extern int pure_parser; -/* error_token_number is the token number of the error token. */ +/* ERROR_TOKEN_NUMBER is the token number of the error token. */ extern int error_token_number; + + +/* Dump RITEM for traces. */ +void ritem_print PARAMS ((FILE *out)); + +/* Return the size of the longest rule RHS. */ +size_t ritem_longest_rhs PARAMS ((void)); + +#endif /* !GRAM_H_ */