| 1 | /* Data definitions for internal representation of Bison's input. |
| 2 | |
| 3 | Copyright (C) 1984, 1986, 1989, 1992, 2001, 2002, 2003 |
| 4 | Free Software Foundation, Inc. |
| 5 | |
| 6 | This file is part of Bison, the GNU Compiler Compiler. |
| 7 | |
| 8 | Bison is free software; you can redistribute it and/or modify |
| 9 | it under the terms of the GNU General Public License as published by |
| 10 | the Free Software Foundation; either version 2, or (at your option) |
| 11 | any later version. |
| 12 | |
| 13 | Bison is distributed in the hope that it will be useful, |
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | GNU General Public License for more details. |
| 17 | |
| 18 | You should have received a copy of the GNU General Public License |
| 19 | along with Bison; see the file COPYING. If not, write to |
| 20 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 21 | Boston, MA 02111-1307, USA. */ |
| 22 | |
| 23 | #ifndef GRAM_H_ |
| 24 | # define GRAM_H_ |
| 25 | |
| 26 | /* Representation of the grammar rules: |
| 27 | |
| 28 | NTOKENS is the number of tokens, and NVARS is the number of |
| 29 | variables (nonterminals). NSYMS is the total number, ntokens + |
| 30 | nvars. |
| 31 | |
| 32 | Each symbol (either token or variable) receives a symbol number. |
| 33 | Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1 |
| 34 | are for variables. Symbol number zero is the end-of-input token. |
| 35 | This token is counted in ntokens. The true number of token values |
| 36 | assigned is NTOKENS reduced by one for each alias declaration. |
| 37 | |
| 38 | The rules receive rule numbers 1 to NRULES in the order they are |
| 39 | written. More precisely Bison augments the grammar with the |
| 40 | initial rule, `$accept: START-SYMBOL $end', which is numbered 1, |
| 41 | all the user rules are 2, 3 etc. Each time a rule number is |
| 42 | presented to the user, we subtract 1, so *displayed* rule numbers |
| 43 | are 0, 1, 2... |
| 44 | |
| 45 | Internally, we cannot use the number 0 for a rule because for |
| 46 | instance RITEM stores both symbol (the RHS) and rule numbers: the |
| 47 | symbols are shorts >= 0, and rule number are stored negative. |
| 48 | Therefore 0 cannot be used, since it would be both the rule number |
| 49 | 0, and the token $end). |
| 50 | |
| 51 | Actions are accessed via the rule number. |
| 52 | |
| 53 | The rules themselves are described by several arrays: amongst which |
| 54 | RITEM, and RULES. |
| 55 | |
| 56 | RULES is an array of rules, whose members are: |
| 57 | |
| 58 | RULES[R].lhs -- the symbol of the left hand side of rule R. |
| 59 | |
| 60 | RULES[R].rhs -- the index in RITEM of the beginning of the portion |
| 61 | for rule R. |
| 62 | |
| 63 | RULES[R].prec -- the symbol providing the precedence level of R. |
| 64 | |
| 65 | RULES[R].precsym -- the symbol attached (via %prec) to give its |
| 66 | precedence to R. Of course, if set, it is equal to `prec', but we |
| 67 | need to distinguish one from the other when reducing: a symbol used |
| 68 | in a %prec is not useless. |
| 69 | |
| 70 | RULES[R].assoc -- the associativity of R. |
| 71 | |
| 72 | RULES[R].dprec -- the dynamic precedence level of R (for GLR |
| 73 | parsing). |
| 74 | |
| 75 | RULES[R].merger -- index of merging function for R (for GLR |
| 76 | parsing). |
| 77 | |
| 78 | RULES[R].line -- the line where R was defined. |
| 79 | |
| 80 | RULES[R].useful -- true iff the rule is used (i.e., false if thrown |
| 81 | away by reduce). |
| 82 | |
| 83 | The right hand side is stored as symbol numbers in a portion of |
| 84 | RITEM. |
| 85 | |
| 86 | The length of the portion is one greater than the number of symbols |
| 87 | in the rule's right hand side. The last element in the portion |
| 88 | contains minus R, which identifies it as the end of a portion and |
| 89 | says which rule it is for. |
| 90 | |
| 91 | The portions of RITEM come in order of increasing rule number. |
| 92 | NRITEMS is the total length of RITEM. Each element of RITEM is |
| 93 | called an "item" and its index in RITEM is an item number. |
| 94 | |
| 95 | Item numbers are used in the finite state machine to represent |
| 96 | places that parsing can get to. |
| 97 | |
| 98 | SYMBOLS[I]->prec records the precedence level of each symbol. |
| 99 | |
| 100 | Precedence levels are assigned in increasing order starting with 1 |
| 101 | so that numerically higher precedence values mean tighter binding |
| 102 | as they ought to. Zero as a symbol or rule's precedence means none |
| 103 | is assigned. |
| 104 | |
| 105 | Associativities are recorded similarly in SYMBOLS[I]->assoc. */ |
| 106 | |
| 107 | # include "location.h" |
| 108 | # include "symtab.h" |
| 109 | |
| 110 | # define ISTOKEN(i) ((i) < ntokens) |
| 111 | # define ISVAR(i) ((i) >= ntokens) |
| 112 | |
| 113 | extern int nsyms; |
| 114 | extern int ntokens; |
| 115 | extern int nvars; |
| 116 | |
| 117 | typedef int item_number; |
| 118 | extern item_number *ritem; |
| 119 | extern unsigned int nritems; |
| 120 | |
| 121 | /* There is weird relationship between OT1H item_number and OTOH |
| 122 | symbol_number and rule_number: we store the latter in |
| 123 | item_number. symbol_number values are stored as-is, while |
| 124 | the negation of (rule_number + 1) is stored. |
| 125 | |
| 126 | Therefore, a symbol_number must be a valid item_number, and we |
| 127 | sometimes have to perform the converse transformation. */ |
| 128 | |
| 129 | static inline item_number |
| 130 | symbol_number_as_item_number (symbol_number sym) |
| 131 | { |
| 132 | return sym; |
| 133 | } |
| 134 | |
| 135 | static inline symbol_number |
| 136 | item_number_as_symbol_number (item_number i) |
| 137 | { |
| 138 | return i; |
| 139 | } |
| 140 | |
| 141 | /* Rule numbers. */ |
| 142 | typedef short rule_number; |
| 143 | extern rule_number nrules; |
| 144 | |
| 145 | static inline item_number |
| 146 | rule_number_as_item_number (rule_number r) |
| 147 | { |
| 148 | return -1 - r; |
| 149 | } |
| 150 | |
| 151 | static inline rule_number |
| 152 | item_number_as_rule_number (item_number i) |
| 153 | { |
| 154 | return -1 - i; |
| 155 | } |
| 156 | |
| 157 | |
| 158 | /*--------. |
| 159 | | Rules. | |
| 160 | `--------*/ |
| 161 | |
| 162 | typedef struct |
| 163 | { |
| 164 | /* The number of the rule in the source. It is usually the index in |
| 165 | RULES too, except if there are useless rules. */ |
| 166 | rule_number user_number; |
| 167 | |
| 168 | /* The index in RULES. Usually the rule number in the source, |
| 169 | except if some rules are useless. */ |
| 170 | rule_number number; |
| 171 | |
| 172 | symbol *lhs; |
| 173 | item_number *rhs; |
| 174 | |
| 175 | /* This symbol provides both the associativity, and the precedence. */ |
| 176 | symbol *prec; |
| 177 | |
| 178 | short dprec; |
| 179 | short merger; |
| 180 | |
| 181 | /* This symbol was attached to the rule via %prec. */ |
| 182 | symbol *precsym; |
| 183 | |
| 184 | location location; |
| 185 | bool useful; |
| 186 | |
| 187 | const char *action; |
| 188 | location action_location; |
| 189 | } rule; |
| 190 | |
| 191 | extern rule *rules; |
| 192 | |
| 193 | /* A function that selects a rule. */ |
| 194 | typedef bool (*rule_filter) (rule *); |
| 195 | |
| 196 | /* Return true IFF the rule has a `number' smaller than NRULES. */ |
| 197 | bool rule_useful_p (rule *r); |
| 198 | |
| 199 | /* Return true IFF the rule has a `number' higher than NRULES. */ |
| 200 | bool rule_useless_p (rule *r); |
| 201 | |
| 202 | /* Return true IFF the rule is not flagged as useful *and* is useful. |
| 203 | In other words, it was discarded because of conflicts. */ |
| 204 | bool rule_never_reduced_p (rule *r); |
| 205 | |
| 206 | /* Print this rule's number and lhs on OUT. If a PREVIOUS_LHS was |
| 207 | already displayed (by a previous call for another rule), avoid |
| 208 | useless repetitions. */ |
| 209 | void rule_lhs_print (rule *r, symbol *previous_lhs, FILE *out); |
| 210 | |
| 211 | /* Return the length of the RHS. */ |
| 212 | int rule_rhs_length (rule *r); |
| 213 | |
| 214 | /* Print this rule's RHS on OUT. */ |
| 215 | void rule_rhs_print (rule *r, FILE *out); |
| 216 | |
| 217 | /* Print this rule on OUT. */ |
| 218 | void rule_print (rule *r, FILE *out); |
| 219 | |
| 220 | |
| 221 | |
| 222 | |
| 223 | /* Table of the symbols, indexed by the symbol number. */ |
| 224 | extern symbol **symbols; |
| 225 | |
| 226 | /* TOKEN_TRANSLATION -- a table indexed by a token number as returned |
| 227 | by the user's yylex routine, it yields the internal token number |
| 228 | used by the parser and throughout bison. */ |
| 229 | extern symbol_number *token_translations; |
| 230 | extern int max_user_token_number; |
| 231 | |
| 232 | |
| 233 | |
| 234 | /* GLR_PARSER is nonzero if the input file says to use the GLR |
| 235 | (Generalized LR) parser, and to output some additional |
| 236 | information used by the GLR algorithm. */ |
| 237 | |
| 238 | extern int glr_parser; |
| 239 | |
| 240 | /* PURE_PARSER is nonzero if should generate a parser that is all pure |
| 241 | and reentrant. */ |
| 242 | |
| 243 | extern int pure_parser; |
| 244 | |
| 245 | /* Dump RITEM for traces. */ |
| 246 | void ritem_print (FILE *out); |
| 247 | |
| 248 | /* Return the size of the longest rule RHS. */ |
| 249 | size_t ritem_longest_rhs (void); |
| 250 | |
| 251 | /* Print the grammar's rules numbers from BEGIN (inclusive) to END |
| 252 | (exclusive) on OUT under TITLE. */ |
| 253 | void grammar_rules_partial_print (FILE *out, const char *title, |
| 254 | rule_filter filter); |
| 255 | |
| 256 | /* Print the grammar's rules on OUT. */ |
| 257 | void grammar_rules_print (FILE *out); |
| 258 | |
| 259 | /* Dump the grammar. */ |
| 260 | void grammar_dump (FILE *out, const char *title); |
| 261 | |
| 262 | /* Report on STDERR the rules that are not flagged USEFUL, using the |
| 263 | MESSAGE (which can be `useless rule' when invoked after grammar |
| 264 | reduction, or `never reduced' after conflicts were taken into |
| 265 | account). */ |
| 266 | void grammar_rules_never_reduced_report (const char *message); |
| 267 | |
| 268 | /* Free the packed grammar. */ |
| 269 | void grammar_free (void); |
| 270 | |
| 271 | #endif /* !GRAM_H_ */ |