src/gram.h

   1 /* Data definitions for internal representation of bison's input,
   2    Copyright (C) 1984, 1986, 1989, 1992, 2001, 2002
   3    Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    Bison is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    Bison is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with Bison; see the file COPYING.  If not, write to
  19    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20    Boston, MA 02111-1307, USA.  */
  21
  22 #ifndef GRAM_H_
  23 # define GRAM_H_
  24
  25 /* Representation of the grammar rules:
  26
  27    NTOKENS is the number of tokens, and NVARS is the number of
  28    variables (nonterminals).  NSYMS is the total number, ntokens +
  29    nvars.
  30
  31    Each symbol (either token or variable) receives a symbol number.
  32    Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1
  33    are for variables.  Symbol number zero is the end-of-input token.
  34    This token is counted in ntokens.  The true number of token values
  35    assigned is NTOKENS reduced by one for each alias declaration.
  36
  37    The rules receive rule numbers 1 to NRULES in the order they are
  38    written.  More precisely Bison augments the grammar with the
  39    initial rule, `$accept: START-SYMBOL $end', which is numbered 1,
  40    all the user rules are 2, 3 etc.  Each time a rule number is
  41    presented to the user, we subtract 1, so *displayed* rule numbers
  42    are 0, 1, 2...
  43
  44    Internally, we cannot use the number 0 for a rule because for
  45    instance RITEM stores both symbol (the RHS) and rule numbers: the
  46    symbols are shorts >= 0, and rule number are stored negative.
  47    Therefore 0 cannot be used, since it would be both the rule number
  48    0, and the token $end).
  49
  50    Actions are accessed via the rule number.
  51
  52    The rules themselves are described by several arrays: amongst which
  53    RITEM, and RULES.
  54
  55    RULES is an array of rules, whose members are:
  56
  57    RULES[R].lhs -- the symbol of the left hand side of rule R.
  58
  59    RULES[R].rhs -- the index in RITEM of the beginning of the portion
  60    for rule R.
  61
  62    RULES[R].prec -- the symbol providing the precedence level of R.
  63
  64    RULES[R].precsym -- the symbol attached (via %prec) to give its
  65    precedence to R.  Of course, if set, it is equal to `prec', but we
  66    need to distinguish one from the other when reducing: a symbol used
  67    in a %prec is not useless.
  68
  69    RULES[R].assoc -- the associativity of R.
  70
  71    RULES[R].dprec -- the dynamic precedence level of R (for GLR
  72    parsing).
  73
  74    RULES[R].merger -- index of merging function for R (for GLR
  75    parsing).
  76
  77    RULES[R].line -- the line where R was defined.
  78
  79    RULES[R].useful -- true iff the rule is used (i.e., false if thrown
  80    away by reduce).
  81
  82    The right hand side is stored as symbol numbers in a portion of
  83    RITEM.
  84
  85    The length of the portion is one greater than the number of symbols
  86    in the rule's right hand side.  The last element in the portion
  87    contains minus R, which identifies it as the end of a portion and
  88    says which rule it is for.
  89
  90    The portions of RITEM come in order of increasing rule number.
  91    NRITEMS is the total length of RITEM.  Each element of RITEM is
  92    called an "item" and its index in RITEM is an item number.
  93
  94    Item numbers are used in the finite state machine to represent
  95    places that parsing can get to.
  96
  97    SYMBOLS[I]->prec records the precedence level of each symbol.
  98
  99    Precedence levels are assigned in increasing order starting with 1
 100    so that numerically higher precedence values mean tighter binding
 101    as they ought to.  Zero as a symbol or rule's precedence means none
 102    is assigned.
 103
 104    Associativities are recorded similarly in SYMBOLS[I]->assoc.  */
 105
 106 # include "location.h"
 107 # include "symtab.h"
 108
 109 # define ISTOKEN(s)     ((s) < ntokens)
 110 # define ISVAR(s)       ((s) >= ntokens)
 111
 112 extern int nsyms;
 113 extern int ntokens;
 114 extern int nvars;
 115
 116 typedef int item_number;
 117 extern item_number *ritem;
 118 extern unsigned int nritems;
 119
 120 /* There is weird relationship between OT1H item_number and OTOH
 121    symbol_number and rule_number: we store the latter in
 122    item_number.  symbol_number values are stored as-is, while
 123    the negation of (rule_number + 1) is stored.
 124
 125    Therefore, a symbol_number must be a valid item_number, and we
 126    sometimes have to perform the converse transformation.  */
 127 # define symbol_number_as_item_number(Tok) ((item_number) (Tok))
 128 # define item_number_as_symbol_number(Ite) ((symbol_number) (Ite))
 129
 130 extern symbol_number start_symbol;
 131
 132 /* Rule numbers.  */
 133 typedef short rule_number;
 134 extern rule_number nrules;
 135 # define int_of_rule_number(RNum) ((int) (RNum))
 136 # define rule_number_as_item_number(RNum) ((item_number) (- RNum - 1))
 137 # define item_number_as_rule_number(INum) ((rule_number) (- INum - 1))
 138
 139
 140 /*--------.
 141 | Rules.  |
 142 `--------*/
 143
 144 typedef struct
 145 {
 146   /* The number of the rule in the source.  It is usually the index in
 147      RULES too, except if there are useless rules.  */
 148   rule_number user_number;
 149
 150   /* The index in RULES.  Usually the rule number in the source,
 151      except if some rules are useless.  */
 152   rule_number number;
 153
 154   symbol *lhs;
 155   item_number *rhs;
 156
 157   /* This symbol provides both the associativity, and the precedence. */
 158   symbol *prec;
 159
 160   short dprec;
 161   short merger;
 162
 163   /* This symbol was attached to the rule via %prec. */
 164   symbol *precsym;
 165
 166   location location;
 167   bool useful;
 168
 169   const char *action;
 170   location action_location;
 171 } rule;
 172
 173 extern rule *rules;
 174
 175 /* A function that selects a rule.  */
 176 typedef bool (*rule_filter) (rule *);
 177
 178 /* Return true IFF the rule has a `number' smaller than NRULES.  */
 179 bool rule_useful_p (rule *r);
 180
 181 /* Return true IFF the rule has a `number' higher than NRULES.  */
 182 bool rule_useless_p (rule *r);
 183
 184 /* Return true IFF the rule is not flagged as useful *and* is useful.
 185    In other words, it was discarded because of conflicts.  */
 186 bool rule_never_reduced_p (rule *r);
 187
 188 /* Print this rule's number and lhs on OUT.  If a PREVIOUS_LHS was
 189    already displayed (by a previous call for another rule), avoid
 190    useless repetitions.  */
 191 void rule_lhs_print (rule *r, symbol *previous_lhs, FILE *out);
 192
 193 /* Return the length of the RHS.  */
 194 int rule_rhs_length (rule *r);
 195
 196 /* Print this rule's RHS on OUT.  */
 197 void rule_rhs_print (rule *r, FILE *out);
 198
 199 /* Print this rule on OUT.  */
 200 void rule_print (rule *r, FILE *out);
 201
 202
 203
 204
 205 /* Table of the symbols, indexed by the symbol number. */
 206 extern symbol **symbols;
 207
 208 /* TOKEN_TRANSLATION -- a table indexed by a token number as returned
 209    by the user's yylex routine, it yields the internal token number
 210    used by the parser and throughout bison.  */
 211 extern symbol_number *token_translations;
 212 extern int max_user_token_number;
 213
 214
 215
 216 /* GLR_PARSER is nonzero if the input file says to use the GLR
 217    (Generalized LR) parser, and to output some additional
 218    information used by the GLR algorithm. */
 219
 220 extern int glr_parser;
 221
 222 /* PURE_PARSER is nonzero if should generate a parser that is all pure
 223    and reentrant.  */
 224
 225 extern int pure_parser;
 226
 227 /* Dump RITEM for traces. */
 228 void ritem_print (FILE *out);
 229
 230 /* Return the size of the longest rule RHS.  */
 231 size_t ritem_longest_rhs (void);
 232
 233 /* Print the grammar's rules numbers from BEGIN (inclusive) to END
 234    (exclusive) on OUT under TITLE.  */
 235 void grammar_rules_partial_print (FILE *out, const char *title,
 236                                   rule_filter filter);
 237
 238 /* Print the grammar's rules on OUT.  */
 239 void grammar_rules_print (FILE *out);
 240
 241 /* Dump the grammar. */
 242 void grammar_dump (FILE *out, const char *title);
 243
 244 /* Report on STDERR the rules that are not flagged USEFUL, using the
 245    MESSAGE (which can be `useless rule' when invoked after grammar
 246    reduction, or `never reduced' after conflicts were taken into
 247    account).  */
 248 void grammar_rules_never_reduced_report (const char *message);
 249
 250 /* Free the packed grammar. */
 251 void grammar_free (void);
 252
 253 #endif /* !GRAM_H_ */