Let symbols have a location.

[bison.git] / src / gram.h
diff --git a/src/gram.h b/src/gram.h

index c12698b6fe5ae321c2b1db098c12f69329b1268a..ff43ab8f4063c7d02c5d2e0e1933c9001c6219b2 100644 (file)
--- a/src/gram.h
+++ b/src/gram.h
@@ -1,5 +1,6 @@
  /* Data definitions for internal representation of bison's input,
-   Copyright 1984, 1986, 1989, 1992, 2001  Free Software Foundation, Inc.
+   Copyright (C) 1984, 1986, 1989, 1992, 2001, 2002
+   Free Software Foundation, Inc.
  
     This file is part of Bison, the GNU Compiler Compiler.
  
@@ -21,41 +22,56 @@
  #ifndef GRAM_H_
  # define GRAM_H_
  
-/* representation of the grammar rules:
+/* Representation of the grammar rules:
  
-   ntokens is the number of tokens, and nvars is the number of
-   variables (nonterminals).  nsyms is the total number, ntokens +
+   NTOKENS is the number of tokens, and NVARS is the number of
+   variables (nonterminals).  NSYMS is the total number, ntokens +
     nvars.
  
-   (the true number of token values assigned is ntokens reduced by one
-   for each alias declaration)
-
     Each symbol (either token or variable) receives a symbol number.
-   Numbers 0 to ntokens-1 are for tokens, and ntokens to nsyms-1 are
-   for variables.  Symbol number zero is the end-of-input token.  This
-   token is counted in ntokens.
-
-   The rules receive rule numbers 1 to nrules in the order they are
-   written.  Actions and guards are accessed via the rule number.
+   Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1
+   are for variables.  Symbol number zero is the end-of-input token.
+   This token is counted in ntokens.  The true number of token values
+   assigned is NTOKENS reduced by one for each alias declaration.
+
+   The rules receive rule numbers 1 to NRULES in the order they are
+   written.  More precisely Bison augments the grammar with the
+   initial rule, `$axiom: START-SYMBOL EOF', which is numbered 1, all
+   the user rules are 2, 3 etc.  Each time a rule number is presented
+   to the user, we subtract 1, so *displayed* rule numbers are 0, 1,
+   2...
+
+   Internally, we cannot use the number 0 for a rule because for
+   instance RITEM stores both symbol (the RHS) and rule numbers: the
+   symbols are shorts >= 0, and rule number are stored negative.
+   Therefore 0 cannot be used, since it would be both the rule number
+   0, and the token EOF).
+
+   Actions are accessed via the rule number.
  
     The rules themselves are described by several arrays: amongst which
-   RITEM, and RULE_TABLE.
+   RITEM, and RULES.
+
+   RULES is an array of struct rule_s, which members are:
+
+   RULES[R].lhs -- the symbol of the left hand side of rule R.
  
-   RULE_TABLE is an array of struct rule_s, which members are:
+   RULES[R].rhs -- the index in RITEM of the beginning of the portion
+   for rule R.
  
-   RULE_TABLE[R].lhs -- the symbol number of the left hand side of
-   rule R.  If -1, the rule has been thrown out by reduce.c and should
-   be ignored.
+   RULES[R].prec -- the symbol providing the precedence level of R.
  
-   RULE_TABLE[R].rhs -- the index in RITEM of the beginning of the
-   portion for rule R.
+   RULES[R].precsym -- the symbol attached (via %prec) to give its
+   precedence to R.  Of course, if set, it is equal to `prec', but we
+   need to distinguish one from the other when reducing: a symbol used
+   in a %prec is not useless.
  
-   RULE_TABLE[R].prec -- the precedence level of R.
+   RULES[R].assoc -- the associativity of R.
  
-   RULE_TABLE[R].precsym -- the symbol-number of the symbol in %prec
-   for R (if any).
+   RULES[R].line -- the line where R was defined.
  
-   RULE_TABLE[R].assoc -- the associativity of the rule.
+   RULES[R].useful -- TRUE iff the rule is used (i.e., FALSE if thrown
+   away by reduce).
  
     The right hand side is stored as symbol numbers in a portion of
     RITEM.
@@ -65,81 +81,106 @@
     contains minus R, which identifies it as the end of a portion and
     says which rule it is for.
  
-   The portions of RITEM come in order of increasing rule number and
-   are followed by an element which is zero to mark the end.  nitems
-   is the total length of ritem, not counting the final zero.  Each
-   element of ritem is called an "item" and its index in ritem is an
-   item number.
+   The portions of RITEM come in order of increasing rule number.
+   NRITEMS is the total length of RITEM.  Each element of RITEM is
+   called an "item" and its index in RITEM is an item number.
  
     Item numbers are used in the finite state machine to represent
     places that parsing can get to.
  
-   SPREC records the precedence level of each symbol.
+   SYMBOLS[I]->prec records the precedence level of each symbol.
  
     Precedence levels are assigned in increasing order starting with 1
     so that numerically higher precedence values mean tighter binding
     as they ought to.  Zero as a symbol or rule's precedence means none
     is assigned.
  
-   Associativities are recorded similarly in rassoc and sassoc.  */
+   Associativities are recorded similarly in SYMBOLS[I]->assoc.  */
  
+# include "location.h"
+# include "symtab.h"
  
-#define        ISTOKEN(s)      ((s) < ntokens)
-#define        ISVAR(s)        ((s) >= ntokens)
+# define ISTOKEN(s)    ((s) < ntokens)
+# define ISVAR(s)      ((s) >= ntokens)
  
-extern int nitems;
  extern int nrules;
  extern int nsyms;
  extern int ntokens;
  extern int nvars;
  
-extern short *ritem;
+# define ITEM_NUMBER_MAX INT_MAX
+typedef int item_number_t;
+extern item_number_t *ritem;
+extern unsigned int nritems;
  
-extern short *sprec;
-extern short *sassoc;
-extern short *rline;           /* Source line number of each rule */
+/* There is weird relationship between item_number_t and
+   symbol_number_t: we store symbol_number_t in item_number_t, but in
+   the latter we also store, as negative numbers, the rule numbers.
  
-extern int start_symbol;
+   Therefore, an symbol_number_t must be a valid item_number_t, and we
+   sometimes have to perform the converse transformation.  */
+# define symbol_number_as_item_number(Tok) ((item_number_t) (Tok))
+# define item_number_as_symbol_number(Ite) ((symbol_number_t) (Ite))
  
-/* associativity values in elements of rassoc, sassoc.  */
-typedef enum
-{
-  right_assoc,
-  left_assoc,
-  non_assoc
-} associativity;
+extern symbol_number_t start_symbol;
  
  
  typedef struct rule_s
  {
-  short lhs;
-  short rhs;
-  short prec;
-  short precsym;
-  short assoc;
+  /* The number of the rule in the source.  It is usually the index in
+     RULES too, except if there are useless rules.  */
+  short user_number;
+
+  /* The index in RULES.  Usually the rule number in the source,
+     except if some rules are useless.  */
+  short number;
+
+  symbol_t *lhs;
+  item_number_t *rhs;
+
+  /* This symbol provides both the associativity, and the precedence. */
+  symbol_t *prec;
+
+  /* This symbol was attached to the rule via %prec. */
+  symbol_t *precsym;
+
+  location_t location;
+  bool useful;
+
+  const char *action;
+  location_t action_location;
  } rule_t;
  
-extern struct rule_s *rule_table;
+extern struct rule_s *rules;
  
-/* token translation table: indexed by a token number as returned by
-   the user's yylex routine, it yields the internal token number used
-   by the parser and throughout bison.  */
+/* Table of the symbols, indexed by the symbol number. */
+extern symbol_t **symbols;
  
-extern short *token_translations;
+/* TOKEN_TRANSLATION -- a table indexed by a token number as returned
+   by the user's yylex routine, it yields the internal token number
+   used by the parser and throughout bison.  */
+extern symbol_number_t *token_translations;
  extern int max_user_token_number;
  
-/* SEMANTIC_PARSER is nonzero if the input file says to use the hairy
-   parser that provides for semantic error recovery.  If it is zero,
-   the yacc-compatible simplified parser is used.  */
-
-extern int semantic_parser;
  
  /* PURE_PARSER is nonzero if should generate a parser that is all pure
     and reentrant.  */
  
  extern int pure_parser;
  
-/* ERROR_TOKEN_NUMBER is the token number of the error token.  */
+/* Report the length of the RHS. */
+int rule_rhs_length PARAMS ((rule_t *rule));
+
+/* Dump RITEM for traces. */
+void ritem_print PARAMS ((FILE *out));
+
+/* Return the size of the longest rule RHS.  */
+size_t ritem_longest_rhs PARAMS ((void));
+
+/* Dump the grammar. */
+void grammar_dump PARAMS ((FILE *out, const char *title));
+
+/* Free the packed grammar. */
+void grammar_free PARAMS ((void));
  
-extern int error_token_number;
  #endif /* !GRAM_H_ */