/* Grammar reduction for Bison.
- Copyright 1988, 1989, 2000, 2001 Free Software Foundation, Inc.
+
+ Copyright (C) 1988, 1989, 2000, 2001, 2002, 2003, 2005, 2006 Free
+ Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
You should have received a copy of the GNU General Public License
along with Bison; see the file COPYING. If not, write to
- the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
+ the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA. */
/* Reduce the grammar: Find and eliminate unreachable terminals,
/* Don't eliminate unreachable terminals: They may be used by the
user's parser. */
+#include <config.h>
#include "system.h"
-#include "getargs.h"
+
+#include <bitset.h>
+#include <quotearg.h>
+
+#include "complain.h"
#include "files.h"
-#include "symtab.h"
+#include "getargs.h"
#include "gram.h"
-#include "complain.h"
-#include "reduce.h"
#include "reader.h"
-#include "getargs.h"
-#include "bitset.h"
-
-typedef short *rule;
-
+#include "reduce.h"
+#include "symtab.h"
/* Set of all nonterminals which are not useless. */
static bitset N;
`useless', but no warning should be issued). */
static bitset V1;
-static int nuseful_productions;
-static int nuseless_productions;
+static rule_number nuseful_productions;
+rule_number nuseless_productions;
static int nuseful_nonterminals;
-int nuseless_nonterminals;
+symbol_number nuseless_nonterminals;
\f
/*-------------------------------------------------------------------.
| Another way to do this would be with a set for each production and |
`-------------------------------------------------------------------*/
static bool
-useful_production (int i, bitset N0)
+useful_production (rule_number r, bitset N0)
{
- rule r;
- short n;
+ item_number *rhsp;
/* A production is useful if all of the nonterminals in its appear
in the set of useful nonterminals. */
- for (r = rules[i].rhs; *r >= 0; r++)
- if (ISVAR (n = *r) && !bitset_test (N0, n - ntokens))
- return FALSE;
- return TRUE;
+ for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
+ if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens))
+ return false;
+ return true;
}
useless_nonterminals (void)
{
bitset Np, Ns;
- int i;
+ rule_number r;
/* N is set as built. Np is set being built this iteration. P is
set of all productions which have a RHS all in N. */
while (1)
{
bitset_copy (Np, N);
- for (i = 1; i < nrules + 1; i++)
- if (!bitset_test (P, i)
- && useful_production (i, N))
+ for (r = 0; r < nrules; r++)
+ if (!bitset_test (P, r)
+ && useful_production (r, N))
{
- bitset_set (Np, rules[i].lhs - ntokens);
- bitset_set (P, i);
+ bitset_set (Np, rules[r].lhs->number - ntokens);
+ bitset_set (P, r);
}
if (bitset_equal_p (N, Np))
break;
inaccessable_symbols (void)
{
bitset Vp, Vs, Pp;
- int i;
- short t;
- rule r;
/* Find out which productions are reachable and which symbols are
used. Starting with an empty set of productions and a set of
user can know. */
Vp = bitset_create (nsyms, BITSET_FIXED);
- Pp = bitset_create (nrules + 1, BITSET_FIXED);
+ Pp = bitset_create (nrules, BITSET_FIXED);
/* If the start symbol isn't useful, then nothing will be useful. */
- if (bitset_test (N, start_symbol - ntokens))
+ if (bitset_test (N, accept->number - ntokens))
{
- bitset_set (V, start_symbol);
+ bitset_set (V, accept->number);
while (1)
{
+ rule_number r;
bitset_copy (Vp, V);
- for (i = 1; i < nrules + 1; i++)
+ for (r = 0; r < nrules; r++)
{
- if (!bitset_test (Pp, i)
- && bitset_test (P, i)
- && bitset_test (V, rules[i].lhs))
+ if (!bitset_test (Pp, r)
+ && bitset_test (P, r)
+ && bitset_test (V, rules[r].lhs->number))
{
- for (r = rules[i].rhs; *r >= 0; r++)
- if (ISTOKEN (t = *r) || bitset_test (N, t - ntokens))
- bitset_set (Vp, t);
- bitset_set (Pp, i);
+ item_number *rhsp;
+ for (rhsp = rules[r].rhs; *rhsp >= 0; rhsp++)
+ if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens))
+ bitset_set (Vp, *rhsp);
+ bitset_set (Pp, r);
}
}
if (bitset_equal_p (V, Vp))
V = Vp;
/* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
- bitset_set (V, 0); /* end-of-input token */
- bitset_set (V, 1); /* error token */
- bitset_set (V, 2); /* some undefined token */
+ bitset_set (V, endtoken->number); /* end-of-input token */
+ bitset_set (V, errtoken->number); /* error token */
+ bitset_set (V, undeftoken->number); /* some undefined token */
bitset_free (P);
P = Pp;
nuseless_productions = nrules - nuseful_productions;
nuseful_nonterminals = 0;
- for (i = ntokens; i < nsyms; i++)
- if (bitset_test (V, i))
- nuseful_nonterminals++;
+ {
+ symbol_number i;
+ for (i = ntokens; i < nsyms; i++)
+ if (bitset_test (V, i))
+ nuseful_nonterminals++;
+ }
nuseless_nonterminals = nvars - nuseful_nonterminals;
/* A token that was used in %prec should not be warned about. */
- for (i = 1; i < nrules; i++)
- if (rules[i].precsym != 0)
- bitset_set (V1, rules[i].precsym);
+ {
+ rule_number r;
+ for (r = 0; r < nrules; ++r)
+ if (rules[r].precsym != 0)
+ bitset_set (V1, rules[r].precsym->number);
+ }
}
+
+/*-------------------------------------------------------------------.
+| Put the useless productions at the end of RULES, and adjust NRULES |
+| accordingly. |
+`-------------------------------------------------------------------*/
+
static void
reduce_grammar_tables (void)
{
- /* This is turned off because we would need to change the numbers in
- the case statements in the actions file.
-
- We don't disable it via CPP so that it is still checked with the
- rest of the code, to avoid its becoming completely obsolete.
-
- FIXME: I think the comment above demonstrates this code must be
- turned off for *semantic* parser, not in the general case. Try
- to understand this better --akim. */
+ /* Report and flag useless productions. */
+ {
+ rule_number r;
+ for (r = 0; r < nrules; r++)
+ rules[r].useful = bitset_test (P, r);
+ grammar_rules_never_reduced_report (_("useless rule"));
+ }
- if (0)
- /* remove useless productions */
- if (nuseless_productions > 0)
+ /* Map the nonterminals to their new index: useful first, useless
+ afterwards. Kept for later report. */
+ {
+ int useful = 0;
+ int useless = nrules - nuseless_productions;
+ rule *rules_sorted = xnmalloc (nrules, sizeof *rules_sorted);
+ rule_number r;
+ for (r = 0; r < nrules; ++r)
+ rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r];
+ free (rules);
+ rules = rules_sorted;
+
+ /* Renumber the rules markers in RITEMS. */
+ for (r = 0; r < nrules; ++r)
{
- short np, pn, ni, pi;
-
- np = 0;
- ni = 0;
- for (pn = 1; pn < nrules + 1; pn++)
- if (bitset_test (P, pn))
- {
- np++;
- if (pn != np)
- {
- rules[np].lhs = rules[pn].lhs;
- rules[np].line = rules[pn].line;
- rules[np].prec = rules[pn].prec;
- rules[np].assoc = rules[pn].assoc;
- rules[np].rhs = rules[pn].rhs;
- if (rules[np].rhs - ritem != ni)
- {
- pi = rules[np].rhs - ritem;
- rules[np].rhs = ritem + ni;
- while (ritem[pi] >= 0)
- ritem[ni++] = ritem[pi++];
- ritem[ni++] = -np;
- }
- }
- else
- {
- while (ritem[ni++] >= 0)
- /* Nothing. */;
- }
- }
-
- ritem[ni] = 0;
- nrules -= nuseless_productions;
- nitems = ni;
- nritems = ni;
-
- /* Is it worth it to reduce the amount of memory for the
- grammar? Probably not. */
+ item_number *rhsp = rules[r].rhs;
+ for (/* Nothing. */; *rhsp >= 0; ++rhsp)
+ /* Nothing. */;
+ *rhsp = rule_number_as_item_number (r);
+ rules[r].number = r;
}
+ nrules -= nuseless_productions;
+ }
- /* Disable useless productions. */
- if (nuseless_productions > 0)
- {
- int pn;
- for (pn = 1; pn < nrules + 1; pn++)
- rules[pn].useful = bitset_test (P, pn);
- }
+ /* Adjust NRITEMS. */
+ {
+ rule_number r;
+ int length;
+ for (r = nrules; r < nrules + nuseless_productions; ++r)
+ {
+ length = rule_rhs_length (&rules[r]);
+ nritems -= length + 1;
+ }
+ }
}
static void
nonterminals_reduce (void)
{
- int i, n;
+ symbol_number i, n;
/* Map the nonterminals to their new index: useful first, useless
afterwards. Kept for later report. */
- short *nontermmap = XCALLOC (short, nvars) - ntokens;
+ symbol_number *nontermmap = xnmalloc (nvars, sizeof *nontermmap);
n = ntokens;
for (i = ntokens; i < nsyms; i++)
if (bitset_test (V, i))
- nontermmap[i] = n++;
+ nontermmap[i - ntokens] = n++;
for (i = ntokens; i < nsyms; i++)
if (!bitset_test (V, i))
- nontermmap[i] = n++;
+ {
+ nontermmap[i - ntokens] = n++;
+ warn_at (symbols[i]->location, _("useless nonterminal: %s"),
+ symbols[i]->tag);
+ }
/* Shuffle elements of tables indexed by symbol number. */
{
- bucket **symbols_sorted = XMALLOC (bucket *, nvars) - ntokens;
+ symbol **symbols_sorted = xnmalloc (nvars, sizeof *symbols_sorted);
for (i = ntokens; i < nsyms; i++)
- symbols_sorted[nontermmap[i]] = symbols[i];
+ symbols[i]->number = nontermmap[i - ntokens];
+ for (i = ntokens; i < nsyms; i++)
+ symbols_sorted[nontermmap[i - ntokens] - ntokens] = symbols[i];
for (i = ntokens; i < nsyms; i++)
- symbols[i] = symbols_sorted[i];
- free (symbols_sorted + ntokens);
+ symbols[i] = symbols_sorted[i - ntokens];
+ free (symbols_sorted);
}
- /* Replace all symbol numbers in valid data structures. */
-
- for (i = 1; i < nrules + 1; i++)
- {
- rules[i].lhs = nontermmap[rules[i].lhs];
- if (ISVAR (rules[i].precsym))
- /* Can this happen? */
- rules[i].precsym = nontermmap[rules[i].precsym];
- }
-
- for (i = 0; i < nritems; ++i)
- if (ISVAR (ritem[i]))
- ritem[i] = nontermmap[ritem[i]];
-
- start_symbol = nontermmap[start_symbol];
+ {
+ rule_number r;
+ for (r = 0; r < nrules; ++r)
+ {
+ item_number *rhsp;
+ for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
+ if (ISVAR (*rhsp))
+ *rhsp = symbol_number_as_item_number (nontermmap[*rhsp
+ - ntokens]);
+ }
+ accept->number = nontermmap[accept->number - ntokens];
+ }
nsyms -= nuseless_nonterminals;
nvars -= nuseless_nonterminals;
- free (nontermmap + ntokens);
+ free (nontermmap);
}
if (nuseless_nonterminals > 0)
{
int i;
- fprintf (out, "%s\n\n", _("Useless nonterminals:"));
+ fprintf (out, "%s\n\n", _("Useless nonterminals"));
for (i = 0; i < nuseless_nonterminals; ++i)
fprintf (out, " %s\n", symbols[nsyms + i]->tag);
fputs ("\n\n", out);
}
{
- bool b = FALSE;
+ bool b = false;
int i;
for (i = 0; i < ntokens; i++)
if (!bitset_test (V, i) && !bitset_test (V1, i))
{
if (!b)
- fprintf (out, "%s\n\n", _("Terminals which are not used:"));
- b = TRUE;
+ fprintf (out, "%s\n\n", _("Terminals which are not used"));
+ b = true;
fprintf (out, " %s\n", symbols[i]->tag);
}
if (b)
}
if (nuseless_productions > 0)
- {
- int i;
- fprintf (out, "%s\n\n", _("Useless rules:"));
- for (i = 1; i < nrules + 1; i++)
- if (!rules[i].useful)
- {
- rule r;
- fprintf (out, "#%-4d ", i - 1);
- fprintf (out, "%s:", symbols[rules[i].lhs]->tag);
- for (r = rules[i].rhs; *r >= 0; r++)
- fprintf (out, " %s", symbols[*r]->tag);
- fputs (";\n", out);
- }
- fputs ("\n\n", out);
- }
+ grammar_rules_partial_print (out, _("Useless rules"),
+ rule_useless_p);
}
\f
-static void
-dump_grammar (FILE *out)
-{
- int i;
- rule r;
-
- fprintf (out, "REDUCED GRAMMAR\n\n");
- fprintf (out,
- "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nitems = %d\n\n",
- ntokens, nvars, nsyms, nrules, nitems);
- fprintf (out, "Variables\n---------\n\n");
- fprintf (out, "Value Sprec Sassoc Tag\n");
- for (i = ntokens; i < nsyms; i++)
- fprintf (out, "%5d %5d %5d %s\n",
- i,
- symbols[i]->prec, symbols[i]->assoc, symbols[i]->tag);
- fprintf (out, "\n\n");
- fprintf (out, "Rules\n-----\n\n");
- fprintf (out, "Num (Prec, Assoc, Useful, Ritem Range) Lhs -> Rhs (Ritem range) [Num]\n");
- for (i = 1; i < nrules + 1; i++)
- {
- int rhs_count = 0;
- /* Find the last RHS index in ritems. */
- for (r = rules[i].rhs; *r >= 0; ++r)
- ++rhs_count;
- fprintf (out, "%3d (%2d, %2d, %2d, %2d-%2d) %2d ->",
- i - 1,
- rules[i].prec, rules[i].assoc, rules[i].useful,
- rules[i].rhs - ritem, rules[i].rhs - ritem + rhs_count - 1,
- rules[i].lhs);
- /* Dumped the RHS. */
- for (r = rules[i].rhs; *r >= 0; r++)
- fprintf (out, "%3d", *r);
- fprintf (out, " [%d]\n", -(*r) - 1);
- }
- fprintf (out, "\n\n");
- fprintf (out, "Rules interpreted\n-----------------\n\n");
- for (i = 1; i < nrules + 1; i++)
- {
- fprintf (out, "%-5d %s :", i, symbols[rules[i].lhs]->tag);
- for (r = rules[i].rhs; *r >= 0; r++)
- fprintf (out, " %s", symbols[*r]->tag);
- fputc ('\n', out);
- }
- fprintf (out, "\n\n");
-}
+
nuseless_productions),
nuseless_productions);
- fprintf (stderr, _("%s contains "), infile);
+ fprintf (stderr, "%s: %s: ", grammar_file, _("warning"));
if (nuseless_nonterminals > 0)
fprintf (stderr, ngettext ("%d useless nonterminal",
nuseless_productions),
nuseless_productions);
fprintf (stderr, "\n");
- fflush (stderr);
}
\f
void
/* Allocate the global sets used to compute the reduced grammar */
N = bitset_create (nvars, BITSET_FIXED);
- P = bitset_create (nrules + 1, BITSET_FIXED);
+ P = bitset_create (nrules, BITSET_FIXED);
V = bitset_create (nsyms, BITSET_FIXED);
V1 = bitset_create (nsyms, BITSET_FIXED);
useless_nonterminals ();
inaccessable_symbols ();
- reduced = (bool) (nuseless_nonterminals + nuseless_productions > 0);
+ reduced = (nuseless_nonterminals + nuseless_productions > 0);
if (!reduced)
return;
reduce_print ();
- if (!bitset_test (N, start_symbol - ntokens))
- fatal (_("Start symbol %s does not derive any sentence"),
- symbols[start_symbol]->tag);
+ if (!bitset_test (N, accept->number - ntokens))
+ fatal_at (startsymbol_location,
+ _("start symbol %s does not derive any sentence"),
+ startsymbol->tag);
- reduce_grammar_tables ();
+ /* First reduce the nonterminals, as they renumber themselves in the
+ whole grammar. If you change the order, nonterms would be
+ renumbered only in the reduced grammar. */
if (nuseless_nonterminals > 0)
nonterminals_reduce ();
+ if (nuseless_productions > 0)
+ reduce_grammar_tables ();
- if (trace_flag)
+ if (trace_flag & trace_grammar)
{
- dump_grammar (stderr);
+ grammar_dump (stderr, "Reduced Grammar");
fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
, and %d productions.\n",
- infile, ntokens, nvars, nrules);
+ grammar_file, ntokens, nvars, nrules);
}
}