/* Grammar reduction for Bison.
- Copyright (C) 1988, 1989, 2000 Free Software Foundation, Inc.
+
+ Copyright (C) 1988, 1989, 2000, 2001, 2002, 2003, 2005, 2006,
+ 2007, 2008 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
- Bison is free software; you can redistribute it and/or modify
+ This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
- Bison is distributed in the hope that it will be useful,
+ This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with Bison; see the file COPYING. If not, write to
- the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
/* Reduce the grammar: Find and eliminate unreachable terminals,
/* Don't eliminate unreachable terminals: They may be used by the
user's parser. */
+#include <config.h>
#include "system.h"
-#include "getargs.h"
+
+#include <bitset.h>
+#include <quotearg.h>
+
+#include "complain.h"
#include "files.h"
+#include "getargs.h"
#include "gram.h"
-#include "xalloc.h"
-#include "complain.h"
-#include "reduce.h"
+#include "print-xml.h"
#include "reader.h"
-#include "getargs.h"
+#include "reduce.h"
+#include "symtab.h"
-typedef unsigned *BSet;
-typedef short *rule;
+/* Set of all nonterminals which are not useless. */
+static bitset N;
+/* Set of all rules which have no useless nonterminals in their RHS. */
+static bitset P;
-/* N is set of all nonterminals which are not useless. P is set of
- all rules which have no useless nonterminals in their RHS. V is
- the set of all accessible symbols. */
+/* Set of all accessible symbols. */
+static bitset V;
-static BSet N, P, V, V1;
+/* Set of symbols used to define rule precedence (so they are
+ `useless', but no warning should be issued). */
+static bitset V1;
-static int nuseful_productions;
-static int nuseless_productions;
+static rule_number nuseful_productions;
+rule_number nuseless_productions;
static int nuseful_nonterminals;
-static int nuseless_nonterminals;
-\f
-static bool
-bits_equal (BSet L, BSet R, int n)
-{
- int i;
-
- for (i = n - 1; i >= 0; i--)
- if (L[i] != R[i])
- return FALSE;
- return TRUE;
-}
-
-
-static int
-nbits (unsigned i)
-{
- int count = 0;
-
- while (i != 0)
- {
- i ^= (i & ((unsigned) (-(int) i)));
- ++count;
- }
- return count;
-}
-
-
-static int
-bits_size (BSet S, int n)
-{
- int i, count = 0;
-
- for (i = n - 1; i >= 0; i--)
- count += nbits (S[i]);
- return count;
-}
+symbol_number nuseless_nonterminals;
\f
/*-------------------------------------------------------------------.
| Another way to do this would be with a set for each production and |
`-------------------------------------------------------------------*/
static bool
-useful_production (int i, BSet N0)
+useful_production (rule_number r, bitset N0)
{
- rule r;
- short n;
+ item_number *rhsp;
/* A production is useful if all of the nonterminals in its appear
in the set of useful nonterminals. */
- for (r = &ritem[rrhs[i]]; *r > 0; r++)
- if (ISVAR (n = *r))
- if (!BITISSET (N0, n - ntokens))
- return FALSE;
- return TRUE;
+ for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
+ if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens))
+ return false;
+ return true;
}
static void
useless_nonterminals (void)
{
- BSet Np, Ns;
- int i, n;
+ bitset Np, Ns;
+ rule_number r;
/* N is set as built. Np is set being built this iteration. P is
set of all productions which have a RHS all in N. */
- Np = XCALLOC (unsigned, WORDSIZE (nvars));
+ Np = bitset_create (nvars, BITSET_FIXED);
+
/* The set being computed is a set of nonterminals which can derive
the empty string or strings consisting of all terminals. At each
saved to be used when finding useful productions: only
productions in this set will appear in the final grammar. */
- n = 0;
while (1)
{
- for (i = WORDSIZE (nvars) - 1; i >= 0; i--)
- Np[i] = N[i];
- for (i = 1; i <= nrules; i++)
- {
- if (!BITISSET (P, i))
- {
- if (useful_production (i, N))
- {
- SETBIT (Np, rlhs[i] - ntokens);
- SETBIT (P, i);
- }
- }
- }
- if (bits_equal (N, Np, WORDSIZE (nvars)))
+ bitset_copy (Np, N);
+ for (r = 0; r < nrules; r++)
+ if (!bitset_test (P, r)
+ && useful_production (r, N))
+ {
+ bitset_set (Np, rules[r].lhs->number - ntokens);
+ bitset_set (P, r);
+ }
+ if (bitset_equal_p (N, Np))
break;
Ns = Np;
Np = N;
N = Ns;
}
- XFREE (N);
+ bitset_free (N);
N = Np;
}
static void
inaccessable_symbols (void)
{
- BSet Vp, Vs, Pp;
- int i, n;
- short t;
- rule r;
+ bitset Vp, Vs, Pp;
/* Find out which productions are reachable and which symbols are
used. Starting with an empty set of productions and a set of
terminals are printed (if running in verbose mode) so that the
user can know. */
- Vp = XCALLOC (unsigned, WORDSIZE (nsyms));
- Pp = XCALLOC (unsigned, WORDSIZE (nrules + 1));
+ Vp = bitset_create (nsyms, BITSET_FIXED);
+ Pp = bitset_create (nrules, BITSET_FIXED);
/* If the start symbol isn't useful, then nothing will be useful. */
- if (!BITISSET (N, start_symbol - ntokens))
- goto end_iteration;
-
- SETBIT (V, start_symbol);
-
- n = 0;
- while (1)
+ if (bitset_test (N, accept->number - ntokens))
{
- for (i = WORDSIZE (nsyms) - 1; i >= 0; i--)
- Vp[i] = V[i];
- for (i = 1; i <= nrules; i++)
+ bitset_set (V, accept->number);
+
+ while (1)
{
- if (!BITISSET (Pp, i) && BITISSET (P, i) && BITISSET (V, rlhs[i]))
+ rule_number r;
+ bitset_copy (Vp, V);
+ for (r = 0; r < nrules; r++)
{
- for (r = &ritem[rrhs[i]]; *r >= 0; r++)
+ if (!bitset_test (Pp, r)
+ && bitset_test (P, r)
+ && bitset_test (V, rules[r].lhs->number))
{
- if (ISTOKEN (t = *r) || BITISSET (N, t - ntokens))
- {
- SETBIT (Vp, t);
- }
+ item_number *rhsp;
+ for (rhsp = rules[r].rhs; *rhsp >= 0; rhsp++)
+ if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens))
+ bitset_set (Vp, *rhsp);
+ bitset_set (Pp, r);
}
- SETBIT (Pp, i);
}
+ if (bitset_equal_p (V, Vp))
+ break;
+ Vs = Vp;
+ Vp = V;
+ V = Vs;
}
- if (bits_equal (V, Vp, WORDSIZE (nsyms)))
- {
- break;
- }
- Vs = Vp;
- Vp = V;
- V = Vs;
}
-end_iteration:
- XFREE (V);
+ bitset_free (V);
V = Vp;
/* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
- SETBIT (V, 0); /* end-of-input token */
- SETBIT (V, 1); /* error token */
- SETBIT (V, 2); /* some undefined token */
+ bitset_set (V, endtoken->number); /* end-of-input token */
+ bitset_set (V, errtoken->number); /* error token */
+ bitset_set (V, undeftoken->number); /* some undefined token */
- XFREE (P);
+ bitset_free (P);
P = Pp;
- nuseful_productions = bits_size (P, WORDSIZE (nrules + 1));
+ nuseful_productions = bitset_count (P);
nuseless_productions = nrules - nuseful_productions;
nuseful_nonterminals = 0;
- for (i = ntokens; i < nsyms; i++)
- if (BITISSET (V, i))
- nuseful_nonterminals++;
+ {
+ symbol_number i;
+ for (i = ntokens; i < nsyms; i++)
+ if (bitset_test (V, i))
+ nuseful_nonterminals++;
+ }
nuseless_nonterminals = nvars - nuseful_nonterminals;
/* A token that was used in %prec should not be warned about. */
- for (i = 1; i < nrules; i++)
- if (rprecsym[i] != 0)
- SETBIT (V1, rprecsym[i]);
+ {
+ rule_number r;
+ for (r = 0; r < nrules; ++r)
+ if (rules[r].precsym != 0)
+ bitset_set (V1, rules[r].precsym->number);
+ }
}
+
+/*-------------------------------------------------------------------.
+| Put the useless productions at the end of RULES, and adjust NRULES |
+| accordingly. |
+`-------------------------------------------------------------------*/
+
static void
reduce_grammar_tables (void)
{
-/* This is turned off because we would need to change the numbers
- in the case statements in the actions file. */
-#if 0
- /* remove useless productions */
- if (nuseless_productions > 0)
- {
- short np, pn, ni, pi;
-
- np = 0;
- ni = 0;
- for (pn = 1; pn <= nrules; pn++)
- {
- if (BITISSET (P, pn))
- {
- np++;
- if (pn != np)
- {
- rlhs[np] = rlhs[pn];
- rline[np] = rline[pn];
- rprec[np] = rprec[pn];
- rassoc[np] = rassoc[pn];
- rrhs[np] = rrhs[pn];
- if (rrhs[np] != ni)
- {
- pi = rrhs[np];
- rrhs[np] = ni;
- while (ritem[pi] >= 0)
- ritem[ni++] = ritem[pi++];
- ritem[ni++] = -np;
- }
- }
- else
- {
- while (ritem[ni++] >= 0);
- }
- }
- }
- ritem[ni] = 0;
- nrules -= nuseless_productions;
- nitems = ni;
-
- /* Is it worth it to reduce the amount of memory for the
- grammar? Probably not. */
-
- }
-#endif /* 0 */
- /* Disable useless productions,
- since they may contain useless nonterms
- that would get mapped below to -1 and confuse everyone. */
- if (nuseless_productions > 0)
- {
- int pn;
-
- for (pn = 1; pn <= nrules; pn++)
- {
- if (!BITISSET (P, pn))
- {
- rlhs[pn] = -1;
- }
- }
- }
-
- /* remove useless symbols */
- if (nuseless_nonterminals > 0)
- {
-
- int i, n;
-/* short j; JF unused */
- short *nontermmap;
- rule r;
-
- /* Create a map of nonterminal number to new nonterminal
- number. -1 in the map means it was useless and is being
- eliminated. */
-
- nontermmap = XCALLOC (short, nvars) - ntokens;
- for (i = ntokens; i < nsyms; i++)
- nontermmap[i] = -1;
+ /* Report and flag useless productions. */
+ {
+ rule_number r;
+ for (r = 0; r < nrules; r++)
+ rules[r].useful = bitset_test (P, r);
+ grammar_rules_useless_report (_("rule useless in grammar"));
+ }
+
+ /* Map the nonterminals to their new index: useful first, useless
+ afterwards. Kept for later report. */
+ {
+ int useful = 0;
+ int useless = nrules - nuseless_productions;
+ rule *rules_sorted = xnmalloc (nrules, sizeof *rules_sorted);
+ rule_number r;
+ for (r = 0; r < nrules; ++r)
+ rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r];
+ free (rules);
+ rules = rules_sorted;
+
+ /* Renumber the rules markers in RITEMS. */
+ for (r = 0; r < nrules; ++r)
+ {
+ item_number *rhsp = rules[r].rhs;
+ for (/* Nothing. */; *rhsp >= 0; ++rhsp)
+ /* Nothing. */;
+ *rhsp = rule_number_as_item_number (r);
+ rules[r].number = r;
+ }
+ nrules -= nuseless_productions;
+ }
+
+ /* Adjust NRITEMS. */
+ {
+ rule_number r;
+ int length;
+ for (r = nrules; r < nrules + nuseless_productions; ++r)
+ {
+ length = rule_rhs_length (&rules[r]);
+ nritems -= length + 1;
+ }
+ }
+}
- n = ntokens;
- for (i = ntokens; i < nsyms; i++)
- if (BITISSET (V, i))
- nontermmap[i] = n++;
- /* Shuffle elements of tables indexed by symbol number. */
+/*------------------------------.
+| Remove useless nonterminals. |
+`------------------------------*/
- for (i = ntokens; i < nsyms; i++)
- {
- n = nontermmap[i];
- if (n >= 0)
- {
- sassoc[n] = sassoc[i];
- sprec[n] = sprec[i];
- tags[n] = tags[i];
- }
- else
- {
- free (tags[i]);
- }
- }
-
- /* Replace all symbol numbers in valid data structures. */
-
- for (i = 1; i <= nrules; i++)
- {
- /* Ignore the rules disabled above. */
- if (rlhs[i] >= 0)
- rlhs[i] = nontermmap[rlhs[i]];
- if (ISVAR (rprecsym[i]))
- /* Can this happen? */
- rprecsym[i] = nontermmap[rprecsym[i]];
- }
+static void
+nonterminals_reduce (void)
+{
+ symbol_number i, n;
- for (r = ritem; *r; r++)
- if (ISVAR (*r))
- *r = nontermmap[*r];
+ /* Map the nonterminals to their new index: useful first, useless
+ afterwards. Kept for later report. */
- start_symbol = nontermmap[start_symbol];
+ symbol_number *nontermmap = xnmalloc (nvars, sizeof *nontermmap);
+ n = ntokens;
+ for (i = ntokens; i < nsyms; i++)
+ if (bitset_test (V, i))
+ nontermmap[i - ntokens] = n++;
+ for (i = ntokens; i < nsyms; i++)
+ if (!bitset_test (V, i))
+ {
+ nontermmap[i - ntokens] = n++;
+ warn_at (symbols[i]->location, _("nonterminal useless in grammar: %s"),
+ symbols[i]->tag);
+ }
+
+
+ /* Shuffle elements of tables indexed by symbol number. */
+ {
+ symbol **symbols_sorted = xnmalloc (nvars, sizeof *symbols_sorted);
+
+ for (i = ntokens; i < nsyms; i++)
+ symbols[i]->number = nontermmap[i - ntokens];
+ for (i = ntokens; i < nsyms; i++)
+ symbols_sorted[nontermmap[i - ntokens] - ntokens] = symbols[i];
+ for (i = ntokens; i < nsyms; i++)
+ symbols[i] = symbols_sorted[i - ntokens];
+ free (symbols_sorted);
+ }
+
+ {
+ rule_number r;
+ for (r = 0; r < nrules; ++r)
+ {
+ item_number *rhsp;
+ for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
+ if (ISVAR (*rhsp))
+ *rhsp = symbol_number_as_item_number (nontermmap[*rhsp
+ - ntokens]);
+ }
+ accept->number = nontermmap[accept->number - ntokens];
+ }
+
+ nsyms -= nuseless_nonterminals;
+ nvars -= nuseless_nonterminals;
+
+ free (nontermmap);
+}
- nsyms -= nuseless_nonterminals;
- nvars -= nuseless_nonterminals;
- free (&nontermmap[ntokens]);
- }
-}
+/*------------------------------------------------------------------.
+| Output the detailed results of the reductions. For FILE.output. |
+`------------------------------------------------------------------*/
-static void
-print_results (void)
+void
+reduce_output (FILE *out)
{
- int i;
-/* short j; JF unused */
- rule r;
- bool b;
-
if (nuseless_nonterminals > 0)
{
- fprintf (foutput, _("Useless nonterminals:\n\n"));
- for (i = ntokens; i < nsyms; i++)
- if (!BITISSET (V, i))
- fprintf (foutput, " %s\n", tags[i]);
+ int i;
+ fprintf (out, "%s\n\n", _("Nonterminals useless in grammar"));
+ for (i = 0; i < nuseless_nonterminals; ++i)
+ fprintf (out, " %s\n", symbols[nsyms + i]->tag);
+ fputs ("\n\n", out);
}
- b = FALSE;
- for (i = 0; i < ntokens; i++)
- {
- if (!BITISSET (V, i) && !BITISSET (V1, i))
+
+ {
+ bool b = false;
+ int i;
+ for (i = 0; i < ntokens; i++)
+ if (reduce_token_unused_in_grammar (i))
{
if (!b)
- {
- fprintf (foutput, _("\n\nTerminals which are not used:\n\n"));
- b = TRUE;
- }
- fprintf (foutput, " %s\n", tags[i]);
+ fprintf (out, "%s\n\n", _("Terminals unused in grammar"));
+ b = true;
+ fprintf (out, " %s\n", symbols[i]->tag);
}
- }
+ if (b)
+ fputs ("\n\n", out);
+ }
if (nuseless_productions > 0)
- {
- fprintf (foutput, _("\n\nUseless rules:\n\n"));
- for (i = 1; i <= nrules; i++)
- {
- if (!BITISSET (P, i))
- {
- fprintf (foutput, "#%-4d ", i);
- fprintf (foutput, "%s :\t", tags[rlhs[i]]);
- for (r = &ritem[rrhs[i]]; *r >= 0; r++)
- {
- fprintf (foutput, " %s", tags[*r]);
- }
- fprintf (foutput, ";\n");
- }
- }
- }
- if (nuseless_nonterminals > 0 || nuseless_productions > 0 || b)
- fprintf (foutput, "\n\n");
+ grammar_rules_partial_print (out, _("Rules useless in grammar"),
+ rule_useless_in_grammar_p);
}
\f
-#if 0 /* XXX currently unused. */
-static void
-dump_grammar (void)
-{
- int i;
- rule r;
-
- fprintf (foutput,
- "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nitems = %d\n\n",
- ntokens, nvars, nsyms, nrules, nitems);
- fprintf (foutput, _("Variables\n---------\n\n"));
- fprintf (foutput, _("Value Sprec Sassoc Tag\n"));
- for (i = ntokens; i < nsyms; i++)
- fprintf (foutput, "%5d %5d %5d %s\n", i, sprec[i], sassoc[i], tags[i]);
- fprintf (foutput, "\n\n");
- fprintf (foutput, _("Rules\n-----\n\n"));
- for (i = 1; i <= nrules; i++)
- {
- fprintf (foutput, "%-5d(%5d%5d)%5d : (@%-5d)",
- i, rprec[i], rassoc[i], rlhs[i], rrhs[i]);
- for (r = &ritem[rrhs[i]]; *r > 0; r++)
- fprintf (foutput, "%5d", *r);
- fprintf (foutput, " [%d]\n", -(*r));
- }
- fprintf (foutput, "\n\n");
- fprintf (foutput, _("Rules interpreted\n-----------------\n\n"));
- for (i = 1; i <= nrules; i++)
- {
- fprintf (foutput, "%-5d %s :", i, tags[rlhs[i]]);
- for (r = &ritem[rrhs[i]]; *r > 0; r++)
- fprintf (foutput, " %s", tags[*r]);
- fprintf (foutput, "\n");
- }
- fprintf (foutput, "\n\n");
-}
-
-#endif
+/*-------------------------------.
+| Report the results to STDERR. |
+`-------------------------------*/
static void
-print_notices (void)
+reduce_print (void)
{
- if (yacc_flag && nuseless_productions)
- fprintf (stderr, _("%d rules never reduced\n"), nuseless_productions);
-
- fprintf (stderr, _("%s contains "), infile);
-
if (nuseless_nonterminals > 0)
{
- fprintf (stderr, _("%d useless nonterminal%s"),
- nuseless_nonterminals,
- (nuseless_nonterminals == 1 ? "" : "s"));
+ fprintf (stderr, "%s: %s: ", grammar_file, _("warning"));
+ fprintf (stderr, ngettext ("%d nonterminal useless in grammar",
+ "%d nonterminals useless in grammar",
+ nuseless_nonterminals),
+ nuseless_nonterminals);
+ fprintf (stderr, "\n");
}
- if (nuseless_nonterminals > 0 && nuseless_productions > 0)
- fprintf (stderr, _(" and "));
-
if (nuseless_productions > 0)
{
- fprintf (stderr, _("%d useless rule%s"),
- nuseless_productions, (nuseless_productions == 1 ? "" : "s"));
+ fprintf (stderr, "%s: %s: ", grammar_file, _("warning"));
+ fprintf (stderr, ngettext ("%d rule useless in grammar",
+ "%d rules useless in grammar",
+ nuseless_productions),
+ nuseless_productions);
+ fprintf (stderr, "\n");
}
- fprintf (stderr, "\n");
- fflush (stderr);
}
\f
void
/* Allocate the global sets used to compute the reduced grammar */
- N = XCALLOC (unsigned, WORDSIZE (nvars));
- P = XCALLOC (unsigned, WORDSIZE (nrules + 1));
- V = XCALLOC (unsigned, WORDSIZE (nsyms));
- V1 = XCALLOC (unsigned, WORDSIZE (nsyms));
+ N = bitset_create (nvars, BITSET_FIXED);
+ P = bitset_create (nrules, BITSET_FIXED);
+ V = bitset_create (nsyms, BITSET_FIXED);
+ V1 = bitset_create (nsyms, BITSET_FIXED);
useless_nonterminals ();
inaccessable_symbols ();
- reduced = (bool) (nuseless_nonterminals + nuseless_productions > 0);
-
- if (verbose_flag)
- print_results ();
+ reduced = (nuseless_nonterminals + nuseless_productions > 0);
+ if (!reduced)
+ return;
- if (reduced == FALSE)
- goto done_reducing;
+ reduce_print ();
- print_notices ();
+ if (!bitset_test (N, accept->number - ntokens))
+ fatal_at (startsymbol_location,
+ _("start symbol %s does not derive any sentence"),
+ startsymbol->tag);
- if (!BITISSET (N, start_symbol - ntokens))
- fatal (_("Start symbol %s does not derive any sentence"),
- tags[start_symbol]);
+ /* First reduce the nonterminals, as they renumber themselves in the
+ whole grammar. If you change the order, nonterms would be
+ renumbered only in the reduced grammar. */
+ if (nuseless_nonterminals > 0)
+ nonterminals_reduce ();
+ if (nuseless_productions > 0)
+ reduce_grammar_tables ();
- reduce_grammar_tables ();
-#if 0
- if (verbose_flag)
+ if (trace_flag & trace_grammar)
{
- fprintf (foutput, "REDUCED GRAMMAR\n\n");
- dump_grammar ();
+ grammar_dump (stderr, "Reduced Grammar");
+
+ fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
+, and %d productions.\n",
+ grammar_file, ntokens, nvars, nrules);
}
-#endif
-
- if (statistics_flag)
- fprintf (stderr, _("reduced %s defines %d terminal%s, %d nonterminal%s\
-, and %d production%s.\n"),
- infile,
- ntokens,
- (ntokens == 1 ? "" : "s"),
- nvars,
- (nvars == 1 ? "" : "s"),
- nrules,
- (nrules == 1 ? "" : "s"));
-
-done_reducing:
- /* Free the global sets used to compute the reduced grammar */
-
- XFREE (N);
- XFREE (V);
- XFREE (P);
+}
+
+bool
+reduce_token_unused_in_grammar (symbol_number i)
+{
+ aver (i < ntokens);
+ return !bitset_test (V, i) && !bitset_test (V1, i);
+}
+
+bool
+reduce_nonterminal_useless_in_grammar (symbol_number i)
+{
+ aver (ntokens <= i && i < nsyms + nuseless_nonterminals);
+ return nsyms <= i;
+}
+
+/*-----------------------------------------------------------.
+| Free the global sets used to compute the reduced grammar. |
+`-----------------------------------------------------------*/
+
+void
+reduce_free (void)
+{
+ bitset_free (N);
+ bitset_free (V);
+ bitset_free (V1);
+ bitset_free (P);
}