The computation of nullable is broken: it doesn't handle empty

[bison.git] / src / reduce.c
diff --git a/src/reduce.c b/src/reduce.c

index d160f9d6f134322439cc7b85dd54a15430d23a6e..d8ea5ed60ba1f95327219cdc8cf51db90e6f10a1 100644 (file)
--- a/src/reduce.c
+++ b/src/reduce.c
@@ -1,5 +1,5 @@
  /* Grammar reduction for Bison.
-   Copyright (C) 1988, 1989, 2000 Free Software Foundation, Inc.
+   Copyright 1988, 1989, 2000, 2001  Free Software Foundation, Inc.
  
     This file is part of Bison, the GNU Compiler Compiler.
  
@@ -29,28 +29,32 @@
  #include "getargs.h"
  #include "files.h"
  #include "gram.h"
-#include "alloc.h"
  #include "complain.h"
  #include "reduce.h"
-
-extern char **tags;            /* reader.c */
-static int statisticsflag;     /* XXXXXXX */
-extern int fixed_outfiles;
+#include "reader.h"
+#include "getargs.h"
  
  typedef unsigned *BSet;
  typedef short *rule;
  
  
-/* N is set of all nonterminals which are not useless.  P is set of
-   all rules which have no useless nonterminals in their RHS.  V is
-   the set of all accessible symbols.  */
+/* Set of all nonterminals which are not useless.  */
+static BSet N;
+
+/* Set of all rules which have no useless nonterminals in their RHS.  */
+static BSet P;
  
-static BSet N, P, V, V1;
+/* Set of all accessible symbols.  */
+static BSet V;
+
+/* Set of symbols used to define rule precedence (so they are
+   `useless', but no warning should be issued).  */
+static BSet V1;
  
  static int nuseful_productions;
  static int nuseless_productions;
  static int nuseful_nonterminals;
-static int nuseless_nonterminals;
+int nuseless_nonterminals;
  \f
  static bool
  bits_equal (BSet L, BSet R, int n)
@@ -103,7 +107,7 @@ useful_production (int i, BSet N0)
    /* A production is useful if all of the nonterminals in its appear
       in the set of useful nonterminals.  */
  
-  for (r = &ritem[rrhs[i]]; *r > 0; r++)
+  for (r = &ritem[rule_table[i].rhs]; *r > 0; r++)
      if (ISVAR (n = *r))
        if (!BITISSET (N0, n - ntokens))
         return FALSE;
@@ -119,12 +123,12 @@ static void
  useless_nonterminals (void)
  {
    BSet Np, Ns;
-  int i, n;
+  int i;
  
    /* N is set as built.  Np is set being built this iteration. P is
       set of all productions which have a RHS all in N.  */
  
-  Np = NEW2 (WORDSIZE (nvars), unsigned);
+  Np = XCALLOC (unsigned, WORDSIZE (nvars));
  
    /* The set being computed is a set of nonterminals which can derive
       the empty string or strings consisting of all terminals. At each
@@ -142,7 +146,6 @@ useless_nonterminals (void)
       saved to be used when finding useful productions: only
       productions in this set will appear in the final grammar.  */
  
-  n = 0;
    while (1)
      {
        for (i = WORDSIZE (nvars) - 1; i >= 0; i--)
@@ -153,7 +156,7 @@ useless_nonterminals (void)
             {
               if (useful_production (i, N))
                 {
-                 SETBIT (Np, rlhs[i] - ntokens);
+                 SETBIT (Np, rule_table[i].lhs - ntokens);
                   SETBIT (P, i);
                 }
             }
@@ -164,7 +167,7 @@ useless_nonterminals (void)
        Np = N;
        N = Ns;
      }
-  FREE (N);
+  XFREE (N);
    N = Np;
  }
  
@@ -173,7 +176,7 @@ static void
  inaccessable_symbols (void)
  {
    BSet Vp, Vs, Pp;
-  int i, n;
+  int i;
    short t;
    rule r;
  
@@ -200,45 +203,39 @@ inaccessable_symbols (void)
       terminals are printed (if running in verbose mode) so that the
       user can know.  */
  
-  Vp = NEW2 (WORDSIZE (nsyms), unsigned);
-  Pp = NEW2 (WORDSIZE (nrules + 1), unsigned);
+  Vp = XCALLOC (unsigned, WORDSIZE (nsyms));
+  Pp = XCALLOC (unsigned, WORDSIZE (nrules + 1));
  
    /* If the start symbol isn't useful, then nothing will be useful. */
-  if (!BITISSET (N, start_symbol - ntokens))
-    goto end_iteration;
-
-  SETBIT (V, start_symbol);
-
-  n = 0;
-  while (1)
+  if (BITISSET (N, start_symbol - ntokens))
      {
-      for (i = WORDSIZE (nsyms) - 1; i >= 0; i--)
-       Vp[i] = V[i];
-      for (i = 1; i <= nrules; i++)
+      SETBIT (V, start_symbol);
+
+      while (1)
         {
-         if (!BITISSET (Pp, i) && BITISSET (P, i) && BITISSET (V, rlhs[i]))
+         for (i = WORDSIZE (nsyms) - 1; i >= 0; i--)
+           Vp[i] = V[i];
+         for (i = 1; i <= nrules; i++)
             {
-             for (r = &ritem[rrhs[i]]; *r >= 0; r++)
+             if (!BITISSET (Pp, i)
+                 && BITISSET (P, i)
+                 && BITISSET (V, rule_table[i].lhs))
                 {
-                 if (ISTOKEN (t = *r) || BITISSET (N, t - ntokens))
-                   {
+                 for (r = &ritem[rule_table[i].rhs]; *r >= 0; r++)
+                   if (ISTOKEN (t = *r) || BITISSET (N, t - ntokens))
                       SETBIT (Vp, t);
-                   }
+                 SETBIT (Pp, i);
                 }
-             SETBIT (Pp, i);
             }
+         if (bits_equal (V, Vp, WORDSIZE (nsyms)))
+           break;
+         Vs = Vp;
+         Vp = V;
+         V = Vs;
         }
-      if (bits_equal (V, Vp, WORDSIZE (nsyms)))
-       {
-         break;
-       }
-      Vs = Vp;
-      Vp = V;
-      V = Vs;
      }
-end_iteration:
  
-  FREE (V);
+  XFREE (V);
    V = Vp;
  
    /* Tokens 0, 1, and 2 are internal to Bison.  Consider them useful. */
@@ -246,7 +243,7 @@ end_iteration:
    SETBIT (V, 1);               /* error token */
    SETBIT (V, 2);               /* some undefined token */
  
-  FREE (P);
+  XFREE (P);
    P = Pp;
  
    nuseful_productions = bits_size (P, WORDSIZE (nrules + 1));
@@ -260,39 +257,46 @@ end_iteration:
  
    /* A token that was used in %prec should not be warned about.  */
    for (i = 1; i < nrules; i++)
-    if (rprecsym[i] != 0)
-      SETBIT (V1, rprecsym[i]);
+    if (rule_table[i].precsym != 0)
+      SETBIT (V1, rule_table[i].precsym);
  }
  
  static void
  reduce_grammar_tables (void)
  {
-/* This is turned off because we would need to change the numbers
-   in the case statements in the actions file.  */
-#if 0
-  /* remove useless productions */
-  if (nuseless_productions > 0)
-    {
-      short np, pn, ni, pi;
+  /* This is turned off because we would need to change the numbers in
+     the case statements in the actions file.
  
-      np = 0;
-      ni = 0;
-      for (pn = 1; pn <= nrules; pn++)
-       {
+     We don't disable it via CPP so that it is still checked with the
+     rest of the code, to avoid its becoming completely obsolete.
+
+     FIXME: I think the comment above demonstrates this code must be
+     turned off for *semantic* parser, not in the general case.  Try
+     to understand this better --akim.  */
+
+  if (0)
+    /* remove useless productions */
+    if (nuseless_productions > 0)
+      {
+       short np, pn, ni, pi;
+
+       np = 0;
+       ni = 0;
+       for (pn = 1; pn <= nrules; pn++)
           if (BITISSET (P, pn))
             {
               np++;
               if (pn != np)
                 {
-                 rlhs[np] = rlhs[pn];
-                 rline[np] = rline[pn];
-                 rprec[np] = rprec[pn];
-                 rassoc[np] = rassoc[pn];
-                 rrhs[np] = rrhs[pn];
-                 if (rrhs[np] != ni)
+                 rule_table[np].lhs   = rule_table[pn].lhs;
+                 rule_table[np].line  = rule_table[pn].line;
+                 rule_table[np].prec  = rule_table[pn].prec;
+                 rule_table[np].assoc = rule_table[pn].assoc;
+                 rule_table[np].rhs   = rule_table[pn].rhs;
+                 if (rule_table[np].rhs != ni)
                     {
-                     pi = rrhs[np];
-                     rrhs[np] = ni;
+                     pi = rule_table[np].rhs;
+                     rule_table[np].rhs = ni;
                       while (ritem[pi] >= 0)
                         ritem[ni++] = ritem[pi++];
                       ritem[ni++] = -np;
@@ -303,207 +307,220 @@ reduce_grammar_tables (void)
                   while (ritem[ni++] >= 0);
                 }
             }
-       }
-      ritem[ni] = 0;
-      nrules -= nuseless_productions;
-      nitems = ni;
  
-      /* Is it worth it to reduce the amount of memory for the
-         grammar? Probably not.  */
+       ritem[ni] = 0;
+       nrules -= nuseless_productions;
+       nitems = ni;
  
-    }
-#endif /* 0 */
-  /* Disable useless productions,
-     since they may contain useless nonterms
-     that would get mapped below to -1 and confuse everyone.  */
+       /* Is it worth it to reduce the amount of memory for the
+          grammar? Probably not.  */
+      }
+
+  /* Disable useless productions. */
    if (nuseless_productions > 0)
      {
        int pn;
-
        for (pn = 1; pn <= nrules; pn++)
-       {
-         if (!BITISSET (P, pn))
-           {
-             rlhs[pn] = -1;
-           }
-       }
+       rule_table[pn].useful = BITISSET (P, pn);
      }
+}
  
-  /* remove useless symbols */
-  if (nuseless_nonterminals > 0)
-    {
-
-      int i, n;
-/*      short  j; JF unused */
-      short *nontermmap;
-      rule r;
  
-      /* Create a map of nonterminal number to new nonterminal
-        number. -1 in the map means it was useless and is being
-        eliminated.  */
+/*------------------------------.
+| Remove useless nonterminals.  |
+`------------------------------*/
  
-      nontermmap = NEW2 (nvars, short) - ntokens;
-      for (i = ntokens; i < nsyms; i++)
-       nontermmap[i] = -1;
+static void
+nonterminals_reduce (void)
+{
+  int i, n;
+  rule r;
  
-      n = ntokens;
-      for (i = ntokens; i < nsyms; i++)
-       if (BITISSET (V, i))
-         nontermmap[i] = n++;
+  /* Map the nonterminals to their new index: useful first, useless
+     afterwards.  Kept for later report.  */
  
-      /* Shuffle elements of tables indexed by symbol number.  */
+  short *nontermmap = XCALLOC (short, nvars) - ntokens;
+  n = ntokens;
+  for (i = ntokens; i < nsyms; i++)
+    if (BITISSET (V, i))
+      nontermmap[i] = n++;
+  for (i = ntokens; i < nsyms; i++)
+    if (!BITISSET (V, i))
+      nontermmap[i] = n++;
+
+
+  /* Shuffle elements of tables indexed by symbol number.  */
+  {
+    short *sassoc_sorted = XMALLOC (short, nvars) - ntokens;
+    short *sprec_sorted  = XMALLOC (short, nvars) - ntokens;
+    char **tags_sorted   = XMALLOC (char *, nvars) - ntokens;
+
+    for (i = ntokens; i < nsyms; i++)
+      {
+       n = nontermmap[i];
+       sassoc_sorted[n] = sassoc[i];
+       sprec_sorted[n]  = sprec[i];
+       tags_sorted[n]   = tags[i];
+      }
+    for (i = ntokens; i < nsyms; i++)
+      {
+       sassoc[i] = sassoc_sorted[i];
+       sprec[i]  = sprec_sorted[i];
+       tags[i]   = tags_sorted[i];
+      }
+    free (sassoc_sorted + ntokens);
+    free (sprec_sorted + ntokens);
+    free (tags_sorted + ntokens);
+  }
+
+  /* Replace all symbol numbers in valid data structures.  */
  
-      for (i = ntokens; i < nsyms; i++)
-       {
-         n = nontermmap[i];
-         if (n >= 0)
-           {
-             sassoc[n] = sassoc[i];
-             sprec[n] = sprec[i];
-             tags[n] = tags[i];
-           }
-         else
-           {
-             free (tags[i]);
-           }
-       }
+  for (i = 1; i <= nrules; i++)
+    {
+      rule_table[i].lhs = nontermmap[rule_table[i].lhs];
+      if (ISVAR (rule_table[i].precsym))
+       /* Can this happen?  */
+       rule_table[i].precsym = nontermmap[rule_table[i].precsym];
+    }
  
-      /* Replace all symbol numbers in valid data structures.  */
+  for (r = ritem; *r; r++)
+    if (ISVAR (*r))
+      *r = nontermmap[*r];
  
-      for (i = 1; i <= nrules; i++)
-       {
-         /* Ignore the rules disabled above.  */
-         if (rlhs[i] >= 0)
-           rlhs[i] = nontermmap[rlhs[i]];
-         if (ISVAR (rprecsym[i]))
-           /* Can this happen?  */
-           rprecsym[i] = nontermmap[rprecsym[i]];
-       }
+  start_symbol = nontermmap[start_symbol];
  
-      for (r = ritem; *r; r++)
-       if (ISVAR (*r))
-         *r = nontermmap[*r];
+  nsyms -= nuseless_nonterminals;
+  nvars -= nuseless_nonterminals;
  
-      start_symbol = nontermmap[start_symbol];
+  free (nontermmap + ntokens);
+}
  
-      nsyms -= nuseless_nonterminals;
-      nvars -= nuseless_nonterminals;
  
-      free (&nontermmap[ntokens]);
-    }
-}
+/*------------------------------------------------------------------.
+| Output the detailed results of the reductions.  For FILE.output.  |
+`------------------------------------------------------------------*/
  
-static void
-print_results (void)
+void
+reduce_output (FILE *out)
  {
-  int i;
-/*  short j; JF unused */
-  rule r;
-  bool b;
-
    if (nuseless_nonterminals > 0)
      {
-      fprintf (foutput, _("Useless nonterminals:\n\n"));
-      for (i = ntokens; i < nsyms; i++)
-       if (!BITISSET (V, i))
-         fprintf (foutput, "   %s\n", tags[i]);
+      int i;
+      fprintf (out, "%s\n\n", _("Useless nonterminals:"));
+      for (i = 0; i < nuseless_nonterminals; ++i)
+       fprintf (out, "   %s\n", tags[nsyms + i]);
+      fputs ("\n\n", out);
      }
-  b = FALSE;
-  for (i = 0; i < ntokens; i++)
-    {
+
+  {
+    bool b = FALSE;
+    int i;
+    for (i = 0; i < ntokens; i++)
        if (!BITISSET (V, i) && !BITISSET (V1, i))
         {
           if (!b)
-           {
-             fprintf (foutput, _("\n\nTerminals which are not used:\n\n"));
-             b = TRUE;
-           }
-         fprintf (foutput, "   %s\n", tags[i]);
+           fprintf (out, "%s\n\n", _("Terminals which are not used:"));
+         b = TRUE;
+         fprintf (out, "   %s\n", tags[i]);
         }
-    }
+    if (b)
+      fputs ("\n\n", out);
+  }
  
    if (nuseless_productions > 0)
      {
-      fprintf (foutput, _("\n\nUseless rules:\n\n"));
+      int i;
+      fprintf (out, "%s\n\n", _("Useless rules:"));
        for (i = 1; i <= nrules; i++)
-       {
-         if (!BITISSET (P, i))
-           {
-             fprintf (foutput, "#%-4d  ", i);
-             fprintf (foutput, "%s :\t", tags[rlhs[i]]);
-             for (r = &ritem[rrhs[i]]; *r >= 0; r++)
-               {
-                 fprintf (foutput, " %s", tags[*r]);
-               }
-             fprintf (foutput, ";\n");
-           }
-       }
+       if (!rule_table[i].useful)
+         {
+           rule r;
+           fprintf (out, "#%-4d  ", i);
+           fprintf (out, "%s:", tags[rule_table[i].lhs]);
+           for (r = &ritem[rule_table[i].rhs]; *r >= 0; r++)
+             fprintf (out, " %s", tags[*r]);
+           fputs (";\n", out);
+         }
+      fputs ("\n\n", out);
      }
-  if (nuseless_nonterminals > 0 || nuseless_productions > 0 || b)
-    fprintf (foutput, "\n\n");
  }
  \f
-#if 0                          /* XXX currently unused.  */
  static void
-dump_grammar (void)
+dump_grammar (FILE *out)
  {
    int i;
    rule r;
  
-  fprintf (foutput,
+  fprintf (out, "REDUCED GRAMMAR\n\n");
+  fprintf (out,
            "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nitems = %d\n\n",
            ntokens, nvars, nsyms, nrules, nitems);
-  fprintf (foutput, _("Variables\n---------\n\n"));
-  fprintf (foutput, _("Value  Sprec    Sassoc    Tag\n"));
+  fprintf (out, "Variables\n---------\n\n");
+  fprintf (out, "Value  Sprec  Sassoc  Tag\n");
    for (i = ntokens; i < nsyms; i++)
-    fprintf (foutput, "%5d  %5d  %5d  %s\n", i, sprec[i], sassoc[i], tags[i]);
-  fprintf (foutput, "\n\n");
-  fprintf (foutput, _("Rules\n-----\n\n"));
+    fprintf (out, "%5d  %5d   %5d  %s\n", i, sprec[i], sassoc[i], tags[i]);
+  fprintf (out, "\n\n");
+  fprintf (out, "Rules\n-----\n\n");
+  fprintf (out, "Num (Prec, Assoc, Useful, Ritem Range) Lhs -> Rhs (Ritem range) [Num]\n");
    for (i = 1; i <= nrules; i++)
      {
-      fprintf (foutput, "%-5d(%5d%5d)%5d : (@%-5d)",
-              i, rprec[i], rassoc[i], rlhs[i], rrhs[i]);
-      for (r = &ritem[rrhs[i]]; *r > 0; r++)
-       fprintf (foutput, "%5d", *r);
-      fprintf (foutput, " [%d]\n", -(*r));
+      int rhs_count = 0;
+      /* Find the last RHS index in ritems. */
+      for (r = &ritem[rule_table[i].rhs]; *r > 0; ++r)
+       ++rhs_count;
+      fprintf (out, "%3d (%2d, %2d, %2d, %2d-%2d)   %2d ->",
+              i,
+              rule_table[i].prec, rule_table[i].assoc, rule_table[i].useful,
+              rule_table[i].rhs, rule_table[i].rhs + rhs_count - 1,
+              rule_table[i].lhs);
+      /* Dumped the RHS. */
+      for (r = &ritem[rule_table[i].rhs]; *r > 0; r++)
+       fprintf (out, "%3d", *r);
+      fprintf (out, "  [%d]\n", -(*r));
      }
-  fprintf (foutput, "\n\n");
-  fprintf (foutput, _("Rules interpreted\n-----------------\n\n"));
+  fprintf (out, "\n\n");
+  fprintf (out, "Rules interpreted\n-----------------\n\n");
    for (i = 1; i <= nrules; i++)
      {
-      fprintf (foutput, "%-5d  %s :", i, tags[rlhs[i]]);
-      for (r = &ritem[rrhs[i]]; *r > 0; r++)
-       fprintf (foutput, " %s", tags[*r]);
-      fprintf (foutput, "\n");
+      fprintf (out, "%-5d  %s :", i, tags[rule_table[i].lhs]);
+      for (r = &ritem[rule_table[i].rhs]; *r > 0; r++)
+       fprintf (out, " %s", tags[*r]);
+      fputc ('\n', out);
      }
-  fprintf (foutput, "\n\n");
+  fprintf (out, "\n\n");
  }
  
-#endif
  
  
+/*-------------------------------.
+| Report the results to STDERR.  |
+`-------------------------------*/
+
  static void
-print_notices (void)
+reduce_print (void)
  {
-  if (fixed_outfiles && nuseless_productions)
-    fprintf (stderr, _("%d rules never reduced\n"), nuseless_productions);
+  if (yacc_flag && nuseless_productions)
+    fprintf (stderr, ngettext ("%d rule never reduced\n",
+                              "%d rules never reduced\n",
+                              nuseless_productions),
+            nuseless_productions);
  
    fprintf (stderr, _("%s contains "), infile);
  
    if (nuseless_nonterminals > 0)
-    {
-      fprintf (stderr, _("%d useless nonterminal%s"),
-              nuseless_nonterminals,
-              (nuseless_nonterminals == 1 ? "" : "s"));
-    }
+    fprintf (stderr, ngettext ("%d useless nonterminal",
+                              "%d useless nonterminals",
+                              nuseless_nonterminals),
+            nuseless_nonterminals);
+
    if (nuseless_nonterminals > 0 && nuseless_productions > 0)
      fprintf (stderr, _(" and "));
  
    if (nuseless_productions > 0)
-    {
-      fprintf (stderr, _("%d useless rule%s"),
-              nuseless_productions, (nuseless_productions == 1 ? "" : "s"));
-    }
+    fprintf (stderr, ngettext ("%d useless rule",
+                              "%d useless rules",
+                              nuseless_productions),
+            nuseless_productions);
    fprintf (stderr, "\n");
    fflush (stderr);
  }
@@ -515,52 +532,49 @@ reduce_grammar (void)
  
    /* Allocate the global sets used to compute the reduced grammar */
  
-  N = NEW2 (WORDSIZE (nvars), unsigned);
-  P = NEW2 (WORDSIZE (nrules + 1), unsigned);
-  V = NEW2 (WORDSIZE (nsyms), unsigned);
-  V1 = NEW2 (WORDSIZE (nsyms), unsigned);
+  N = XCALLOC (unsigned, WORDSIZE (nvars));
+  P = XCALLOC (unsigned, WORDSIZE (nrules + 1));
+  V = XCALLOC (unsigned, WORDSIZE (nsyms));
+  V1 = XCALLOC (unsigned, WORDSIZE (nsyms));
  
    useless_nonterminals ();
    inaccessable_symbols ();
  
    reduced = (bool) (nuseless_nonterminals + nuseless_productions > 0);
  
-  if (verboseflag)
-    print_results ();
-
-  if (reduced == FALSE)
-    goto done_reducing;
+  if (!reduced)
+    return;
  
-  print_notices ();
+  reduce_print ();
  
    if (!BITISSET (N, start_symbol - ntokens))
      fatal (_("Start symbol %s does not derive any sentence"),
            tags[start_symbol]);
  
    reduce_grammar_tables ();
-#if 0
-  if (verboseflag)
+  if (nuseless_nonterminals > 0)
+    nonterminals_reduce ();
+
+  if (trace_flag)
      {
-      fprintf (foutput, "REDUCED GRAMMAR\n\n");
-      dump_grammar ();
+      dump_grammar (stderr);
+
+      fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
+, and %d productions.\n",
+              infile, ntokens, nvars, nrules);
      }
-#endif
-  statisticsflag = FALSE;      /* someday getopts should handle this */
-  if (statisticsflag == TRUE)
-    fprintf (stderr, _("reduced %s defines %d terminal%s, %d nonterminal%s\
-, and %d production%s.\n"),
-            infile,
-            ntokens,
-            (ntokens == 1 ? "" : "s"),
-            nvars,
-            (nvars == 1 ? "" : "s"),
-            nrules,
-            (nrules == 1 ? "" : "s"));
-
-done_reducing:
-  /* Free the global sets used to compute the reduced grammar */
-
-  FREE (N);
-  FREE (V);
-  FREE (P);
+}
+
+
+/*-----------------------------------------------------------.
+| Free the global sets used to compute the reduced grammar.  |
+`-----------------------------------------------------------*/
+
+void
+reduce_free (void)
+{
+  XFREE (N);
+  XFREE (V);
+  XFREE (V1);
+  XFREE (P);
  }