* src/closure.c, src/print.c, src/reader.c, src/reduce.c:

[bison.git] / src / reduce.c
diff --git a/src/reduce.c b/src/reduce.c

index 626c195c0432c67d433443b616e763258cab1844..c5e9381d853fa87146cd64ba026cf19a3de9ad41 100644 (file)
--- a/src/reduce.c
+++ b/src/reduce.c
@@ -28,69 +28,34 @@
  #include "system.h"
  #include "getargs.h"
  #include "files.h"
+#include "symtab.h"
  #include "gram.h"
  #include "complain.h"
  #include "reduce.h"
  #include "reader.h"
  #include "getargs.h"
+#include "bitset.h"
  
-typedef unsigned *BSet;
  typedef short *rule;
  
  
  /* Set of all nonterminals which are not useless.  */
-static BSet N;
+static bitset N;
  
  /* Set of all rules which have no useless nonterminals in their RHS.  */
-static BSet P;
+static bitset P;
  
  /* Set of all accessible symbols.  */
-static BSet V;
+static bitset V;
  
  /* Set of symbols used to define rule precedence (so they are
     `useless', but no warning should be issued).  */
-static BSet V1;
+static bitset V1;
  
  static int nuseful_productions;
  static int nuseless_productions;
  static int nuseful_nonterminals;
-static int nuseless_nonterminals;
-\f
-static bool
-bits_equal (BSet L, BSet R, int n)
-{
-  int i;
-
-  for (i = n - 1; i >= 0; i--)
-    if (L[i] != R[i])
-      return FALSE;
-  return TRUE;
-}
-
-
-static int
-nbits (unsigned i)
-{
-  int count = 0;
-
-  while (i != 0)
-    {
-      i ^= (i & ((unsigned) (-(int) i)));
-      ++count;
-    }
-  return count;
-}
-
-
-static int
-bits_size (BSet S, int n)
-{
-  int i, count = 0;
-
-  for (i = n - 1; i >= 0; i--)
-    count += nbits (S[i]);
-  return count;
-}
+int nuseless_nonterminals;
  \f
  /*-------------------------------------------------------------------.
  | Another way to do this would be with a set for each production and |
@@ -99,7 +64,7 @@ bits_size (BSet S, int n)
  `-------------------------------------------------------------------*/
  
  static bool
-useful_production (int i, BSet N0)
+useful_production (int i, bitset N0)
  {
    rule r;
    short n;
@@ -107,10 +72,9 @@ useful_production (int i, BSet N0)
    /* A production is useful if all of the nonterminals in its appear
       in the set of useful nonterminals.  */
  
-  for (r = &ritem[rule_table[i].rhs]; *r > 0; r++)
-    if (ISVAR (n = *r))
-      if (!BITISSET (N0, n - ntokens))
-       return FALSE;
+  for (r = rules[i].rhs; *r >= 0; r++)
+    if (ISVAR (n = *r) && !bitset_test (N0, n - ntokens))
+      return FALSE;
    return TRUE;
  }
  
@@ -122,13 +86,14 @@ useful_production (int i, BSet N0)
  static void
  useless_nonterminals (void)
  {
-  BSet Np, Ns;
+  bitset Np, Ns;
    int i;
  
    /* N is set as built.  Np is set being built this iteration. P is
       set of all productions which have a RHS all in N.  */
  
-  Np = XCALLOC (unsigned, WORDSIZE (nvars));
+  Np = bitset_create (nvars, BITSET_FIXED);
+
  
    /* The set being computed is a set of nonterminals which can derive
       the empty string or strings consisting of all terminals. At each
@@ -148,26 +113,21 @@ useless_nonterminals (void)
  
    while (1)
      {
-      for (i = WORDSIZE (nvars) - 1; i >= 0; i--)
-       Np[i] = N[i];
-      for (i = 1; i <= nrules; i++)
-       {
-         if (!BITISSET (P, i))
-           {
-             if (useful_production (i, N))
-               {
-                 SETBIT (Np, rule_table[i].lhs - ntokens);
-                 SETBIT (P, i);
-               }
-           }
-       }
-      if (bits_equal (N, Np, WORDSIZE (nvars)))
+      bitset_copy (Np, N);
+      for (i = 1; i < nrules + 1; i++)
+       if (!bitset_test (P, i)
+           && useful_production (i, N))
+         {
+           bitset_set (Np, rules[i].lhs - ntokens);
+           bitset_set (P, i);
+         }
+      if (bitset_equal_p (N, Np))
         break;
        Ns = Np;
        Np = N;
        N = Ns;
      }
-  XFREE (N);
+  bitset_free (N);
    N = Np;
  }
  
@@ -175,7 +135,7 @@ useless_nonterminals (void)
  static void
  inaccessable_symbols (void)
  {
-  BSet Vp, Vs, Pp;
+  bitset Vp, Vs, Pp;
    int i;
    short t;
    rule r;
@@ -203,31 +163,30 @@ inaccessable_symbols (void)
       terminals are printed (if running in verbose mode) so that the
       user can know.  */
  
-  Vp = XCALLOC (unsigned, WORDSIZE (nsyms));
-  Pp = XCALLOC (unsigned, WORDSIZE (nrules + 1));
+  Vp = bitset_create (nsyms, BITSET_FIXED);
+  Pp = bitset_create (nrules + 1, BITSET_FIXED);
  
    /* If the start symbol isn't useful, then nothing will be useful. */
-  if (BITISSET (N, start_symbol - ntokens))
+  if (bitset_test (N, start_symbol - ntokens))
      {
-      SETBIT (V, start_symbol);
+      bitset_set (V, start_symbol);
  
        while (1)
         {
-         for (i = WORDSIZE (nsyms) - 1; i >= 0; i--)
-           Vp[i] = V[i];
-         for (i = 1; i <= nrules; i++)
+         bitset_copy (Vp, V);
+         for (i = 1; i < nrules + 1; i++)
             {
-             if (!BITISSET (Pp, i)
-                 && BITISSET (P, i)
-                 && BITISSET (V, rule_table[i].lhs))
+             if (!bitset_test (Pp, i)
+                 && bitset_test (P, i)
+                 && bitset_test (V, rules[i].lhs))
                 {
-                 for (r = &ritem[rule_table[i].rhs]; *r >= 0; r++)
-                   if (ISTOKEN (t = *r) || BITISSET (N, t - ntokens))
-                     SETBIT (Vp, t);
-                 SETBIT (Pp, i);
+                 for (r = rules[i].rhs; *r >= 0; r++)
+                   if (ISTOKEN (t = *r) || bitset_test (N, t - ntokens))
+                     bitset_set (Vp, t);
+                 bitset_set (Pp, i);
                 }
             }
-         if (bits_equal (V, Vp, WORDSIZE (nsyms)))
+         if (bitset_equal_p (V, Vp))
             break;
           Vs = Vp;
           Vp = V;
@@ -235,61 +194,68 @@ inaccessable_symbols (void)
         }
      }
  
-  XFREE (V);
+  bitset_free (V);
    V = Vp;
  
    /* Tokens 0, 1, and 2 are internal to Bison.  Consider them useful. */
-  SETBIT (V, 0);               /* end-of-input token */
-  SETBIT (V, 1);               /* error token */
-  SETBIT (V, 2);               /* some undefined token */
+  bitset_set (V, 0);           /* end-of-input token */
+  bitset_set (V, 1);           /* error token */
+  bitset_set (V, 2);           /* some undefined token */
  
-  XFREE (P);
+  bitset_free (P);
    P = Pp;
  
-  nuseful_productions = bits_size (P, WORDSIZE (nrules + 1));
+  nuseful_productions = bitset_count (P);
    nuseless_productions = nrules - nuseful_productions;
  
    nuseful_nonterminals = 0;
    for (i = ntokens; i < nsyms; i++)
-    if (BITISSET (V, i))
+    if (bitset_test (V, i))
        nuseful_nonterminals++;
    nuseless_nonterminals = nvars - nuseful_nonterminals;
  
    /* A token that was used in %prec should not be warned about.  */
    for (i = 1; i < nrules; i++)
-    if (rule_table[i].precsym != 0)
-      SETBIT (V1, rule_table[i].precsym);
+    if (rules[i].precsym != 0)
+      bitset_set (V1, rules[i].precsym);
  }
  
  static void
  reduce_grammar_tables (void)
  {
-/* This is turned off because we would need to change the numbers
-   in the case statements in the actions file.  */
-#if 0
-  /* remove useless productions */
-  if (nuseless_productions > 0)
-    {
-      short np, pn, ni, pi;
+  /* This is turned off because we would need to change the numbers in
+     the case statements in the actions file.
  
-      np = 0;
-      ni = 0;
-      for (pn = 1; pn <= nrules; pn++)
-       {
-         if (BITISSET (P, pn))
+     We don't disable it via CPP so that it is still checked with the
+     rest of the code, to avoid its becoming completely obsolete.
+
+     FIXME: I think the comment above demonstrates this code must be
+     turned off for *semantic* parser, not in the general case.  Try
+     to understand this better --akim.  */
+
+  if (0)
+    /* remove useless productions */
+    if (nuseless_productions > 0)
+      {
+       short np, pn, ni, pi;
+
+       np = 0;
+       ni = 0;
+       for (pn = 1; pn < nrules + 1; pn++)
+         if (bitset_test (P, pn))
             {
               np++;
               if (pn != np)
                 {
-                 rule_table[np].lhs = rule_table[pn].lhs;
-                 rline[np] = rline[pn];
-                 rule_table[np].prec = rule_table[pn].prec;
-                 rule_table[np].assoc = rule_table[pn].assoc;
-                 rule_table[np].rhs = rule_table[pn].rhs;
-                 if (rule_table[np].rhs != ni)
+                 rules[np].lhs   = rules[pn].lhs;
+                 rules[np].line  = rules[pn].line;
+                 rules[np].prec  = rules[pn].prec;
+                 rules[np].assoc = rules[pn].assoc;
+                 rules[np].rhs   = rules[pn].rhs;
+                 if (rules[np].rhs - ritem != ni)
                     {
-                     pi = rule_table[np].rhs;
-                     rule_table[np].rhs = ni;
+                     pi = rules[np].rhs - ritem;
+                     rules[np].rhs = ritem + ni;
                       while (ritem[pi] >= 0)
                         ritem[ni++] = ritem[pi++];
                       ritem[ni++] = -np;
@@ -297,33 +263,26 @@ reduce_grammar_tables (void)
                 }
               else
                 {
-                 while (ritem[ni++] >= 0);
+                 while (ritem[ni++] >= 0)
+                   /* Nothing. */;
                 }
             }
-       }
-      ritem[ni] = 0;
-      nrules -= nuseless_productions;
-      nitems = ni;
  
-      /* Is it worth it to reduce the amount of memory for the
-         grammar? Probably not.  */
+       ritem[ni] = 0;
+       nrules -= nuseless_productions;
+       nitems = ni;
+       nritems = ni;
  
-    }
-#endif /* 0 */
-  /* Disable useless productions,
-     since they may contain useless nonterms
-     that would get mapped below to -1 and confuse everyone.  */
+       /* Is it worth it to reduce the amount of memory for the
+          grammar? Probably not.  */
+      }
+
+  /* Disable useless productions. */
    if (nuseless_productions > 0)
      {
        int pn;
-
-      for (pn = 1; pn <= nrules; pn++)
-       {
-         if (!BITISSET (P, pn))
-           {
-             rule_table[pn].lhs = -1;
-           }
-       }
+      for (pn = 1; pn < nrules + 1; pn++)
+       rules[pn].useful = bitset_test (P, pn);
      }
  }
  
@@ -336,67 +295,51 @@ static void
  nonterminals_reduce (void)
  {
    int i, n;
-  rule r;
  
-  /* Create a map of nonterminal number to new nonterminal number. -1
-     in the map means it was useless and is being eliminated.  */
+  /* Map the nonterminals to their new index: useful first, useless
+     afterwards.  Kept for later report.  */
  
    short *nontermmap = XCALLOC (short, nvars) - ntokens;
    n = ntokens;
    for (i = ntokens; i < nsyms; i++)
-    if (BITISSET (V, i))
+    if (bitset_test (V, i))
        nontermmap[i] = n++;
    for (i = ntokens; i < nsyms; i++)
-    if (!BITISSET (V, i))
+    if (!bitset_test (V, i))
        nontermmap[i] = n++;
  
  
    /* Shuffle elements of tables indexed by symbol number.  */
    {
-    short *sassoc_sorted = XMALLOC (short, nvars) - ntokens;
-    short *sprec_sorted  = XMALLOC (short, nvars) - ntokens;
-    char **tags_sorted   = XMALLOC (char *, nvars) - ntokens;
+    bucket **symbols_sorted = XMALLOC (bucket *, nvars) - ntokens;
  
      for (i = ntokens; i < nsyms; i++)
-      {
-       n = nontermmap[i];
-       sassoc_sorted[n] = sassoc[i];
-       sprec_sorted[n]  = sprec[i];
-       tags_sorted[n]   = tags[i];
-      }
+      symbols_sorted[nontermmap[i]] = symbols[i];
      for (i = ntokens; i < nsyms; i++)
-      {
-       sassoc[i] = sassoc_sorted[i];
-       sprec[i]  = sprec_sorted[i];
-       tags[i]   = tags_sorted[i];
-      }
-    free (sassoc_sorted + ntokens);
-    free (sprec_sorted + ntokens);
-    free (tags_sorted + ntokens);
+      symbols[i] = symbols_sorted[i];
+    free (symbols_sorted + ntokens);
    }
  
    /* Replace all symbol numbers in valid data structures.  */
  
-  for (i = 1; i <= nrules; i++)
+  for (i = 1; i < nrules + 1; i++)
      {
-      /* Ignore the rules disabled above.  */
-      if (rule_table[i].lhs >= 0)
-       rule_table[i].lhs = nontermmap[rule_table[i].lhs];
-      if (ISVAR (rule_table[i].precsym))
+      rules[i].lhs = nontermmap[rules[i].lhs];
+      if (ISVAR (rules[i].precsym))
         /* Can this happen?  */
-       rule_table[i].precsym = nontermmap[rule_table[i].precsym];
+       rules[i].precsym = nontermmap[rules[i].precsym];
      }
  
-  for (r = ritem; *r; r++)
-    if (ISVAR (*r))
-      *r = nontermmap[*r];
+  for (i = 0; i < nritems; ++i)
+    if (ISVAR (ritem[i]))
+      ritem[i] = nontermmap[ritem[i]];
  
    start_symbol = nontermmap[start_symbol];
  
    nsyms -= nuseless_nonterminals;
    nvars -= nuseless_nonterminals;
  
-  free (&nontermmap[ntokens]);
+  free (nontermmap + ntokens);
  }
  
  
@@ -412,7 +355,7 @@ reduce_output (FILE *out)
        int i;
        fprintf (out, "%s\n\n", _("Useless nonterminals:"));
        for (i = 0; i < nuseless_nonterminals; ++i)
-       fprintf (out, "   %s\n", tags[nsyms + i]);
+       fprintf (out, "   %s\n", symbols[nsyms + i]->tag);
        fputs ("\n\n", out);
      }
  
@@ -420,12 +363,12 @@ reduce_output (FILE *out)
      bool b = FALSE;
      int i;
      for (i = 0; i < ntokens; i++)
-      if (!BITISSET (V, i) && !BITISSET (V1, i))
+      if (!bitset_test (V, i) && !bitset_test (V1, i))
         {
           if (!b)
             fprintf (out, "%s\n\n", _("Terminals which are not used:"));
           b = TRUE;
-         fprintf (out, "   %s\n", tags[i]);
+         fprintf (out, "   %s\n", symbols[i]->tag);
         }
      if (b)
        fputs ("\n\n", out);
@@ -435,14 +378,14 @@ reduce_output (FILE *out)
      {
        int i;
        fprintf (out, "%s\n\n", _("Useless rules:"));
-      for (i = 1; i <= nrules; i++)
-       if (!BITISSET (P, i))
+      for (i = 1; i < nrules + 1; i++)
+       if (!rules[i].useful)
           {
             rule r;
-           fprintf (out, "#%-4d  ", i);
-           fprintf (out, "%s :\t", tags[rule_table[i].lhs]);
-           for (r = &ritem[rule_table[i].rhs]; *r >= 0; r++)
-             fprintf (out, " %s", tags[*r]);
+           fprintf (out, "#%-4d  ", i - 1);
+           fprintf (out, "%s:", symbols[rules[i].lhs]->tag);
+           for (r = rules[i].rhs; *r >= 0; r++)
+             fprintf (out, " %s", symbols[*r]->tag);
             fputs (";\n", out);
           }
        fputs ("\n\n", out);
@@ -462,29 +405,35 @@ dump_grammar (FILE *out)
    fprintf (out, "Variables\n---------\n\n");
    fprintf (out, "Value  Sprec  Sassoc  Tag\n");
    for (i = ntokens; i < nsyms; i++)
-    fprintf (out, "%5d  %5d   %5d  %s\n", i, sprec[i], sassoc[i], tags[i]);
+    fprintf (out, "%5d  %5d   %5d  %s\n",
+            i,
+            symbols[i]->prec, symbols[i]->assoc, symbols[i]->tag);
    fprintf (out, "\n\n");
    fprintf (out, "Rules\n-----\n\n");
-  fprintf (out, "Num (Prec, Assoc) Lhs : (@Rhs) Ritems [Num?]\n");
-  for (i = 1; i <= nrules; i++)
+  fprintf (out, "Num (Prec, Assoc, Useful, Ritem Range) Lhs -> Rhs (Ritem range) [Num]\n");
+  for (i = 1; i < nrules + 1; i++)
      {
-      fprintf (out, "%-5d(%5d%5d)%5d : (@%-5d)",
-              i,
-              rule_table[i].prec,
-              rule_table[i].assoc,
-              rule_table[i].lhs,
-              rule_table[i].rhs);
-      for (r = &ritem[rule_table[i].rhs]; *r > 0; r++)
-       fprintf (out, "%5d", *r);
-      fprintf (out, " [%d]\n", -(*r));
+      int rhs_count = 0;
+      /* Find the last RHS index in ritems. */
+      for (r = rules[i].rhs; *r >= 0; ++r)
+       ++rhs_count;
+      fprintf (out, "%3d (%2d, %2d, %2d, %2d-%2d)   %2d ->",
+              i - 1,
+              rules[i].prec, rules[i].assoc, rules[i].useful,
+              rules[i].rhs - ritem, rules[i].rhs - ritem + rhs_count - 1,
+              rules[i].lhs);
+      /* Dumped the RHS. */
+      for (r = rules[i].rhs; *r >= 0; r++)
+       fprintf (out, "%3d", *r);
+      fprintf (out, "  [%d]\n", -(*r) - 1);
      }
    fprintf (out, "\n\n");
    fprintf (out, "Rules interpreted\n-----------------\n\n");
-  for (i = 1; i <= nrules; i++)
+  for (i = 1; i < nrules + 1; i++)
      {
-      fprintf (out, "%-5d  %s :", i, tags[rule_table[i].lhs]);
-      for (r = &ritem[rule_table[i].rhs]; *r > 0; r++)
-       fprintf (out, " %s", tags[*r]);
+      fprintf (out, "%-5d  %s :", i, symbols[rules[i].lhs]->tag);
+      for (r = rules[i].rhs; *r >= 0; r++)
+       fprintf (out, " %s", symbols[*r]->tag);
        fputc ('\n', out);
      }
    fprintf (out, "\n\n");
@@ -532,24 +481,23 @@ reduce_grammar (void)
  
    /* Allocate the global sets used to compute the reduced grammar */
  
-  N = XCALLOC (unsigned, WORDSIZE (nvars));
-  P = XCALLOC (unsigned, WORDSIZE (nrules + 1));
-  V = XCALLOC (unsigned, WORDSIZE (nsyms));
-  V1 = XCALLOC (unsigned, WORDSIZE (nsyms));
+  N = bitset_create (nvars, BITSET_FIXED);
+  P =  bitset_create (nrules + 1, BITSET_FIXED);
+  V = bitset_create (nsyms, BITSET_FIXED);
+  V1 = bitset_create (nsyms, BITSET_FIXED);
  
    useless_nonterminals ();
    inaccessable_symbols ();
  
    reduced = (bool) (nuseless_nonterminals + nuseless_productions > 0);
-
    if (!reduced)
      return;
  
    reduce_print ();
  
-  if (!BITISSET (N, start_symbol - ntokens))
+  if (!bitset_test (N, start_symbol - ntokens))
      fatal (_("Start symbol %s does not derive any sentence"),
-          tags[start_symbol]);
+          symbols[start_symbol]->tag);
  
    reduce_grammar_tables ();
    if (nuseless_nonterminals > 0)
@@ -573,8 +521,8 @@ reduce_grammar (void)
  void
  reduce_free (void)
  {
-  XFREE (N);
-  XFREE (V);
-  XFREE (V1);
-  XFREE (P);
+  bitset_free (N);
+  bitset_free (V);
+  bitset_free (V1);
+  bitset_free (P);
  }