[bison.git] / src / ielr.c

/* IELR main implementation.

   Copyright (C) 2009 Free Software Foundation, Inc.

   This file is part of Bison, the GNU Compiler Compiler.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#include <config.h>
#include "system.h"

#include "ielr.h"

#include <bitset.h>
#include <timevar.h>

#include "AnnotationList.h"
#include "derives.h"
#include "getargs.h"
#include "lalr.h"
#include "muscle-tab.h"
#include "nullable.h"
#include "relation.h"
#include "state.h"
#include "symtab.h"

/** Records the value of the \%define variable "lr.type".  */
typedef enum { LR_TYPE__LALR, LR_TYPE__IELR, LR_TYPE__CANONICAL_LR } LrType;

/**
 * \post:
 *   - \c result = a new \c bitset of size \c ::nritems such that any bit \c i
 *     is set iff <tt>ritem[i]</tt> is a nonterminal after which all ritems in
 *     the same RHS are nullable nonterminals.  In other words, the follows of
 *     a goto on <tt>ritem[i]</tt> include the lookahead set of the item.
 */
static bitset
ielr_compute_ritem_sees_lookahead_set (void)
{
  bitset result = bitset_create (nritems, BITSET_FIXED);
  unsigned int i = nritems-1;
  while (i>0)
    {
      --i;
      while (!item_number_is_rule_number (ritem[i])
             && ISVAR (ritem[i])
             && nullable [item_number_as_symbol_number (ritem[i]) - ntokens])
        bitset_set (result, i--);
      if (!item_number_is_rule_number (ritem[i]) && ISVAR (ritem[i]))
        bitset_set (result, i--);
      while (!item_number_is_rule_number (ritem[i]) && i>0)
        --i;
    }
  if (trace_flag & trace_ielr)
    {
      fprintf (stderr, "ritem_sees_lookahead_set:\n");
      debug_bitset (result);
      fprintf (stderr, "\n");
    }
  return result;
}

/**
 * \pre:
 *   - \c ritem_sees_lookahead_set was computed by
 *     \c ielr_compute_ritem_sees_lookahead_set.
 * \post:
 *   - Each of \c *edgesp and \c *edge_countsp is a new array of size
 *     \c ::ngotos.
 *   - <tt>(*edgesp)[i]</tt> points to a \c goto_number array of size
 *     <tt>(*edge_countsp)[i]+1</tt>.
 *   - In such a \c goto_number array, the last element is \c ::END_NODE.
 *   - All remaining elements are the indices of the gotos to which there is an
 *     internal follow edge from goto \c i.
 *   - There is an internal follow edge from goto \c i to goto \c j iff both:
 *     - The from states of gotos \c i and \c j are the same.
 *     - The transition nonterminal for goto \c i appears as the first RHS
 *       symbol of at least one production for which both:
 *       - The LHS is the transition symbol of goto \c j.
 *       - All other RHS symbols are nullable nonterminals.
 *     - In other words, the follows of goto \c i include the follows of
 *       goto \c j and it's an internal edge because the from states are the
 *       same.
 */
static void
ielr_compute_internal_follow_edges (bitset ritem_sees_lookahead_set,
                                    goto_number ***edgesp, int **edge_countsp)
{
  *edgesp = xnmalloc (ngotos, sizeof **edgesp);
  *edge_countsp = xnmalloc (ngotos, sizeof **edge_countsp);
  {
    bitset sources = bitset_create (ngotos, BITSET_FIXED);
    goto_number i;
    for (i = 0; i < ngotos; ++i)
      (*edge_countsp)[i] = 0;
    for (i = 0; i < ngotos; ++i)
      {
        int nsources = 0;
        {
          rule **rulep;
          for (rulep = derives[states[to_state[i]]->accessing_symbol
                               - ntokens];
               *rulep;
               ++rulep)
            {
              /* If there is at least one RHS symbol, if the first RHS symbol
                 is a nonterminal, and if all remaining RHS symbols (if any)
                 are nullable nonterminals, create an edge from the LHS
                 symbol's goto to the first RHS symbol's goto such that the RHS
                 symbol's goto will be the source of the edge after the
                 eventual relation_transpose below.

                 Unlike in ielr_compute_always_follows, I use a bitset for
                 edges rather than an array because it is possible that
                 multiple RHS's with the same first symbol could fit and thus
                 that we could end up with redundant edges.  With the
                 possibility of redundant edges, it's hard to know ahead of
                 time how large to make such an array.  Another possible
                 redundancy is that source and destination might be the same
                 goto.  Eliminating all these possible redundancies now might
                 possibly help performance a little.  I have not proven any of
                 this, but I'm guessing the bitset shouldn't entail much of a
                 performance penalty, if any.  */
              if (bitset_test (ritem_sees_lookahead_set,
                               (*rulep)->rhs - ritem))
                {
                  goto_number source =
                    map_goto (from_state[i],
                              item_number_as_symbol_number (*(*rulep)->rhs));
                  if (i != source && !bitset_test (sources, source))
                    {
                      bitset_set (sources, source);
                      ++nsources;
                      ++(*edge_countsp)[source];
                    }
                }
            }
        }
        if (nsources == 0)
          (*edgesp)[i] = NULL;
        else
          {
            (*edgesp)[i] = xnmalloc (nsources + 1, sizeof *(*edgesp)[i]);
            {
              bitset_iterator biter_source;
              bitset_bindex source;
              int j = 0;
              BITSET_FOR_EACH (biter_source, sources, source, 0)
                (*edgesp)[i][j++] = source;
            }
            (*edgesp)[i][nsources] = END_NODE;
          }
        bitset_zero (sources);
      }
    bitset_free (sources);
  }

  relation_transpose (edgesp, ngotos);

  if (trace_flag & trace_ielr)
    {
      fprintf (stderr, "internal_follow_edges:\n");
      relation_print (*edgesp, ngotos, stderr);
    }
}

/**
 * \pre:
 *   - \c ritem_sees_lookahead_set was computed by
 *     \c ielr_compute_ritem_sees_lookahead_set.
 *   - \c internal_follow_edges was computed by
 *     \c ielr_compute_internal_follow_edges.
 * \post:
 *   - \c *follow_kernel_itemsp is a new \c bitsetv in which the number of rows
 *     is \c ngotos and the number of columns is maximum number of kernel items
 *     in any state.
 *   - <tt>(*follow_kernel_itemsp)[i][j]</tt> is set iff the follows of goto
 *     \c i include the lookahead set of item \c j in the from state of goto
 *     \c i.
 *   - Thus, <tt>(*follow_kernel_itemsp)[i][j]</tt> is always unset if there is
 *     no item \c j in the from state of goto \c i.
 */
static void
ielr_compute_follow_kernel_items (bitset ritem_sees_lookahead_set,
                                  goto_number **internal_follow_edges,
                                  bitsetv *follow_kernel_itemsp)
{
  {
    size_t max_nitems = 0;
    state_number i;
    for (i = 0; i < nstates; ++i)
      if (states[i]->nitems > max_nitems)
        max_nitems = states[i]->nitems;
    *follow_kernel_itemsp = bitsetv_create (ngotos, max_nitems, BITSET_FIXED);
  }
  {
    goto_number i;
    for (i = 0; i < ngotos; ++i)
      {
        size_t nitems = states[from_state[i]]->nitems;
        item_number *items = states[from_state[i]]->items;
        size_t j;
        for (j = 0; j < nitems; ++j)
          /* If this item has this goto and if all subsequent symbols in this
             RHS (if any) are nullable nonterminals, then record this item as
             one whose lookahead set is included in this goto's follows.  */
          if (item_number_is_symbol_number (ritem[items[j]])
              && item_number_as_symbol_number (ritem[items[j]])
                == states[to_state[i]]->accessing_symbol
              && bitset_test (ritem_sees_lookahead_set, items[j]))
            bitset_set ((*follow_kernel_itemsp)[i], j);
      }
  }
  relation_digraph (internal_follow_edges, ngotos, follow_kernel_itemsp);

  if (trace_flag & trace_ielr)
    {
      fprintf (stderr, "follow_kernel_items:\n");
      debug_bitsetv (*follow_kernel_itemsp);
    }
}

/**
 * \pre
 *   - \c *edgesp and \c edge_counts were computed by
 *     \c ielr_compute_internal_follow_edges.
 * \post
 *   - \c *always_followsp is a new \c bitsetv with \c ngotos rows and
 *     \c ntokens columns.
 *   - <tt>(*always_followsp)[i][j]</tt> is set iff token \c j is an always
 *     follow (that is, it's computed by internal and successor edges) of goto
 *     \c i.
 *   - Rows of \c *edgesp have been realloc'ed and extended to include
 *     successor follow edges.  \c edge_counts has not been updated.
 */
static void
ielr_compute_always_follows (goto_number ***edgesp,
                             int const edge_counts[],
                             bitsetv *always_followsp)
{
  *always_followsp = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
  {
    goto_number *edge_array = xnmalloc (ngotos, sizeof *edge_array);
    goto_number i;
    for (i = 0; i < ngotos; ++i)
      {
        goto_number nedges = edge_counts[i];
        {
          int j;
          transitions *trans = states[to_state[i]]->transitions;
          FOR_EACH_SHIFT (trans, j)
            bitset_set ((*always_followsp)[i], TRANSITION_SYMBOL (trans, j));
          for (; j < trans->num; ++j)
            {
              symbol_number sym = TRANSITION_SYMBOL (trans, j);
              if (nullable[sym - ntokens])
                edge_array[nedges++] = map_goto (to_state[i], sym);
            }
        }
        if (nedges - edge_counts[i])
          {
            (*edgesp)[i] =
              xnrealloc ((*edgesp)[i], nedges + 1, sizeof *(*edgesp)[i]);
            memcpy ((*edgesp)[i] + edge_counts[i], edge_array + edge_counts[i],
                    (nedges - edge_counts[i]) * sizeof *(*edgesp)[i]);
            (*edgesp)[i][nedges] = END_NODE;
          }
      }
    free (edge_array);
  }
  relation_digraph (*edgesp, ngotos, always_followsp);

  if (trace_flag & trace_ielr)
    {
      fprintf (stderr, "always follow edges:\n");
      relation_print (*edgesp, ngotos, stderr);
      fprintf (stderr, "always_follows:\n");
      debug_bitsetv (*always_followsp);
    }
}

/**
 * \post
 *   - \c result is a new array of size \c ::nstates.
 *   - <tt>result[i]</tt> is an array of pointers to the predecessor
 *     <tt>state</tt>'s of state \c i.
 *   - The last element of such an array is \c NULL.
 */
static state ***
ielr_compute_predecessors (void)
{
  state_number i;
  int *predecessor_counts = xnmalloc (nstates, sizeof *predecessor_counts);
  state ***result = xnmalloc (nstates, sizeof *result);
  for (i = 0; i < nstates; ++i)
    predecessor_counts[i] = 0;
  for (i = 0; i < nstates; ++i)
    {
      int j;
      for (j = 0; j < states[i]->transitions->num; ++j)
        ++predecessor_counts[states[i]->transitions->states[j]->number];
    }
  for (i = 0; i < nstates; ++i)
    {
      result[i] = xnmalloc (predecessor_counts[i]+1, sizeof *result[i]);
      result[i][predecessor_counts[i]] = NULL;
      predecessor_counts[i] = 0;
    }
  for (i = 0; i < nstates; ++i)
    {
      int j;
      for (j = 0; j < states[i]->transitions->num; ++j)
        {
          state_number k = states[i]->transitions->states[j]->number;
          result[k][predecessor_counts[k]++] = states[i];
        }
    }
  free (predecessor_counts);
  return result;
}

/**
 * \post
 *   - \c *follow_kernel_itemsp and \c *always_followsp were computed by
 *     \c ielr_compute_follow_kernel_items and
 *     \c ielr_compute_always_follows.
 *   - Iff <tt>predecessorsp != NULL</tt>, then \c *predecessorsp was computed
 *     by \c ielr_compute_predecessors.
 */
static void
ielr_compute_auxiliary_tables (bitsetv *follow_kernel_itemsp,
                               bitsetv *always_followsp,
                               state ****predecessorsp)
{
  goto_number **edges;
  int *edge_counts;
  {
    bitset ritem_sees_lookahead_set = ielr_compute_ritem_sees_lookahead_set ();
    ielr_compute_internal_follow_edges (ritem_sees_lookahead_set,
                                        &edges, &edge_counts);
    ielr_compute_follow_kernel_items (ritem_sees_lookahead_set, edges,
                                      follow_kernel_itemsp);
    bitset_free (ritem_sees_lookahead_set);
  }
  ielr_compute_always_follows (&edges, edge_counts, always_followsp);
  {
    int i;
    for (i = 0; i < ngotos; ++i)
      free (edges[i]);
  }
  free (edges);
  free (edge_counts);
  if (predecessorsp)
    *predecessorsp = ielr_compute_predecessors ();
}

/**
 * \note
 *   - FIXME: It might be an interesting experiment to compare the space and
 *     time efficiency of computing \c item_lookahead_sets either:
 *     - Fully up front.
 *     - Just-in-time, as implemented below.
 *     - Not at all.  That is, just let annotations continue even when
 *       unnecessary.
 */
bool
ielr_item_has_lookahead (state *s, symbol_number lhs, size_t item,
                         symbol_number lookahead, state ***predecessors,
                         bitset **item_lookahead_sets)
{
  if (!item_lookahead_sets[s->number])
    {
      size_t i;
      item_lookahead_sets[s->number] =
        xnmalloc (s->nitems, sizeof item_lookahead_sets[s->number][0]);
      for (i = 0; i < s->nitems; ++i)
        item_lookahead_sets[s->number][i] = NULL;
    }
  if (!item_lookahead_sets[s->number][item])
    {
      item_lookahead_sets[s->number][item] =
        bitset_create (ntokens, BITSET_FIXED);
      /* If this kernel item is the beginning of a RHS, it must be the kernel
         item in the start state, and so its LHS has no follows and no goto to
         check.  If, instead, this kernel item is the successor of the start
         state's kernel item, there are still no follows and no goto.  This
         situation is fortunate because we want to avoid the - 2 below in both
         cases.

         Actually, IELR(1) should never invoke this function for either of
         those cases because (1) follow_kernel_items will never reference a
         kernel item for this RHS because the end token blocks sight of the
         lookahead set from the RHS's only nonterminal, and (2) no reduction
         has a lookback dependency on this lookahead set.  Nevertheless, I
         didn't change this test to an aver just in case the usage of this
         function evolves to need those two cases.  In both cases, the current
         implementation returns the right result.  */
      if (s->items[item] > 1)
        {
          /* If the LHS symbol of this item isn't known (because this is a
             top-level invocation), go get it.  */
          if (!lhs)
            {
              unsigned int i;
              for (i = s->items[item];
                   !item_number_is_rule_number (ritem[i]);
                   ++i)
                ;
              lhs = rules[item_number_as_rule_number (ritem[i])].lhs->number;
            }
          /* If this kernel item is next to the beginning of the RHS, then
             check all predecessors' goto follows for the LHS.  */
          if (item_number_is_rule_number (ritem[s->items[item] - 2]))
            {
              state **predecessor;
              aver (lhs != accept->number);
              for (predecessor = predecessors[s->number];
                   *predecessor;
                   ++predecessor)
                bitset_or (item_lookahead_sets[s->number][item],
                           item_lookahead_sets[s->number][item],
                           goto_follows[map_goto ((*predecessor)->number,
                                                  lhs)]);
            }
          /* If this kernel item is later in the RHS, then check all
             predecessor items' lookahead sets.  */
          else
            {
              state **predecessor;
              for (predecessor = predecessors[s->number];
                   *predecessor;
                   ++predecessor)
                {
                  size_t predecessor_item;
                  for (predecessor_item = 0;
                       predecessor_item < (*predecessor)->nitems;
                       ++predecessor_item)
                    if ((*predecessor)->items[predecessor_item]
                        == s->items[item] - 1)
                      break;
                  aver (predecessor_item != (*predecessor)->nitems);
                  ielr_item_has_lookahead (*predecessor, lhs,
                                           predecessor_item, 0 /*irrelevant*/,
                                           predecessors, item_lookahead_sets);
                  bitset_or (item_lookahead_sets[s->number][item],
                             item_lookahead_sets[s->number][item],
                             item_lookahead_sets[(*predecessor)->number]
                                                [predecessor_item]);
                }
            }
        }
    }
  return bitset_test (item_lookahead_sets[s->number][item], lookahead);
}

/**
 * \pre
 *   - \c follow_kernel_items, \c always_follows, and \c predecessors
 *     were computed by \c ielr_compute_auxiliary_tables.
 * \post
 *   - Each of <tt>*inadequacy_listsp</tt> and <tt>*annotation_listsp</tt>
 *     points to a new array of size \c ::nstates.
 *   - For <tt>0 <= i < ::nstates</tt>:
 *     - <tt>(*inadequacy_listsp)[i]</tt> contains the \c InadequacyList head
 *       node for <tt>states[i]</tt>.
 *     - <tt>(*annotation_listsp)[i]</tt> contains the \c AnnotationList head
 *       node for <tt>states[i]</tt>.
 *   - <tt>*max_annotationsp</tt> is the maximum number of annotations per
 *     state.
 */
static void
ielr_compute_annotation_lists (bitsetv follow_kernel_items,
                               bitsetv always_follows, state ***predecessors,
                               AnnotationIndex *max_annotationsp,
                               InadequacyList ***inadequacy_listsp,
                               AnnotationList ***annotation_listsp,
                               struct obstack *annotations_obstackp)
{
  bitset **item_lookahead_sets =
    xnmalloc (nstates, sizeof *item_lookahead_sets);
  AnnotationIndex *annotation_counts =
    xnmalloc (nstates, sizeof *annotation_counts);
  ContributionIndex max_contributions = 0;
  unsigned int total_annotations = 0;
  state_number i;

  *inadequacy_listsp = xnmalloc (nstates, sizeof **inadequacy_listsp);
  *annotation_listsp = xnmalloc (nstates, sizeof **annotation_listsp);
  for (i = 0; i < nstates; ++i)
    {
      item_lookahead_sets[i] = NULL;
      (*inadequacy_listsp)[i] = NULL;
      (*annotation_listsp)[i] = NULL;
      annotation_counts[i] = 0;
    }
  for (i = 0; i < nstates; ++i)
    AnnotationList__compute_from_inadequacies (states[i], follow_kernel_items,
                                               always_follows, predecessors,
                                               item_lookahead_sets,
                                               *inadequacy_listsp,
                                               *annotation_listsp,
                                               annotation_counts,
                                               &max_contributions,
                                               annotations_obstackp);
  *max_annotationsp = 0;
  for (i = 0; i < nstates; ++i)
    {
      if (annotation_counts[i] > *max_annotationsp)
        *max_annotationsp = annotation_counts[i];
      total_annotations += annotation_counts[i];
    }
  if (trace_flag & trace_ielr)
    {
      for (i = 0; i < nstates; ++i)
        {
          fprintf (stderr, "Inadequacy annotations for state %d:\n", i);
          AnnotationList__debug ((*annotation_listsp)[i],
                                 states[i]->nitems, 2);
        }
      fprintf (stderr, "Number of LR(0)/LALR(1) states: %d\n", nstates);
      fprintf (stderr, "Average number of annotations per state: %f\n",
               (float)total_annotations/nstates);
      fprintf (stderr, "Max number of annotations per state: %d\n",
               *max_annotationsp);
      fprintf (stderr, "Max number of contributions per annotation: %d\n",
               max_contributions);
    }
  for (i = 0; i < nstates; ++i)
    if (item_lookahead_sets[i])
      {
        size_t j;
        for (j = 0; j < states[i]->nitems; ++j)
          if (item_lookahead_sets[i][j])
            bitset_free (item_lookahead_sets[i][j]);
        free (item_lookahead_sets[i]);
      }
  free (item_lookahead_sets);
  free (annotation_counts);
}

typedef struct state_list {
  struct state_list *next;
  state *state;
  /** Has this state been recomputed as a successor of another state?  */
  bool recomputedAsSuccessor;
  /**
   * \c NULL iff all lookahead sets are empty.  <tt>lookaheads[i] = NULL</tt>
   * iff the lookahead set on item \c i is empty.
   */
  bitset *lookaheads;
  /**
   * nextIsocore is the next state in a circularly linked-list of all states
   * with the same core.  The one originally computed by generate_states in
   * LR0.c is lr0Isocore.
   */
  struct state_list *lr0Isocore;
  struct state_list *nextIsocore;
} state_list;

/**
 * \pre
 *   - \c follow_kernel_items and \c always_follows were computed by
 *     \c ielr_compute_auxiliary_tables.
 *   - <tt>n->class = nterm_sym</tt>.
 * \post
 *   - \c follow_set contains the follow set for the goto on nonterminal \c n
 *     in state \c s based on the lookaheads stored in <tt>s->lookaheads</tt>.
 */
static void
ielr_compute_goto_follow_set (bitsetv follow_kernel_items,
                              bitsetv always_follows, state_list *s,
                              symbol *n, bitset follow_set)
{
  goto_number n_goto = map_goto (s->lr0Isocore->state->number, n->number);
  bitset_copy (follow_set, always_follows[n_goto]);
  if (s->lookaheads)
    {
      bitset_iterator biter_item;
      bitset_bindex item;
      BITSET_FOR_EACH (biter_item, follow_kernel_items[n_goto], item, 0)
        if (s->lookaheads[item])
          bitset_or (follow_set, follow_set, s->lookaheads[item]);
    }
}

/**
 * \pre
 *   - \c follow_kernel_items and \c always_follows were computed by
 *     \c ielr_compute_auxiliary_tables.
 *   - \c lookahead_filter was computed by
 *     \c AnnotationList__computeLookaheadFilter for the original LR(0) isocore
 *     of \c t.
 *   - The number of rows in \c lookaheads is at least the number of items in
 *     \c t, and the number of columns is \c ::ntokens.
 * \post
 *   - <tt>lookaheads[i][j]</tt> is set iff both:
 *     - <tt>lookahead_filter[i][j]</tt> is set.
 *     - The isocore of \c t that will be the transition successor of \c s will
 *       inherit from \c s token \c j into the lookahead set of item \c i.
 */
static void
ielr_compute_lookaheads (bitsetv follow_kernel_items, bitsetv always_follows,
                         state_list *s, state *t, bitsetv lookahead_filter,
                         bitsetv lookaheads)
{
  size_t s_item = 0;
  size_t t_item;
  bitsetv_zero (lookaheads);
  for (t_item = 0; t_item < t->nitems; ++t_item)
    {
      /* If this kernel item is the beginning of a RHS, it must be the
         kernel item in the start state, but t is supposed to be a successor
         state.  If, instead, this kernel item is the successor of the start
         state's kernel item, the lookahead set is empty.  This second case is
         a special case to avoid the - 2 below, but the next successor can be
         handled fine without special casing it.  */
      aver (t->items[t_item] != 0);
      if (t->items[t_item] > 1
          && !bitset_empty_p (lookahead_filter[t_item]))
        {
          if (item_number_is_rule_number (ritem[t->items[t_item] - 2]))
            {
              unsigned int rule_item;
              for (rule_item = t->items[t_item];
                   !item_number_is_rule_number (ritem[rule_item]);
                   ++rule_item)
                ;
              ielr_compute_goto_follow_set (
                follow_kernel_items, always_follows, s,
                rules[item_number_as_rule_number (ritem[rule_item])].lhs,
                lookaheads[t_item]);
            }
          else if (s->lookaheads)
            {
              /* We don't have to start the s item search at the beginning
                 every time because items from both states are sorted by their
                 indices in ritem.  */
              for (; s_item < s->state->nitems; ++s_item)
                if (s->state->items[s_item] == t->items[t_item] - 1)
                  break;
              aver (s_item != s->state->nitems);
              if (s->lookaheads[s_item])
                bitset_copy (lookaheads[t_item], s->lookaheads[s_item]);
            }
          bitset_and (lookaheads[t_item],
                      lookaheads[t_item], lookahead_filter[t_item]);
        }
    }
}

/**
 * \pre
 *   - \c follow_kernel_items and \c always_follows were computed by
 *     \c ielr_compute_auxiliary_tables.
 *   - Either:
 *     - <tt>annotation_lists = NULL</tt> and all bits in work2 are set.
 *     - \c annotation_lists was computed by \c ielr_compute_annotation_lists.
 *   - The number of rows in each of \c lookaheads and \c work2 is the maximum
 *     number of items in any state.  The number of columns in each is
 *     \c ::ntokens.
 *   - \c lookaheads was computed by \c ielr_compute_lookaheads for \c t.
 *   - \c ::nstates is the total number of states, some not yet fully computed,
 *     in the list ending at \c *last_statep.  It is at least the number of
 *     original LR(0) states.
 *   - The size of \c work1 is at least the number of annotations for the LR(0)
 *     isocore of \c t.
 * \post
 *   - Either:
 *     - In the case that <tt>annotation_lists != NULL</tt>,
 *       <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if
 *       permitted by the annotations for the original LR(0) isocore of \c t.
 *       If this changed the lookaheads in that isocore, those changes were
 *       propagated to all already computed transition successors recursively
 *       possibly resulting in the splitting of some of those successors.
 *     - In the case that <tt>annotation_lists = NULL</tt>,
 *       <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if the
 *       isocore's lookahead sets were identical to those specified by
 *       <tt>lookaheads \@pre</tt>.
 *     - If no such merge was permitted, a new isocore of \c t containing
 *       <tt>lookaheads \@pre</tt> was appended to the state list whose
 *       previous tail was <tt>*last_statep \@pre</tt> and \c ::nstates was
 *       incremented.  It was also appended to \c t's isocore list.
 *   - <tt>*tp</tt> = the isocore of \c t into which
 *     <tt>lookaheads \@pre</tt> was placed/merged.
 *   - \c lookaheads, \c work1, and \c work2 may have been altered.
 */
static void
ielr_compute_state (bitsetv follow_kernel_items, bitsetv always_follows,
                    AnnotationList **annotation_lists, state *t,
                    bitsetv lookaheads, state_list **last_statep,
                    ContributionIndex work1[], bitsetv work2, state **tp)
{
  state_list *lr0_isocore = t->state_list->lr0Isocore;
  state_list **this_isocorep;
  bool has_lookaheads;

  /* Determine whether there's an isocore of t with which these lookaheads can
     be merged.  */
  {
    AnnotationIndex ai;
    AnnotationList *a;
    if (annotation_lists)
      for (ai = 0, a = annotation_lists[lr0_isocore->state->number];
           a;
           ++ai, a = a->next)
        work1[ai] =
          AnnotationList__computeDominantContribution (
            a, lr0_isocore->state->nitems, lookaheads, false);
    for (this_isocorep = &t->state_list;
         this_isocorep == &t->state_list || *this_isocorep != t->state_list;
         this_isocorep = &(*this_isocorep)->nextIsocore)
      {
        if (!(*this_isocorep)->recomputedAsSuccessor)
          break;
        if (annotation_lists)
          {
            for (ai = 0, a = annotation_lists[lr0_isocore->state->number];
                 a;
                 ++ai, a = a->next)
              {
                if (work1[ai] != ContributionIndex__none)
                  {
                    /* This isocore compatibility test depends on the fact
                       that, if the dominant contributions are the same for the
                       two isocores, then merging their lookahead sets will not
                       produce a state with a different dominant contribution.
                       */
                    ContributionIndex ci =
                      AnnotationList__computeDominantContribution (
                        a, lr0_isocore->state->nitems,
                        (*this_isocorep)->lookaheads, false);
                    if (ci != ContributionIndex__none && work1[ai] != ci)
                      break;
                  }
              }
            if (!a)
              break;
          }
        else
          {
            size_t i;
            for (i = 0; i < t->nitems; ++i)
              {
                if (!(*this_isocorep)->lookaheads
                    || !(*this_isocorep)->lookaheads[i])
                  {
                    if (!bitset_empty_p (lookaheads[i]))
                      break;
                  }
                // bitset_equal_p uses the size of the first argument, so
                // lookaheads[i] must be the second argument.
                else if (!bitset_equal_p ((*this_isocorep)->lookaheads[i],
                                          lookaheads[i]))
                  break;
              }
            if (i == t->nitems)
              break;
          }
      }
  }

  has_lookaheads = false;
  {
    size_t i;
    for (i = 0; i < lr0_isocore->state->nitems; ++i)
      if (!bitset_empty_p (lookaheads[i]))
        {
          has_lookaheads = true;
          break;
        }
  }

  /* Merge with an existing isocore.  */
  if (this_isocorep == &t->state_list || *this_isocorep != t->state_list)
    {
      bool new_lookaheads = false;
      *tp = (*this_isocorep)->state;

      /* Merge lookaheads into the state and record whether any of them are
         actually new.  */
      if (has_lookaheads)
        {
          size_t i;
          if (!(*this_isocorep)->lookaheads)
            {
              (*this_isocorep)->lookaheads =
                xnmalloc (t->nitems, sizeof (*this_isocorep)->lookaheads);
              for (i = 0; i < t->nitems; ++i)
                (*this_isocorep)->lookaheads[i] = NULL;
            }
          for (i = 0; i < t->nitems; ++i)
            if (!bitset_empty_p (lookaheads[i]))
              {
                if (!(*this_isocorep)->lookaheads[i])
                  (*this_isocorep)->lookaheads[i] =
                    bitset_create (ntokens, BITSET_FIXED);
                bitset_andn (lookaheads[i],
                             lookaheads[i], (*this_isocorep)->lookaheads[i]);
                bitset_or ((*this_isocorep)->lookaheads[i],
                           lookaheads[i], (*this_isocorep)->lookaheads[i]);
                if (!bitset_empty_p (lookaheads[i]))
                  new_lookaheads = true;
              }
        }

      /* If new lookaheads were merged, propagate those lookaheads to the
         successors, possibly splitting them.  If *tp is being recomputed for
         the first time, this isn't necessary because the main
         ielr_split_states loop will handle the successors later.  */
      if (!(*this_isocorep)->recomputedAsSuccessor)
        (*this_isocorep)->recomputedAsSuccessor = true;
      else if (new_lookaheads)
        {
          int i;
          /* When merging demands identical lookahead sets, it is impossible to
             merge new lookaheads.  */
          aver (annotation_lists);
          for (i = 0; i < (*tp)->transitions->num; ++i)
            {
              state *t2 = (*tp)->transitions->states[i];
              /* At any time, there's at most one state for which we have so
                 far initially recomputed only some of its successors in the
                 main ielr_split_states loop.  Because we recompute successors
                 in order, we can just stop at the first such successor.  Of
                 course, *tp might be a state some of whose successors have
                 been recomputed as successors of other states rather than as
                 successors of *tp.  It's fine if we go ahead and propagate to
                 some of those.  We'll propagate to them again (but stop when
                 we see nothing changes) and to the others when we reach *tp in
                 the main ielr_split_states loop later.  */
              if (!t2->state_list->recomputedAsSuccessor)
                break;
              AnnotationList__computeLookaheadFilter (
                annotation_lists[t2->state_list->lr0Isocore->state->number],
                t2->nitems, work2);
              ielr_compute_lookaheads (follow_kernel_items, always_follows,
                                       (*this_isocorep), t2, work2,
                                       lookaheads);
              /* FIXME: If splitting t2 here, it's possible that lookaheads
                 that had already propagated from *tp to t2 will be left in t2
                 after *tp has been removed as t2's predecessor:
                 - We're going to recompute all lookaheads in phase 4, so these
                   extra lookaheads won't appear in the final parser table.
                 - If t2 has just one annotation, then these extra lookaheads
                   cannot alter the dominating contribution to the associated
                   inadequacy and thus cannot needlessly prevent a future merge
                   of some new state with t2.  We can be sure of this because:
                   - The fact that we're splitting t2 now means that some
                     predecessors (at least one) other than *tp must be
                     propagating contributions to t2.
                   - The fact that t2 was merged in the first place means that,
                     if *tp propagated any contributions, the dominating
                     contribution must be the same as that from those other
                     predecessors.
                   - Thus, if some new state to be merged with t2 in the future
                     proves to be compatible with the contributions propagated
                     by the other predecessors, it will also be compatible with
                     the contributions made by the extra lookaheads left behind
                     by *tp.
                 - However, if t2 has more than one annotation and these extra
                   lookaheads contribute to one of their inadequacies, it's
                   possible these extra lookaheads may needlessly prevent a
                   future merge with t2.  For example:
                   - Let's say there's an inadequacy A that makes the split
                     necessary as follows:
                     - There's currently just one other predecessor and it
                       propagates to t2 some contributions to inadequacy A.
                     - The new lookaheads that we were attempting to propagate
                       from *tp to t2 made contributions to inadequacy A with a
                       different dominating contribution than those from that
                       other predecessor.
                     - The extra lookaheads either make no contribution to
                       inadequacy A or have the same dominating contribution as
                       the contributions from the other predecessor.  Either
                       way, as explained above, they can't prevent a future
                       merge.
                   - Let's say there's an inadequacy B that causes the trouble
                     with future merges as follows:
                     - The extra lookaheads make contributions to inadequacy B.
                     - Those extra contributions did not prevent the original
                       merge to create t2 because the other predecessor
                       propagates to t2 no contributions to inadequacy B.
                     - Thus, those extra contributions may prevent a future
                       merge with t2 even though the merge would be fine if *tp
                       had not left them behind.
                 - Is the latter case common enough to worry about?
                 - Perhaps we should track all predecessors and iterate them
                   now to recreate t2 without those extra lookaheads.  */
              ielr_compute_state (follow_kernel_items, always_follows,
                                  annotation_lists, t2, lookaheads,
                                  last_statep, work1, work2,
                                  &(*tp)->transitions->states[i]);
            }
        }
    }

  /* Create a new isocore.  */
  else
    {
      state_list *old_isocore = *this_isocorep;
      (*last_statep)->next = *this_isocorep = xmalloc (sizeof **last_statep);
      *last_statep = *this_isocorep;
      (*last_statep)->state = *tp = state_new_isocore (t);
      (*tp)->state_list = *last_statep;
      (*last_statep)->recomputedAsSuccessor = true;
      (*last_statep)->next = NULL;
      (*last_statep)->lookaheads = NULL;
      if (has_lookaheads)
        {
          size_t i;
          (*last_statep)->lookaheads =
            xnmalloc (t->nitems, sizeof (*last_statep)->lookaheads);
          for (i = 0; i < t->nitems; ++i)
            {
              if (bitset_empty_p (lookaheads[i]))
                (*last_statep)->lookaheads[i] = NULL;
              else
                {
                  (*last_statep)->lookaheads[i] =
                    bitset_create (ntokens, BITSET_FIXED);
                  bitset_copy ((*last_statep)->lookaheads[i], lookaheads[i]);
                }
            }
        }
      (*last_statep)->lr0Isocore = lr0_isocore;
      (*last_statep)->nextIsocore = old_isocore;
    }
}

/**
 * \pre
 *   - \c follow_kernel_items and \c always_follows were computed by
 *     \c ielr_compute_auxiliary_tables.
 *   - Either:
 *     - <tt>annotation_lists = NULL</tt> and <tt>max_annotations=0</tt>.
 *     - \c annotation_lists and \c max_annotations were computed by
 *       \c ielr_compute_annotation_lists.
 * \post
 *   - \c ::states is of size \c ::nstates (which might be greater than
 *     <tt>::nstates \@pre</tt>) and no longer contains any LR(1)-relative
 *     inadequacy.  \c annotation_lists was used to determine state
 *     compatibility or, if <tt>annotation_lists = NULL</tt>, the canonical
 *     LR(1) state compatibility test was used.
 *   - If <tt>annotation_lists = NULL</tt>, reduction lookahead sets were
 *     computed in all states.  TV_IELR_PHASE4 was pushed while they were
 *     computed from item lookahead sets.
 */
static void
ielr_split_states (bitsetv follow_kernel_items, bitsetv always_follows,
                   AnnotationList **annotation_lists,
                   AnnotationIndex max_annotations)
{
  state_list *first_state;
  state_list *last_state;
  bitsetv lookahead_filter = NULL;
  bitsetv lookaheads;

  /* Set up state list and some reusable bitsets.  */
  {
    size_t max_nitems = 0;
    state_number i;
    state_list **nodep = &first_state;
    for (i = 0; i < nstates; ++i)
      {
        *nodep = states[i]->state_list = last_state = xmalloc (sizeof **nodep);
        (*nodep)->state = states[i];
        (*nodep)->recomputedAsSuccessor = false;
        (*nodep)->lookaheads = NULL;
        (*nodep)->lr0Isocore = *nodep;
        (*nodep)->nextIsocore = *nodep;
        nodep = &(*nodep)->next;
        if (states[i]->nitems > max_nitems)
          max_nitems = states[i]->nitems;
      }
    *nodep = NULL;
    lookahead_filter = bitsetv_create (max_nitems, ntokens, BITSET_FIXED);
    if (!annotation_lists)
      bitsetv_ones (lookahead_filter);
    lookaheads = bitsetv_create (max_nitems, ntokens, BITSET_FIXED);
  }

  /* Recompute states.  */
  {
    ContributionIndex *work = xnmalloc (max_annotations, sizeof *work);
    state_list *this_state;
    for (this_state = first_state; this_state; this_state = this_state->next)
      {
        state *s = this_state->state;
        int i;
        for (i = 0; i < s->transitions->num; ++i)
          {
            state *t = s->transitions->states[i];
            if (annotation_lists)
              AnnotationList__computeLookaheadFilter (
                annotation_lists[t->state_list->lr0Isocore->state->number],
                t->nitems, lookahead_filter);
            ielr_compute_lookaheads (follow_kernel_items, always_follows,
                                     this_state, t, lookahead_filter,
                                     lookaheads);
            ielr_compute_state (follow_kernel_items, always_follows,
                                annotation_lists, t, lookaheads, &last_state,
                                work, lookahead_filter,
                                &s->transitions->states[i]);
          }
      }
    free (work);
  }

  bitsetv_free (lookahead_filter);
  bitsetv_free (lookaheads);

  /* Store states back in the states array.  */
  states = xnrealloc (states, nstates, sizeof *states);
  {
    state_list *node;
    for (node = first_state; node; node = node->next)
      states[node->state->number] = node->state;
  }

  /* In the case of canonical LR(1), copy item lookahead sets to reduction
     lookahead sets.  */
  if (!annotation_lists)
    {
      timevar_push (TV_IELR_PHASE4);
      initialize_LA ();
      state_list *node;
      for (node = first_state; node; node = node->next)
        if (!node->state->consistent)
          {
            size_t i = 0;
            item_number *itemset = node->state->items;
            size_t r;
            for (r = 0; r < node->state->reductions->num; ++r)
              {
                rule *this_rule = node->state->reductions->rules[r];
                bitset lookahead_set =
                  node->state->reductions->lookahead_tokens[r];
                if (item_number_is_rule_number (*this_rule->rhs))
                  ielr_compute_goto_follow_set (follow_kernel_items,
                                                always_follows, node,
                                                this_rule->lhs, lookahead_set);
                else if (node->lookaheads)
                  {
                    /* We don't need to start the kernel item search back at
                       i=0 because both items and reductions are sorted on rule
                       number.  */
                    while (!item_number_is_rule_number (ritem[itemset[i]])
                           || item_number_as_rule_number (ritem[itemset[i]])
                              != this_rule->number)
                      {
                        ++i;
                        aver (i < node->state->nitems);
                      }
                    if (node->lookaheads[i])
                      bitset_copy (lookahead_set, node->lookaheads[i]);
                  }
              }
          }
      timevar_pop (TV_IELR_PHASE4);
    }

  /* Free state list.  */
  while (first_state)
    {
      state_list *node = first_state;
      if (node->lookaheads)
        {
          size_t i;
          for (i = 0; i < node->state->nitems; ++i)
            if (node->lookaheads[i])
              bitset_free (node->lookaheads[i]);
          free (node->lookaheads);
        }
      first_state = node->next;
      free (node);
    }
}

void
ielr (void)
{
  LrType lr_type;

  /* Examine user options.  */
  {
    char *type = muscle_percent_define_get ("lr.type");
    if (0 == strcmp (type, "lalr"))
      lr_type = LR_TYPE__LALR;
    else if (0 == strcmp (type, "ielr"))
      lr_type = LR_TYPE__IELR;
    else if (0 == strcmp (type, "canonical-lr"))
      lr_type = LR_TYPE__CANONICAL_LR;
    else
      aver (false);
    free (type);
  }

  /* Phase 0: LALR(1).  */
  timevar_push (TV_LALR);
  if (lr_type == LR_TYPE__CANONICAL_LR)
    set_goto_map ();
  else
    lalr ();
  if (lr_type == LR_TYPE__LALR)
    {
      bitsetv_free (goto_follows);
      timevar_pop (TV_LALR);
      return;
    }
  timevar_pop (TV_LALR);

  {
    bitsetv follow_kernel_items;
    bitsetv always_follows;
    InadequacyList **inadequacy_lists = NULL;
    AnnotationList **annotation_lists = NULL;
    struct obstack annotations_obstack;
    AnnotationIndex max_annotations = 0;

    {
      /* Phase 1: Compute Auxiliary Tables.  */
      state ***predecessors;
      timevar_push (TV_IELR_PHASE1);
      ielr_compute_auxiliary_tables (
        &follow_kernel_items, &always_follows,
        lr_type == LR_TYPE__CANONICAL_LR ? NULL : &predecessors);
      timevar_pop (TV_IELR_PHASE1);

      /* Phase 2: Compute Annotations.  */
      timevar_push (TV_IELR_PHASE2);
      if (lr_type != LR_TYPE__CANONICAL_LR)
        {
          obstack_init (&annotations_obstack);
          ielr_compute_annotation_lists (follow_kernel_items, always_follows,
                                         predecessors, &max_annotations,
                                         &inadequacy_lists, &annotation_lists,
                                         &annotations_obstack);
          {
            state_number i;
            for (i = 0; i < nstates; ++i)
              free (predecessors[i]);
          }
          free (predecessors);
          bitsetv_free (goto_follows);
          lalr_free ();
        }
      timevar_pop (TV_IELR_PHASE2);
    }

    /* Phase 3: Split States.  */
    timevar_push (TV_IELR_PHASE3);
    {
      state_number nstates_lr0 = nstates;
      ielr_split_states (follow_kernel_items, always_follows,
                         annotation_lists, max_annotations);
      if (inadequacy_lists)
        {
          state_number i;
          for (i = 0; i < nstates_lr0; ++i)
            InadequacyList__delete (inadequacy_lists[i]);
        }
    }
    free (inadequacy_lists);
    if (annotation_lists)
      obstack_free (&annotations_obstack, NULL);
    free (annotation_lists);
    bitsetv_free (follow_kernel_items);
    bitsetv_free (always_follows);
    timevar_pop (TV_IELR_PHASE3);
  }

  /* Phase 4: Compute Reduction Lookaheads.  */
  timevar_push (TV_IELR_PHASE4);
  free (goto_map);
  free (from_state);
  free (to_state);
  if (lr_type == LR_TYPE__CANONICAL_LR)
    {
      // Reduction lookaheads are computed in ielr_split_states above but are
      // timed as part of phase 4.
      set_goto_map ();
    }
  else
    {
      lalr ();
      bitsetv_free (goto_follows);
    }
  timevar_pop (TV_IELR_PHASE4);
}