+/* IELR main implementation.
+
+ Copyright (C) 2009 Free Software Foundation, Inc.
+
+ This file is part of Bison, the GNU Compiler Compiler.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include "system.h"
+
+#include "ielr.h"
+
+#include <bitset.h>
+#include <timevar.h>
+
+#include "AnnotationList.h"
+#include "derives.h"
+#include "getargs.h"
+#include "lalr.h"
+#include "muscle-tab.h"
+#include "nullable.h"
+#include "relation.h"
+#include "state.h"
+#include "symtab.h"
+
+/** Records the value of the \%define variable "lr.type". */
+typedef enum { LR_TYPE__LALR, LR_TYPE__IELR, LR_TYPE__CANONICAL_LR } LrType;
+
+/**
+ * \post:
+ * - \c result = a new \c bitset of size \c ::nritems such that any bit \c i
+ * is set iff <tt>ritem[i]</tt> is a nonterminal after which all ritems in
+ * the same RHS are nullable nonterminals. In other words, the follows of
+ * a goto on <tt>ritem[i]</tt> include the lookahead set of the item.
+ */
+static bitset
+ielr_compute_ritem_sees_lookahead_set (void)
+{
+ bitset result = bitset_create (nritems, BITSET_FIXED);
+ unsigned int i = nritems-1;
+ while (i>0)
+ {
+ --i;
+ while (!item_number_is_rule_number (ritem[i])
+ && ISVAR (ritem[i])
+ && nullable [item_number_as_symbol_number (ritem[i]) - ntokens])
+ bitset_set (result, i--);
+ if (!item_number_is_rule_number (ritem[i]) && ISVAR (ritem[i]))
+ bitset_set (result, i--);
+ while (!item_number_is_rule_number (ritem[i]) && i>0)
+ --i;
+ }
+ if (trace_flag & trace_ielr)
+ {
+ fprintf (stderr, "ritem_sees_lookahead_set:\n");
+ debug_bitset (result);
+ fprintf (stderr, "\n");
+ }
+ return result;
+}
+
+/**
+ * \pre:
+ * - \c ritem_sees_lookahead_set was computed by
+ * \c ielr_compute_ritem_sees_lookahead_set.
+ * \post:
+ * - Each of \c *edgesp and \c *edge_countsp is a new array of size
+ * \c ::ngotos.
+ * - <tt>(*edgesp)[i]</tt> points to a \c goto_number array of size
+ * <tt>(*edge_countsp)[i]+1</tt>.
+ * - In such a \c goto_number array, the last element is \c ::END_NODE.
+ * - All remaining elements are the indices of the gotos to which there is an
+ * internal follow edge from goto \c i.
+ * - There is an internal follow edge from goto \c i to goto \c j iff both:
+ * - The from states of gotos \c i and \c j are the same.
+ * - The transition nonterminal for goto \c i appears as the first RHS
+ * symbol of at least one production for which both:
+ * - The LHS is the transition symbol of goto \c j.
+ * - All other RHS symbols are nullable nonterminals.
+ * - In other words, the follows of goto \c i include the follows of
+ * goto \c j and it's an internal edge because the from states are the
+ * same.
+ */
+static void
+ielr_compute_internal_follow_edges (bitset ritem_sees_lookahead_set,
+ goto_number ***edgesp, int **edge_countsp)
+{
+ *edgesp = xnmalloc (ngotos, sizeof **edgesp);
+ *edge_countsp = xnmalloc (ngotos, sizeof **edge_countsp);
+ {
+ bitset sources = bitset_create (ngotos, BITSET_FIXED);
+ goto_number i;
+ for (i = 0; i < ngotos; ++i)
+ (*edge_countsp)[i] = 0;
+ for (i = 0; i < ngotos; ++i)
+ {
+ int nsources = 0;
+ {
+ rule **rulep;
+ for (rulep = derives[states[to_state[i]]->accessing_symbol
+ - ntokens];
+ *rulep;
+ ++rulep)
+ {
+ /* If there is at least one RHS symbol, if the first RHS symbol
+ is a nonterminal, and if all remaining RHS symbols (if any)
+ are nullable nonterminals, create an edge from the LHS
+ symbol's goto to the first RHS symbol's goto such that the RHS
+ symbol's goto will be the source of the edge after the
+ eventual relation_transpose below.
+
+ Unlike in ielr_compute_always_follows, I use a bitset for
+ edges rather than an array because it is possible that
+ multiple RHS's with the same first symbol could fit and thus
+ that we could end up with redundant edges. With the
+ possibility of redundant edges, it's hard to know ahead of
+ time how large to make such an array. Another possible
+ redundancy is that source and destination might be the same
+ goto. Eliminating all these possible redundancies now might
+ possibly help performance a little. I have not proven any of
+ this, but I'm guessing the bitset shouldn't entail much of a
+ performance penalty, if any. */
+ if (bitset_test (ritem_sees_lookahead_set,
+ (*rulep)->rhs - ritem))
+ {
+ goto_number source =
+ map_goto (from_state[i],
+ item_number_as_symbol_number (*(*rulep)->rhs));
+ if (i != source && !bitset_test (sources, source))
+ {
+ bitset_set (sources, source);
+ ++nsources;
+ ++(*edge_countsp)[source];
+ }
+ }
+ }
+ }
+ if (nsources == 0)
+ (*edgesp)[i] = NULL;
+ else
+ {
+ (*edgesp)[i] = xnmalloc (nsources + 1, sizeof *(*edgesp)[i]);
+ {
+ bitset_iterator biter_source;
+ bitset_bindex source;
+ int j = 0;
+ BITSET_FOR_EACH (biter_source, sources, source, 0)
+ (*edgesp)[i][j++] = source;
+ }
+ (*edgesp)[i][nsources] = END_NODE;
+ }
+ bitset_zero (sources);
+ }
+ bitset_free (sources);
+ }
+
+ relation_transpose (edgesp, ngotos);
+
+ if (trace_flag & trace_ielr)
+ {
+ fprintf (stderr, "internal_follow_edges:\n");
+ relation_print (*edgesp, ngotos, stderr);
+ }
+}
+
+/**
+ * \pre:
+ * - \c ritem_sees_lookahead_set was computed by
+ * \c ielr_compute_ritem_sees_lookahead_set.
+ * - \c internal_follow_edges was computed by
+ * \c ielr_compute_internal_follow_edges.
+ * \post:
+ * - \c *follow_kernel_itemsp is a new \c bitsetv in which the number of rows
+ * is \c ngotos and the number of columns is maximum number of kernel items
+ * in any state.
+ * - <tt>(*follow_kernel_itemsp)[i][j]</tt> is set iff the follows of goto
+ * \c i include the lookahead set of item \c j in the from state of goto
+ * \c i.
+ * - Thus, <tt>(*follow_kernel_itemsp)[i][j]</tt> is always unset if there is
+ * no item \c j in the from state of goto \c i.
+ */
+static void
+ielr_compute_follow_kernel_items (bitset ritem_sees_lookahead_set,
+ goto_number **internal_follow_edges,
+ bitsetv *follow_kernel_itemsp)
+{
+ {
+ size_t max_nitems = 0;
+ state_number i;
+ for (i = 0; i < nstates; ++i)
+ if (states[i]->nitems > max_nitems)
+ max_nitems = states[i]->nitems;
+ *follow_kernel_itemsp = bitsetv_create (ngotos, max_nitems, BITSET_FIXED);
+ }
+ {
+ goto_number i;
+ for (i = 0; i < ngotos; ++i)
+ {
+ size_t nitems = states[from_state[i]]->nitems;
+ item_number *items = states[from_state[i]]->items;
+ size_t j;
+ for (j = 0; j < nitems; ++j)
+ /* If this item has this goto and if all subsequent symbols in this
+ RHS (if any) are nullable nonterminals, then record this item as
+ one whose lookahead set is included in this goto's follows. */
+ if (item_number_is_symbol_number (ritem[items[j]])
+ && item_number_as_symbol_number (ritem[items[j]])
+ == states[to_state[i]]->accessing_symbol
+ && bitset_test (ritem_sees_lookahead_set, items[j]))
+ bitset_set ((*follow_kernel_itemsp)[i], j);
+ }
+ }
+ relation_digraph (internal_follow_edges, ngotos, follow_kernel_itemsp);
+
+ if (trace_flag & trace_ielr)
+ {
+ fprintf (stderr, "follow_kernel_items:\n");
+ debug_bitsetv (*follow_kernel_itemsp);
+ }
+}
+
+/**
+ * \pre
+ * - \c *edgesp and \c edge_counts were computed by
+ * \c ielr_compute_internal_follow_edges.
+ * \post
+ * - \c *always_followsp is a new \c bitsetv with \c ngotos rows and
+ * \c ntokens columns.
+ * - <tt>(*always_followsp)[i][j]</tt> is set iff token \c j is an always
+ * follow (that is, it's computed by internal and successor edges) of goto
+ * \c i.
+ * - Rows of \c *edgesp have been realloc'ed and extended to include
+ * successor follow edges. \c edge_counts has not been updated.
+ */
+static void
+ielr_compute_always_follows (goto_number ***edgesp,
+ int const edge_counts[],
+ bitsetv *always_followsp)
+{
+ *always_followsp = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
+ {
+ goto_number *edge_array = xnmalloc (ngotos, sizeof *edge_array);
+ goto_number i;
+ for (i = 0; i < ngotos; ++i)
+ {
+ goto_number nedges = edge_counts[i];
+ {
+ int j;
+ transitions *trans = states[to_state[i]]->transitions;
+ FOR_EACH_SHIFT (trans, j)
+ bitset_set ((*always_followsp)[i], TRANSITION_SYMBOL (trans, j));
+ for (; j < trans->num; ++j)
+ {
+ symbol_number sym = TRANSITION_SYMBOL (trans, j);
+ if (nullable[sym - ntokens])
+ edge_array[nedges++] = map_goto (to_state[i], sym);
+ }
+ }
+ if (nedges - edge_counts[i])
+ {
+ (*edgesp)[i] =
+ xnrealloc ((*edgesp)[i], nedges + 1, sizeof *(*edgesp)[i]);
+ memcpy ((*edgesp)[i] + edge_counts[i], edge_array + edge_counts[i],
+ (nedges - edge_counts[i]) * sizeof *(*edgesp)[i]);
+ (*edgesp)[i][nedges] = END_NODE;
+ }
+ }
+ free (edge_array);
+ }
+ relation_digraph (*edgesp, ngotos, always_followsp);
+
+ if (trace_flag & trace_ielr)
+ {
+ fprintf (stderr, "always follow edges:\n");
+ relation_print (*edgesp, ngotos, stderr);
+ fprintf (stderr, "always_follows:\n");
+ debug_bitsetv (*always_followsp);
+ }
+}
+
+/**
+ * \post
+ * - \c result is a new array of size \c ::nstates.
+ * - <tt>result[i]</tt> is an array of pointers to the predecessor
+ * <tt>state</tt>'s of state \c i.
+ * - The last element of such an array is \c NULL.
+ */
+static state ***
+ielr_compute_predecessors (void)
+{
+ state_number i;
+ int *predecessor_counts = xnmalloc (nstates, sizeof *predecessor_counts);
+ state ***result = xnmalloc (nstates, sizeof *result);
+ for (i = 0; i < nstates; ++i)
+ predecessor_counts[i] = 0;
+ for (i = 0; i < nstates; ++i)
+ {
+ int j;
+ for (j = 0; j < states[i]->transitions->num; ++j)
+ ++predecessor_counts[states[i]->transitions->states[j]->number];
+ }
+ for (i = 0; i < nstates; ++i)
+ {
+ result[i] = xnmalloc (predecessor_counts[i]+1, sizeof *result[i]);
+ result[i][predecessor_counts[i]] = NULL;
+ predecessor_counts[i] = 0;
+ }
+ for (i = 0; i < nstates; ++i)
+ {
+ int j;
+ for (j = 0; j < states[i]->transitions->num; ++j)
+ {
+ state_number k = states[i]->transitions->states[j]->number;
+ result[k][predecessor_counts[k]++] = states[i];
+ }
+ }
+ free (predecessor_counts);
+ return result;
+}
+
+/**
+ * \post
+ * - \c *follow_kernel_itemsp and \c *always_followsp were computed by
+ * \c ielr_compute_follow_kernel_items and
+ * \c ielr_compute_always_follows.
+ * - Iff <tt>predecessorsp != NULL</tt>, then \c *predecessorsp was computed
+ * by \c ielr_compute_predecessors.
+ */
+static void
+ielr_compute_auxiliary_tables (bitsetv *follow_kernel_itemsp,
+ bitsetv *always_followsp,
+ state ****predecessorsp)
+{
+ goto_number **edges;
+ int *edge_counts;
+ {
+ bitset ritem_sees_lookahead_set = ielr_compute_ritem_sees_lookahead_set ();
+ ielr_compute_internal_follow_edges (ritem_sees_lookahead_set,
+ &edges, &edge_counts);
+ ielr_compute_follow_kernel_items (ritem_sees_lookahead_set, edges,
+ follow_kernel_itemsp);
+ bitset_free (ritem_sees_lookahead_set);
+ }
+ ielr_compute_always_follows (&edges, edge_counts, always_followsp);
+ {
+ int i;
+ for (i = 0; i < ngotos; ++i)
+ free (edges[i]);
+ }
+ free (edges);
+ free (edge_counts);
+ if (predecessorsp)
+ *predecessorsp = ielr_compute_predecessors ();
+}
+
+/**
+ * \note
+ * - FIXME: It might be an interesting experiment to compare the space and
+ * time efficiency of computing \c item_lookahead_sets either:
+ * - Fully up front.
+ * - Just-in-time, as implemented below.
+ * - Not at all. That is, just let annotations continue even when
+ * unnecessary.
+ */
+bool
+ielr_item_has_lookahead (state *s, symbol_number lhs, size_t item,
+ symbol_number lookahead, state ***predecessors,
+ bitset **item_lookahead_sets)
+{
+ if (!item_lookahead_sets[s->number])
+ {
+ size_t i;
+ item_lookahead_sets[s->number] =
+ xnmalloc (s->nitems, sizeof item_lookahead_sets[s->number][0]);
+ for (i = 0; i < s->nitems; ++i)
+ item_lookahead_sets[s->number][i] = NULL;
+ }
+ if (!item_lookahead_sets[s->number][item])
+ {
+ item_lookahead_sets[s->number][item] =
+ bitset_create (ntokens, BITSET_FIXED);
+ /* If this kernel item is the beginning of a RHS, it must be the kernel
+ item in the start state, and so its LHS has no follows and no goto to
+ check. If, instead, this kernel item is the successor of the start
+ state's kernel item, there are still no follows and no goto. This
+ situation is fortunate because we want to avoid the - 2 below in both
+ cases.
+
+ Actually, IELR(1) should never invoke this function for either of
+ those cases because (1) follow_kernel_items will never reference a
+ kernel item for this RHS because the end token blocks sight of the
+ lookahead set from the RHS's only nonterminal, and (2) no reduction
+ has a lookback dependency on this lookahead set. Nevertheless, I
+ didn't change this test to an aver just in case the usage of this
+ function evolves to need those two cases. In both cases, the current
+ implementation returns the right result. */
+ if (s->items[item] > 1)
+ {
+ /* If the LHS symbol of this item isn't known (because this is a
+ top-level invocation), go get it. */
+ if (!lhs)
+ {
+ unsigned int i;
+ for (i = s->items[item];
+ !item_number_is_rule_number (ritem[i]);
+ ++i)
+ ;
+ lhs = rules[item_number_as_rule_number (ritem[i])].lhs->number;
+ }
+ /* If this kernel item is next to the beginning of the RHS, then
+ check all predecessors' goto follows for the LHS. */
+ if (item_number_is_rule_number (ritem[s->items[item] - 2]))
+ {
+ state **predecessor;
+ aver (lhs != accept->number);
+ for (predecessor = predecessors[s->number];
+ *predecessor;
+ ++predecessor)
+ bitset_or (item_lookahead_sets[s->number][item],
+ item_lookahead_sets[s->number][item],
+ goto_follows[map_goto ((*predecessor)->number,
+ lhs)]);
+ }
+ /* If this kernel item is later in the RHS, then check all
+ predecessor items' lookahead sets. */
+ else
+ {
+ state **predecessor;
+ for (predecessor = predecessors[s->number];
+ *predecessor;
+ ++predecessor)
+ {
+ size_t predecessor_item;
+ for (predecessor_item = 0;
+ predecessor_item < (*predecessor)->nitems;
+ ++predecessor_item)
+ if ((*predecessor)->items[predecessor_item]
+ == s->items[item] - 1)
+ break;
+ aver (predecessor_item != (*predecessor)->nitems);
+ ielr_item_has_lookahead (*predecessor, lhs,
+ predecessor_item, 0 /*irrelevant*/,
+ predecessors, item_lookahead_sets);
+ bitset_or (item_lookahead_sets[s->number][item],
+ item_lookahead_sets[s->number][item],
+ item_lookahead_sets[(*predecessor)->number]
+ [predecessor_item]);
+ }
+ }
+ }
+ }
+ return bitset_test (item_lookahead_sets[s->number][item], lookahead);
+}
+
+/**
+ * \pre
+ * - \c follow_kernel_items, \c always_follows, and \c predecessors
+ * were computed by \c ielr_compute_auxiliary_tables.
+ * \post
+ * - Each of <tt>*inadequacy_listsp</tt> and <tt>*annotation_listsp</tt>
+ * points to a new array of size \c ::nstates.
+ * - For <tt>0 <= i < ::nstates</tt>:
+ * - <tt>(*inadequacy_listsp)[i]</tt> contains the \c InadequacyList head
+ * node for <tt>states[i]</tt>.
+ * - <tt>(*annotation_listsp)[i]</tt> contains the \c AnnotationList head
+ * node for <tt>states[i]</tt>.
+ * - <tt>*max_annotationsp</tt> is the maximum number of annotations per
+ * state.
+ */
+static void
+ielr_compute_annotation_lists (bitsetv follow_kernel_items,
+ bitsetv always_follows, state ***predecessors,
+ AnnotationIndex *max_annotationsp,
+ InadequacyList ***inadequacy_listsp,
+ AnnotationList ***annotation_listsp,
+ struct obstack *annotations_obstackp)
+{
+ bitset **item_lookahead_sets =
+ xnmalloc (nstates, sizeof *item_lookahead_sets);
+ AnnotationIndex *annotation_counts =
+ xnmalloc (nstates, sizeof *annotation_counts);
+ ContributionIndex max_contributions = 0;
+ unsigned int total_annotations = 0;
+ state_number i;
+
+ *inadequacy_listsp = xnmalloc (nstates, sizeof **inadequacy_listsp);
+ *annotation_listsp = xnmalloc (nstates, sizeof **annotation_listsp);
+ for (i = 0; i < nstates; ++i)
+ {
+ item_lookahead_sets[i] = NULL;
+ (*inadequacy_listsp)[i] = NULL;
+ (*annotation_listsp)[i] = NULL;
+ annotation_counts[i] = 0;
+ }
+ for (i = 0; i < nstates; ++i)
+ AnnotationList__compute_from_inadequacies (states[i], follow_kernel_items,
+ always_follows, predecessors,
+ item_lookahead_sets,
+ *inadequacy_listsp,
+ *annotation_listsp,
+ annotation_counts,
+ &max_contributions,
+ annotations_obstackp);
+ *max_annotationsp = 0;
+ for (i = 0; i < nstates; ++i)
+ {
+ if (annotation_counts[i] > *max_annotationsp)
+ *max_annotationsp = annotation_counts[i];
+ total_annotations += annotation_counts[i];
+ }
+ if (trace_flag & trace_ielr)
+ {
+ for (i = 0; i < nstates; ++i)
+ {
+ fprintf (stderr, "Inadequacy annotations for state %d:\n", i);
+ AnnotationList__debug ((*annotation_listsp)[i],
+ states[i]->nitems, 2);
+ }
+ fprintf (stderr, "Number of LR(0)/LALR(1) states: %d\n", nstates);
+ fprintf (stderr, "Average number of annotations per state: %f\n",
+ (float)total_annotations/nstates);
+ fprintf (stderr, "Max number of annotations per state: %d\n",
+ *max_annotationsp);
+ fprintf (stderr, "Max number of contributions per annotation: %d\n",
+ max_contributions);
+ }
+ for (i = 0; i < nstates; ++i)
+ if (item_lookahead_sets[i])
+ {
+ size_t j;
+ for (j = 0; j < states[i]->nitems; ++j)
+ if (item_lookahead_sets[i][j])
+ bitset_free (item_lookahead_sets[i][j]);
+ free (item_lookahead_sets[i]);
+ }
+ free (item_lookahead_sets);
+ free (annotation_counts);
+}
+
+typedef struct state_list {
+ struct state_list *next;
+ state *state;
+ /** Has this state been recomputed as a successor of another state? */
+ bool recomputedAsSuccessor;
+ /**
+ * \c NULL iff all lookahead sets are empty. <tt>lookaheads[i] = NULL</tt>
+ * iff the lookahead set on item \c i is empty.
+ */
+ bitset *lookaheads;
+ /**
+ * nextIsocore is the next state in a circularly linked-list of all states
+ * with the same core. The one originally computed by generate_states in
+ * LR0.c is lr0Isocore.
+ */
+ struct state_list *lr0Isocore;
+ struct state_list *nextIsocore;
+} state_list;
+
+/**
+ * \pre
+ * - \c follow_kernel_items and \c always_follows were computed by
+ * \c ielr_compute_auxiliary_tables.
+ * - <tt>n->class = nterm_sym</tt>.
+ * \post
+ * - \c follow_set contains the follow set for the goto on nonterminal \c n
+ * in state \c s based on the lookaheads stored in <tt>s->lookaheads</tt>.
+ */
+static void
+ielr_compute_goto_follow_set (bitsetv follow_kernel_items,
+ bitsetv always_follows, state_list *s,
+ symbol *n, bitset follow_set)
+{
+ goto_number n_goto = map_goto (s->lr0Isocore->state->number, n->number);
+ bitset_copy (follow_set, always_follows[n_goto]);
+ if (s->lookaheads)
+ {
+ bitset_iterator biter_item;
+ bitset_bindex item;
+ BITSET_FOR_EACH (biter_item, follow_kernel_items[n_goto], item, 0)
+ if (s->lookaheads[item])
+ bitset_or (follow_set, follow_set, s->lookaheads[item]);
+ }
+}
+
+/**
+ * \pre
+ * - \c follow_kernel_items and \c always_follows were computed by
+ * \c ielr_compute_auxiliary_tables.
+ * - \c lookahead_filter was computed by
+ * \c AnnotationList__computeLookaheadFilter for the original LR(0) isocore
+ * of \c t.
+ * - The number of rows in \c lookaheads is at least the number of items in
+ * \c t, and the number of columns is \c ::ntokens.
+ * \post
+ * - <tt>lookaheads[i][j]</tt> is set iff both:
+ * - <tt>lookahead_filter[i][j]</tt> is set.
+ * - The isocore of \c t that will be the transition successor of \c s will
+ * inherit from \c s token \c j into the lookahead set of item \c i.
+ */
+static void
+ielr_compute_lookaheads (bitsetv follow_kernel_items, bitsetv always_follows,
+ state_list *s, state *t, bitsetv lookahead_filter,
+ bitsetv lookaheads)
+{
+ size_t s_item = 0;
+ size_t t_item;
+ bitsetv_zero (lookaheads);
+ for (t_item = 0; t_item < t->nitems; ++t_item)
+ {
+ /* If this kernel item is the beginning of a RHS, it must be the
+ kernel item in the start state, but t is supposed to be a successor
+ state. If, instead, this kernel item is the successor of the start
+ state's kernel item, the lookahead set is empty. This second case is
+ a special case to avoid the - 2 below, but the next successor can be
+ handled fine without special casing it. */
+ aver (t->items[t_item] != 0);
+ if (t->items[t_item] > 1
+ && !bitset_empty_p (lookahead_filter[t_item]))
+ {
+ if (item_number_is_rule_number (ritem[t->items[t_item] - 2]))
+ {
+ unsigned int rule_item;
+ for (rule_item = t->items[t_item];
+ !item_number_is_rule_number (ritem[rule_item]);
+ ++rule_item)
+ ;
+ ielr_compute_goto_follow_set (
+ follow_kernel_items, always_follows, s,
+ rules[item_number_as_rule_number (ritem[rule_item])].lhs,
+ lookaheads[t_item]);
+ }
+ else if (s->lookaheads)
+ {
+ /* We don't have to start the s item search at the beginning
+ every time because items from both states are sorted by their
+ indices in ritem. */
+ for (; s_item < s->state->nitems; ++s_item)
+ if (s->state->items[s_item] == t->items[t_item] - 1)
+ break;
+ aver (s_item != s->state->nitems);
+ if (s->lookaheads[s_item])
+ bitset_copy (lookaheads[t_item], s->lookaheads[s_item]);
+ }
+ bitset_and (lookaheads[t_item],
+ lookaheads[t_item], lookahead_filter[t_item]);
+ }
+ }
+}
+
+/**
+ * \pre
+ * - \c follow_kernel_items and \c always_follows were computed by
+ * \c ielr_compute_auxiliary_tables.
+ * - Either:
+ * - <tt>annotation_lists = NULL</tt> and all bits in work2 are set.
+ * - \c annotation_lists was computed by \c ielr_compute_annotation_lists.
+ * - The number of rows in each of \c lookaheads and \c work2 is the maximum
+ * number of items in any state. The number of columns in each is
+ * \c ::ntokens.
+ * - \c lookaheads was computed by \c ielr_compute_lookaheads for \c t.
+ * - \c ::nstates is the total number of states, some not yet fully computed,
+ * in the list ending at \c *last_statep. It is at least the number of
+ * original LR(0) states.
+ * - The size of \c work1 is at least the number of annotations for the LR(0)
+ * isocore of \c t.
+ * \post
+ * - Either:
+ * - In the case that <tt>annotation_lists != NULL</tt>,
+ * <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if
+ * permitted by the annotations for the original LR(0) isocore of \c t.
+ * If this changed the lookaheads in that isocore, those changes were
+ * propagated to all already computed transition successors recursively
+ * possibly resulting in the splitting of some of those successors.
+ * - In the case that <tt>annotation_lists = NULL</tt>,
+ * <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if the
+ * isocore's lookahead sets were identical to those specified by
+ * <tt>lookaheads \@pre</tt>.
+ * - If no such merge was permitted, a new isocore of \c t containing
+ * <tt>lookaheads \@pre</tt> was appended to the state list whose
+ * previous tail was <tt>*last_statep \@pre</tt> and \c ::nstates was
+ * incremented. It was also appended to \c t's isocore list.
+ * - <tt>*tp</tt> = the isocore of \c t into which
+ * <tt>lookaheads \@pre</tt> was placed/merged.
+ * - \c lookaheads, \c work1, and \c work2 may have been altered.
+ */
+static void
+ielr_compute_state (bitsetv follow_kernel_items, bitsetv always_follows,
+ AnnotationList **annotation_lists, state *t,
+ bitsetv lookaheads, state_list **last_statep,
+ ContributionIndex work1[], bitsetv work2, state **tp)
+{
+ state_list *lr0_isocore = t->state_list->lr0Isocore;
+ state_list **this_isocorep;
+ bool has_lookaheads;
+
+ /* Determine whether there's an isocore of t with which these lookaheads can
+ be merged. */
+ {
+ AnnotationIndex ai;
+ AnnotationList *a;
+ if (annotation_lists)
+ for (ai = 0, a = annotation_lists[lr0_isocore->state->number];
+ a;
+ ++ai, a = a->next)
+ work1[ai] =
+ AnnotationList__computeDominantContribution (
+ a, lr0_isocore->state->nitems, lookaheads, false);
+ for (this_isocorep = &t->state_list;
+ this_isocorep == &t->state_list || *this_isocorep != t->state_list;
+ this_isocorep = &(*this_isocorep)->nextIsocore)
+ {
+ if (!(*this_isocorep)->recomputedAsSuccessor)
+ break;
+ if (annotation_lists)
+ {
+ for (ai = 0, a = annotation_lists[lr0_isocore->state->number];
+ a;
+ ++ai, a = a->next)
+ {
+ if (work1[ai] != ContributionIndex__none)
+ {
+ /* This isocore compatibility test depends on the fact
+ that, if the dominant contributions are the same for the
+ two isocores, then merging their lookahead sets will not
+ produce a state with a different dominant contribution.
+ */
+ ContributionIndex ci =
+ AnnotationList__computeDominantContribution (
+ a, lr0_isocore->state->nitems,
+ (*this_isocorep)->lookaheads, false);
+ if (ci != ContributionIndex__none && work1[ai] != ci)
+ break;
+ }
+ }
+ if (!a)
+ break;
+ }
+ else
+ {
+ size_t i;
+ for (i = 0; i < t->nitems; ++i)
+ {
+ if (!(*this_isocorep)->lookaheads
+ || !(*this_isocorep)->lookaheads[i])
+ {
+ if (!bitset_empty_p (lookaheads[i]))
+ break;
+ }
+ // bitset_equal_p uses the size of the first argument, so
+ // lookaheads[i] must be the second argument.
+ else if (!bitset_equal_p ((*this_isocorep)->lookaheads[i],
+ lookaheads[i]))
+ break;
+ }
+ if (i == t->nitems)
+ break;
+ }
+ }
+ }
+
+ has_lookaheads = false;
+ {
+ size_t i;
+ for (i = 0; i < lr0_isocore->state->nitems; ++i)
+ if (!bitset_empty_p (lookaheads[i]))
+ {
+ has_lookaheads = true;
+ break;
+ }
+ }
+
+ /* Merge with an existing isocore. */
+ if (this_isocorep == &t->state_list || *this_isocorep != t->state_list)
+ {
+ bool new_lookaheads = false;
+ *tp = (*this_isocorep)->state;
+
+ /* Merge lookaheads into the state and record whether any of them are
+ actually new. */
+ if (has_lookaheads)
+ {
+ size_t i;
+ if (!(*this_isocorep)->lookaheads)
+ {
+ (*this_isocorep)->lookaheads =
+ xnmalloc (t->nitems, sizeof (*this_isocorep)->lookaheads);
+ for (i = 0; i < t->nitems; ++i)
+ (*this_isocorep)->lookaheads[i] = NULL;
+ }
+ for (i = 0; i < t->nitems; ++i)
+ if (!bitset_empty_p (lookaheads[i]))
+ {
+ if (!(*this_isocorep)->lookaheads[i])
+ (*this_isocorep)->lookaheads[i] =
+ bitset_create (ntokens, BITSET_FIXED);
+ bitset_andn (lookaheads[i],
+ lookaheads[i], (*this_isocorep)->lookaheads[i]);
+ bitset_or ((*this_isocorep)->lookaheads[i],
+ lookaheads[i], (*this_isocorep)->lookaheads[i]);
+ if (!bitset_empty_p (lookaheads[i]))
+ new_lookaheads = true;
+ }
+ }
+
+ /* If new lookaheads were merged, propagate those lookaheads to the
+ successors, possibly splitting them. If *tp is being recomputed for
+ the first time, this isn't necessary because the main
+ ielr_split_states loop will handle the successors later. */
+ if (!(*this_isocorep)->recomputedAsSuccessor)
+ (*this_isocorep)->recomputedAsSuccessor = true;
+ else if (new_lookaheads)
+ {
+ int i;
+ /* When merging demands identical lookahead sets, it is impossible to
+ merge new lookaheads. */
+ aver (annotation_lists);
+ for (i = 0; i < (*tp)->transitions->num; ++i)
+ {
+ state *t2 = (*tp)->transitions->states[i];
+ /* At any time, there's at most one state for which we have so
+ far initially recomputed only some of its successors in the
+ main ielr_split_states loop. Because we recompute successors
+ in order, we can just stop at the first such successor. Of
+ course, *tp might be a state some of whose successors have
+ been recomputed as successors of other states rather than as
+ successors of *tp. It's fine if we go ahead and propagate to
+ some of those. We'll propagate to them again (but stop when
+ we see nothing changes) and to the others when we reach *tp in
+ the main ielr_split_states loop later. */
+ if (!t2->state_list->recomputedAsSuccessor)
+ break;
+ AnnotationList__computeLookaheadFilter (
+ annotation_lists[t2->state_list->lr0Isocore->state->number],
+ t2->nitems, work2);
+ ielr_compute_lookaheads (follow_kernel_items, always_follows,
+ (*this_isocorep), t2, work2,
+ lookaheads);
+ /* FIXME: If splitting t2 here, it's possible that lookaheads
+ that had already propagated from *tp to t2 will be left in t2
+ after *tp has been removed as t2's predecessor:
+ - We're going to recompute all lookaheads in phase 4, so these
+ extra lookaheads won't appear in the final parser table.
+ - If t2 has just one annotation, then these extra lookaheads
+ cannot alter the dominating contribution to the associated
+ inadequacy and thus cannot needlessly prevent a future merge
+ of some new state with t2. We can be sure of this because:
+ - The fact that we're splitting t2 now means that some
+ predecessors (at least one) other than *tp must be
+ propagating contributions to t2.
+ - The fact that t2 was merged in the first place means that,
+ if *tp propagated any contributions, the dominating
+ contribution must be the same as that from those other
+ predecessors.
+ - Thus, if some new state to be merged with t2 in the future
+ proves to be compatible with the contributions propagated
+ by the other predecessors, it will also be compatible with
+ the contributions made by the extra lookaheads left behind
+ by *tp.
+ - However, if t2 has more than one annotation and these extra
+ lookaheads contribute to one of their inadequacies, it's
+ possible these extra lookaheads may needlessly prevent a
+ future merge with t2. For example:
+ - Let's say there's an inadequacy A that makes the split
+ necessary as follows:
+ - There's currently just one other predecessor and it
+ propagates to t2 some contributions to inadequacy A.
+ - The new lookaheads that we were attempting to propagate
+ from *tp to t2 made contributions to inadequacy A with a
+ different dominating contribution than those from that
+ other predecessor.
+ - The extra lookaheads either make no contribution to
+ inadequacy A or have the same dominating contribution as
+ the contributions from the other predecessor. Either
+ way, as explained above, they can't prevent a future
+ merge.
+ - Let's say there's an inadequacy B that causes the trouble
+ with future merges as follows:
+ - The extra lookaheads make contributions to inadequacy B.
+ - Those extra contributions did not prevent the original
+ merge to create t2 because the other predecessor
+ propagates to t2 no contributions to inadequacy B.
+ - Thus, those extra contributions may prevent a future
+ merge with t2 even though the merge would be fine if *tp
+ had not left them behind.
+ - Is the latter case common enough to worry about?
+ - Perhaps we should track all predecessors and iterate them
+ now to recreate t2 without those extra lookaheads. */
+ ielr_compute_state (follow_kernel_items, always_follows,
+ annotation_lists, t2, lookaheads,
+ last_statep, work1, work2,
+ &(*tp)->transitions->states[i]);
+ }
+ }
+ }
+
+ /* Create a new isocore. */
+ else
+ {
+ state_list *old_isocore = *this_isocorep;
+ (*last_statep)->next = *this_isocorep = xmalloc (sizeof **last_statep);
+ *last_statep = *this_isocorep;
+ (*last_statep)->state = *tp = state_new_isocore (t);
+ (*tp)->state_list = *last_statep;
+ (*last_statep)->recomputedAsSuccessor = true;
+ (*last_statep)->next = NULL;
+ (*last_statep)->lookaheads = NULL;
+ if (has_lookaheads)
+ {
+ size_t i;
+ (*last_statep)->lookaheads =
+ xnmalloc (t->nitems, sizeof (*last_statep)->lookaheads);
+ for (i = 0; i < t->nitems; ++i)
+ {
+ if (bitset_empty_p (lookaheads[i]))
+ (*last_statep)->lookaheads[i] = NULL;
+ else
+ {
+ (*last_statep)->lookaheads[i] =
+ bitset_create (ntokens, BITSET_FIXED);
+ bitset_copy ((*last_statep)->lookaheads[i], lookaheads[i]);
+ }
+ }
+ }
+ (*last_statep)->lr0Isocore = lr0_isocore;
+ (*last_statep)->nextIsocore = old_isocore;
+ }
+}
+
+/**
+ * \pre
+ * - \c follow_kernel_items and \c always_follows were computed by
+ * \c ielr_compute_auxiliary_tables.
+ * - Either:
+ * - <tt>annotation_lists = NULL</tt> and <tt>max_annotations=0</tt>.
+ * - \c annotation_lists and \c max_annotations were computed by
+ * \c ielr_compute_annotation_lists.
+ * \post
+ * - \c ::states is of size \c ::nstates (which might be greater than
+ * <tt>::nstates \@pre</tt>) and no longer contains any LR(1)-relative
+ * inadequacy. \c annotation_lists was used to determine state
+ * compatibility or, if <tt>annotation_lists = NULL</tt>, the canonical
+ * LR(1) state compatibility test was used.
+ * - If <tt>annotation_lists = NULL</tt>, reduction lookahead sets were
+ * computed in all states. TV_IELR_PHASE4 was pushed while they were
+ * computed from item lookahead sets.
+ */
+static void
+ielr_split_states (bitsetv follow_kernel_items, bitsetv always_follows,
+ AnnotationList **annotation_lists,
+ AnnotationIndex max_annotations)
+{
+ state_list *first_state;
+ state_list *last_state;
+ bitsetv lookahead_filter = NULL;
+ bitsetv lookaheads;
+
+ /* Set up state list and some reusable bitsets. */
+ {
+ size_t max_nitems = 0;
+ state_number i;
+ state_list **nodep = &first_state;
+ for (i = 0; i < nstates; ++i)
+ {
+ *nodep = states[i]->state_list = last_state = xmalloc (sizeof **nodep);
+ (*nodep)->state = states[i];
+ (*nodep)->recomputedAsSuccessor = false;
+ (*nodep)->lookaheads = NULL;
+ (*nodep)->lr0Isocore = *nodep;
+ (*nodep)->nextIsocore = *nodep;
+ nodep = &(*nodep)->next;
+ if (states[i]->nitems > max_nitems)
+ max_nitems = states[i]->nitems;
+ }
+ *nodep = NULL;
+ lookahead_filter = bitsetv_create (max_nitems, ntokens, BITSET_FIXED);
+ if (!annotation_lists)
+ bitsetv_ones (lookahead_filter);
+ lookaheads = bitsetv_create (max_nitems, ntokens, BITSET_FIXED);
+ }
+
+ /* Recompute states. */
+ {
+ ContributionIndex *work = xnmalloc (max_annotations, sizeof *work);
+ state_list *this_state;
+ for (this_state = first_state; this_state; this_state = this_state->next)
+ {
+ state *s = this_state->state;
+ int i;
+ for (i = 0; i < s->transitions->num; ++i)
+ {
+ state *t = s->transitions->states[i];
+ if (annotation_lists)
+ AnnotationList__computeLookaheadFilter (
+ annotation_lists[t->state_list->lr0Isocore->state->number],
+ t->nitems, lookahead_filter);
+ ielr_compute_lookaheads (follow_kernel_items, always_follows,
+ this_state, t, lookahead_filter,
+ lookaheads);
+ ielr_compute_state (follow_kernel_items, always_follows,
+ annotation_lists, t, lookaheads, &last_state,
+ work, lookahead_filter,
+ &s->transitions->states[i]);
+ }
+ }
+ free (work);
+ }
+
+ bitsetv_free (lookahead_filter);
+ bitsetv_free (lookaheads);
+
+ /* Store states back in the states array. */
+ states = xnrealloc (states, nstates, sizeof *states);
+ {
+ state_list *node;
+ for (node = first_state; node; node = node->next)
+ states[node->state->number] = node->state;
+ }
+
+ /* In the case of canonical LR(1), copy item lookahead sets to reduction
+ lookahead sets. */
+ if (!annotation_lists)
+ {
+ timevar_push (TV_IELR_PHASE4);
+ initialize_LA ();
+ state_list *node;
+ for (node = first_state; node; node = node->next)
+ if (!node->state->consistent)
+ {
+ size_t i = 0;
+ item_number *itemset = node->state->items;
+ size_t r;
+ for (r = 0; r < node->state->reductions->num; ++r)
+ {
+ rule *this_rule = node->state->reductions->rules[r];
+ bitset lookahead_set =
+ node->state->reductions->lookahead_tokens[r];
+ if (item_number_is_rule_number (*this_rule->rhs))
+ ielr_compute_goto_follow_set (follow_kernel_items,
+ always_follows, node,
+ this_rule->lhs, lookahead_set);
+ else if (node->lookaheads)
+ {
+ /* We don't need to start the kernel item search back at
+ i=0 because both items and reductions are sorted on rule
+ number. */
+ while (!item_number_is_rule_number (ritem[itemset[i]])
+ || item_number_as_rule_number (ritem[itemset[i]])
+ != this_rule->number)
+ {
+ ++i;
+ aver (i < node->state->nitems);
+ }
+ if (node->lookaheads[i])
+ bitset_copy (lookahead_set, node->lookaheads[i]);
+ }
+ }
+ }
+ timevar_pop (TV_IELR_PHASE4);
+ }
+
+ /* Free state list. */
+ while (first_state)
+ {
+ state_list *node = first_state;
+ if (node->lookaheads)
+ {
+ size_t i;
+ for (i = 0; i < node->state->nitems; ++i)
+ if (node->lookaheads[i])
+ bitset_free (node->lookaheads[i]);
+ free (node->lookaheads);
+ }
+ first_state = node->next;
+ free (node);
+ }
+}
+
+void
+ielr (void)
+{
+ LrType lr_type;
+
+ /* Examine user options. */
+ {
+ char *type = muscle_percent_define_get ("lr.type");
+ if (0 == strcmp (type, "LALR"))
+ lr_type = LR_TYPE__LALR;
+ else if (0 == strcmp (type, "IELR"))
+ lr_type = LR_TYPE__IELR;
+ else if (0 == strcmp (type, "canonical LR"))
+ lr_type = LR_TYPE__CANONICAL_LR;
+ else
+ aver (false);
+ free (type);
+ }
+
+ /* Phase 0: LALR(1). */
+ timevar_push (TV_LALR);
+ if (lr_type == LR_TYPE__CANONICAL_LR)
+ set_goto_map ();
+ else
+ lalr ();
+ if (lr_type == LR_TYPE__LALR)
+ {
+ bitsetv_free (goto_follows);
+ timevar_pop (TV_LALR);
+ return;
+ }
+ timevar_pop (TV_LALR);
+
+ {
+ bitsetv follow_kernel_items;
+ bitsetv always_follows;
+ InadequacyList **inadequacy_lists = NULL;
+ AnnotationList **annotation_lists = NULL;
+ struct obstack annotations_obstack;
+ AnnotationIndex max_annotations = 0;
+
+ {
+ /* Phase 1: Compute Auxiliary Tables. */
+ state ***predecessors;
+ timevar_push (TV_IELR_PHASE1);
+ ielr_compute_auxiliary_tables (
+ &follow_kernel_items, &always_follows,
+ lr_type == LR_TYPE__CANONICAL_LR ? NULL : &predecessors);
+ timevar_pop (TV_IELR_PHASE1);
+
+ /* Phase 2: Compute Annotations. */
+ timevar_push (TV_IELR_PHASE2);
+ if (lr_type != LR_TYPE__CANONICAL_LR)
+ {
+ obstack_init (&annotations_obstack);
+ ielr_compute_annotation_lists (follow_kernel_items, always_follows,
+ predecessors, &max_annotations,
+ &inadequacy_lists, &annotation_lists,
+ &annotations_obstack);
+ {
+ state_number i;
+ for (i = 0; i < nstates; ++i)
+ free (predecessors[i]);
+ }
+ free (predecessors);
+ bitsetv_free (goto_follows);
+ lalr_free ();
+ }
+ timevar_pop (TV_IELR_PHASE2);
+ }
+
+ /* Phase 3: Split States. */
+ timevar_push (TV_IELR_PHASE3);
+ {
+ state_number nstates_lr0 = nstates;
+ ielr_split_states (follow_kernel_items, always_follows,
+ annotation_lists, max_annotations);
+ if (inadequacy_lists)
+ {
+ state_number i;
+ for (i = 0; i < nstates_lr0; ++i)
+ InadequacyList__delete (inadequacy_lists[i]);
+ }
+ }
+ free (inadequacy_lists);
+ if (annotation_lists)
+ obstack_free (&annotations_obstack, NULL);
+ free (annotation_lists);
+ bitsetv_free (follow_kernel_items);
+ bitsetv_free (always_follows);
+ timevar_pop (TV_IELR_PHASE3);
+ }
+
+ /* Phase 4: Compute Reduction Lookaheads. */
+ timevar_push (TV_IELR_PHASE4);
+ free (goto_map);
+ free (from_state);
+ free (to_state);
+ if (lr_type == LR_TYPE__CANONICAL_LR)
+ {
+ // Reduction lookaheads are computed in ielr_split_states above but are
+ // timed as part of phase 4.
+ set_goto_map ();
+ }
+ else
+ {
+ lalr ();
+ bitsetv_free (goto_follows);
+ }
+ timevar_pop (TV_IELR_PHASE4);
+}