From: Joel E. Denny Date: Tue, 21 Apr 2009 06:10:57 +0000 (-0400) Subject: Add new files for IELR and canonical LR implementation. X-Git-Tag: v2.5_rc1~258 X-Git-Url: https://git.saurik.com/bison.git/commitdiff_plain/166366b28f579dfb991f8f6fca323847bcc1eb65 Add new files for IELR and canonical LR implementation. * src/AnnotationList.c: New. * src/AnnotationList.h: New. * src/InadequacyList.c: New. * src/InadequacyList.h: New. * src/Sbitset.c: New. * src/Sbitset.h: New. * src/ielr.c: New. * src/ielr.h: New. --- diff --git a/ChangeLog b/ChangeLog index fdbbba9c..962f5342 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2009-04-21 Joel E. Denny + + Add new files for IELR and canonical LR implementation. + * src/AnnotationList.c: New. + * src/AnnotationList.h: New. + * src/InadequacyList.c: New. + * src/InadequacyList.h: New. + * src/Sbitset.c: New. + * src/Sbitset.h: New. + * src/ielr.c: New. + * src/ielr.h: New. + 2009-04-20 Joel E. Denny Implement %define lr.default_rules. diff --git a/src/AnnotationList.c b/src/AnnotationList.c new file mode 100644 index 00000000..a603102a --- /dev/null +++ b/src/AnnotationList.c @@ -0,0 +1,811 @@ +/* IELR's inadequacy annotation list. + + Copyright (C) 2009 Free Software Foundation, Inc. + + This file is part of Bison, the GNU Compiler Compiler. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include "system.h" + +#include "AnnotationList.h" +#include "lalr.h" +#include "ielr.h" + +/** + * \pre + * - annotations_obstackp != NULL. + * \post + * - \c result is a new \c AnnotationList with one node whose: + * - \c inadequacyNode member is \c NULL. + * - \c contributions member is allocated with \c contribution_count + * uninitialized elements. + * - All memory was allocated on \c annotations_obstackp. + */ +static AnnotationList* +AnnotationList__alloc_on_obstack (ContributionIndex contribution_count, + struct obstack *annotations_obstackp) +{ + AnnotationList *result; + size_t contributions_size = + contribution_count * sizeof result->contributions[0]; + result = obstack_alloc (annotations_obstackp, + offsetof (AnnotationList, contributions) + + contributions_size); + result->next = NULL; + result->inadequacyNode = NULL; + return result; +} + +/** + * \pre + * - self != NULL. + * - 0 <= ci < self->inadequacyNode->contributionCount. + * \post + * - \c result = true iff contribution \c ci in \c self represents an + * "always" contribution. + */ +static bool +AnnotationList__isContributionAlways (AnnotationList const *self, + ContributionIndex ci) +{ + aver (0 <= ci && ci < self->inadequacyNode->contributionCount); + return self->contributions[ci] == NULL; +} + +/** + * \pre + * - \c self is a single node. + * - \c self annotates the same state as every other node in \c list, and + * that state has \c nitems kernel items. + * \post + * - If the list \c list already contains an identical annotation to \c self, + * \c self was discarded, \c result is false, and the caller is responsible + * for the memory of \c self. + * - Otherwise, \c list now contains the node \c self, \c result is true, and + * \c list assumes responsibility for the memory of \c self. + * - The sort in \c list is: + * - Sort in reverse order on memory address of the associated inadequacy + * node. Because memory is usually allocated in ascending order (FIXME: + * Is this true enough? Should we keep some sort of global index to + * guarantee it?), this should mean that the insertion position within an + * annotation list is usually near the beginning with other annotations + * associated with the same inadequacy. + * - Next, sort on the first contribution that is different as follows: + * - Sort an always-contribution before a never-contribution before a + * potential-contribution. + * - Two always-contributions are identical. + * - Two never-contributions are identical. + * - For two potential-contributions, sort on the contributions' kernel + * item bitsets interpreted as binary numbers. + * - The sorting has a few effects: + * - It accelerates elimination of identical annotations during insertion. + * - It determines how the output of \c AnnotationList__debug is sorted. + * - Other than that, it's probably not important. + */ +static bool +AnnotationList__insertInto (AnnotationList *self, AnnotationList **list, + size_t nitems) +{ + AnnotationList **node; + for (node = list; *node; node = &(*node)->next) + { + int cmp = 0; + ContributionIndex ci; + if (self->inadequacyNode < (*node)->inadequacyNode) + cmp = 1; + else if ((*node)->inadequacyNode < self->inadequacyNode) + cmp = -1; + else + for (ci = 0; + cmp == 0 && ci < self->inadequacyNode->contributionCount; + ++ci) + { + if (AnnotationList__isContributionAlways (self, ci)) + { + if (!AnnotationList__isContributionAlways (*node, ci)) + cmp = -1; + } + else if (AnnotationList__isContributionAlways (*node, ci)) + cmp = 1; + else + { + size_t item; + for (item = 0; cmp == 0 && item < nitems; ++item) + { + if (!Sbitset__test (self->contributions[ci], item)) + { + if (Sbitset__test ((*node)->contributions[ci], item)) + cmp = -1; + } + else if (!Sbitset__test ((*node)->contributions[ci], item)) + cmp = 1; + } + } + } + if (cmp < 0) + { + self->next = *node; + *node = self; + break; + } + else if (cmp == 0) + { + self = NULL; + break; + } + } + if (!*node) + *node = self; + return self != NULL; +} + +static bitset +AnnotationList__compute_shift_tokens (transitions *trans) +{ + bitset shift_tokens = bitset_create (ntokens, BITSET_FIXED); + int i; + FOR_EACH_SHIFT (trans, i) + bitset_set (shift_tokens, TRANSITION_SYMBOL (trans, i)); + return shift_tokens; +} + +static bitset +AnnotationList__compute_conflicted_tokens (bitset shift_tokens, + reductions *reds) +{ + bitset conflicted_tokens = bitset_create (ntokens, BITSET_FIXED); + bitset conflicted_tokens_rule = bitset_create (ntokens, BITSET_FIXED); + bitset tokens = bitset_create (ntokens, BITSET_FIXED); + int i; + + bitset_copy (tokens, shift_tokens); + for (i = 0; i < reds->num; ++i) + { + bitset_and (conflicted_tokens_rule, tokens, reds->lookahead_tokens[i]); + bitset_or (conflicted_tokens, + conflicted_tokens, conflicted_tokens_rule); + bitset_or (tokens, tokens, reds->lookahead_tokens[i]); + /* Check that rules are sorted on rule number or the next step in + AnnotationList__compute_from_inadequacies will misbehave. */ + aver (i == 0 || reds->rules[i-1] < reds->rules[i]); + } + + bitset_free (tokens); + bitset_free (conflicted_tokens_rule); + + return conflicted_tokens; +} + +static bool +AnnotationList__compute_lhs_contributions (state *s, rule *the_rule, + symbol_number conflicted_token, + bitsetv follow_kernel_items, + bitsetv always_follows, + state ***predecessors, + bitset **item_lookahead_sets, + Sbitset *items, + struct obstack + *annotations_obstackp) +{ + goto_number lhs_goto = map_goto (s->number, the_rule->lhs->number); + if (bitset_test (always_follows[lhs_goto], conflicted_token)) + return true; + *items = Sbitset__new_on_obstack (s->nitems, annotations_obstackp); + { + bitset_iterator biter_item; + bitset_bindex item; + BITSET_FOR_EACH (biter_item, follow_kernel_items[lhs_goto], item, 0) + if (ielr_item_has_lookahead (s, 0, item, conflicted_token, + predecessors, item_lookahead_sets)) + Sbitset__set (*items, item); + } + return false; +} + +static void +AnnotationList__computePredecessorAnnotations (AnnotationList *self, state *s, + bitsetv follow_kernel_items, + bitsetv always_follows, + state ***predecessors, + bitset **item_lookahead_sets, + AnnotationList + **annotation_lists, + AnnotationIndex + *annotation_counts, + struct obstack + *annotations_obstackp) +{ + state **predecessor; + for (predecessor = predecessors[s->number]; *predecessor; ++predecessor) + { + AnnotationList *annotation_node = + AnnotationList__alloc_on_obstack ( + self->inadequacyNode->contributionCount, annotations_obstackp); + annotation_node->inadequacyNode = self->inadequacyNode; + bool potential_contribution = false; + bitset *lookaheads = NULL; + { + ContributionIndex ci; + for (ci = 0; ci < self->inadequacyNode->contributionCount; ++ci) + { + symbol_number contribution_token = + InadequacyList__getContributionToken (self->inadequacyNode, ci) + ->number; + if (AnnotationList__isContributionAlways (self, ci)) + { + annotation_node->contributions[ci] = NULL; + continue; + } + annotation_node->contributions[ci] = + Sbitset__new_on_obstack ((*predecessor)->nitems, + annotations_obstackp); + { + size_t predecessor_item = 0; + Sbitset sbiter_item; + Sbitset__Index self_item; + SBITSET__FOR_EACH (self->contributions[ci], s->nitems, + sbiter_item, self_item) + { + /* If this kernel item is the beginning of a RHS, it must be + the kernel item in the start state, and so it has an empty + lookahead set. Thus, it can't contribute to inadequacies, + and so it should never have been identified as a + contribution. If, instead, this kernel item is the + successor of the start state's kernel item, the lookahead + set is still empty, and so it also should never have been + identified as a contribution. This situation is fortunate + because we want to avoid the - 2 below in both cases. */ + aver (s->items[self_item] > 1); + /* If this kernel item is next to the beginning of the RHS, + then check all of the predecessor's goto follows for the + LHS. */ + if (item_number_is_rule_number (ritem[s->items[self_item] + - 2])) + { + Sbitset items; + unsigned int rulei; + for (rulei = s->items[self_item]; + !item_number_is_rule_number (ritem[rulei]); + ++rulei) + ; + if (AnnotationList__compute_lhs_contributions ( + *predecessor, + &rules[item_number_as_rule_number (ritem[rulei])], + contribution_token, + follow_kernel_items, always_follows, predecessors, + item_lookahead_sets, &items, annotations_obstackp)) + { + obstack_free (annotations_obstackp, + annotation_node->contributions[ci]); + annotation_node->contributions[ci] = NULL; + break; + } + else + { + Sbitset__or (annotation_node->contributions[ci], + annotation_node->contributions[ci], + items, (*predecessor)->nitems); + obstack_free (annotations_obstackp, items); + } + } + /* If this kernel item is later in the RHS, then check the + predecessor item's lookahead set. */ + else + { + /* We don't have to start the predecessor item search at + the beginning every time because items from both + states are sorted by their indices in ritem. */ + for (; + predecessor_item < (*predecessor)->nitems; + ++predecessor_item) + if ((*predecessor)->items[predecessor_item] + == s->items[self_item] - 1) + break; + aver (predecessor_item != (*predecessor)->nitems); + if (ielr_item_has_lookahead (*predecessor, 0, + predecessor_item, + contribution_token, + predecessors, + item_lookahead_sets)) + Sbitset__set (annotation_node->contributions[ci], + predecessor_item); + } + } + } + if (annotation_node->contributions[ci]) + { + Sbitset biter; + Sbitset__Index i; + SBITSET__FOR_EACH (annotation_node->contributions[ci], + (*predecessor)->nitems, biter, i) + { + potential_contribution = true; + if (!lookaheads) + { + size_t j; + lookaheads = xnmalloc ((*predecessor)->nitems, + sizeof *lookaheads); + for (j = 0; j < (*predecessor)->nitems; ++j) + lookaheads[j] = NULL; + } + if (!lookaheads[i]) + lookaheads[i] = bitset_create (ntokens, BITSET_FIXED); + bitset_set (lookaheads[i], contribution_token); + } + } + } + } + + /* If the predecessor has any contributions besides just "always" and + "never" contributions: + - If the dominant contribution is split-stable, the annotation could + not affect merging on this predecessor state or its eventual + predecessor states. Moreover, all contributions that affect + whether the dominant contribution remains dominant must be "always" + or "never" contributions in order for the dominant contribution to + be split-stable. Thus, the dominant contribution computation result + in eventual successor states will not be affected by lookaheads + tracked for this predecessor state. (Also, as in the isocore + compatibility test, we depend on the fact that isocores with equal + dominant contributions will have the same dominant contribution when + merged. Otherwise, we might have to worry that the presence of a + potential contribution might somehow be the culprit of that behavior + and thus need to be tracked regardless of the split stability of the + dominant contribution.) Thus, go ahead and discard the annotation + to save space now plus time during state splitting. + - Otherwise, record the annotation, and compute any resulting + annotations needed on predecessor states. */ + if (potential_contribution) + { + if (ContributionIndex__none + != AnnotationList__computeDominantContribution ( + annotation_node, (*predecessor)->nitems, lookaheads, true)) + { + obstack_free (annotations_obstackp, annotation_node); + annotation_node = NULL; + } + { + size_t i; + for (i = 0; i < (*predecessor)->nitems; ++i) + if (lookaheads[i]) + bitset_free (lookaheads[i]); + free (lookaheads); + } + if (annotation_node) + { + if (AnnotationList__insertInto (annotation_node, + &annotation_lists[(*predecessor) + ->number], + (*predecessor)->nitems)) + { + ++annotation_counts[(*predecessor)->number]; + AnnotationList__computePredecessorAnnotations ( + annotation_node, *predecessor, + follow_kernel_items, always_follows, predecessors, + item_lookahead_sets, annotation_lists, annotation_counts, + annotations_obstackp); + } + else + obstack_free (annotations_obstackp, annotation_node); + } + } + else + obstack_free (annotations_obstackp, annotation_node); + } +} + +void +AnnotationList__compute_from_inadequacies (state *s, + bitsetv follow_kernel_items, + bitsetv always_follows, + state ***predecessors, + bitset **item_lookahead_sets, + InadequacyList **inadequacy_lists, + AnnotationList **annotation_lists, + AnnotationIndex *annotation_counts, + ContributionIndex + *max_contributionsp, + struct obstack + *annotations_obstackp) +{ + bitsetv all_lookaheads; + bitset shift_tokens; + bitset conflicted_tokens; + bitset_iterator biter_conflict; + bitset_bindex conflicted_token; + + /* Return an empty list if s->lookahead_tokens = NULL. */ + if (s->consistent) + return; + + all_lookaheads = bitsetv_create (s->nitems, ntokens, BITSET_FIXED); + bitsetv_ones (all_lookaheads); + shift_tokens = AnnotationList__compute_shift_tokens (s->transitions); + conflicted_tokens = + AnnotationList__compute_conflicted_tokens (shift_tokens, s->reductions); + + /* Add an inadequacy annotation for each conflicted_token. */ + BITSET_FOR_EACH (biter_conflict, conflicted_tokens, conflicted_token, 0) + { + AnnotationList *annotation_node; + /* FIXME: Would a BITSET_FRUGAL or BITEST_SPARSE be more efficient? Now + or convert it inside InadequacyList__new_conflict? */ + bitset actions = bitset_create (s->reductions->num + 1, BITSET_FIXED); + ContributionIndex contribution_count = 0; + bool potential_contribution = false; + + /* Allocate the annotation node. */ + { + int rule_i; + for (rule_i = 0; rule_i < s->reductions->num; ++rule_i) + if (bitset_test (s->reductions->lookahead_tokens[rule_i], + conflicted_token)) + ++contribution_count; + if (bitset_test (shift_tokens, conflicted_token)) + ++contribution_count; + annotation_node = + AnnotationList__alloc_on_obstack (contribution_count, + annotations_obstackp); + } + + /* Add a contribution for each reduction that has conflicted_token as a + lookahead. */ + { + ContributionIndex ci = 0; + int item_i = 0; + int rule_i; + for (rule_i = 0; rule_i < s->reductions->num; ++rule_i) + { + rule *the_rule = s->reductions->rules[rule_i]; + if (bitset_test (s->reductions->lookahead_tokens[rule_i], + conflicted_token)) + { + bitset_set (actions, rule_i); + /* If this reduction is on a kernel item, just add it. */ + if (!item_number_is_rule_number (the_rule->rhs[0])) + { + annotation_node->contributions[ci] = + Sbitset__new_on_obstack (s->nitems, + annotations_obstackp); + /* Catch item_i up to rule_i. This works because both are + sorted on rule number. */ + while (!item_number_is_rule_number ( + ritem[s->items[item_i]]) + || item_number_as_rule_number ( + ritem[s->items[item_i]]) + != the_rule->number) + { + ++item_i; + aver (item_i < s->nitems); + } + Sbitset__set (annotation_node->contributions[ci], item_i); + } + /* Otherwise, add the kernel items whose lookahead sets + contribute the conflicted token to this reduction's + lookahead set. */ + else if (AnnotationList__compute_lhs_contributions ( + s, the_rule, conflicted_token, follow_kernel_items, + always_follows, predecessors, item_lookahead_sets, + &annotation_node->contributions[ci], + annotations_obstackp)) + { + annotation_node->contributions[ci++] = NULL; + continue; + } + /* The lookahead token has to come from somewhere. */ + aver (!Sbitset__isEmpty (annotation_node->contributions[ci], + s->nitems)); + ++ci; + potential_contribution = true; + } + } + } + + /* If there are any contributions besides just "always" contributions: + - If there's also a shift contribution, record it. + - If the dominant contribution is split-stable, then the annotation + could not affect merging, so go ahead and discard the annotation and + the inadequacy to save space now plus time during state splitting. + - Otherwise, record the annotation and the inadequacy, and compute any + resulting annotations needed on predecessor states. */ + if (potential_contribution) + { + if (bitset_test (shift_tokens, conflicted_token)) + { + bitset_set (actions, s->reductions->num); + annotation_node->contributions[contribution_count - 1] = NULL; + } + { + InadequacyList *conflict_node = + InadequacyList__new_conflict (s, symbols[conflicted_token], + actions); + actions = NULL; + annotation_node->inadequacyNode = conflict_node; + if (ContributionIndex__none + != AnnotationList__computeDominantContribution ( + annotation_node, s->nitems, all_lookaheads, true)) + { + obstack_free (annotations_obstackp, annotation_node); + InadequacyList__delete (conflict_node); + } + else + { + InadequacyList__prependTo (conflict_node, + &inadequacy_lists[s->number]); + aver (AnnotationList__insertInto ( + annotation_node, &annotation_lists[s->number], + s->nitems)); + /* This aver makes sure the + AnnotationList__computeDominantContribution check above + does discard annotations in the simplest case of a S/R + conflict with no token precedence. */ + aver (!bitset_test (shift_tokens, conflicted_token) + || symbols[conflicted_token]->prec); + ++annotation_counts[s->number]; + if (contribution_count > *max_contributionsp) + *max_contributionsp = contribution_count; + AnnotationList__computePredecessorAnnotations ( + annotation_node, s, + follow_kernel_items, always_follows, predecessors, + item_lookahead_sets, annotation_lists, annotation_counts, + annotations_obstackp); + } + } + } + else + { + bitset_free (actions); + obstack_free (annotations_obstackp, annotation_node); + } + } + + bitsetv_free (all_lookaheads); + bitset_free (shift_tokens); + bitset_free (conflicted_tokens); +} + +void +AnnotationList__debug (AnnotationList const *self, size_t nitems, int spaces) +{ + AnnotationList const *a; + AnnotationIndex ai; + for (a = self, ai = 0; a; a = a->next, ++ai) + { + { + int j; + for (j = 0; j < spaces; ++j) + putc (' ', stderr); + } + fprintf (stderr, "Annotation %d (manifesting state %d):\n", + ai, a->inadequacyNode->manifestingState->number); + { + ContributionIndex ci; + bitset_bindex rulei = 0; /* init suppresses compiler warning */ + rulei = bitset_first (a->inadequacyNode->inadequacy.conflict.actions); + for (ci = 0; ci < a->inadequacyNode->contributionCount; ++ci) + { + symbol_number token = + InadequacyList__getContributionToken (a->inadequacyNode, ci) + ->number; + { + int j; + for (j = 0; j < spaces+2; ++j) + putc (' ', stderr); + } + if (ci == InadequacyList__getShiftContributionIndex ( + a->inadequacyNode)) + fprintf (stderr, "Contributes shift of token %d.\n", token); + else + { + fprintf (stderr, "Contributes token %d", token); + aver (rulei != BITSET_BINDEX_MAX); + fprintf (stderr, " as lookahead, rule number %d", + a->inadequacyNode->manifestingState + ->reductions->rules[rulei]->number); + rulei = + bitset_next (a->inadequacyNode->inadequacy.conflict.actions, + rulei+1); + if (AnnotationList__isContributionAlways (a, ci)) + fprintf (stderr, " always."); + else + { + fprintf (stderr, ", items: "); + Sbitset__fprint (a->contributions[ci], nitems, stderr); + } + fprintf (stderr, "\n"); + } + } + } + } +} + +void +AnnotationList__computeLookaheadFilter (AnnotationList const *self, + size_t nitems, + bitsetv lookahead_filter) +{ + bitsetv_zero (lookahead_filter); + for (; self; self = self->next) + { + ContributionIndex ci; + for (ci = 0; ci < self->inadequacyNode->contributionCount; ++ci) + if (!AnnotationList__isContributionAlways (self, ci)) + { + Sbitset__Index item; + Sbitset biter; + symbol_number token = + InadequacyList__getContributionToken (self->inadequacyNode, ci) + ->number; + SBITSET__FOR_EACH (self->contributions[ci], nitems, biter, item) + bitset_set (lookahead_filter[item], token); + } + } +} + +/** + * \pre + * - self != NULL. + * - \c nitems is the number of kernel items in the LR(0) state that \c self + * annotates. + * - \c lookaheads describes the lookahead sets on the kernel items of some + * isocore of the LR(0) state that \c self annotates. Either: + * - lookaheads = NULL only if the lookahead set on every kernel + * item is empty. + * - For any 0 <= i < nitems, lookaheads[i] is either: + * - \c NULL only if the lookahead set on kernel item \c i is empty. + * - The (possibly empty) lookahead set on kernel item \c i. + * - 0 <= ci < self->inadequacyNode->contributionCount. + * \post + * - \c result = true iff contribution \c ci in \c self is made by the state + * described by \c lookaheads. + */ +static bool +AnnotationList__stateMakesContribution (AnnotationList const *self, + size_t nitems, ContributionIndex ci, + bitset *lookaheads) +{ + if (AnnotationList__isContributionAlways (self, ci)) + return true; + if (!lookaheads) + return false; + { + symbol_number token = + InadequacyList__getContributionToken (self->inadequacyNode, ci)->number; + Sbitset__Index item; + Sbitset biter; + SBITSET__FOR_EACH (self->contributions[ci], nitems, biter, item) + if (lookaheads[item] && bitset_test (lookaheads[item], token)) + return true; + } + return false; +} + +ContributionIndex +AnnotationList__computeDominantContribution (AnnotationList const *self, + size_t nitems, bitset *lookaheads, + bool require_split_stable) +{ + symbol *token; + ContributionIndex const ci_shift = + InadequacyList__getShiftContributionIndex (self->inadequacyNode); + + token = self->inadequacyNode->inadequacy.conflict.token; + + /* S/R conflict. */ + if (ci_shift != ContributionIndex__none) + { + bool find_stable_domination_over_shift = false; + bool find_stable_error_action_domination = false; + { + ContributionIndex ci; + int actioni; + ContributionIndex ci_rr_dominator = ContributionIndex__none; + int shift_precedence = token->prec; + + /* If the token has no precedence set, shift is always chosen. */ + if (!shift_precedence) + return ci_shift; + + /* Figure out which reductions contribute, which of those would + dominate in a R/R comparison, and whether any reduction dominates + the shift so that the R/R comparison is actually needed. */ + for (ci = 0, actioni = bitset_first (self->inadequacyNode->inadequacy + .conflict.actions); + ci < self->inadequacyNode->contributionCount; + ++ci, actioni = bitset_next (self->inadequacyNode->inadequacy + .conflict.actions, actioni+1)) + { + int reduce_precedence = 0; + if (ci == ci_shift) + continue; + { + rule *r = self->inadequacyNode->manifestingState + ->reductions->rules[actioni]; + if (r->prec) + reduce_precedence = r->prec->prec; + } + /* If there's no need to check whether this reduction actually + contributes because the shift eliminates it from the R/R + comparison anyway, continue to the next reduction. */ + if (reduce_precedence + && (reduce_precedence < shift_precedence + || (reduce_precedence == shift_precedence + && token->assoc == right_assoc))) + continue; + if (!AnnotationList__stateMakesContribution (self, nitems, ci, + lookaheads)) + continue; + /* This uneliminated reduction contributes, so see if it can cause + an error action. */ + if (reduce_precedence == shift_precedence + && token->assoc == non_assoc) + { + /* It's not possible to find split-stable domination over + shift after a potential %nonassoc. */ + if (find_stable_domination_over_shift) + return ContributionIndex__none; + if (!require_split_stable + || AnnotationList__isContributionAlways (self, ci)) + return ContributionIndex__error_action; + find_stable_error_action_domination = true; + } + /* Consider this uneliminated contributing reduction in the R/R + comparison. */ + if (ci_rr_dominator == ContributionIndex__none) + ci_rr_dominator = ci; + /* If precedence is set for this uneliminated contributing + reduction, it dominates the shift, so try to figure out which + reduction dominates the R/R comparison. */ + if (reduce_precedence) + { + /* It's not possible to find split-stable error action + domination after a potential reduction. */ + if (find_stable_error_action_domination) + return ContributionIndex__none; + if (!require_split_stable) + return ci_rr_dominator; + if (!AnnotationList__isContributionAlways (self, + ci_rr_dominator)) + return ContributionIndex__none; + if (AnnotationList__isContributionAlways (self, ci)) + return ci_rr_dominator; + find_stable_domination_over_shift = true; + } + } + } + if (find_stable_domination_over_shift + || find_stable_error_action_domination) + return ContributionIndex__none; + /* No reduce or error action domination found, so shift dominates. */ + return ci_shift; + } + + /* R/R conflict, so the reduction with the lowest rule number dominates. + Fortunately, contributions are sorted by rule number. */ + { + ContributionIndex ci; + for (ci = 0; ci < self->inadequacyNode->contributionCount; ++ci) + if (AnnotationList__stateMakesContribution (self, nitems, ci, + lookaheads)) + { + if (require_split_stable + && !AnnotationList__isContributionAlways (self, ci)) + return ContributionIndex__none; + return ci; + } + } + return ContributionIndex__none; +} diff --git a/src/AnnotationList.h b/src/AnnotationList.h new file mode 100644 index 00000000..02d4a2be --- /dev/null +++ b/src/AnnotationList.h @@ -0,0 +1,177 @@ +/* IELR's inadequacy annotation list. + + Copyright (C) 2009 Free Software Foundation, Inc. + + This file is part of Bison, the GNU Compiler Compiler. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef ANNOTATION_LIST_H_ +# define ANNOTATION_LIST_H_ + +#include +#include "Sbitset.h" +#include "InadequacyList.h" +#include "state.h" + +typedef unsigned int AnnotationIndex; + +/** + * A node in a list of annotations on a particular LR(0) state. Each + * annotation records how isocores of that LR(0) state might contribute to an + * individual inadequacy, which might manifest in a different state. Don't + * break encapsulation by modifying the fields directly. Use the provided + * interface functions. + */ +typedef struct AnnotationList +{ + /** The next node in the list or \c NULL if none. */ + struct AnnotationList *next; + /** The \c InadequacyList node describing how this inadequacy manifests. */ + InadequacyList *inadequacyNode; + /** + * List of how the "always", "never", and potential contributions of the + * inadequacy might be made by isocores of the annotated LR(0) state: + * - The number of rows is the number of contributions. That is, + * AnnotationList::inadequacyNode->contributionCount. + * - The token associated with contribution \c i is + * InadequacyList__getContributionToken (AnnotationList::inadequacyNode, i). + * - Iff AnnotationList::contributions[i] = NULL, contribution + * \c i is an "always" contribution. That is, for every isocore of the + * annotated LR(0) state, its core or the core of one its eventual + * successors will definitely make this contribution to the inadequacy. + * It may contribute by either: + * - Creating a shift of contribution i's token in the state + * that can manifest the inadequacy. + * - Propagating that token to the lookahead set of contribution + * i's reduction in the state that can manifest the + * inadequacy. + * - Otherwise: + * - The number of columns in AnnotationList::contributions[i] + * is the number of kernel items in any isocore of the annotated LR(0) + * state. + * - Iff AnnotationList::contributions[i] is empty, contribution + * \c i is a "never" contribution. That is, no isocore of the + * annotated LR(0) state can make this contribution to the inadequacy. + * - Otherwise, for each bit \c j that is set in + * AnnotationList::contributions[i], if the token associated + * with contribution \c i is present in the lookahead set of kernel + * item \c j of an isocore of the annotated LR(0) state, that isocore + * will make contribution \c i to the inadequacy by propagating the + * contribution's token to the lookahead set of the contribution's + * reduction in the state that can manifest the inadequacy. + */ + Sbitset contributions[1]; +} AnnotationList; + +/** + * \pre + * - s != NULL. + * - \c follow_kernel_items, \c always_follows, and \c predecessors were + * computed by \c ielr_compute_auxiliary_tables. + * - The size of each of \c annotation_lists and \c annotation_counts is + * \c ::nstates. + * \post + * - inadequacy_lists[s->number] now describes all inadequacies that + * manifest in \c s. + * - For every state states[i], annotation_lists[i] now + * contains all annotations associated with all inadequacies that manifest + * in \c s. + * - annotation_counts[i] was incremented by the number of new + * annotations added to states[i]. + * - *max_contributionsp is the higher of: + * - The maximum number of contributions computed per annotation. + * - *max_contributionsp \@pre. + * - All memory for all new annotations was allocated on + * \c annotations_obstackp. + */ +void +AnnotationList__compute_from_inadequacies (state *s, + bitsetv follow_kernel_items, + bitsetv always_follows, + state ***predecessors, + bitset **item_lookahead_sets, + InadequacyList **inadequacy_lists, + AnnotationList **annotation_lists, + AnnotationIndex *annotation_counts, + ContributionIndex + *max_contributionsp, + struct obstack + *annotations_obstackp); + +/** + * \pre + * - self != NULL. + * - \c nitems is the number of kernel items in the LR(0) state that every + * node in the list \c self annotates. + * \post + * - A textual representation of all nodes in the list \c self was printed to + * stderr. \c spaces spaces were printed before each line of the text. + */ +void AnnotationList__debug (AnnotationList const *self, size_t nitems, + int spaces); + +/** + * \pre + * - self != NULL. + * - \c nitems is the number of kernel items in the LR(0) state that \c self + * annotates. + * - The number of rows in \c lookahead_filter is at least \c nitems, and the + * number of columns is \c ::ntokens. + * \post + * - lookahead_filter[i][j] is set iff some annotation in the list + * \c self lists token \c j in kernel item \c i as a contributor. + */ +void AnnotationList__computeLookaheadFilter (AnnotationList const *self, + size_t nitems, + bitsetv lookahead_filter); + +/** + * \pre + * - self != NULL. + * - \c nitems is the number of kernel items in the LR(0) state that \c self + * annotates. + * - \c lookaheads describes the lookahead sets on the kernel items of some + * isocore of the LR(0) state that \c self annotates. Either: + * - lookaheads = NULL only if the lookahead set on every kernel + * item is empty. + * - For any 0 <= i < nitems, lookaheads[i] is either: + * - \c NULL only if the lookahead set on kernel item \c i is empty. + * - The (possibly empty) lookahead set on kernel item \c i. + * \post + * - If require_split_stable = false, \c result = either: + * - \c ContributionIndex__none iff the state described by \c lookaheads + * makes none of the contributions in \c self. + * - The index of the dominating contribution in \c self that is made by + * that state. + * - \c ContributionIndex__error_action to indicate that the inadequacy + * manifests as a conflict and that a syntax error action (because of a + * %nonassoc) dominates instead. + * - Otherwise, \c result is the same as if require_split_stable = + * false except that it is also \c ContributionIndex__none if there + * are contributions made by the state but the dominating contribution is + * not split-stable. By split-stable, we mean that the dominating + * contribution cannot change due to loss of one or more potential + * contributions due to loss of lookaheads due to splitting of the state. + * - After determining which contributions are actually made by the state, + * the algorithm for determining which contribution dominates in the + * conflict is intended to choose exactly the same action as conflicts.c + * would choose... no matter how crazy conflicts.c's choice is. + */ +ContributionIndex +AnnotationList__computeDominantContribution (AnnotationList const *self, + size_t nitems, bitset *lookaheads, + bool require_split_stable); + +#endif /* !ANNOTATION_LIST_H_ */ diff --git a/src/InadequacyList.c b/src/InadequacyList.c new file mode 100644 index 00000000..edf3a0f5 --- /dev/null +++ b/src/InadequacyList.c @@ -0,0 +1,76 @@ +/* IELR's inadequacy list. + + Copyright (C) 2009 Free Software Foundation, Inc. + + This file is part of Bison, the GNU Compiler Compiler. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include "system.h" + +#include "InadequacyList.h" + +ContributionIndex const ContributionIndex__none = -1; +ContributionIndex const ContributionIndex__error_action = -2; + +InadequacyList * +InadequacyList__new_conflict (state *manifesting_state, symbol *token, + bitset actions) +{ + InadequacyList *result = xmalloc (sizeof *result); + result->next = NULL; + result->manifestingState = manifesting_state; + result->contributionCount = bitset_count (actions); + result->inadequacy.conflict.token = token; + result->inadequacy.conflict.actions = actions; + return result; +} + +void +InadequacyList__delete (InadequacyList *self) +{ + while (self) + { + InadequacyList *node = self; + self = self->next; + bitset_free (node->inadequacy.conflict.actions); + free (node); + } +} + +ContributionIndex +InadequacyList__getShiftContributionIndex (InadequacyList const *self) +{ + if (!bitset_test (self->inadequacy.conflict.actions, + self->manifestingState->reductions->num)) + return ContributionIndex__none; + return self->contributionCount - 1; +} + +symbol * +InadequacyList__getContributionToken (InadequacyList const *self, + ContributionIndex i) +{ + aver (0 <= i && i < self->contributionCount); + return self->inadequacy.conflict.token; +} + +void +InadequacyList__prependTo (InadequacyList *self, InadequacyList **list) +{ + InadequacyList *head_old = *list; + *list = self; + self->next = head_old; +} diff --git a/src/InadequacyList.h b/src/InadequacyList.h new file mode 100644 index 00000000..5770f994 --- /dev/null +++ b/src/InadequacyList.h @@ -0,0 +1,135 @@ +/* IELR's inadequacy list. + + Copyright (C) 2009 Free Software Foundation, Inc. + + This file is part of Bison, the GNU Compiler Compiler. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef INADEQUACY_LIST_H_ +# define INADEQUACY_LIST_H_ + +#include +#include "gram.h" +#include "state.h" +#include "symtab.h" + +/** + * For a conflict, each rule in the grammar can have at most one contributing + * reduction except that rule 0 cannot have any because the reduction on rule 0 + * cannot have lookaheads. For a conflict, exactly one shift can contribute. + * Thus the number of rules in the grammar is an upper bound on the number of + * possible contributions to any conflict. The maximum number of possible + * items in a state is also an upper bound, but the \c nitems member of \c + * state is currently a \c size_t and thus, if changed, risks becoming out of + * sync with this type. Whatever the type, it must support negatives for sake + * of the special values below. + */ +typedef rule_number ContributionIndex; + +/* Special \c ContributionIndex used to indicate null result when looking for a + contribution. */ +extern ContributionIndex const ContributionIndex__none; + +/* Special \c ContributionIndex used by + \c AnnotationList__computeDominantContribution to signal when the action + chosen in a conflict is a syntax error because of a %nonassoc. */ +extern ContributionIndex const ContributionIndex__error_action; + +/** + * The description of a conflict. Don't break encapsulation by modifying the + * fields directly. Use the provided interface functions for + * \c InadequacyList. + */ +typedef struct { + /** The \c token passed to \c InadequacyList__new_conflict. */ + symbol *token; + /** The \c actions passed to \c InadequacyList__new_conflict. */ + bitset actions; +} Conflict; + +/** + * A node in a list that describes all the inadequacies that manifest in a + * particular state. Don't break encapsulation by modifying the fields + * directly. Use the provided interface functions. + */ +typedef struct InadequacyList { + struct InadequacyList *next; + state *manifestingState; + ContributionIndex contributionCount; + union { + Conflict conflict; + } inadequacy; +} InadequacyList; + +/** + * \pre + * - manifesting_state != NULL. + * - \c token is a token. + * - The size of \c actions is + * manifesting_state->reductions->num + 1. + * \post + * - \c result is a new \c InadequacyList with one node indicating that, in + * \c manifesting_state, the following actions are in conflict on \c token: + * - Shift iff + * bitset_test (actions, manifesting_state->reductions->num). + * - For any \c i such that + * 0 <= i < manifesting_state->reductions->num, the reduction + * for the rule manifesting_state->reductions->rules[i] iff + * actions[i] is set. + * - \c result assumes responsibility for the memory of \c actions. + */ +InadequacyList *InadequacyList__new_conflict (state *manifesting_state, + symbol *token, bitset actions); + +/** + * \post + * - All memory associated with all nodes in the list \c self was freed. + */ +void InadequacyList__delete (InadequacyList *self); + +/** + * \pre + * - self != NULL. + * \post + * - \c result = either: + * - \c ContributionIndex__none iff there is no shift contribution in + * \c self (perhaps because \c self isn't a conflict). + * - The index of the shift contribution, otherwise. + */ +ContributionIndex +InadequacyList__getShiftContributionIndex (InadequacyList const *self); + +/** + * \pre + * - self != NULL. + * - 0 <= i < self->contributionCount. + * \post + * - \c result = the token associated with contribution \c i in the + * inadequacy described by the node \c self. + */ +symbol *InadequacyList__getContributionToken (InadequacyList const *self, + ContributionIndex i); + +/** + * \pre + * - \c self is a single node. + * - list != NULL. + * \post + * - \c list now contains \c self as its first node. + * - \c list assumes responsibility for the memory of \c self. + */ +void InadequacyList__prependTo (InadequacyList *self, InadequacyList **list); + +#endif /* !INADEQUACY_LIST_H_ */ diff --git a/src/Sbitset.c b/src/Sbitset.c new file mode 100644 index 00000000..0c1fedfc --- /dev/null +++ b/src/Sbitset.c @@ -0,0 +1,78 @@ +/* A simple, memory-efficient bitset implementation. + + Copyright (C) 2009 Free Software Foundation, Inc. + + This file is part of Bison, the GNU Compiler Compiler. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include "system.h" + +#include "Sbitset.h" + +Sbitset +Sbitset__new (Sbitset__Index nbits) +{ + /* Some functions, like Sbitset__last_byte_mask, will fail if nbits = 0. */ + aver (nbits); + return xcalloc (1, Sbitset__nbytes (nbits)); +} + +Sbitset +Sbitset__new_on_obstack (Sbitset__Index nbits, struct obstack *obstackp) +{ + char *result; + char *ptr; + char *end; + aver (nbits); + result = obstack_alloc (obstackp, Sbitset__nbytes (nbits)); + for (ptr = result, end = result + Sbitset__nbytes (nbits); ptr < end; ++ptr) + *ptr = 0; + return result; +} + +void +Sbitset__delete (Sbitset self) +{ + free (self); +} + +bool +Sbitset__isEmpty (Sbitset self, Sbitset__Index nbits) +{ + char *last = self + Sbitset__nbytes (nbits) - 1; + for (; self < last; ++self) + if (*self != 0) + return false; + return ((*last) & Sbitset__last_byte_mask (nbits)) == 0; +} + +void +Sbitset__fprint(Sbitset self, Sbitset__Index nbits, FILE *file) +{ + Sbitset__Index i; + Sbitset itr; + bool first = true; + fprintf (file, "nbits = %d, set = {", nbits); + SBITSET__FOR_EACH (self, nbits, itr, i) + { + if (first) + first = false; + else + fprintf (file, ","); + fprintf (file, " %d", i); + } + fprintf (file, " }"); +} diff --git a/src/Sbitset.h b/src/Sbitset.h new file mode 100644 index 00000000..4b4b7508 --- /dev/null +++ b/src/Sbitset.h @@ -0,0 +1,87 @@ +/* A simple, memory-efficient bitset implementation. + + Copyright (C) 2009 Free Software Foundation, Inc. + + This file is part of Bison, the GNU Compiler Compiler. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef SBITSET_H_ +# define SBITSET_H_ + +typedef char *Sbitset; +typedef size_t Sbitset__Index; + +#define Sbitset__nbytes(NBITS) (((NBITS)+7)/8) +#define Sbitset__byteAddress(SELF, INDEX) (((SELF) + (INDEX)/8)) +#define Sbitset__bit_mask(INDEX) (0x1 << (7 - (INDEX)%8)) +#define Sbitset__last_byte_mask(NBITS) (0xff << (7 - ((NBITS)-1)%8)) + +/* nbits must not be 0. */ +Sbitset Sbitset__new (Sbitset__Index nbits); +Sbitset Sbitset__new_on_obstack (Sbitset__Index nbits, + struct obstack *obstackp); +void Sbitset__delete (Sbitset self); + +#define Sbitset__test(SELF, INDEX) \ + ((*Sbitset__byteAddress ((SELF), (INDEX)) & Sbitset__bit_mask (INDEX)) != 0) + +bool Sbitset__isEmpty (Sbitset self, Sbitset__Index nbits); + +void Sbitset__fprint(Sbitset self, Sbitset__Index nbits, FILE *file); + +#define Sbitset__set(SELF, INDEX) \ +do { \ + *Sbitset__byteAddress ((SELF), (INDEX)) = \ + *Sbitset__byteAddress ((SELF), (INDEX)) | Sbitset__bit_mask (INDEX); \ +} while(0) + +#define Sbitset__reset(SELF, INDEX) \ +do { \ + *Sbitset__byteAddress ((SELF), (INDEX)) = \ + *Sbitset__byteAddress ((SELF), (INDEX)) & ~Sbitset__bit_mask (INDEX); \ +} while(0) + +/* NBITS is the size of the bitset. More than NBITS bits might be reset. */ +#define Sbitset__zero(SELF, NBITS) \ +do { \ + memset (SELF, 0, Sbitset__nbytes (NBITS)); \ +} while(0) + +/* NBITS is the size of the bitset. More than NBITS bits might be set. */ +#define Sbitset__ones(SELF, NBITS) \ +do { \ + memset (SELF, 0xff, Sbitset__nbytes (NBITS)); \ +} while(0) + +/* NBITS is the size of every bitset. More than NBITS bits might be set. */ +#define Sbitset__or(SELF, OTHER1, OTHER2, NBITS) \ +do { \ + char *ptr_self = (SELF); \ + char *ptr_other1 = (OTHER1); \ + char *ptr_other2 = (OTHER2); \ + char *end_self = ptr_self + Sbitset__nbytes (NBITS); \ + for (; ptr_self < end_self; ++ptr_self, ++ptr_other1, ++ptr_other2) \ + *ptr_self = *ptr_other1 | *ptr_other2; \ +} while(0) + +#define SBITSET__FOR_EACH(SELF, NBITS, ITER, INDEX) \ + for ((ITER) = (SELF); (ITER) < (SELF) + Sbitset__nbytes (NBITS); ++(ITER)) \ + if (*(ITER) != 0) \ + for ((INDEX) = ((ITER)-(SELF))*8; \ + (INDEX) < (NBITS) && (SELF)+(INDEX)/8 < (ITER)+1; \ + ++(INDEX)) \ + if (((*ITER) & Sbitset__bit_mask (INDEX)) != 0) + +#endif /* !SBITSET_H_ */ diff --git a/src/ielr.c b/src/ielr.c new file mode 100644 index 00000000..2c89a5b1 --- /dev/null +++ b/src/ielr.c @@ -0,0 +1,1200 @@ +/* IELR main implementation. + + Copyright (C) 2009 Free Software Foundation, Inc. + + This file is part of Bison, the GNU Compiler Compiler. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include "system.h" + +#include "ielr.h" + +#include +#include + +#include "AnnotationList.h" +#include "derives.h" +#include "getargs.h" +#include "lalr.h" +#include "muscle_tab.h" +#include "nullable.h" +#include "relation.h" +#include "state.h" +#include "symtab.h" + +/** Records the value of the \%define variable "lr.type". */ +typedef enum { LR_TYPE__LALR, LR_TYPE__IELR, LR_TYPE__CANONICAL_LR } LrType; + +/** + * \post: + * - \c result = a new \c bitset of size \c ::nritems such that any bit \c i + * is set iff ritem[i] is a nonterminal after which all ritems in + * the same RHS are nullable nonterminals. In other words, the follows of + * a goto on ritem[i] include the lookahead set of the item. + */ +static bitset +ielr_compute_ritem_sees_lookahead_set (void) +{ + bitset result = bitset_create (nritems, BITSET_FIXED); + unsigned int i = nritems-1; + while (i>0) + { + --i; + while (!item_number_is_rule_number (ritem[i]) + && ISVAR (ritem[i]) + && nullable [item_number_as_symbol_number (ritem[i]) - ntokens]) + bitset_set (result, i--); + if (!item_number_is_rule_number (ritem[i]) && ISVAR (ritem[i])) + bitset_set (result, i--); + while (!item_number_is_rule_number (ritem[i]) && i>0) + --i; + } + if (trace_flag & trace_ielr) + { + fprintf (stderr, "ritem_sees_lookahead_set:\n"); + debug_bitset (result); + fprintf (stderr, "\n"); + } + return result; +} + +/** + * \pre: + * - \c ritem_sees_lookahead_set was computed by + * \c ielr_compute_ritem_sees_lookahead_set. + * \post: + * - Each of \c *edgesp and \c *edge_countsp is a new array of size + * \c ::ngotos. + * - (*edgesp)[i] points to a \c goto_number array of size + * (*edge_countsp)[i]+1. + * - In such a \c goto_number array, the last element is \c ::END_NODE. + * - All remaining elements are the indices of the gotos to which there is an + * internal follow edge from goto \c i. + * - There is an internal follow edge from goto \c i to goto \c j iff both: + * - The from states of gotos \c i and \c j are the same. + * - The transition nonterminal for goto \c i appears as the first RHS + * symbol of at least one production for which both: + * - The LHS is the transition symbol of goto \c j. + * - All other RHS symbols are nullable nonterminals. + * - In other words, the follows of goto \c i include the follows of + * goto \c j and it's an internal edge because the from states are the + * same. + */ +static void +ielr_compute_internal_follow_edges (bitset ritem_sees_lookahead_set, + goto_number ***edgesp, int **edge_countsp) +{ + *edgesp = xnmalloc (ngotos, sizeof **edgesp); + *edge_countsp = xnmalloc (ngotos, sizeof **edge_countsp); + { + bitset sources = bitset_create (ngotos, BITSET_FIXED); + goto_number i; + for (i = 0; i < ngotos; ++i) + (*edge_countsp)[i] = 0; + for (i = 0; i < ngotos; ++i) + { + int nsources = 0; + { + rule **rulep; + for (rulep = derives[states[to_state[i]]->accessing_symbol + - ntokens]; + *rulep; + ++rulep) + { + /* If there is at least one RHS symbol, if the first RHS symbol + is a nonterminal, and if all remaining RHS symbols (if any) + are nullable nonterminals, create an edge from the LHS + symbol's goto to the first RHS symbol's goto such that the RHS + symbol's goto will be the source of the edge after the + eventual relation_transpose below. + + Unlike in ielr_compute_always_follows, I use a bitset for + edges rather than an array because it is possible that + multiple RHS's with the same first symbol could fit and thus + that we could end up with redundant edges. With the + possibility of redundant edges, it's hard to know ahead of + time how large to make such an array. Another possible + redundancy is that source and destination might be the same + goto. Eliminating all these possible redundancies now might + possibly help performance a little. I have not proven any of + this, but I'm guessing the bitset shouldn't entail much of a + performance penalty, if any. */ + if (bitset_test (ritem_sees_lookahead_set, + (*rulep)->rhs - ritem)) + { + goto_number source = + map_goto (from_state[i], + item_number_as_symbol_number (*(*rulep)->rhs)); + if (i != source && !bitset_test (sources, source)) + { + bitset_set (sources, source); + ++nsources; + ++(*edge_countsp)[source]; + } + } + } + } + if (nsources == 0) + (*edgesp)[i] = NULL; + else + { + (*edgesp)[i] = xnmalloc (nsources + 1, sizeof *(*edgesp)[i]); + { + bitset_iterator biter_source; + bitset_bindex source; + int j = 0; + BITSET_FOR_EACH (biter_source, sources, source, 0) + (*edgesp)[i][j++] = source; + } + (*edgesp)[i][nsources] = END_NODE; + } + bitset_zero (sources); + } + bitset_free (sources); + } + + relation_transpose (edgesp, ngotos); + + if (trace_flag & trace_ielr) + { + fprintf (stderr, "internal_follow_edges:\n"); + relation_print (*edgesp, ngotos, stderr); + } +} + +/** + * \pre: + * - \c ritem_sees_lookahead_set was computed by + * \c ielr_compute_ritem_sees_lookahead_set. + * - \c internal_follow_edges was computed by + * \c ielr_compute_internal_follow_edges. + * \post: + * - \c *follow_kernel_itemsp is a new \c bitsetv in which the number of rows + * is \c ngotos and the number of columns is maximum number of kernel items + * in any state. + * - (*follow_kernel_itemsp)[i][j] is set iff the follows of goto + * \c i include the lookahead set of item \c j in the from state of goto + * \c i. + * - Thus, (*follow_kernel_itemsp)[i][j] is always unset if there is + * no item \c j in the from state of goto \c i. + */ +static void +ielr_compute_follow_kernel_items (bitset ritem_sees_lookahead_set, + goto_number **internal_follow_edges, + bitsetv *follow_kernel_itemsp) +{ + { + size_t max_nitems = 0; + state_number i; + for (i = 0; i < nstates; ++i) + if (states[i]->nitems > max_nitems) + max_nitems = states[i]->nitems; + *follow_kernel_itemsp = bitsetv_create (ngotos, max_nitems, BITSET_FIXED); + } + { + goto_number i; + for (i = 0; i < ngotos; ++i) + { + size_t nitems = states[from_state[i]]->nitems; + item_number *items = states[from_state[i]]->items; + size_t j; + for (j = 0; j < nitems; ++j) + /* If this item has this goto and if all subsequent symbols in this + RHS (if any) are nullable nonterminals, then record this item as + one whose lookahead set is included in this goto's follows. */ + if (item_number_is_symbol_number (ritem[items[j]]) + && item_number_as_symbol_number (ritem[items[j]]) + == states[to_state[i]]->accessing_symbol + && bitset_test (ritem_sees_lookahead_set, items[j])) + bitset_set ((*follow_kernel_itemsp)[i], j); + } + } + relation_digraph (internal_follow_edges, ngotos, follow_kernel_itemsp); + + if (trace_flag & trace_ielr) + { + fprintf (stderr, "follow_kernel_items:\n"); + debug_bitsetv (*follow_kernel_itemsp); + } +} + +/** + * \pre + * - \c *edgesp and \c edge_counts were computed by + * \c ielr_compute_internal_follow_edges. + * \post + * - \c *always_followsp is a new \c bitsetv with \c ngotos rows and + * \c ntokens columns. + * - (*always_followsp)[i][j] is set iff token \c j is an always + * follow (that is, it's computed by internal and successor edges) of goto + * \c i. + * - Rows of \c *edgesp have been realloc'ed and extended to include + * successor follow edges. \c edge_counts has not been updated. + */ +static void +ielr_compute_always_follows (goto_number ***edgesp, + int const edge_counts[], + bitsetv *always_followsp) +{ + *always_followsp = bitsetv_create (ngotos, ntokens, BITSET_FIXED); + { + goto_number *edge_array = xnmalloc (ngotos, sizeof *edge_array); + goto_number i; + for (i = 0; i < ngotos; ++i) + { + goto_number nedges = edge_counts[i]; + { + int j; + transitions *trans = states[to_state[i]]->transitions; + FOR_EACH_SHIFT (trans, j) + bitset_set ((*always_followsp)[i], TRANSITION_SYMBOL (trans, j)); + for (; j < trans->num; ++j) + { + symbol_number sym = TRANSITION_SYMBOL (trans, j); + if (nullable[sym - ntokens]) + edge_array[nedges++] = map_goto (to_state[i], sym); + } + } + if (nedges - edge_counts[i]) + { + (*edgesp)[i] = + xnrealloc ((*edgesp)[i], nedges + 1, sizeof *(*edgesp)[i]); + memcpy ((*edgesp)[i] + edge_counts[i], edge_array + edge_counts[i], + (nedges - edge_counts[i]) * sizeof *(*edgesp)[i]); + (*edgesp)[i][nedges] = END_NODE; + } + } + free (edge_array); + } + relation_digraph (*edgesp, ngotos, always_followsp); + + if (trace_flag & trace_ielr) + { + fprintf (stderr, "always follow edges:\n"); + relation_print (*edgesp, ngotos, stderr); + fprintf (stderr, "always_follows:\n"); + debug_bitsetv (*always_followsp); + } +} + +/** + * \post + * - \c result is a new array of size \c ::nstates. + * - result[i] is an array of pointers to the predecessor + * state's of state \c i. + * - The last element of such an array is \c NULL. + */ +static state *** +ielr_compute_predecessors (void) +{ + state_number i; + int *predecessor_counts = xnmalloc (nstates, sizeof *predecessor_counts); + state ***result = xnmalloc (nstates, sizeof *result); + for (i = 0; i < nstates; ++i) + predecessor_counts[i] = 0; + for (i = 0; i < nstates; ++i) + { + int j; + for (j = 0; j < states[i]->transitions->num; ++j) + ++predecessor_counts[states[i]->transitions->states[j]->number]; + } + for (i = 0; i < nstates; ++i) + { + result[i] = xnmalloc (predecessor_counts[i]+1, sizeof *result[i]); + result[i][predecessor_counts[i]] = NULL; + predecessor_counts[i] = 0; + } + for (i = 0; i < nstates; ++i) + { + int j; + for (j = 0; j < states[i]->transitions->num; ++j) + { + state_number k = states[i]->transitions->states[j]->number; + result[k][predecessor_counts[k]++] = states[i]; + } + } + free (predecessor_counts); + return result; +} + +/** + * \post + * - \c *follow_kernel_itemsp and \c *always_followsp were computed by + * \c ielr_compute_follow_kernel_items and + * \c ielr_compute_always_follows. + * - Iff predecessorsp != NULL, then \c *predecessorsp was computed + * by \c ielr_compute_predecessors. + */ +static void +ielr_compute_auxiliary_tables (bitsetv *follow_kernel_itemsp, + bitsetv *always_followsp, + state ****predecessorsp) +{ + goto_number **edges; + int *edge_counts; + { + bitset ritem_sees_lookahead_set = ielr_compute_ritem_sees_lookahead_set (); + ielr_compute_internal_follow_edges (ritem_sees_lookahead_set, + &edges, &edge_counts); + ielr_compute_follow_kernel_items (ritem_sees_lookahead_set, edges, + follow_kernel_itemsp); + bitset_free (ritem_sees_lookahead_set); + } + ielr_compute_always_follows (&edges, edge_counts, always_followsp); + { + int i; + for (i = 0; i < ngotos; ++i) + free (edges[i]); + } + free (edges); + free (edge_counts); + if (predecessorsp) + *predecessorsp = ielr_compute_predecessors (); +} + +/** + * \note + * - FIXME: It might be an interesting experiment to compare the space and + * time efficiency of computing \c item_lookahead_sets either: + * - Fully up front. + * - Just-in-time, as implemented below. + * - Not at all. That is, just let annotations continue even when + * unnecessary. + */ +bool +ielr_item_has_lookahead (state *s, symbol_number lhs, size_t item, + symbol_number lookahead, state ***predecessors, + bitset **item_lookahead_sets) +{ + if (!item_lookahead_sets[s->number]) + { + size_t i; + item_lookahead_sets[s->number] = + xnmalloc (s->nitems, sizeof item_lookahead_sets[s->number][0]); + for (i = 0; i < s->nitems; ++i) + item_lookahead_sets[s->number][i] = NULL; + } + if (!item_lookahead_sets[s->number][item]) + { + item_lookahead_sets[s->number][item] = + bitset_create (ntokens, BITSET_FIXED); + /* If this kernel item is the beginning of a RHS, it must be the kernel + item in the start state, and so its LHS has no follows and no goto to + check. If, instead, this kernel item is the successor of the start + state's kernel item, there are still no follows and no goto. This + situation is fortunate because we want to avoid the - 2 below in both + cases. + + Actually, IELR(1) should never invoke this function for either of + those cases because (1) follow_kernel_items will never reference a + kernel item for this RHS because the end token blocks sight of the + lookahead set from the RHS's only nonterminal, and (2) no reduction + has a lookback dependency on this lookahead set. Nevertheless, I + didn't change this test to an aver just in case the usage of this + function evolves to need those two cases. In both cases, the current + implementation returns the right result. */ + if (s->items[item] > 1) + { + /* If the LHS symbol of this item isn't known (because this is a + top-level invocation), go get it. */ + if (!lhs) + { + unsigned int i; + for (i = s->items[item]; + !item_number_is_rule_number (ritem[i]); + ++i) + ; + lhs = rules[item_number_as_rule_number (ritem[i])].lhs->number; + } + /* If this kernel item is next to the beginning of the RHS, then + check all predecessors' goto follows for the LHS. */ + if (item_number_is_rule_number (ritem[s->items[item] - 2])) + { + state **predecessor; + aver (lhs != accept->number); + for (predecessor = predecessors[s->number]; + *predecessor; + ++predecessor) + bitset_or (item_lookahead_sets[s->number][item], + item_lookahead_sets[s->number][item], + goto_follows[map_goto ((*predecessor)->number, + lhs)]); + } + /* If this kernel item is later in the RHS, then check all + predecessor items' lookahead sets. */ + else + { + state **predecessor; + for (predecessor = predecessors[s->number]; + *predecessor; + ++predecessor) + { + size_t predecessor_item; + for (predecessor_item = 0; + predecessor_item < (*predecessor)->nitems; + ++predecessor_item) + if ((*predecessor)->items[predecessor_item] + == s->items[item] - 1) + break; + aver (predecessor_item != (*predecessor)->nitems); + ielr_item_has_lookahead (*predecessor, lhs, + predecessor_item, 0 /*irrelevant*/, + predecessors, item_lookahead_sets); + bitset_or (item_lookahead_sets[s->number][item], + item_lookahead_sets[s->number][item], + item_lookahead_sets[(*predecessor)->number] + [predecessor_item]); + } + } + } + } + return bitset_test (item_lookahead_sets[s->number][item], lookahead); +} + +/** + * \pre + * - \c follow_kernel_items, \c always_follows, and \c predecessors + * were computed by \c ielr_compute_auxiliary_tables. + * \post + * - Each of *inadequacy_listsp and *annotation_listsp + * points to a new array of size \c ::nstates. + * - For 0 <= i < ::nstates: + * - (*inadequacy_listsp)[i] contains the \c InadequacyList head + * node for states[i]. + * - (*annotation_listsp)[i] contains the \c AnnotationList head + * node for states[i]. + * - *max_annotationsp is the maximum number of annotations per + * state. + */ +static void +ielr_compute_annotation_lists (bitsetv follow_kernel_items, + bitsetv always_follows, state ***predecessors, + AnnotationIndex *max_annotationsp, + InadequacyList ***inadequacy_listsp, + AnnotationList ***annotation_listsp, + struct obstack *annotations_obstackp) +{ + bitset **item_lookahead_sets = + xnmalloc (nstates, sizeof *item_lookahead_sets); + AnnotationIndex *annotation_counts = + xnmalloc (nstates, sizeof *annotation_counts); + ContributionIndex max_contributions = 0; + unsigned int total_annotations = 0; + state_number i; + + *inadequacy_listsp = xnmalloc (nstates, sizeof **inadequacy_listsp); + *annotation_listsp = xnmalloc (nstates, sizeof **annotation_listsp); + for (i = 0; i < nstates; ++i) + { + item_lookahead_sets[i] = NULL; + (*inadequacy_listsp)[i] = NULL; + (*annotation_listsp)[i] = NULL; + annotation_counts[i] = 0; + } + for (i = 0; i < nstates; ++i) + AnnotationList__compute_from_inadequacies (states[i], follow_kernel_items, + always_follows, predecessors, + item_lookahead_sets, + *inadequacy_listsp, + *annotation_listsp, + annotation_counts, + &max_contributions, + annotations_obstackp); + *max_annotationsp = 0; + for (i = 0; i < nstates; ++i) + { + if (annotation_counts[i] > *max_annotationsp) + *max_annotationsp = annotation_counts[i]; + total_annotations += annotation_counts[i]; + } + if (trace_flag & trace_ielr) + { + for (i = 0; i < nstates; ++i) + { + fprintf (stderr, "Inadequacy annotations for state %d:\n", i); + AnnotationList__debug ((*annotation_listsp)[i], + states[i]->nitems, 2); + } + fprintf (stderr, "Number of LR(0)/LALR(1) states: %d\n", nstates); + fprintf (stderr, "Average number of annotations per state: %f\n", + (float)total_annotations/nstates); + fprintf (stderr, "Max number of annotations per state: %d\n", + *max_annotationsp); + fprintf (stderr, "Max number of contributions per annotation: %d\n", + max_contributions); + } + for (i = 0; i < nstates; ++i) + if (item_lookahead_sets[i]) + { + size_t j; + for (j = 0; j < states[i]->nitems; ++j) + if (item_lookahead_sets[i][j]) + bitset_free (item_lookahead_sets[i][j]); + free (item_lookahead_sets[i]); + } + free (item_lookahead_sets); + free (annotation_counts); +} + +typedef struct state_list { + struct state_list *next; + state *state; + /** Has this state been recomputed as a successor of another state? */ + bool recomputedAsSuccessor; + /** + * \c NULL iff all lookahead sets are empty. lookaheads[i] = NULL + * iff the lookahead set on item \c i is empty. + */ + bitset *lookaheads; + /** + * nextIsocore is the next state in a circularly linked-list of all states + * with the same core. The one originally computed by generate_states in + * LR0.c is lr0Isocore. + */ + struct state_list *lr0Isocore; + struct state_list *nextIsocore; +} state_list; + +/** + * \pre + * - \c follow_kernel_items and \c always_follows were computed by + * \c ielr_compute_auxiliary_tables. + * - n->class = nterm_sym. + * \post + * - \c follow_set contains the follow set for the goto on nonterminal \c n + * in state \c s based on the lookaheads stored in s->lookaheads. + */ +static void +ielr_compute_goto_follow_set (bitsetv follow_kernel_items, + bitsetv always_follows, state_list *s, + symbol *n, bitset follow_set) +{ + goto_number n_goto = map_goto (s->lr0Isocore->state->number, n->number); + bitset_copy (follow_set, always_follows[n_goto]); + if (s->lookaheads) + { + bitset_iterator biter_item; + bitset_bindex item; + BITSET_FOR_EACH (biter_item, follow_kernel_items[n_goto], item, 0) + if (s->lookaheads[item]) + bitset_or (follow_set, follow_set, s->lookaheads[item]); + } +} + +/** + * \pre + * - \c follow_kernel_items and \c always_follows were computed by + * \c ielr_compute_auxiliary_tables. + * - \c lookahead_filter was computed by + * \c AnnotationList__computeLookaheadFilter for the original LR(0) isocore + * of \c t. + * - The number of rows in \c lookaheads is at least the number of items in + * \c t, and the number of columns is \c ::ntokens. + * \post + * - lookaheads[i][j] is set iff both: + * - lookahead_filter[i][j] is set. + * - The isocore of \c t that will be the transition successor of \c s will + * inherit from \c s token \c j into the lookahead set of item \c i. + */ +static void +ielr_compute_lookaheads (bitsetv follow_kernel_items, bitsetv always_follows, + state_list *s, state *t, bitsetv lookahead_filter, + bitsetv lookaheads) +{ + size_t s_item = 0; + size_t t_item; + bitsetv_zero (lookaheads); + for (t_item = 0; t_item < t->nitems; ++t_item) + { + /* If this kernel item is the beginning of a RHS, it must be the + kernel item in the start state, but t is supposed to be a successor + state. If, instead, this kernel item is the successor of the start + state's kernel item, the lookahead set is empty. This second case is + a special case to avoid the - 2 below, but the next successor can be + handled fine without special casing it. */ + aver (t->items[t_item] != 0); + if (t->items[t_item] > 1 + && !bitset_empty_p (lookahead_filter[t_item])) + { + if (item_number_is_rule_number (ritem[t->items[t_item] - 2])) + { + unsigned int rule_item; + for (rule_item = t->items[t_item]; + !item_number_is_rule_number (ritem[rule_item]); + ++rule_item) + ; + ielr_compute_goto_follow_set ( + follow_kernel_items, always_follows, s, + rules[item_number_as_rule_number (ritem[rule_item])].lhs, + lookaheads[t_item]); + } + else if (s->lookaheads) + { + /* We don't have to start the s item search at the beginning + every time because items from both states are sorted by their + indices in ritem. */ + for (; s_item < s->state->nitems; ++s_item) + if (s->state->items[s_item] == t->items[t_item] - 1) + break; + aver (s_item != s->state->nitems); + if (s->lookaheads[s_item]) + bitset_copy (lookaheads[t_item], s->lookaheads[s_item]); + } + bitset_and (lookaheads[t_item], + lookaheads[t_item], lookahead_filter[t_item]); + } + } +} + +/** + * \pre + * - \c follow_kernel_items and \c always_follows were computed by + * \c ielr_compute_auxiliary_tables. + * - Either: + * - annotation_lists = NULL and all bits in work2 are set. + * - \c annotation_lists was computed by \c ielr_compute_annotation_lists. + * - The number of rows in each of \c lookaheads and \c work2 is the maximum + * number of items in any state. The number of columns in each is + * \c ::ntokens. + * - \c lookaheads was computed by \c ielr_compute_lookaheads for \c t. + * - \c ::nstates is the total number of states, some not yet fully computed, + * in the list ending at \c *last_statep. It is at least the number of + * original LR(0) states. + * - The size of \c work1 is at least the number of annotations for the LR(0) + * isocore of \c t. + * \post + * - Either: + * - In the case that annotation_lists != NULL, + * lookaheads \@pre was merged with some isocore of \c t if + * permitted by the annotations for the original LR(0) isocore of \c t. + * If this changed the lookaheads in that isocore, those changes were + * propagated to all already computed transition successors recursively + * possibly resulting in the splitting of some of those successors. + * - In the case that annotation_lists = NULL, + * lookaheads \@pre was merged with some isocore of \c t if the + * isocore's lookahead sets were identical to those specified by + * lookaheads \@pre. + * - If no such merge was permitted, a new isocore of \c t containing + * lookaheads \@pre was appended to the state list whose + * previous tail was *last_statep \@pre and \c ::nstates was + * incremented. It was also appended to \c t's isocore list. + * - *tp = the isocore of \c t into which + * lookaheads \@pre was placed/merged. + * - \c lookaheads, \c work1, and \c work2 may have been altered. + */ +static void +ielr_compute_state (bitsetv follow_kernel_items, bitsetv always_follows, + AnnotationList **annotation_lists, state *t, + bitsetv lookaheads, state_list **last_statep, + ContributionIndex work1[], bitsetv work2, state **tp) +{ + state_list *lr0_isocore = t->state_list->lr0Isocore; + state_list **this_isocorep; + bool has_lookaheads; + + /* Determine whether there's an isocore of t with which these lookaheads can + be merged. */ + { + AnnotationIndex ai; + AnnotationList *a; + if (annotation_lists) + for (ai = 0, a = annotation_lists[lr0_isocore->state->number]; + a; + ++ai, a = a->next) + work1[ai] = + AnnotationList__computeDominantContribution ( + a, lr0_isocore->state->nitems, lookaheads, false); + for (this_isocorep = &t->state_list; + this_isocorep == &t->state_list || *this_isocorep != t->state_list; + this_isocorep = &(*this_isocorep)->nextIsocore) + { + if (!(*this_isocorep)->recomputedAsSuccessor) + break; + if (annotation_lists) + { + for (ai = 0, a = annotation_lists[lr0_isocore->state->number]; + a; + ++ai, a = a->next) + { + if (work1[ai] != ContributionIndex__none) + { + /* This isocore compatibility test depends on the fact + that, if the dominant contributions are the same for the + two isocores, then merging their lookahead sets will not + produce a state with a different dominant contribution. + */ + ContributionIndex ci = + AnnotationList__computeDominantContribution ( + a, lr0_isocore->state->nitems, + (*this_isocorep)->lookaheads, false); + if (ci != ContributionIndex__none && work1[ai] != ci) + break; + } + } + if (!a) + break; + } + else + { + size_t i; + for (i = 0; i < t->nitems; ++i) + { + if (!(*this_isocorep)->lookaheads + || !(*this_isocorep)->lookaheads[i]) + { + if (!bitset_empty_p (lookaheads[i])) + break; + } + // bitset_equal_p uses the size of the first argument, so + // lookaheads[i] must be the second argument. + else if (!bitset_equal_p ((*this_isocorep)->lookaheads[i], + lookaheads[i])) + break; + } + if (i == t->nitems) + break; + } + } + } + + has_lookaheads = false; + { + size_t i; + for (i = 0; i < lr0_isocore->state->nitems; ++i) + if (!bitset_empty_p (lookaheads[i])) + { + has_lookaheads = true; + break; + } + } + + /* Merge with an existing isocore. */ + if (this_isocorep == &t->state_list || *this_isocorep != t->state_list) + { + bool new_lookaheads = false; + *tp = (*this_isocorep)->state; + + /* Merge lookaheads into the state and record whether any of them are + actually new. */ + if (has_lookaheads) + { + size_t i; + if (!(*this_isocorep)->lookaheads) + { + (*this_isocorep)->lookaheads = + xnmalloc (t->nitems, sizeof (*this_isocorep)->lookaheads); + for (i = 0; i < t->nitems; ++i) + (*this_isocorep)->lookaheads[i] = NULL; + } + for (i = 0; i < t->nitems; ++i) + if (!bitset_empty_p (lookaheads[i])) + { + if (!(*this_isocorep)->lookaheads[i]) + (*this_isocorep)->lookaheads[i] = + bitset_create (ntokens, BITSET_FIXED); + bitset_andn (lookaheads[i], + lookaheads[i], (*this_isocorep)->lookaheads[i]); + bitset_or ((*this_isocorep)->lookaheads[i], + lookaheads[i], (*this_isocorep)->lookaheads[i]); + if (!bitset_empty_p (lookaheads[i])) + new_lookaheads = true; + } + } + + /* If new lookaheads were merged, propagate those lookaheads to the + successors, possibly splitting them. If *tp is being recomputed for + the first time, this isn't necessary because the main + ielr_split_states loop will handle the successors later. */ + if (!(*this_isocorep)->recomputedAsSuccessor) + (*this_isocorep)->recomputedAsSuccessor = true; + else if (new_lookaheads) + { + int i; + /* When merging demands identical lookahead sets, it is impossible to + merge new lookaheads. */ + aver (annotation_lists); + for (i = 0; i < (*tp)->transitions->num; ++i) + { + state *t2 = (*tp)->transitions->states[i]; + /* At any time, there's at most one state for which we have so + far initially recomputed only some of its successors in the + main ielr_split_states loop. Because we recompute successors + in order, we can just stop at the first such successor. Of + course, *tp might be a state some of whose successors have + been recomputed as successors of other states rather than as + successors of *tp. It's fine if we go ahead and propagate to + some of those. We'll propagate to them again (but stop when + we see nothing changes) and to the others when we reach *tp in + the main ielr_split_states loop later. */ + if (!t2->state_list->recomputedAsSuccessor) + break; + AnnotationList__computeLookaheadFilter ( + annotation_lists[t2->state_list->lr0Isocore->state->number], + t2->nitems, work2); + ielr_compute_lookaheads (follow_kernel_items, always_follows, + (*this_isocorep), t2, work2, + lookaheads); + /* FIXME: If splitting t2 here, it's possible that lookaheads + that had already propagated from *tp to t2 will be left in t2 + after *tp has been removed as t2's predecessor: + - We're going to recompute all lookaheads in phase 4, so these + extra lookaheads won't appear in the final parser table. + - If t2 has just one annotation, then these extra lookaheads + cannot alter the dominating contribution to the associated + inadequacy and thus cannot needlessly prevent a future merge + of some new state with t2. We can be sure of this because: + - The fact that we're splitting t2 now means that some + predecessors (at least one) other than *tp must be + propagating contributions to t2. + - The fact that t2 was merged in the first place means that, + if *tp propagated any contributions, the dominating + contribution must be the same as that from those other + predecessors. + - Thus, if some new state to be merged with t2 in the future + proves to be compatible with the contributions propagated + by the other predecessors, it will also be compatible with + the contributions made by the extra lookaheads left behind + by *tp. + - However, if t2 has more than one annotation and these extra + lookaheads contribute to one of their inadequacies, it's + possible these extra lookaheads may needlessly prevent a + future merge with t2. For example: + - Let's say there's an inadequacy A that makes the split + necessary as follows: + - There's currently just one other predecessor and it + propagates to t2 some contributions to inadequacy A. + - The new lookaheads that we were attempting to propagate + from *tp to t2 made contributions to inadequacy A with a + different dominating contribution than those from that + other predecessor. + - The extra lookaheads either make no contribution to + inadequacy A or have the same dominating contribution as + the contributions from the other predecessor. Either + way, as explained above, they can't prevent a future + merge. + - Let's say there's an inadequacy B that causes the trouble + with future merges as follows: + - The extra lookaheads make contributions to inadequacy B. + - Those extra contributions did not prevent the original + merge to create t2 because the other predecessor + propagates to t2 no contributions to inadequacy B. + - Thus, those extra contributions may prevent a future + merge with t2 even though the merge would be fine if *tp + had not left them behind. + - Is the latter case common enough to worry about? + - Perhaps we should track all predecessors and iterate them + now to recreate t2 without those extra lookaheads. */ + ielr_compute_state (follow_kernel_items, always_follows, + annotation_lists, t2, lookaheads, + last_statep, work1, work2, + &(*tp)->transitions->states[i]); + } + } + } + + /* Create a new isocore. */ + else + { + state_list *old_isocore = *this_isocorep; + (*last_statep)->next = *this_isocorep = xmalloc (sizeof **last_statep); + *last_statep = *this_isocorep; + (*last_statep)->state = *tp = state_new_isocore (t); + (*tp)->state_list = *last_statep; + (*last_statep)->recomputedAsSuccessor = true; + (*last_statep)->next = NULL; + (*last_statep)->lookaheads = NULL; + if (has_lookaheads) + { + size_t i; + (*last_statep)->lookaheads = + xnmalloc (t->nitems, sizeof (*last_statep)->lookaheads); + for (i = 0; i < t->nitems; ++i) + { + if (bitset_empty_p (lookaheads[i])) + (*last_statep)->lookaheads[i] = NULL; + else + { + (*last_statep)->lookaheads[i] = + bitset_create (ntokens, BITSET_FIXED); + bitset_copy ((*last_statep)->lookaheads[i], lookaheads[i]); + } + } + } + (*last_statep)->lr0Isocore = lr0_isocore; + (*last_statep)->nextIsocore = old_isocore; + } +} + +/** + * \pre + * - \c follow_kernel_items and \c always_follows were computed by + * \c ielr_compute_auxiliary_tables. + * - Either: + * - annotation_lists = NULL and max_annotations=0. + * - \c annotation_lists and \c max_annotations were computed by + * \c ielr_compute_annotation_lists. + * \post + * - \c ::states is of size \c ::nstates (which might be greater than + * ::nstates \@pre) and no longer contains any LR(1)-relative + * inadequacy. \c annotation_lists was used to determine state + * compatibility or, if annotation_lists = NULL, the canonical + * LR(1) state compatibility test was used. + * - If annotation_lists = NULL, reduction lookahead sets were + * computed in all states. TV_IELR_PHASE4 was pushed while they were + * computed from item lookahead sets. + */ +static void +ielr_split_states (bitsetv follow_kernel_items, bitsetv always_follows, + AnnotationList **annotation_lists, + AnnotationIndex max_annotations) +{ + state_list *first_state; + state_list *last_state; + bitsetv lookahead_filter = NULL; + bitsetv lookaheads; + + /* Set up state list and some reusable bitsets. */ + { + size_t max_nitems = 0; + state_number i; + state_list **nodep = &first_state; + for (i = 0; i < nstates; ++i) + { + *nodep = states[i]->state_list = last_state = xmalloc (sizeof **nodep); + (*nodep)->state = states[i]; + (*nodep)->recomputedAsSuccessor = false; + (*nodep)->lookaheads = NULL; + (*nodep)->lr0Isocore = *nodep; + (*nodep)->nextIsocore = *nodep; + nodep = &(*nodep)->next; + if (states[i]->nitems > max_nitems) + max_nitems = states[i]->nitems; + } + *nodep = NULL; + lookahead_filter = bitsetv_create (max_nitems, ntokens, BITSET_FIXED); + if (!annotation_lists) + bitsetv_ones (lookahead_filter); + lookaheads = bitsetv_create (max_nitems, ntokens, BITSET_FIXED); + } + + /* Recompute states. */ + { + ContributionIndex *work = xnmalloc (max_annotations, sizeof *work); + state_list *this_state; + for (this_state = first_state; this_state; this_state = this_state->next) + { + state *s = this_state->state; + int i; + for (i = 0; i < s->transitions->num; ++i) + { + state *t = s->transitions->states[i]; + if (annotation_lists) + AnnotationList__computeLookaheadFilter ( + annotation_lists[t->state_list->lr0Isocore->state->number], + t->nitems, lookahead_filter); + ielr_compute_lookaheads (follow_kernel_items, always_follows, + this_state, t, lookahead_filter, + lookaheads); + ielr_compute_state (follow_kernel_items, always_follows, + annotation_lists, t, lookaheads, &last_state, + work, lookahead_filter, + &s->transitions->states[i]); + } + } + free (work); + } + + bitsetv_free (lookahead_filter); + bitsetv_free (lookaheads); + + /* Store states back in the states array. */ + states = xnrealloc (states, nstates, sizeof *states); + { + state_list *node; + for (node = first_state; node; node = node->next) + states[node->state->number] = node->state; + } + + /* In the case of canonical LR(1), copy item lookahead sets to reduction + lookahead sets. */ + if (!annotation_lists) + { + timevar_push (TV_IELR_PHASE4); + initialize_LA (); + state_list *node; + for (node = first_state; node; node = node->next) + if (!node->state->consistent) + { + size_t i = 0; + item_number *itemset = node->state->items; + size_t r; + for (r = 0; r < node->state->reductions->num; ++r) + { + rule *this_rule = node->state->reductions->rules[r]; + bitset lookahead_set = + node->state->reductions->lookahead_tokens[r]; + if (item_number_is_rule_number (*this_rule->rhs)) + ielr_compute_goto_follow_set (follow_kernel_items, + always_follows, node, + this_rule->lhs, lookahead_set); + else if (node->lookaheads) + { + /* We don't need to start the kernel item search back at + i=0 because both items and reductions are sorted on rule + number. */ + while (!item_number_is_rule_number (ritem[itemset[i]]) + || item_number_as_rule_number (ritem[itemset[i]]) + != this_rule->number) + { + ++i; + aver (i < node->state->nitems); + } + if (node->lookaheads[i]) + bitset_copy (lookahead_set, node->lookaheads[i]); + } + } + } + timevar_pop (TV_IELR_PHASE4); + } + + /* Free state list. */ + while (first_state) + { + state_list *node = first_state; + if (node->lookaheads) + { + size_t i; + for (i = 0; i < node->state->nitems; ++i) + if (node->lookaheads[i]) + bitset_free (node->lookaheads[i]); + free (node->lookaheads); + } + first_state = node->next; + free (node); + } +} + +void +ielr (void) +{ + LrType lr_type; + + /* Examine user options. */ + { + char *type = muscle_percent_define_get ("lr.type"); + if (0 == strcmp (type, "LALR")) + lr_type = LR_TYPE__LALR; + else if (0 == strcmp (type, "IELR")) + lr_type = LR_TYPE__IELR; + else if (0 == strcmp (type, "canonical LR")) + lr_type = LR_TYPE__CANONICAL_LR; + else + aver (false); + free (type); + } + + /* Phase 0: LALR(1). */ + timevar_push (TV_LALR); + if (lr_type == LR_TYPE__CANONICAL_LR) + set_goto_map (); + else + lalr (); + if (lr_type == LR_TYPE__LALR) + { + bitsetv_free (goto_follows); + timevar_pop (TV_LALR); + return; + } + timevar_pop (TV_LALR); + + { + bitsetv follow_kernel_items; + bitsetv always_follows; + InadequacyList **inadequacy_lists = NULL; + AnnotationList **annotation_lists = NULL; + struct obstack annotations_obstack; + AnnotationIndex max_annotations = 0; + + { + /* Phase 1: Compute Auxiliary Tables. */ + state ***predecessors; + timevar_push (TV_IELR_PHASE1); + ielr_compute_auxiliary_tables ( + &follow_kernel_items, &always_follows, + lr_type == LR_TYPE__CANONICAL_LR ? NULL : &predecessors); + timevar_pop (TV_IELR_PHASE1); + + /* Phase 2: Compute Annotations. */ + timevar_push (TV_IELR_PHASE2); + if (lr_type != LR_TYPE__CANONICAL_LR) + { + obstack_init (&annotations_obstack); + ielr_compute_annotation_lists (follow_kernel_items, always_follows, + predecessors, &max_annotations, + &inadequacy_lists, &annotation_lists, + &annotations_obstack); + { + state_number i; + for (i = 0; i < nstates; ++i) + free (predecessors[i]); + } + free (predecessors); + bitsetv_free (goto_follows); + lalr_free (); + } + timevar_pop (TV_IELR_PHASE2); + } + + /* Phase 3: Split States. */ + timevar_push (TV_IELR_PHASE3); + { + state_number nstates_lr0 = nstates; + ielr_split_states (follow_kernel_items, always_follows, + annotation_lists, max_annotations); + if (inadequacy_lists) + { + state_number i; + for (i = 0; i < nstates_lr0; ++i) + InadequacyList__delete (inadequacy_lists[i]); + } + } + free (inadequacy_lists); + if (annotation_lists) + obstack_free (&annotations_obstack, NULL); + free (annotation_lists); + bitsetv_free (follow_kernel_items); + bitsetv_free (always_follows); + timevar_pop (TV_IELR_PHASE3); + } + + /* Phase 4: Compute Reduction Lookaheads. */ + timevar_push (TV_IELR_PHASE4); + free (goto_map); + free (from_state); + free (to_state); + if (lr_type == LR_TYPE__CANONICAL_LR) + { + // Reduction lookaheads are computed in ielr_split_states above but are + // timed as part of phase 4. + set_goto_map (); + } + else + { + lalr (); + bitsetv_free (goto_follows); + } + timevar_pop (TV_IELR_PHASE4); +} diff --git a/src/ielr.h b/src/ielr.h new file mode 100644 index 00000000..27b3a445 --- /dev/null +++ b/src/ielr.h @@ -0,0 +1,46 @@ +/* IELR main implementation. + + Copyright (C) 2009 Free Software Foundation, Inc. + + This file is part of Bison, the GNU Compiler Compiler. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef IELR_H_ +# define IELR_H_ + +#include + +#include "state.h" + +/** + * \pre + * - \c ::states is of size \c ::nstates and defines an LALR(1) parser for + * the users's grammar. + * - \c ::ntokens is the number of tokens in the grammar. + * \post + * - \c ::states is of size \c ::nstates (which might be greater than + * ::nstates \@pre) and defines the type of parser specified by + * the value of the \c \%define variable \c lr.type. Its value can be: + * - \c "LALR". + * - \c "IELR". + * - \c "canonical LR". + */ +void ielr (void); + +bool ielr_item_has_lookahead (state *s, symbol_number lhs, size_t item, + symbol_number lookahead, state ***predecessors, + bitset **item_lookahead_sets); + +#endif /* !IELR_H_ */