1 /* IELR main implementation.
3 Copyright (C) 2009 Free Software Foundation, Inc.
5 This file is part of Bison, the GNU Compiler Compiler.
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
28 #include "AnnotationList.h"
32 #include "muscle-tab.h"
38 /** Records the value of the \%define variable "lr.type". */
39 typedef enum { LR_TYPE__LALR
, LR_TYPE__IELR
, LR_TYPE__CANONICAL_LR
} LrType
;
43 * - \c result = a new \c bitset of size \c ::nritems such that any bit \c i
44 * is set iff <tt>ritem[i]</tt> is a nonterminal after which all ritems in
45 * the same RHS are nullable nonterminals. In other words, the follows of
46 * a goto on <tt>ritem[i]</tt> include the lookahead set of the item.
49 ielr_compute_ritem_sees_lookahead_set (void)
51 bitset result
= bitset_create (nritems
, BITSET_FIXED
);
52 unsigned int i
= nritems
-1;
56 while (!item_number_is_rule_number (ritem
[i
])
58 && nullable
[item_number_as_symbol_number (ritem
[i
]) - ntokens
])
59 bitset_set (result
, i
--);
60 if (!item_number_is_rule_number (ritem
[i
]) && ISVAR (ritem
[i
]))
61 bitset_set (result
, i
--);
62 while (!item_number_is_rule_number (ritem
[i
]) && i
>0)
65 if (trace_flag
& trace_ielr
)
67 fprintf (stderr
, "ritem_sees_lookahead_set:\n");
68 debug_bitset (result
);
69 fprintf (stderr
, "\n");
76 * - \c ritem_sees_lookahead_set was computed by
77 * \c ielr_compute_ritem_sees_lookahead_set.
79 * - Each of \c *edgesp and \c *edge_countsp is a new array of size
81 * - <tt>(*edgesp)[i]</tt> points to a \c goto_number array of size
82 * <tt>(*edge_countsp)[i]+1</tt>.
83 * - In such a \c goto_number array, the last element is \c ::END_NODE.
84 * - All remaining elements are the indices of the gotos to which there is an
85 * internal follow edge from goto \c i.
86 * - There is an internal follow edge from goto \c i to goto \c j iff both:
87 * - The from states of gotos \c i and \c j are the same.
88 * - The transition nonterminal for goto \c i appears as the first RHS
89 * symbol of at least one production for which both:
90 * - The LHS is the transition symbol of goto \c j.
91 * - All other RHS symbols are nullable nonterminals.
92 * - In other words, the follows of goto \c i include the follows of
93 * goto \c j and it's an internal edge because the from states are the
97 ielr_compute_internal_follow_edges (bitset ritem_sees_lookahead_set
,
98 goto_number
***edgesp
, int **edge_countsp
)
100 *edgesp
= xnmalloc (ngotos
, sizeof **edgesp
);
101 *edge_countsp
= xnmalloc (ngotos
, sizeof **edge_countsp
);
103 bitset sources
= bitset_create (ngotos
, BITSET_FIXED
);
105 for (i
= 0; i
< ngotos
; ++i
)
106 (*edge_countsp
)[i
] = 0;
107 for (i
= 0; i
< ngotos
; ++i
)
112 for (rulep
= derives
[states
[to_state
[i
]]->accessing_symbol
117 /* If there is at least one RHS symbol, if the first RHS symbol
118 is a nonterminal, and if all remaining RHS symbols (if any)
119 are nullable nonterminals, create an edge from the LHS
120 symbol's goto to the first RHS symbol's goto such that the RHS
121 symbol's goto will be the source of the edge after the
122 eventual relation_transpose below.
124 Unlike in ielr_compute_always_follows, I use a bitset for
125 edges rather than an array because it is possible that
126 multiple RHS's with the same first symbol could fit and thus
127 that we could end up with redundant edges. With the
128 possibility of redundant edges, it's hard to know ahead of
129 time how large to make such an array. Another possible
130 redundancy is that source and destination might be the same
131 goto. Eliminating all these possible redundancies now might
132 possibly help performance a little. I have not proven any of
133 this, but I'm guessing the bitset shouldn't entail much of a
134 performance penalty, if any. */
135 if (bitset_test (ritem_sees_lookahead_set
,
136 (*rulep
)->rhs
- ritem
))
139 map_goto (from_state
[i
],
140 item_number_as_symbol_number (*(*rulep
)->rhs
));
141 if (i
!= source
&& !bitset_test (sources
, source
))
143 bitset_set (sources
, source
);
145 ++(*edge_countsp
)[source
];
154 (*edgesp
)[i
] = xnmalloc (nsources
+ 1, sizeof *(*edgesp
)[i
]);
156 bitset_iterator biter_source
;
157 bitset_bindex source
;
159 BITSET_FOR_EACH (biter_source
, sources
, source
, 0)
160 (*edgesp
)[i
][j
++] = source
;
162 (*edgesp
)[i
][nsources
] = END_NODE
;
164 bitset_zero (sources
);
166 bitset_free (sources
);
169 relation_transpose (edgesp
, ngotos
);
171 if (trace_flag
& trace_ielr
)
173 fprintf (stderr
, "internal_follow_edges:\n");
174 relation_print (*edgesp
, ngotos
, stderr
);
180 * - \c ritem_sees_lookahead_set was computed by
181 * \c ielr_compute_ritem_sees_lookahead_set.
182 * - \c internal_follow_edges was computed by
183 * \c ielr_compute_internal_follow_edges.
185 * - \c *follow_kernel_itemsp is a new \c bitsetv in which the number of rows
186 * is \c ngotos and the number of columns is maximum number of kernel items
188 * - <tt>(*follow_kernel_itemsp)[i][j]</tt> is set iff the follows of goto
189 * \c i include the lookahead set of item \c j in the from state of goto
191 * - Thus, <tt>(*follow_kernel_itemsp)[i][j]</tt> is always unset if there is
192 * no item \c j in the from state of goto \c i.
195 ielr_compute_follow_kernel_items (bitset ritem_sees_lookahead_set
,
196 goto_number
**internal_follow_edges
,
197 bitsetv
*follow_kernel_itemsp
)
200 size_t max_nitems
= 0;
202 for (i
= 0; i
< nstates
; ++i
)
203 if (states
[i
]->nitems
> max_nitems
)
204 max_nitems
= states
[i
]->nitems
;
205 *follow_kernel_itemsp
= bitsetv_create (ngotos
, max_nitems
, BITSET_FIXED
);
209 for (i
= 0; i
< ngotos
; ++i
)
211 size_t nitems
= states
[from_state
[i
]]->nitems
;
212 item_number
*items
= states
[from_state
[i
]]->items
;
214 for (j
= 0; j
< nitems
; ++j
)
215 /* If this item has this goto and if all subsequent symbols in this
216 RHS (if any) are nullable nonterminals, then record this item as
217 one whose lookahead set is included in this goto's follows. */
218 if (item_number_is_symbol_number (ritem
[items
[j
]])
219 && item_number_as_symbol_number (ritem
[items
[j
]])
220 == states
[to_state
[i
]]->accessing_symbol
221 && bitset_test (ritem_sees_lookahead_set
, items
[j
]))
222 bitset_set ((*follow_kernel_itemsp
)[i
], j
);
225 relation_digraph (internal_follow_edges
, ngotos
, follow_kernel_itemsp
);
227 if (trace_flag
& trace_ielr
)
229 fprintf (stderr
, "follow_kernel_items:\n");
230 debug_bitsetv (*follow_kernel_itemsp
);
236 * - \c *edgesp and \c edge_counts were computed by
237 * \c ielr_compute_internal_follow_edges.
239 * - \c *always_followsp is a new \c bitsetv with \c ngotos rows and
240 * \c ntokens columns.
241 * - <tt>(*always_followsp)[i][j]</tt> is set iff token \c j is an always
242 * follow (that is, it's computed by internal and successor edges) of goto
244 * - Rows of \c *edgesp have been realloc'ed and extended to include
245 * successor follow edges. \c edge_counts has not been updated.
248 ielr_compute_always_follows (goto_number
***edgesp
,
249 int const edge_counts
[],
250 bitsetv
*always_followsp
)
252 *always_followsp
= bitsetv_create (ngotos
, ntokens
, BITSET_FIXED
);
254 goto_number
*edge_array
= xnmalloc (ngotos
, sizeof *edge_array
);
256 for (i
= 0; i
< ngotos
; ++i
)
258 goto_number nedges
= edge_counts
[i
];
261 transitions
*trans
= states
[to_state
[i
]]->transitions
;
262 FOR_EACH_SHIFT (trans
, j
)
263 bitset_set ((*always_followsp
)[i
], TRANSITION_SYMBOL (trans
, j
));
264 for (; j
< trans
->num
; ++j
)
266 symbol_number sym
= TRANSITION_SYMBOL (trans
, j
);
267 if (nullable
[sym
- ntokens
])
268 edge_array
[nedges
++] = map_goto (to_state
[i
], sym
);
271 if (nedges
- edge_counts
[i
])
274 xnrealloc ((*edgesp
)[i
], nedges
+ 1, sizeof *(*edgesp
)[i
]);
275 memcpy ((*edgesp
)[i
] + edge_counts
[i
], edge_array
+ edge_counts
[i
],
276 (nedges
- edge_counts
[i
]) * sizeof *(*edgesp
)[i
]);
277 (*edgesp
)[i
][nedges
] = END_NODE
;
282 relation_digraph (*edgesp
, ngotos
, always_followsp
);
284 if (trace_flag
& trace_ielr
)
286 fprintf (stderr
, "always follow edges:\n");
287 relation_print (*edgesp
, ngotos
, stderr
);
288 fprintf (stderr
, "always_follows:\n");
289 debug_bitsetv (*always_followsp
);
295 * - \c result is a new array of size \c ::nstates.
296 * - <tt>result[i]</tt> is an array of pointers to the predecessor
297 * <tt>state</tt>'s of state \c i.
298 * - The last element of such an array is \c NULL.
301 ielr_compute_predecessors (void)
304 int *predecessor_counts
= xnmalloc (nstates
, sizeof *predecessor_counts
);
305 state
***result
= xnmalloc (nstates
, sizeof *result
);
306 for (i
= 0; i
< nstates
; ++i
)
307 predecessor_counts
[i
] = 0;
308 for (i
= 0; i
< nstates
; ++i
)
311 for (j
= 0; j
< states
[i
]->transitions
->num
; ++j
)
312 ++predecessor_counts
[states
[i
]->transitions
->states
[j
]->number
];
314 for (i
= 0; i
< nstates
; ++i
)
316 result
[i
] = xnmalloc (predecessor_counts
[i
]+1, sizeof *result
[i
]);
317 result
[i
][predecessor_counts
[i
]] = NULL
;
318 predecessor_counts
[i
] = 0;
320 for (i
= 0; i
< nstates
; ++i
)
323 for (j
= 0; j
< states
[i
]->transitions
->num
; ++j
)
325 state_number k
= states
[i
]->transitions
->states
[j
]->number
;
326 result
[k
][predecessor_counts
[k
]++] = states
[i
];
329 free (predecessor_counts
);
335 * - \c *follow_kernel_itemsp and \c *always_followsp were computed by
336 * \c ielr_compute_follow_kernel_items and
337 * \c ielr_compute_always_follows.
338 * - Iff <tt>predecessorsp != NULL</tt>, then \c *predecessorsp was computed
339 * by \c ielr_compute_predecessors.
342 ielr_compute_auxiliary_tables (bitsetv
*follow_kernel_itemsp
,
343 bitsetv
*always_followsp
,
344 state
****predecessorsp
)
349 bitset ritem_sees_lookahead_set
= ielr_compute_ritem_sees_lookahead_set ();
350 ielr_compute_internal_follow_edges (ritem_sees_lookahead_set
,
351 &edges
, &edge_counts
);
352 ielr_compute_follow_kernel_items (ritem_sees_lookahead_set
, edges
,
353 follow_kernel_itemsp
);
354 bitset_free (ritem_sees_lookahead_set
);
356 ielr_compute_always_follows (&edges
, edge_counts
, always_followsp
);
359 for (i
= 0; i
< ngotos
; ++i
)
365 *predecessorsp
= ielr_compute_predecessors ();
370 * - FIXME: It might be an interesting experiment to compare the space and
371 * time efficiency of computing \c item_lookahead_sets either:
373 * - Just-in-time, as implemented below.
374 * - Not at all. That is, just let annotations continue even when
378 ielr_item_has_lookahead (state
*s
, symbol_number lhs
, size_t item
,
379 symbol_number lookahead
, state
***predecessors
,
380 bitset
**item_lookahead_sets
)
382 if (!item_lookahead_sets
[s
->number
])
385 item_lookahead_sets
[s
->number
] =
386 xnmalloc (s
->nitems
, sizeof item_lookahead_sets
[s
->number
][0]);
387 for (i
= 0; i
< s
->nitems
; ++i
)
388 item_lookahead_sets
[s
->number
][i
] = NULL
;
390 if (!item_lookahead_sets
[s
->number
][item
])
392 item_lookahead_sets
[s
->number
][item
] =
393 bitset_create (ntokens
, BITSET_FIXED
);
394 /* If this kernel item is the beginning of a RHS, it must be the kernel
395 item in the start state, and so its LHS has no follows and no goto to
396 check. If, instead, this kernel item is the successor of the start
397 state's kernel item, there are still no follows and no goto. This
398 situation is fortunate because we want to avoid the - 2 below in both
401 Actually, IELR(1) should never invoke this function for either of
402 those cases because (1) follow_kernel_items will never reference a
403 kernel item for this RHS because the end token blocks sight of the
404 lookahead set from the RHS's only nonterminal, and (2) no reduction
405 has a lookback dependency on this lookahead set. Nevertheless, I
406 didn't change this test to an aver just in case the usage of this
407 function evolves to need those two cases. In both cases, the current
408 implementation returns the right result. */
409 if (s
->items
[item
] > 1)
411 /* If the LHS symbol of this item isn't known (because this is a
412 top-level invocation), go get it. */
416 for (i
= s
->items
[item
];
417 !item_number_is_rule_number (ritem
[i
]);
420 lhs
= rules
[item_number_as_rule_number (ritem
[i
])].lhs
->number
;
422 /* If this kernel item is next to the beginning of the RHS, then
423 check all predecessors' goto follows for the LHS. */
424 if (item_number_is_rule_number (ritem
[s
->items
[item
] - 2]))
427 aver (lhs
!= accept
->number
);
428 for (predecessor
= predecessors
[s
->number
];
431 bitset_or (item_lookahead_sets
[s
->number
][item
],
432 item_lookahead_sets
[s
->number
][item
],
433 goto_follows
[map_goto ((*predecessor
)->number
,
436 /* If this kernel item is later in the RHS, then check all
437 predecessor items' lookahead sets. */
441 for (predecessor
= predecessors
[s
->number
];
445 size_t predecessor_item
;
446 for (predecessor_item
= 0;
447 predecessor_item
< (*predecessor
)->nitems
;
449 if ((*predecessor
)->items
[predecessor_item
]
450 == s
->items
[item
] - 1)
452 aver (predecessor_item
!= (*predecessor
)->nitems
);
453 ielr_item_has_lookahead (*predecessor
, lhs
,
454 predecessor_item
, 0 /*irrelevant*/,
455 predecessors
, item_lookahead_sets
);
456 bitset_or (item_lookahead_sets
[s
->number
][item
],
457 item_lookahead_sets
[s
->number
][item
],
458 item_lookahead_sets
[(*predecessor
)->number
]
464 return bitset_test (item_lookahead_sets
[s
->number
][item
], lookahead
);
469 * - \c follow_kernel_items, \c always_follows, and \c predecessors
470 * were computed by \c ielr_compute_auxiliary_tables.
472 * - Each of <tt>*inadequacy_listsp</tt> and <tt>*annotation_listsp</tt>
473 * points to a new array of size \c ::nstates.
474 * - For <tt>0 <= i < ::nstates</tt>:
475 * - <tt>(*inadequacy_listsp)[i]</tt> contains the \c InadequacyList head
476 * node for <tt>states[i]</tt>.
477 * - <tt>(*annotation_listsp)[i]</tt> contains the \c AnnotationList head
478 * node for <tt>states[i]</tt>.
479 * - <tt>*max_annotationsp</tt> is the maximum number of annotations per
483 ielr_compute_annotation_lists (bitsetv follow_kernel_items
,
484 bitsetv always_follows
, state
***predecessors
,
485 AnnotationIndex
*max_annotationsp
,
486 InadequacyList
***inadequacy_listsp
,
487 AnnotationList
***annotation_listsp
,
488 struct obstack
*annotations_obstackp
)
490 bitset
**item_lookahead_sets
=
491 xnmalloc (nstates
, sizeof *item_lookahead_sets
);
492 AnnotationIndex
*annotation_counts
=
493 xnmalloc (nstates
, sizeof *annotation_counts
);
494 ContributionIndex max_contributions
= 0;
495 unsigned int total_annotations
= 0;
498 *inadequacy_listsp
= xnmalloc (nstates
, sizeof **inadequacy_listsp
);
499 *annotation_listsp
= xnmalloc (nstates
, sizeof **annotation_listsp
);
500 for (i
= 0; i
< nstates
; ++i
)
502 item_lookahead_sets
[i
] = NULL
;
503 (*inadequacy_listsp
)[i
] = NULL
;
504 (*annotation_listsp
)[i
] = NULL
;
505 annotation_counts
[i
] = 0;
507 for (i
= 0; i
< nstates
; ++i
)
508 AnnotationList__compute_from_inadequacies (states
[i
], follow_kernel_items
,
509 always_follows
, predecessors
,
515 annotations_obstackp
);
516 *max_annotationsp
= 0;
517 for (i
= 0; i
< nstates
; ++i
)
519 if (annotation_counts
[i
] > *max_annotationsp
)
520 *max_annotationsp
= annotation_counts
[i
];
521 total_annotations
+= annotation_counts
[i
];
523 if (trace_flag
& trace_ielr
)
525 for (i
= 0; i
< nstates
; ++i
)
527 fprintf (stderr
, "Inadequacy annotations for state %d:\n", i
);
528 AnnotationList__debug ((*annotation_listsp
)[i
],
529 states
[i
]->nitems
, 2);
531 fprintf (stderr
, "Number of LR(0)/LALR(1) states: %d\n", nstates
);
532 fprintf (stderr
, "Average number of annotations per state: %f\n",
533 (float)total_annotations
/nstates
);
534 fprintf (stderr
, "Max number of annotations per state: %d\n",
536 fprintf (stderr
, "Max number of contributions per annotation: %d\n",
539 for (i
= 0; i
< nstates
; ++i
)
540 if (item_lookahead_sets
[i
])
543 for (j
= 0; j
< states
[i
]->nitems
; ++j
)
544 if (item_lookahead_sets
[i
][j
])
545 bitset_free (item_lookahead_sets
[i
][j
]);
546 free (item_lookahead_sets
[i
]);
548 free (item_lookahead_sets
);
549 free (annotation_counts
);
552 typedef struct state_list
{
553 struct state_list
*next
;
555 /** Has this state been recomputed as a successor of another state? */
556 bool recomputedAsSuccessor
;
558 * \c NULL iff all lookahead sets are empty. <tt>lookaheads[i] = NULL</tt>
559 * iff the lookahead set on item \c i is empty.
563 * nextIsocore is the next state in a circularly linked-list of all states
564 * with the same core. The one originally computed by generate_states in
565 * LR0.c is lr0Isocore.
567 struct state_list
*lr0Isocore
;
568 struct state_list
*nextIsocore
;
573 * - \c follow_kernel_items and \c always_follows were computed by
574 * \c ielr_compute_auxiliary_tables.
575 * - <tt>n->class = nterm_sym</tt>.
577 * - \c follow_set contains the follow set for the goto on nonterminal \c n
578 * in state \c s based on the lookaheads stored in <tt>s->lookaheads</tt>.
581 ielr_compute_goto_follow_set (bitsetv follow_kernel_items
,
582 bitsetv always_follows
, state_list
*s
,
583 symbol
*n
, bitset follow_set
)
585 goto_number n_goto
= map_goto (s
->lr0Isocore
->state
->number
, n
->number
);
586 bitset_copy (follow_set
, always_follows
[n_goto
]);
589 bitset_iterator biter_item
;
591 BITSET_FOR_EACH (biter_item
, follow_kernel_items
[n_goto
], item
, 0)
592 if (s
->lookaheads
[item
])
593 bitset_or (follow_set
, follow_set
, s
->lookaheads
[item
]);
599 * - \c follow_kernel_items and \c always_follows were computed by
600 * \c ielr_compute_auxiliary_tables.
601 * - \c lookahead_filter was computed by
602 * \c AnnotationList__computeLookaheadFilter for the original LR(0) isocore
604 * - The number of rows in \c lookaheads is at least the number of items in
605 * \c t, and the number of columns is \c ::ntokens.
607 * - <tt>lookaheads[i][j]</tt> is set iff both:
608 * - <tt>lookahead_filter[i][j]</tt> is set.
609 * - The isocore of \c t that will be the transition successor of \c s will
610 * inherit from \c s token \c j into the lookahead set of item \c i.
613 ielr_compute_lookaheads (bitsetv follow_kernel_items
, bitsetv always_follows
,
614 state_list
*s
, state
*t
, bitsetv lookahead_filter
,
619 bitsetv_zero (lookaheads
);
620 for (t_item
= 0; t_item
< t
->nitems
; ++t_item
)
622 /* If this kernel item is the beginning of a RHS, it must be the
623 kernel item in the start state, but t is supposed to be a successor
624 state. If, instead, this kernel item is the successor of the start
625 state's kernel item, the lookahead set is empty. This second case is
626 a special case to avoid the - 2 below, but the next successor can be
627 handled fine without special casing it. */
628 aver (t
->items
[t_item
] != 0);
629 if (t
->items
[t_item
] > 1
630 && !bitset_empty_p (lookahead_filter
[t_item
]))
632 if (item_number_is_rule_number (ritem
[t
->items
[t_item
] - 2]))
634 unsigned int rule_item
;
635 for (rule_item
= t
->items
[t_item
];
636 !item_number_is_rule_number (ritem
[rule_item
]);
639 ielr_compute_goto_follow_set (
640 follow_kernel_items
, always_follows
, s
,
641 rules
[item_number_as_rule_number (ritem
[rule_item
])].lhs
,
644 else if (s
->lookaheads
)
646 /* We don't have to start the s item search at the beginning
647 every time because items from both states are sorted by their
649 for (; s_item
< s
->state
->nitems
; ++s_item
)
650 if (s
->state
->items
[s_item
] == t
->items
[t_item
] - 1)
652 aver (s_item
!= s
->state
->nitems
);
653 if (s
->lookaheads
[s_item
])
654 bitset_copy (lookaheads
[t_item
], s
->lookaheads
[s_item
]);
656 bitset_and (lookaheads
[t_item
],
657 lookaheads
[t_item
], lookahead_filter
[t_item
]);
664 * - \c follow_kernel_items and \c always_follows were computed by
665 * \c ielr_compute_auxiliary_tables.
667 * - <tt>annotation_lists = NULL</tt> and all bits in work2 are set.
668 * - \c annotation_lists was computed by \c ielr_compute_annotation_lists.
669 * - The number of rows in each of \c lookaheads and \c work2 is the maximum
670 * number of items in any state. The number of columns in each is
672 * - \c lookaheads was computed by \c ielr_compute_lookaheads for \c t.
673 * - \c ::nstates is the total number of states, some not yet fully computed,
674 * in the list ending at \c *last_statep. It is at least the number of
675 * original LR(0) states.
676 * - The size of \c work1 is at least the number of annotations for the LR(0)
680 * - In the case that <tt>annotation_lists != NULL</tt>,
681 * <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if
682 * permitted by the annotations for the original LR(0) isocore of \c t.
683 * If this changed the lookaheads in that isocore, those changes were
684 * propagated to all already computed transition successors recursively
685 * possibly resulting in the splitting of some of those successors.
686 * - In the case that <tt>annotation_lists = NULL</tt>,
687 * <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if the
688 * isocore's lookahead sets were identical to those specified by
689 * <tt>lookaheads \@pre</tt>.
690 * - If no such merge was permitted, a new isocore of \c t containing
691 * <tt>lookaheads \@pre</tt> was appended to the state list whose
692 * previous tail was <tt>*last_statep \@pre</tt> and \c ::nstates was
693 * incremented. It was also appended to \c t's isocore list.
694 * - <tt>*tp</tt> = the isocore of \c t into which
695 * <tt>lookaheads \@pre</tt> was placed/merged.
696 * - \c lookaheads, \c work1, and \c work2 may have been altered.
699 ielr_compute_state (bitsetv follow_kernel_items
, bitsetv always_follows
,
700 AnnotationList
**annotation_lists
, state
*t
,
701 bitsetv lookaheads
, state_list
**last_statep
,
702 ContributionIndex work1
[], bitsetv work2
, state
**tp
)
704 state_list
*lr0_isocore
= t
->state_list
->lr0Isocore
;
705 state_list
**this_isocorep
;
708 /* Determine whether there's an isocore of t with which these lookaheads can
713 if (annotation_lists
)
714 for (ai
= 0, a
= annotation_lists
[lr0_isocore
->state
->number
];
718 AnnotationList__computeDominantContribution (
719 a
, lr0_isocore
->state
->nitems
, lookaheads
, false);
720 for (this_isocorep
= &t
->state_list
;
721 this_isocorep
== &t
->state_list
|| *this_isocorep
!= t
->state_list
;
722 this_isocorep
= &(*this_isocorep
)->nextIsocore
)
724 if (!(*this_isocorep
)->recomputedAsSuccessor
)
726 if (annotation_lists
)
728 for (ai
= 0, a
= annotation_lists
[lr0_isocore
->state
->number
];
732 if (work1
[ai
] != ContributionIndex__none
)
734 /* This isocore compatibility test depends on the fact
735 that, if the dominant contributions are the same for the
736 two isocores, then merging their lookahead sets will not
737 produce a state with a different dominant contribution.
739 ContributionIndex ci
=
740 AnnotationList__computeDominantContribution (
741 a
, lr0_isocore
->state
->nitems
,
742 (*this_isocorep
)->lookaheads
, false);
743 if (ci
!= ContributionIndex__none
&& work1
[ai
] != ci
)
753 for (i
= 0; i
< t
->nitems
; ++i
)
755 if (!(*this_isocorep
)->lookaheads
756 || !(*this_isocorep
)->lookaheads
[i
])
758 if (!bitset_empty_p (lookaheads
[i
]))
761 // bitset_equal_p uses the size of the first argument, so
762 // lookaheads[i] must be the second argument.
763 else if (!bitset_equal_p ((*this_isocorep
)->lookaheads
[i
],
773 has_lookaheads
= false;
776 for (i
= 0; i
< lr0_isocore
->state
->nitems
; ++i
)
777 if (!bitset_empty_p (lookaheads
[i
]))
779 has_lookaheads
= true;
784 /* Merge with an existing isocore. */
785 if (this_isocorep
== &t
->state_list
|| *this_isocorep
!= t
->state_list
)
787 bool new_lookaheads
= false;
788 *tp
= (*this_isocorep
)->state
;
790 /* Merge lookaheads into the state and record whether any of them are
795 if (!(*this_isocorep
)->lookaheads
)
797 (*this_isocorep
)->lookaheads
=
798 xnmalloc (t
->nitems
, sizeof (*this_isocorep
)->lookaheads
);
799 for (i
= 0; i
< t
->nitems
; ++i
)
800 (*this_isocorep
)->lookaheads
[i
] = NULL
;
802 for (i
= 0; i
< t
->nitems
; ++i
)
803 if (!bitset_empty_p (lookaheads
[i
]))
805 if (!(*this_isocorep
)->lookaheads
[i
])
806 (*this_isocorep
)->lookaheads
[i
] =
807 bitset_create (ntokens
, BITSET_FIXED
);
808 bitset_andn (lookaheads
[i
],
809 lookaheads
[i
], (*this_isocorep
)->lookaheads
[i
]);
810 bitset_or ((*this_isocorep
)->lookaheads
[i
],
811 lookaheads
[i
], (*this_isocorep
)->lookaheads
[i
]);
812 if (!bitset_empty_p (lookaheads
[i
]))
813 new_lookaheads
= true;
817 /* If new lookaheads were merged, propagate those lookaheads to the
818 successors, possibly splitting them. If *tp is being recomputed for
819 the first time, this isn't necessary because the main
820 ielr_split_states loop will handle the successors later. */
821 if (!(*this_isocorep
)->recomputedAsSuccessor
)
822 (*this_isocorep
)->recomputedAsSuccessor
= true;
823 else if (new_lookaheads
)
826 /* When merging demands identical lookahead sets, it is impossible to
827 merge new lookaheads. */
828 aver (annotation_lists
);
829 for (i
= 0; i
< (*tp
)->transitions
->num
; ++i
)
831 state
*t2
= (*tp
)->transitions
->states
[i
];
832 /* At any time, there's at most one state for which we have so
833 far initially recomputed only some of its successors in the
834 main ielr_split_states loop. Because we recompute successors
835 in order, we can just stop at the first such successor. Of
836 course, *tp might be a state some of whose successors have
837 been recomputed as successors of other states rather than as
838 successors of *tp. It's fine if we go ahead and propagate to
839 some of those. We'll propagate to them again (but stop when
840 we see nothing changes) and to the others when we reach *tp in
841 the main ielr_split_states loop later. */
842 if (!t2
->state_list
->recomputedAsSuccessor
)
844 AnnotationList__computeLookaheadFilter (
845 annotation_lists
[t2
->state_list
->lr0Isocore
->state
->number
],
847 ielr_compute_lookaheads (follow_kernel_items
, always_follows
,
848 (*this_isocorep
), t2
, work2
,
850 /* FIXME: If splitting t2 here, it's possible that lookaheads
851 that had already propagated from *tp to t2 will be left in t2
852 after *tp has been removed as t2's predecessor:
853 - We're going to recompute all lookaheads in phase 4, so these
854 extra lookaheads won't appear in the final parser table.
855 - If t2 has just one annotation, then these extra lookaheads
856 cannot alter the dominating contribution to the associated
857 inadequacy and thus cannot needlessly prevent a future merge
858 of some new state with t2. We can be sure of this because:
859 - The fact that we're splitting t2 now means that some
860 predecessors (at least one) other than *tp must be
861 propagating contributions to t2.
862 - The fact that t2 was merged in the first place means that,
863 if *tp propagated any contributions, the dominating
864 contribution must be the same as that from those other
866 - Thus, if some new state to be merged with t2 in the future
867 proves to be compatible with the contributions propagated
868 by the other predecessors, it will also be compatible with
869 the contributions made by the extra lookaheads left behind
871 - However, if t2 has more than one annotation and these extra
872 lookaheads contribute to one of their inadequacies, it's
873 possible these extra lookaheads may needlessly prevent a
874 future merge with t2. For example:
875 - Let's say there's an inadequacy A that makes the split
876 necessary as follows:
877 - There's currently just one other predecessor and it
878 propagates to t2 some contributions to inadequacy A.
879 - The new lookaheads that we were attempting to propagate
880 from *tp to t2 made contributions to inadequacy A with a
881 different dominating contribution than those from that
883 - The extra lookaheads either make no contribution to
884 inadequacy A or have the same dominating contribution as
885 the contributions from the other predecessor. Either
886 way, as explained above, they can't prevent a future
888 - Let's say there's an inadequacy B that causes the trouble
889 with future merges as follows:
890 - The extra lookaheads make contributions to inadequacy B.
891 - Those extra contributions did not prevent the original
892 merge to create t2 because the other predecessor
893 propagates to t2 no contributions to inadequacy B.
894 - Thus, those extra contributions may prevent a future
895 merge with t2 even though the merge would be fine if *tp
896 had not left them behind.
897 - Is the latter case common enough to worry about?
898 - Perhaps we should track all predecessors and iterate them
899 now to recreate t2 without those extra lookaheads. */
900 ielr_compute_state (follow_kernel_items
, always_follows
,
901 annotation_lists
, t2
, lookaheads
,
902 last_statep
, work1
, work2
,
903 &(*tp
)->transitions
->states
[i
]);
908 /* Create a new isocore. */
911 state_list
*old_isocore
= *this_isocorep
;
912 (*last_statep
)->next
= *this_isocorep
= xmalloc (sizeof **last_statep
);
913 *last_statep
= *this_isocorep
;
914 (*last_statep
)->state
= *tp
= state_new_isocore (t
);
915 (*tp
)->state_list
= *last_statep
;
916 (*last_statep
)->recomputedAsSuccessor
= true;
917 (*last_statep
)->next
= NULL
;
918 (*last_statep
)->lookaheads
= NULL
;
922 (*last_statep
)->lookaheads
=
923 xnmalloc (t
->nitems
, sizeof (*last_statep
)->lookaheads
);
924 for (i
= 0; i
< t
->nitems
; ++i
)
926 if (bitset_empty_p (lookaheads
[i
]))
927 (*last_statep
)->lookaheads
[i
] = NULL
;
930 (*last_statep
)->lookaheads
[i
] =
931 bitset_create (ntokens
, BITSET_FIXED
);
932 bitset_copy ((*last_statep
)->lookaheads
[i
], lookaheads
[i
]);
936 (*last_statep
)->lr0Isocore
= lr0_isocore
;
937 (*last_statep
)->nextIsocore
= old_isocore
;
943 * - \c follow_kernel_items and \c always_follows were computed by
944 * \c ielr_compute_auxiliary_tables.
946 * - <tt>annotation_lists = NULL</tt> and <tt>max_annotations=0</tt>.
947 * - \c annotation_lists and \c max_annotations were computed by
948 * \c ielr_compute_annotation_lists.
950 * - \c ::states is of size \c ::nstates (which might be greater than
951 * <tt>::nstates \@pre</tt>) and no longer contains any LR(1)-relative
952 * inadequacy. \c annotation_lists was used to determine state
953 * compatibility or, if <tt>annotation_lists = NULL</tt>, the canonical
954 * LR(1) state compatibility test was used.
955 * - If <tt>annotation_lists = NULL</tt>, reduction lookahead sets were
956 * computed in all states. TV_IELR_PHASE4 was pushed while they were
957 * computed from item lookahead sets.
960 ielr_split_states (bitsetv follow_kernel_items
, bitsetv always_follows
,
961 AnnotationList
**annotation_lists
,
962 AnnotationIndex max_annotations
)
964 state_list
*first_state
;
965 state_list
*last_state
;
966 bitsetv lookahead_filter
= NULL
;
969 /* Set up state list and some reusable bitsets. */
971 size_t max_nitems
= 0;
973 state_list
**nodep
= &first_state
;
974 for (i
= 0; i
< nstates
; ++i
)
976 *nodep
= states
[i
]->state_list
= last_state
= xmalloc (sizeof **nodep
);
977 (*nodep
)->state
= states
[i
];
978 (*nodep
)->recomputedAsSuccessor
= false;
979 (*nodep
)->lookaheads
= NULL
;
980 (*nodep
)->lr0Isocore
= *nodep
;
981 (*nodep
)->nextIsocore
= *nodep
;
982 nodep
= &(*nodep
)->next
;
983 if (states
[i
]->nitems
> max_nitems
)
984 max_nitems
= states
[i
]->nitems
;
987 lookahead_filter
= bitsetv_create (max_nitems
, ntokens
, BITSET_FIXED
);
988 if (!annotation_lists
)
989 bitsetv_ones (lookahead_filter
);
990 lookaheads
= bitsetv_create (max_nitems
, ntokens
, BITSET_FIXED
);
993 /* Recompute states. */
995 ContributionIndex
*work
= xnmalloc (max_annotations
, sizeof *work
);
996 state_list
*this_state
;
997 for (this_state
= first_state
; this_state
; this_state
= this_state
->next
)
999 state
*s
= this_state
->state
;
1001 for (i
= 0; i
< s
->transitions
->num
; ++i
)
1003 state
*t
= s
->transitions
->states
[i
];
1004 if (annotation_lists
)
1005 AnnotationList__computeLookaheadFilter (
1006 annotation_lists
[t
->state_list
->lr0Isocore
->state
->number
],
1007 t
->nitems
, lookahead_filter
);
1008 ielr_compute_lookaheads (follow_kernel_items
, always_follows
,
1009 this_state
, t
, lookahead_filter
,
1011 ielr_compute_state (follow_kernel_items
, always_follows
,
1012 annotation_lists
, t
, lookaheads
, &last_state
,
1013 work
, lookahead_filter
,
1014 &s
->transitions
->states
[i
]);
1020 bitsetv_free (lookahead_filter
);
1021 bitsetv_free (lookaheads
);
1023 /* Store states back in the states array. */
1024 states
= xnrealloc (states
, nstates
, sizeof *states
);
1027 for (node
= first_state
; node
; node
= node
->next
)
1028 states
[node
->state
->number
] = node
->state
;
1031 /* In the case of canonical LR(1), copy item lookahead sets to reduction
1033 if (!annotation_lists
)
1035 timevar_push (TV_IELR_PHASE4
);
1038 for (node
= first_state
; node
; node
= node
->next
)
1039 if (!node
->state
->consistent
)
1042 item_number
*itemset
= node
->state
->items
;
1044 for (r
= 0; r
< node
->state
->reductions
->num
; ++r
)
1046 rule
*this_rule
= node
->state
->reductions
->rules
[r
];
1047 bitset lookahead_set
=
1048 node
->state
->reductions
->lookahead_tokens
[r
];
1049 if (item_number_is_rule_number (*this_rule
->rhs
))
1050 ielr_compute_goto_follow_set (follow_kernel_items
,
1051 always_follows
, node
,
1052 this_rule
->lhs
, lookahead_set
);
1053 else if (node
->lookaheads
)
1055 /* We don't need to start the kernel item search back at
1056 i=0 because both items and reductions are sorted on rule
1058 while (!item_number_is_rule_number (ritem
[itemset
[i
]])
1059 || item_number_as_rule_number (ritem
[itemset
[i
]])
1060 != this_rule
->number
)
1063 aver (i
< node
->state
->nitems
);
1065 if (node
->lookaheads
[i
])
1066 bitset_copy (lookahead_set
, node
->lookaheads
[i
]);
1070 timevar_pop (TV_IELR_PHASE4
);
1073 /* Free state list. */
1076 state_list
*node
= first_state
;
1077 if (node
->lookaheads
)
1080 for (i
= 0; i
< node
->state
->nitems
; ++i
)
1081 if (node
->lookaheads
[i
])
1082 bitset_free (node
->lookaheads
[i
]);
1083 free (node
->lookaheads
);
1085 first_state
= node
->next
;
1095 /* Examine user options. */
1097 char *type
= muscle_percent_define_get ("lr.type");
1098 if (0 == strcmp (type
, "lalr"))
1099 lr_type
= LR_TYPE__LALR
;
1100 else if (0 == strcmp (type
, "ielr"))
1101 lr_type
= LR_TYPE__IELR
;
1102 else if (0 == strcmp (type
, "canonical-lr"))
1103 lr_type
= LR_TYPE__CANONICAL_LR
;
1109 /* Phase 0: LALR(1). */
1110 timevar_push (TV_LALR
);
1111 if (lr_type
== LR_TYPE__CANONICAL_LR
)
1115 if (lr_type
== LR_TYPE__LALR
)
1117 bitsetv_free (goto_follows
);
1118 timevar_pop (TV_LALR
);
1121 timevar_pop (TV_LALR
);
1124 bitsetv follow_kernel_items
;
1125 bitsetv always_follows
;
1126 InadequacyList
**inadequacy_lists
= NULL
;
1127 AnnotationList
**annotation_lists
= NULL
;
1128 struct obstack annotations_obstack
;
1129 AnnotationIndex max_annotations
= 0;
1132 /* Phase 1: Compute Auxiliary Tables. */
1133 state
***predecessors
;
1134 timevar_push (TV_IELR_PHASE1
);
1135 ielr_compute_auxiliary_tables (
1136 &follow_kernel_items
, &always_follows
,
1137 lr_type
== LR_TYPE__CANONICAL_LR
? NULL
: &predecessors
);
1138 timevar_pop (TV_IELR_PHASE1
);
1140 /* Phase 2: Compute Annotations. */
1141 timevar_push (TV_IELR_PHASE2
);
1142 if (lr_type
!= LR_TYPE__CANONICAL_LR
)
1144 obstack_init (&annotations_obstack
);
1145 ielr_compute_annotation_lists (follow_kernel_items
, always_follows
,
1146 predecessors
, &max_annotations
,
1147 &inadequacy_lists
, &annotation_lists
,
1148 &annotations_obstack
);
1151 for (i
= 0; i
< nstates
; ++i
)
1152 free (predecessors
[i
]);
1154 free (predecessors
);
1155 bitsetv_free (goto_follows
);
1158 timevar_pop (TV_IELR_PHASE2
);
1161 /* Phase 3: Split States. */
1162 timevar_push (TV_IELR_PHASE3
);
1164 state_number nstates_lr0
= nstates
;
1165 ielr_split_states (follow_kernel_items
, always_follows
,
1166 annotation_lists
, max_annotations
);
1167 if (inadequacy_lists
)
1170 for (i
= 0; i
< nstates_lr0
; ++i
)
1171 InadequacyList__delete (inadequacy_lists
[i
]);
1174 free (inadequacy_lists
);
1175 if (annotation_lists
)
1176 obstack_free (&annotations_obstack
, NULL
);
1177 free (annotation_lists
);
1178 bitsetv_free (follow_kernel_items
);
1179 bitsetv_free (always_follows
);
1180 timevar_pop (TV_IELR_PHASE3
);
1183 /* Phase 4: Compute Reduction Lookaheads. */
1184 timevar_push (TV_IELR_PHASE4
);
1188 if (lr_type
== LR_TYPE__CANONICAL_LR
)
1190 // Reduction lookaheads are computed in ielr_split_states above but are
1191 // timed as part of phase 4.
1197 bitsetv_free (goto_follows
);
1199 timevar_pop (TV_IELR_PHASE4
);