]> git.saurik.com Git - bison.git/blob - src/ielr.c
f1b314a380f4acf1e10304910ecc5d68c1f71a3a
[bison.git] / src / ielr.c
1 /* IELR main implementation.
2
3 Copyright (C) 2009-2010 Free Software Foundation, Inc.
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19
20 #include <config.h>
21 #include "system.h"
22
23 #include "ielr.h"
24
25 #include <bitset.h>
26 #include <timevar.h>
27
28 #include "AnnotationList.h"
29 #include "derives.h"
30 #include "getargs.h"
31 #include "lalr.h"
32 #include "muscle-tab.h"
33 #include "nullable.h"
34 #include "relation.h"
35 #include "state.h"
36 #include "symtab.h"
37
38 /** Records the value of the \%define variable lr.type. */
39 typedef enum { LR_TYPE__LALR, LR_TYPE__IELR, LR_TYPE__CANONICAL_LR } LrType;
40
41 /**
42 * \post:
43 * - \c result = a new \c bitset of size \c ::nritems such that any bit \c i
44 * is set iff <tt>ritem[i]</tt> is a nonterminal after which all ritems in
45 * the same RHS are nullable nonterminals. In other words, the follows of
46 * a goto on <tt>ritem[i]</tt> include the lookahead set of the item.
47 */
48 static bitset
49 ielr_compute_ritem_sees_lookahead_set (void)
50 {
51 bitset result = bitset_create (nritems, BITSET_FIXED);
52 unsigned int i = nritems-1;
53 while (i>0)
54 {
55 --i;
56 while (!item_number_is_rule_number (ritem[i])
57 && ISVAR (ritem[i])
58 && nullable [item_number_as_symbol_number (ritem[i]) - ntokens])
59 bitset_set (result, i--);
60 if (!item_number_is_rule_number (ritem[i]) && ISVAR (ritem[i]))
61 bitset_set (result, i--);
62 while (!item_number_is_rule_number (ritem[i]) && i>0)
63 --i;
64 }
65 if (trace_flag & trace_ielr)
66 {
67 fprintf (stderr, "ritem_sees_lookahead_set:\n");
68 debug_bitset (result);
69 fprintf (stderr, "\n");
70 }
71 return result;
72 }
73
74 /**
75 * \pre:
76 * - \c ritem_sees_lookahead_set was computed by
77 * \c ielr_compute_ritem_sees_lookahead_set.
78 * \post:
79 * - Each of \c *edgesp and \c *edge_countsp is a new array of size
80 * \c ::ngotos.
81 * - <tt>(*edgesp)[i]</tt> points to a \c goto_number array of size
82 * <tt>(*edge_countsp)[i]+1</tt>.
83 * - In such a \c goto_number array, the last element is \c ::END_NODE.
84 * - All remaining elements are the indices of the gotos to which there is an
85 * internal follow edge from goto \c i.
86 * - There is an internal follow edge from goto \c i to goto \c j iff both:
87 * - The from states of gotos \c i and \c j are the same.
88 * - The transition nonterminal for goto \c i appears as the first RHS
89 * symbol of at least one production for which both:
90 * - The LHS is the transition symbol of goto \c j.
91 * - All other RHS symbols are nullable nonterminals.
92 * - In other words, the follows of goto \c i include the follows of
93 * goto \c j and it's an internal edge because the from states are the
94 * same.
95 */
96 static void
97 ielr_compute_internal_follow_edges (bitset ritem_sees_lookahead_set,
98 goto_number ***edgesp, int **edge_countsp)
99 {
100 *edgesp = xnmalloc (ngotos, sizeof **edgesp);
101 *edge_countsp = xnmalloc (ngotos, sizeof **edge_countsp);
102 {
103 bitset sources = bitset_create (ngotos, BITSET_FIXED);
104 goto_number i;
105 for (i = 0; i < ngotos; ++i)
106 (*edge_countsp)[i] = 0;
107 for (i = 0; i < ngotos; ++i)
108 {
109 int nsources = 0;
110 {
111 rule **rulep;
112 for (rulep = derives[states[to_state[i]]->accessing_symbol
113 - ntokens];
114 *rulep;
115 ++rulep)
116 {
117 /* If there is at least one RHS symbol, if the first RHS symbol
118 is a nonterminal, and if all remaining RHS symbols (if any)
119 are nullable nonterminals, create an edge from the LHS
120 symbol's goto to the first RHS symbol's goto such that the RHS
121 symbol's goto will be the source of the edge after the
122 eventual relation_transpose below.
123
124 Unlike in ielr_compute_always_follows, I use a bitset for
125 edges rather than an array because it is possible that
126 multiple RHS's with the same first symbol could fit and thus
127 that we could end up with redundant edges. With the
128 possibility of redundant edges, it's hard to know ahead of
129 time how large to make such an array. Another possible
130 redundancy is that source and destination might be the same
131 goto. Eliminating all these possible redundancies now might
132 possibly help performance a little. I have not proven any of
133 this, but I'm guessing the bitset shouldn't entail much of a
134 performance penalty, if any. */
135 if (bitset_test (ritem_sees_lookahead_set,
136 (*rulep)->rhs - ritem))
137 {
138 goto_number source =
139 map_goto (from_state[i],
140 item_number_as_symbol_number (*(*rulep)->rhs));
141 if (i != source && !bitset_test (sources, source))
142 {
143 bitset_set (sources, source);
144 ++nsources;
145 ++(*edge_countsp)[source];
146 }
147 }
148 }
149 }
150 if (nsources == 0)
151 (*edgesp)[i] = NULL;
152 else
153 {
154 (*edgesp)[i] = xnmalloc (nsources + 1, sizeof *(*edgesp)[i]);
155 {
156 bitset_iterator biter_source;
157 bitset_bindex source;
158 int j = 0;
159 BITSET_FOR_EACH (biter_source, sources, source, 0)
160 (*edgesp)[i][j++] = source;
161 }
162 (*edgesp)[i][nsources] = END_NODE;
163 }
164 bitset_zero (sources);
165 }
166 bitset_free (sources);
167 }
168
169 relation_transpose (edgesp, ngotos);
170
171 if (trace_flag & trace_ielr)
172 {
173 fprintf (stderr, "internal_follow_edges:\n");
174 relation_print (*edgesp, ngotos, stderr);
175 }
176 }
177
178 /**
179 * \pre:
180 * - \c ritem_sees_lookahead_set was computed by
181 * \c ielr_compute_ritem_sees_lookahead_set.
182 * - \c internal_follow_edges was computed by
183 * \c ielr_compute_internal_follow_edges.
184 * \post:
185 * - \c *follow_kernel_itemsp is a new \c bitsetv in which the number of rows
186 * is \c ngotos and the number of columns is maximum number of kernel items
187 * in any state.
188 * - <tt>(*follow_kernel_itemsp)[i][j]</tt> is set iff the follows of goto
189 * \c i include the lookahead set of item \c j in the from state of goto
190 * \c i.
191 * - Thus, <tt>(*follow_kernel_itemsp)[i][j]</tt> is always unset if there is
192 * no item \c j in the from state of goto \c i.
193 */
194 static void
195 ielr_compute_follow_kernel_items (bitset ritem_sees_lookahead_set,
196 goto_number **internal_follow_edges,
197 bitsetv *follow_kernel_itemsp)
198 {
199 {
200 size_t max_nitems = 0;
201 state_number i;
202 for (i = 0; i < nstates; ++i)
203 if (states[i]->nitems > max_nitems)
204 max_nitems = states[i]->nitems;
205 *follow_kernel_itemsp = bitsetv_create (ngotos, max_nitems, BITSET_FIXED);
206 }
207 {
208 goto_number i;
209 for (i = 0; i < ngotos; ++i)
210 {
211 size_t nitems = states[from_state[i]]->nitems;
212 item_number *items = states[from_state[i]]->items;
213 size_t j;
214 for (j = 0; j < nitems; ++j)
215 /* If this item has this goto and if all subsequent symbols in this
216 RHS (if any) are nullable nonterminals, then record this item as
217 one whose lookahead set is included in this goto's follows. */
218 if (item_number_is_symbol_number (ritem[items[j]])
219 && item_number_as_symbol_number (ritem[items[j]])
220 == states[to_state[i]]->accessing_symbol
221 && bitset_test (ritem_sees_lookahead_set, items[j]))
222 bitset_set ((*follow_kernel_itemsp)[i], j);
223 }
224 }
225 relation_digraph (internal_follow_edges, ngotos, follow_kernel_itemsp);
226
227 if (trace_flag & trace_ielr)
228 {
229 fprintf (stderr, "follow_kernel_items:\n");
230 debug_bitsetv (*follow_kernel_itemsp);
231 }
232 }
233
234 /**
235 * \pre
236 * - \c *edgesp and \c edge_counts were computed by
237 * \c ielr_compute_internal_follow_edges.
238 * \post
239 * - \c *always_followsp is a new \c bitsetv with \c ngotos rows and
240 * \c ntokens columns.
241 * - <tt>(*always_followsp)[i][j]</tt> is set iff token \c j is an always
242 * follow (that is, it's computed by internal and successor edges) of goto
243 * \c i.
244 * - Rows of \c *edgesp have been realloc'ed and extended to include
245 * successor follow edges. \c edge_counts has not been updated.
246 */
247 static void
248 ielr_compute_always_follows (goto_number ***edgesp,
249 int const edge_counts[],
250 bitsetv *always_followsp)
251 {
252 *always_followsp = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
253 {
254 goto_number *edge_array = xnmalloc (ngotos, sizeof *edge_array);
255 goto_number i;
256 for (i = 0; i < ngotos; ++i)
257 {
258 goto_number nedges = edge_counts[i];
259 {
260 int j;
261 transitions *trans = states[to_state[i]]->transitions;
262 FOR_EACH_SHIFT (trans, j)
263 bitset_set ((*always_followsp)[i], TRANSITION_SYMBOL (trans, j));
264 for (; j < trans->num; ++j)
265 {
266 symbol_number sym = TRANSITION_SYMBOL (trans, j);
267 if (nullable[sym - ntokens])
268 edge_array[nedges++] = map_goto (to_state[i], sym);
269 }
270 }
271 if (nedges - edge_counts[i])
272 {
273 (*edgesp)[i] =
274 xnrealloc ((*edgesp)[i], nedges + 1, sizeof *(*edgesp)[i]);
275 memcpy ((*edgesp)[i] + edge_counts[i], edge_array + edge_counts[i],
276 (nedges - edge_counts[i]) * sizeof *(*edgesp)[i]);
277 (*edgesp)[i][nedges] = END_NODE;
278 }
279 }
280 free (edge_array);
281 }
282 relation_digraph (*edgesp, ngotos, always_followsp);
283
284 if (trace_flag & trace_ielr)
285 {
286 fprintf (stderr, "always follow edges:\n");
287 relation_print (*edgesp, ngotos, stderr);
288 fprintf (stderr, "always_follows:\n");
289 debug_bitsetv (*always_followsp);
290 }
291 }
292
293 /**
294 * \post
295 * - \c result is a new array of size \c ::nstates.
296 * - <tt>result[i]</tt> is an array of pointers to the predecessor
297 * <tt>state</tt>'s of state \c i.
298 * - The last element of such an array is \c NULL.
299 */
300 static state ***
301 ielr_compute_predecessors (void)
302 {
303 state_number i;
304 int *predecessor_counts = xnmalloc (nstates, sizeof *predecessor_counts);
305 state ***result = xnmalloc (nstates, sizeof *result);
306 for (i = 0; i < nstates; ++i)
307 predecessor_counts[i] = 0;
308 for (i = 0; i < nstates; ++i)
309 {
310 int j;
311 for (j = 0; j < states[i]->transitions->num; ++j)
312 ++predecessor_counts[states[i]->transitions->states[j]->number];
313 }
314 for (i = 0; i < nstates; ++i)
315 {
316 result[i] = xnmalloc (predecessor_counts[i]+1, sizeof *result[i]);
317 result[i][predecessor_counts[i]] = NULL;
318 predecessor_counts[i] = 0;
319 }
320 for (i = 0; i < nstates; ++i)
321 {
322 int j;
323 for (j = 0; j < states[i]->transitions->num; ++j)
324 {
325 state_number k = states[i]->transitions->states[j]->number;
326 result[k][predecessor_counts[k]++] = states[i];
327 }
328 }
329 free (predecessor_counts);
330 return result;
331 }
332
333 /**
334 * \post
335 * - \c *follow_kernel_itemsp and \c *always_followsp were computed by
336 * \c ielr_compute_follow_kernel_items and
337 * \c ielr_compute_always_follows.
338 * - Iff <tt>predecessorsp != NULL</tt>, then \c *predecessorsp was computed
339 * by \c ielr_compute_predecessors.
340 */
341 static void
342 ielr_compute_auxiliary_tables (bitsetv *follow_kernel_itemsp,
343 bitsetv *always_followsp,
344 state ****predecessorsp)
345 {
346 goto_number **edges;
347 int *edge_counts;
348 {
349 bitset ritem_sees_lookahead_set = ielr_compute_ritem_sees_lookahead_set ();
350 ielr_compute_internal_follow_edges (ritem_sees_lookahead_set,
351 &edges, &edge_counts);
352 ielr_compute_follow_kernel_items (ritem_sees_lookahead_set, edges,
353 follow_kernel_itemsp);
354 bitset_free (ritem_sees_lookahead_set);
355 }
356 ielr_compute_always_follows (&edges, edge_counts, always_followsp);
357 {
358 int i;
359 for (i = 0; i < ngotos; ++i)
360 free (edges[i]);
361 }
362 free (edges);
363 free (edge_counts);
364 if (predecessorsp)
365 *predecessorsp = ielr_compute_predecessors ();
366 }
367
368 /**
369 * \note
370 * - FIXME: It might be an interesting experiment to compare the space and
371 * time efficiency of computing \c item_lookahead_sets either:
372 * - Fully up front.
373 * - Just-in-time, as implemented below.
374 * - Not at all. That is, just let annotations continue even when
375 * unnecessary.
376 */
377 bool
378 ielr_item_has_lookahead (state *s, symbol_number lhs, size_t item,
379 symbol_number lookahead, state ***predecessors,
380 bitset **item_lookahead_sets)
381 {
382 if (!item_lookahead_sets[s->number])
383 {
384 size_t i;
385 item_lookahead_sets[s->number] =
386 xnmalloc (s->nitems, sizeof item_lookahead_sets[s->number][0]);
387 for (i = 0; i < s->nitems; ++i)
388 item_lookahead_sets[s->number][i] = NULL;
389 }
390 if (!item_lookahead_sets[s->number][item])
391 {
392 item_lookahead_sets[s->number][item] =
393 bitset_create (ntokens, BITSET_FIXED);
394 /* If this kernel item is the beginning of a RHS, it must be the kernel
395 item in the start state, and so its LHS has no follows and no goto to
396 check. If, instead, this kernel item is the successor of the start
397 state's kernel item, there are still no follows and no goto. This
398 situation is fortunate because we want to avoid the - 2 below in both
399 cases.
400
401 Actually, IELR(1) should never invoke this function for either of
402 those cases because (1) follow_kernel_items will never reference a
403 kernel item for this RHS because the end token blocks sight of the
404 lookahead set from the RHS's only nonterminal, and (2) no reduction
405 has a lookback dependency on this lookahead set. Nevertheless, I
406 didn't change this test to an aver just in case the usage of this
407 function evolves to need those two cases. In both cases, the current
408 implementation returns the right result. */
409 if (s->items[item] > 1)
410 {
411 /* If the LHS symbol of this item isn't known (because this is a
412 top-level invocation), go get it. */
413 if (!lhs)
414 {
415 unsigned int i;
416 for (i = s->items[item];
417 !item_number_is_rule_number (ritem[i]);
418 ++i)
419 ;
420 lhs = rules[item_number_as_rule_number (ritem[i])].lhs->number;
421 }
422 /* If this kernel item is next to the beginning of the RHS, then
423 check all predecessors' goto follows for the LHS. */
424 if (item_number_is_rule_number (ritem[s->items[item] - 2]))
425 {
426 state **predecessor;
427 aver (lhs != accept->number);
428 for (predecessor = predecessors[s->number];
429 *predecessor;
430 ++predecessor)
431 bitset_or (item_lookahead_sets[s->number][item],
432 item_lookahead_sets[s->number][item],
433 goto_follows[map_goto ((*predecessor)->number,
434 lhs)]);
435 }
436 /* If this kernel item is later in the RHS, then check all
437 predecessor items' lookahead sets. */
438 else
439 {
440 state **predecessor;
441 for (predecessor = predecessors[s->number];
442 *predecessor;
443 ++predecessor)
444 {
445 size_t predecessor_item;
446 for (predecessor_item = 0;
447 predecessor_item < (*predecessor)->nitems;
448 ++predecessor_item)
449 if ((*predecessor)->items[predecessor_item]
450 == s->items[item] - 1)
451 break;
452 aver (predecessor_item != (*predecessor)->nitems);
453 ielr_item_has_lookahead (*predecessor, lhs,
454 predecessor_item, 0 /*irrelevant*/,
455 predecessors, item_lookahead_sets);
456 bitset_or (item_lookahead_sets[s->number][item],
457 item_lookahead_sets[s->number][item],
458 item_lookahead_sets[(*predecessor)->number]
459 [predecessor_item]);
460 }
461 }
462 }
463 }
464 return bitset_test (item_lookahead_sets[s->number][item], lookahead);
465 }
466
467 /**
468 * \pre
469 * - \c follow_kernel_items, \c always_follows, and \c predecessors
470 * were computed by \c ielr_compute_auxiliary_tables.
471 * \post
472 * - Each of <tt>*inadequacy_listsp</tt> and <tt>*annotation_listsp</tt>
473 * points to a new array of size \c ::nstates.
474 * - For <tt>0 <= i < ::nstates</tt>:
475 * - <tt>(*inadequacy_listsp)[i]</tt> contains the \c InadequacyList head
476 * node for <tt>states[i]</tt>.
477 * - <tt>(*annotation_listsp)[i]</tt> contains the \c AnnotationList head
478 * node for <tt>states[i]</tt>.
479 * - <tt>*max_annotationsp</tt> is the maximum number of annotations per
480 * state.
481 */
482 static void
483 ielr_compute_annotation_lists (bitsetv follow_kernel_items,
484 bitsetv always_follows, state ***predecessors,
485 AnnotationIndex *max_annotationsp,
486 InadequacyList ***inadequacy_listsp,
487 AnnotationList ***annotation_listsp,
488 struct obstack *annotations_obstackp)
489 {
490 bitset **item_lookahead_sets =
491 xnmalloc (nstates, sizeof *item_lookahead_sets);
492 AnnotationIndex *annotation_counts =
493 xnmalloc (nstates, sizeof *annotation_counts);
494 ContributionIndex max_contributions = 0;
495 unsigned int total_annotations = 0;
496 state_number i;
497
498 *inadequacy_listsp = xnmalloc (nstates, sizeof **inadequacy_listsp);
499 *annotation_listsp = xnmalloc (nstates, sizeof **annotation_listsp);
500 for (i = 0; i < nstates; ++i)
501 {
502 item_lookahead_sets[i] = NULL;
503 (*inadequacy_listsp)[i] = NULL;
504 (*annotation_listsp)[i] = NULL;
505 annotation_counts[i] = 0;
506 }
507 {
508 InadequacyListNodeCount inadequacy_list_node_count = 0;
509 for (i = 0; i < nstates; ++i)
510 AnnotationList__compute_from_inadequacies (
511 states[i], follow_kernel_items, always_follows, predecessors,
512 item_lookahead_sets, *inadequacy_listsp, *annotation_listsp,
513 annotation_counts, &max_contributions, annotations_obstackp,
514 &inadequacy_list_node_count);
515 }
516 *max_annotationsp = 0;
517 for (i = 0; i < nstates; ++i)
518 {
519 if (annotation_counts[i] > *max_annotationsp)
520 *max_annotationsp = annotation_counts[i];
521 total_annotations += annotation_counts[i];
522 }
523 if (trace_flag & trace_ielr)
524 {
525 for (i = 0; i < nstates; ++i)
526 {
527 fprintf (stderr, "Inadequacy annotations for state %d:\n", i);
528 AnnotationList__debug ((*annotation_listsp)[i],
529 states[i]->nitems, 2);
530 }
531 fprintf (stderr, "Number of LR(0)/LALR(1) states: %d\n", nstates);
532 fprintf (stderr, "Average number of annotations per state: %f\n",
533 (float)total_annotations/nstates);
534 fprintf (stderr, "Max number of annotations per state: %d\n",
535 *max_annotationsp);
536 fprintf (stderr, "Max number of contributions per annotation: %d\n",
537 max_contributions);
538 }
539 for (i = 0; i < nstates; ++i)
540 if (item_lookahead_sets[i])
541 {
542 size_t j;
543 for (j = 0; j < states[i]->nitems; ++j)
544 if (item_lookahead_sets[i][j])
545 bitset_free (item_lookahead_sets[i][j]);
546 free (item_lookahead_sets[i]);
547 }
548 free (item_lookahead_sets);
549 free (annotation_counts);
550 }
551
552 typedef struct state_list {
553 struct state_list *next;
554 state *state;
555 /** Has this state been recomputed as a successor of another state? */
556 bool recomputedAsSuccessor;
557 /**
558 * \c NULL iff all lookahead sets are empty. <tt>lookaheads[i] = NULL</tt>
559 * iff the lookahead set on item \c i is empty.
560 */
561 bitset *lookaheads;
562 /**
563 * nextIsocore is the next state in a circularly linked-list of all states
564 * with the same core. The one originally computed by generate_states in
565 * LR0.c is lr0Isocore.
566 */
567 struct state_list *lr0Isocore;
568 struct state_list *nextIsocore;
569 } state_list;
570
571 /**
572 * \pre
573 * - \c follow_kernel_items and \c always_follows were computed by
574 * \c ielr_compute_auxiliary_tables.
575 * - <tt>n->class = nterm_sym</tt>.
576 * \post
577 * - \c follow_set contains the follow set for the goto on nonterminal \c n
578 * in state \c s based on the lookaheads stored in <tt>s->lookaheads</tt>.
579 */
580 static void
581 ielr_compute_goto_follow_set (bitsetv follow_kernel_items,
582 bitsetv always_follows, state_list *s,
583 symbol *n, bitset follow_set)
584 {
585 goto_number n_goto = map_goto (s->lr0Isocore->state->number, n->number);
586 bitset_copy (follow_set, always_follows[n_goto]);
587 if (s->lookaheads)
588 {
589 bitset_iterator biter_item;
590 bitset_bindex item;
591 BITSET_FOR_EACH (biter_item, follow_kernel_items[n_goto], item, 0)
592 if (s->lookaheads[item])
593 bitset_or (follow_set, follow_set, s->lookaheads[item]);
594 }
595 }
596
597 /**
598 * \pre
599 * - \c follow_kernel_items and \c always_follows were computed by
600 * \c ielr_compute_auxiliary_tables.
601 * - \c lookahead_filter was computed by
602 * \c AnnotationList__computeLookaheadFilter for the original LR(0) isocore
603 * of \c t.
604 * - The number of rows in \c lookaheads is at least the number of items in
605 * \c t, and the number of columns is \c ::ntokens.
606 * \post
607 * - <tt>lookaheads[i][j]</tt> is set iff both:
608 * - <tt>lookahead_filter[i][j]</tt> is set.
609 * - The isocore of \c t that will be the transition successor of \c s will
610 * inherit from \c s token \c j into the lookahead set of item \c i.
611 */
612 static void
613 ielr_compute_lookaheads (bitsetv follow_kernel_items, bitsetv always_follows,
614 state_list *s, state *t, bitsetv lookahead_filter,
615 bitsetv lookaheads)
616 {
617 size_t s_item = 0;
618 size_t t_item;
619 bitsetv_zero (lookaheads);
620 for (t_item = 0; t_item < t->nitems; ++t_item)
621 {
622 /* If this kernel item is the beginning of a RHS, it must be the
623 kernel item in the start state, but t is supposed to be a successor
624 state. If, instead, this kernel item is the successor of the start
625 state's kernel item, the lookahead set is empty. This second case is
626 a special case to avoid the - 2 below, but the next successor can be
627 handled fine without special casing it. */
628 aver (t->items[t_item] != 0);
629 if (t->items[t_item] > 1
630 && !bitset_empty_p (lookahead_filter[t_item]))
631 {
632 if (item_number_is_rule_number (ritem[t->items[t_item] - 2]))
633 {
634 unsigned int rule_item;
635 for (rule_item = t->items[t_item];
636 !item_number_is_rule_number (ritem[rule_item]);
637 ++rule_item)
638 ;
639 ielr_compute_goto_follow_set (
640 follow_kernel_items, always_follows, s,
641 rules[item_number_as_rule_number (ritem[rule_item])].lhs,
642 lookaheads[t_item]);
643 }
644 else if (s->lookaheads)
645 {
646 /* We don't have to start the s item search at the beginning
647 every time because items from both states are sorted by their
648 indices in ritem. */
649 for (; s_item < s->state->nitems; ++s_item)
650 if (s->state->items[s_item] == t->items[t_item] - 1)
651 break;
652 aver (s_item != s->state->nitems);
653 if (s->lookaheads[s_item])
654 bitset_copy (lookaheads[t_item], s->lookaheads[s_item]);
655 }
656 bitset_and (lookaheads[t_item],
657 lookaheads[t_item], lookahead_filter[t_item]);
658 }
659 }
660 }
661
662 /**
663 * \pre
664 * - \c follow_kernel_items and \c always_follows were computed by
665 * \c ielr_compute_auxiliary_tables.
666 * - Either:
667 * - <tt>annotation_lists = NULL</tt> and all bits in work2 are set.
668 * - \c annotation_lists was computed by \c ielr_compute_annotation_lists.
669 * - The number of rows in each of \c lookaheads and \c work2 is the maximum
670 * number of items in any state. The number of columns in each is
671 * \c ::ntokens.
672 * - \c lookaheads was computed by \c ielr_compute_lookaheads for \c t.
673 * - \c ::nstates is the total number of states, some not yet fully computed,
674 * in the list ending at \c *last_statep. It is at least the number of
675 * original LR(0) states.
676 * - The size of \c work1 is at least the number of annotations for the LR(0)
677 * isocore of \c t.
678 * \post
679 * - Either:
680 * - In the case that <tt>annotation_lists != NULL</tt>,
681 * <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if
682 * permitted by the annotations for the original LR(0) isocore of \c t.
683 * If this changed the lookaheads in that isocore, those changes were
684 * propagated to all already computed transition successors recursively
685 * possibly resulting in the splitting of some of those successors.
686 * - In the case that <tt>annotation_lists = NULL</tt>,
687 * <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if the
688 * isocore's lookahead sets were identical to those specified by
689 * <tt>lookaheads \@pre</tt>.
690 * - If no such merge was permitted, a new isocore of \c t containing
691 * <tt>lookaheads \@pre</tt> was appended to the state list whose
692 * previous tail was <tt>*last_statep \@pre</tt> and \c ::nstates was
693 * incremented. It was also appended to \c t's isocore list.
694 * - <tt>*tp</tt> = the isocore of \c t into which
695 * <tt>lookaheads \@pre</tt> was placed/merged.
696 * - \c lookaheads, \c work1, and \c work2 may have been altered.
697 */
698 static void
699 ielr_compute_state (bitsetv follow_kernel_items, bitsetv always_follows,
700 AnnotationList **annotation_lists, state *t,
701 bitsetv lookaheads, state_list **last_statep,
702 ContributionIndex work1[], bitsetv work2, state **tp)
703 {
704 state_list *lr0_isocore = t->state_list->lr0Isocore;
705 state_list **this_isocorep;
706 bool has_lookaheads;
707
708 /* Determine whether there's an isocore of t with which these lookaheads can
709 be merged. */
710 {
711 AnnotationIndex ai;
712 AnnotationList *a;
713 if (annotation_lists)
714 for (ai = 0, a = annotation_lists[lr0_isocore->state->number];
715 a;
716 ++ai, a = a->next)
717 work1[ai] =
718 AnnotationList__computeDominantContribution (
719 a, lr0_isocore->state->nitems, lookaheads, false);
720 for (this_isocorep = &t->state_list;
721 this_isocorep == &t->state_list || *this_isocorep != t->state_list;
722 this_isocorep = &(*this_isocorep)->nextIsocore)
723 {
724 if (!(*this_isocorep)->recomputedAsSuccessor)
725 break;
726 if (annotation_lists)
727 {
728 for (ai = 0, a = annotation_lists[lr0_isocore->state->number];
729 a;
730 ++ai, a = a->next)
731 {
732 if (work1[ai] != ContributionIndex__none)
733 {
734 /* This isocore compatibility test depends on the fact
735 that, if the dominant contributions are the same for the
736 two isocores, then merging their lookahead sets will not
737 produce a state with a different dominant contribution.
738 */
739 ContributionIndex ci =
740 AnnotationList__computeDominantContribution (
741 a, lr0_isocore->state->nitems,
742 (*this_isocorep)->lookaheads, false);
743 if (ci != ContributionIndex__none && work1[ai] != ci)
744 break;
745 }
746 }
747 if (!a)
748 break;
749 }
750 else
751 {
752 size_t i;
753 for (i = 0; i < t->nitems; ++i)
754 {
755 if (!(*this_isocorep)->lookaheads
756 || !(*this_isocorep)->lookaheads[i])
757 {
758 if (!bitset_empty_p (lookaheads[i]))
759 break;
760 }
761 // bitset_equal_p uses the size of the first argument, so
762 // lookaheads[i] must be the second argument.
763 else if (!bitset_equal_p ((*this_isocorep)->lookaheads[i],
764 lookaheads[i]))
765 break;
766 }
767 if (i == t->nitems)
768 break;
769 }
770 }
771 }
772
773 has_lookaheads = false;
774 {
775 size_t i;
776 for (i = 0; i < lr0_isocore->state->nitems; ++i)
777 if (!bitset_empty_p (lookaheads[i]))
778 {
779 has_lookaheads = true;
780 break;
781 }
782 }
783
784 /* Merge with an existing isocore. */
785 if (this_isocorep == &t->state_list || *this_isocorep != t->state_list)
786 {
787 bool new_lookaheads = false;
788 *tp = (*this_isocorep)->state;
789
790 /* Merge lookaheads into the state and record whether any of them are
791 actually new. */
792 if (has_lookaheads)
793 {
794 size_t i;
795 if (!(*this_isocorep)->lookaheads)
796 {
797 (*this_isocorep)->lookaheads =
798 xnmalloc (t->nitems, sizeof (*this_isocorep)->lookaheads);
799 for (i = 0; i < t->nitems; ++i)
800 (*this_isocorep)->lookaheads[i] = NULL;
801 }
802 for (i = 0; i < t->nitems; ++i)
803 if (!bitset_empty_p (lookaheads[i]))
804 {
805 if (!(*this_isocorep)->lookaheads[i])
806 (*this_isocorep)->lookaheads[i] =
807 bitset_create (ntokens, BITSET_FIXED);
808 bitset_andn (lookaheads[i],
809 lookaheads[i], (*this_isocorep)->lookaheads[i]);
810 bitset_or ((*this_isocorep)->lookaheads[i],
811 lookaheads[i], (*this_isocorep)->lookaheads[i]);
812 if (!bitset_empty_p (lookaheads[i]))
813 new_lookaheads = true;
814 }
815 }
816
817 /* If new lookaheads were merged, propagate those lookaheads to the
818 successors, possibly splitting them. If *tp is being recomputed for
819 the first time, this isn't necessary because the main
820 ielr_split_states loop will handle the successors later. */
821 if (!(*this_isocorep)->recomputedAsSuccessor)
822 (*this_isocorep)->recomputedAsSuccessor = true;
823 else if (new_lookaheads)
824 {
825 int i;
826 /* When merging demands identical lookahead sets, it is impossible to
827 merge new lookaheads. */
828 aver (annotation_lists);
829 for (i = 0; i < (*tp)->transitions->num; ++i)
830 {
831 state *t2 = (*tp)->transitions->states[i];
832 /* At any time, there's at most one state for which we have so
833 far initially recomputed only some of its successors in the
834 main ielr_split_states loop. Because we recompute successors
835 in order, we can just stop at the first such successor. Of
836 course, *tp might be a state some of whose successors have
837 been recomputed as successors of other states rather than as
838 successors of *tp. It's fine if we go ahead and propagate to
839 some of those. We'll propagate to them again (but stop when
840 we see nothing changes) and to the others when we reach *tp in
841 the main ielr_split_states loop later. */
842 if (!t2->state_list->recomputedAsSuccessor)
843 break;
844 AnnotationList__computeLookaheadFilter (
845 annotation_lists[t2->state_list->lr0Isocore->state->number],
846 t2->nitems, work2);
847 ielr_compute_lookaheads (follow_kernel_items, always_follows,
848 (*this_isocorep), t2, work2,
849 lookaheads);
850 /* FIXME: If splitting t2 here, it's possible that lookaheads
851 that had already propagated from *tp to t2 will be left in t2
852 after *tp has been removed as t2's predecessor:
853 - We're going to recompute all lookaheads in phase 4, so these
854 extra lookaheads won't appear in the final parser table.
855 - If t2 has just one annotation, then these extra lookaheads
856 cannot alter the dominating contribution to the associated
857 inadequacy and thus cannot needlessly prevent a future merge
858 of some new state with t2. We can be sure of this because:
859 - The fact that we're splitting t2 now means that some
860 predecessors (at least one) other than *tp must be
861 propagating contributions to t2.
862 - The fact that t2 was merged in the first place means that,
863 if *tp propagated any contributions, the dominating
864 contribution must be the same as that from those other
865 predecessors.
866 - Thus, if some new state to be merged with t2 in the future
867 proves to be compatible with the contributions propagated
868 by the other predecessors, it will also be compatible with
869 the contributions made by the extra lookaheads left behind
870 by *tp.
871 - However, if t2 has more than one annotation and these extra
872 lookaheads contribute to one of their inadequacies, it's
873 possible these extra lookaheads may needlessly prevent a
874 future merge with t2. For example:
875 - Let's say there's an inadequacy A that makes the split
876 necessary as follows:
877 - There's currently just one other predecessor and it
878 propagates to t2 some contributions to inadequacy A.
879 - The new lookaheads that we were attempting to propagate
880 from *tp to t2 made contributions to inadequacy A with a
881 different dominating contribution than those from that
882 other predecessor.
883 - The extra lookaheads either make no contribution to
884 inadequacy A or have the same dominating contribution as
885 the contributions from the other predecessor. Either
886 way, as explained above, they can't prevent a future
887 merge.
888 - Let's say there's an inadequacy B that causes the trouble
889 with future merges as follows:
890 - The extra lookaheads make contributions to inadequacy B.
891 - Those extra contributions did not prevent the original
892 merge to create t2 because the other predecessor
893 propagates to t2 no contributions to inadequacy B.
894 - Thus, those extra contributions may prevent a future
895 merge with t2 even though the merge would be fine if *tp
896 had not left them behind.
897 - Is the latter case common enough to worry about?
898 - Perhaps we should track all predecessors and iterate them
899 now to recreate t2 without those extra lookaheads. */
900 ielr_compute_state (follow_kernel_items, always_follows,
901 annotation_lists, t2, lookaheads,
902 last_statep, work1, work2,
903 &(*tp)->transitions->states[i]);
904 }
905 }
906 }
907
908 /* Create a new isocore. */
909 else
910 {
911 state_list *old_isocore = *this_isocorep;
912 (*last_statep)->next = *this_isocorep = xmalloc (sizeof **last_statep);
913 *last_statep = *this_isocorep;
914 (*last_statep)->state = *tp = state_new_isocore (t);
915 (*tp)->state_list = *last_statep;
916 (*last_statep)->recomputedAsSuccessor = true;
917 (*last_statep)->next = NULL;
918 (*last_statep)->lookaheads = NULL;
919 if (has_lookaheads)
920 {
921 size_t i;
922 (*last_statep)->lookaheads =
923 xnmalloc (t->nitems, sizeof (*last_statep)->lookaheads);
924 for (i = 0; i < t->nitems; ++i)
925 {
926 if (bitset_empty_p (lookaheads[i]))
927 (*last_statep)->lookaheads[i] = NULL;
928 else
929 {
930 (*last_statep)->lookaheads[i] =
931 bitset_create (ntokens, BITSET_FIXED);
932 bitset_copy ((*last_statep)->lookaheads[i], lookaheads[i]);
933 }
934 }
935 }
936 (*last_statep)->lr0Isocore = lr0_isocore;
937 (*last_statep)->nextIsocore = old_isocore;
938 }
939 }
940
941 /**
942 * \pre
943 * - \c follow_kernel_items and \c always_follows were computed by
944 * \c ielr_compute_auxiliary_tables.
945 * - Either:
946 * - <tt>annotation_lists = NULL</tt> and <tt>max_annotations=0</tt>.
947 * - \c annotation_lists and \c max_annotations were computed by
948 * \c ielr_compute_annotation_lists.
949 * \post
950 * - \c ::states is of size \c ::nstates (which might be greater than
951 * <tt>::nstates \@pre</tt>) and no longer contains any LR(1)-relative
952 * inadequacy. \c annotation_lists was used to determine state
953 * compatibility or, if <tt>annotation_lists = NULL</tt>, the canonical
954 * LR(1) state compatibility test was used.
955 * - If <tt>annotation_lists = NULL</tt>, reduction lookahead sets were
956 * computed in all states. TV_IELR_PHASE4 was pushed while they were
957 * computed from item lookahead sets.
958 */
959 static void
960 ielr_split_states (bitsetv follow_kernel_items, bitsetv always_follows,
961 AnnotationList **annotation_lists,
962 AnnotationIndex max_annotations)
963 {
964 state_list *first_state;
965 state_list *last_state;
966 bitsetv lookahead_filter = NULL;
967 bitsetv lookaheads;
968
969 /* Set up state list and some reusable bitsets. */
970 {
971 size_t max_nitems = 0;
972 state_number i;
973 state_list **nodep = &first_state;
974 for (i = 0; i < nstates; ++i)
975 {
976 *nodep = states[i]->state_list = last_state = xmalloc (sizeof **nodep);
977 (*nodep)->state = states[i];
978 (*nodep)->recomputedAsSuccessor = false;
979 (*nodep)->lookaheads = NULL;
980 (*nodep)->lr0Isocore = *nodep;
981 (*nodep)->nextIsocore = *nodep;
982 nodep = &(*nodep)->next;
983 if (states[i]->nitems > max_nitems)
984 max_nitems = states[i]->nitems;
985 }
986 *nodep = NULL;
987 lookahead_filter = bitsetv_create (max_nitems, ntokens, BITSET_FIXED);
988 if (!annotation_lists)
989 bitsetv_ones (lookahead_filter);
990 lookaheads = bitsetv_create (max_nitems, ntokens, BITSET_FIXED);
991 }
992
993 /* Recompute states. */
994 {
995 ContributionIndex *work = xnmalloc (max_annotations, sizeof *work);
996 state_list *this_state;
997 for (this_state = first_state; this_state; this_state = this_state->next)
998 {
999 state *s = this_state->state;
1000 int i;
1001 for (i = 0; i < s->transitions->num; ++i)
1002 {
1003 state *t = s->transitions->states[i];
1004 if (annotation_lists)
1005 AnnotationList__computeLookaheadFilter (
1006 annotation_lists[t->state_list->lr0Isocore->state->number],
1007 t->nitems, lookahead_filter);
1008 ielr_compute_lookaheads (follow_kernel_items, always_follows,
1009 this_state, t, lookahead_filter,
1010 lookaheads);
1011 ielr_compute_state (follow_kernel_items, always_follows,
1012 annotation_lists, t, lookaheads, &last_state,
1013 work, lookahead_filter,
1014 &s->transitions->states[i]);
1015 }
1016 }
1017 free (work);
1018 }
1019
1020 bitsetv_free (lookahead_filter);
1021 bitsetv_free (lookaheads);
1022
1023 /* Store states back in the states array. */
1024 states = xnrealloc (states, nstates, sizeof *states);
1025 {
1026 state_list *node;
1027 for (node = first_state; node; node = node->next)
1028 states[node->state->number] = node->state;
1029 }
1030
1031 /* In the case of canonical LR(1), copy item lookahead sets to reduction
1032 lookahead sets. */
1033 if (!annotation_lists)
1034 {
1035 timevar_push (TV_IELR_PHASE4);
1036 initialize_LA ();
1037 state_list *node;
1038 for (node = first_state; node; node = node->next)
1039 if (!node->state->consistent)
1040 {
1041 size_t i = 0;
1042 item_number *itemset = node->state->items;
1043 size_t r;
1044 for (r = 0; r < node->state->reductions->num; ++r)
1045 {
1046 rule *this_rule = node->state->reductions->rules[r];
1047 bitset lookahead_set =
1048 node->state->reductions->lookahead_tokens[r];
1049 if (item_number_is_rule_number (*this_rule->rhs))
1050 ielr_compute_goto_follow_set (follow_kernel_items,
1051 always_follows, node,
1052 this_rule->lhs, lookahead_set);
1053 else if (node->lookaheads)
1054 {
1055 /* We don't need to start the kernel item search back at
1056 i=0 because both items and reductions are sorted on rule
1057 number. */
1058 while (!item_number_is_rule_number (ritem[itemset[i]])
1059 || item_number_as_rule_number (ritem[itemset[i]])
1060 != this_rule->number)
1061 {
1062 ++i;
1063 aver (i < node->state->nitems);
1064 }
1065 if (node->lookaheads[i])
1066 bitset_copy (lookahead_set, node->lookaheads[i]);
1067 }
1068 }
1069 }
1070 timevar_pop (TV_IELR_PHASE4);
1071 }
1072
1073 /* Free state list. */
1074 while (first_state)
1075 {
1076 state_list *node = first_state;
1077 if (node->lookaheads)
1078 {
1079 size_t i;
1080 for (i = 0; i < node->state->nitems; ++i)
1081 if (node->lookaheads[i])
1082 bitset_free (node->lookaheads[i]);
1083 free (node->lookaheads);
1084 }
1085 first_state = node->next;
1086 free (node);
1087 }
1088 }
1089
1090 void
1091 ielr (void)
1092 {
1093 LrType lr_type;
1094
1095 /* Examine user options. */
1096 {
1097 char *type = muscle_percent_define_get ("lr.type");
1098 if (0 == strcmp (type, "lalr"))
1099 lr_type = LR_TYPE__LALR;
1100 else if (0 == strcmp (type, "ielr"))
1101 lr_type = LR_TYPE__IELR;
1102 else if (0 == strcmp (type, "canonical-lr"))
1103 lr_type = LR_TYPE__CANONICAL_LR;
1104 else
1105 aver (false);
1106 free (type);
1107 }
1108
1109 /* Phase 0: LALR(1). */
1110 timevar_push (TV_LALR);
1111 if (lr_type == LR_TYPE__CANONICAL_LR)
1112 set_goto_map ();
1113 else
1114 lalr ();
1115 if (lr_type == LR_TYPE__LALR)
1116 {
1117 bitsetv_free (goto_follows);
1118 timevar_pop (TV_LALR);
1119 return;
1120 }
1121 timevar_pop (TV_LALR);
1122
1123 {
1124 bitsetv follow_kernel_items;
1125 bitsetv always_follows;
1126 InadequacyList **inadequacy_lists = NULL;
1127 AnnotationList **annotation_lists = NULL;
1128 struct obstack annotations_obstack;
1129 AnnotationIndex max_annotations = 0;
1130
1131 {
1132 /* Phase 1: Compute Auxiliary Tables. */
1133 state ***predecessors;
1134 timevar_push (TV_IELR_PHASE1);
1135 ielr_compute_auxiliary_tables (
1136 &follow_kernel_items, &always_follows,
1137 lr_type == LR_TYPE__CANONICAL_LR ? NULL : &predecessors);
1138 timevar_pop (TV_IELR_PHASE1);
1139
1140 /* Phase 2: Compute Annotations. */
1141 timevar_push (TV_IELR_PHASE2);
1142 if (lr_type != LR_TYPE__CANONICAL_LR)
1143 {
1144 obstack_init (&annotations_obstack);
1145 ielr_compute_annotation_lists (follow_kernel_items, always_follows,
1146 predecessors, &max_annotations,
1147 &inadequacy_lists, &annotation_lists,
1148 &annotations_obstack);
1149 {
1150 state_number i;
1151 for (i = 0; i < nstates; ++i)
1152 free (predecessors[i]);
1153 }
1154 free (predecessors);
1155 bitsetv_free (goto_follows);
1156 lalr_free ();
1157 }
1158 timevar_pop (TV_IELR_PHASE2);
1159 }
1160
1161 /* Phase 3: Split States. */
1162 timevar_push (TV_IELR_PHASE3);
1163 {
1164 state_number nstates_lr0 = nstates;
1165 ielr_split_states (follow_kernel_items, always_follows,
1166 annotation_lists, max_annotations);
1167 if (inadequacy_lists)
1168 {
1169 state_number i;
1170 for (i = 0; i < nstates_lr0; ++i)
1171 InadequacyList__delete (inadequacy_lists[i]);
1172 }
1173 }
1174 free (inadequacy_lists);
1175 if (annotation_lists)
1176 obstack_free (&annotations_obstack, NULL);
1177 free (annotation_lists);
1178 bitsetv_free (follow_kernel_items);
1179 bitsetv_free (always_follows);
1180 timevar_pop (TV_IELR_PHASE3);
1181 }
1182
1183 /* Phase 4: Compute Reduction Lookaheads. */
1184 timevar_push (TV_IELR_PHASE4);
1185 free (goto_map);
1186 free (from_state);
1187 free (to_state);
1188 if (lr_type == LR_TYPE__CANONICAL_LR)
1189 {
1190 // Reduction lookaheads are computed in ielr_split_states above but are
1191 // timed as part of phase 4.
1192 set_goto_map ();
1193 }
1194 else
1195 {
1196 lalr ();
1197 bitsetv_free (goto_follows);
1198 }
1199 timevar_pop (TV_IELR_PHASE4);
1200 }