]>
Commit | Line | Data |
---|---|---|
742e4900 | 1 | /* Compute lookahead criteria for Bison. |
5362cd5f | 2 | |
3209eb1c | 3 | Copyright (C) 1984, 1986, 1989, 2000-2015 Free Software Foundation, |
575619af | 4 | Inc. |
d0fb370f | 5 | |
340ef489 | 6 | This file is part of Bison, the GNU Compiler Compiler. |
d0fb370f | 7 | |
f16b0819 | 8 | This program is free software: you can redistribute it and/or modify |
340ef489 | 9 | it under the terms of the GNU General Public License as published by |
f16b0819 PE |
10 | the Free Software Foundation, either version 3 of the License, or |
11 | (at your option) any later version. | |
d0fb370f | 12 | |
f16b0819 | 13 | This program is distributed in the hope that it will be useful, |
340ef489 AD |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
d0fb370f | 17 | |
340ef489 | 18 | You should have received a copy of the GNU General Public License |
f16b0819 | 19 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
d0fb370f RS |
20 | |
21 | ||
fca9c5ef | 22 | /* Find which rules need lookahead in each state, and which lookahead |
340ef489 | 23 | tokens they accept. */ |
d0fb370f | 24 | |
2cec9080 | 25 | #include <config.h> |
d0fb370f | 26 | #include "system.h" |
5dde5425 PE |
27 | |
28 | #include <bitset.h> | |
29 | #include <bitsetv.h> | |
5dde5425 | 30 | |
b2ca4022 | 31 | #include "LR0.h" |
a0f6b076 | 32 | #include "complain.h" |
340ef489 | 33 | #include "derives.h" |
f67d13aa | 34 | #include "getargs.h" |
5dde5425 PE |
35 | #include "gram.h" |
36 | #include "lalr.h" | |
7254f6a8 | 37 | #include "muscle-tab.h" |
5dde5425 PE |
38 | #include "nullable.h" |
39 | #include "reader.h" | |
40 | #include "relation.h" | |
41 | #include "symtab.h" | |
d0fb370f | 42 | |
4cf44c00 | 43 | goto_number *goto_map; |
db34f798 | 44 | goto_number ngotos; |
4cf44c00 PE |
45 | state_number *from_state; |
46 | state_number *to_state; | |
db34f798 | 47 | bitsetv goto_follows = NULL; |
e68e0410 AD |
48 | |
49 | /* Linked list of goto numbers. */ | |
5dde5425 | 50 | typedef struct goto_list |
e68e0410 | 51 | { |
5dde5425 PE |
52 | struct goto_list *next; |
53 | goto_number value; | |
54 | } goto_list; | |
e68e0410 AD |
55 | |
56 | ||
b09f4f48 | 57 | /* LA is an NLA by NTOKENS matrix of bits. LA[l, i] is 1 if the rule |
3325ddc4 AD |
58 | LArule[l] is applicable in the appropriate state when the next |
59 | token is symbol i. If LA[l, i] and LA[l, j] are both 1 for i != j, | |
60 | it is a conflict. */ | |
61 | ||
62 | static bitsetv LA = NULL; | |
d200e455 | 63 | size_t nLA; |
9703cc49 | 64 | |
d0fb370f | 65 | |
5dde5425 PE |
66 | static goto_number **includes; |
67 | static goto_list **lookback; | |
aa2aab3c AD |
68 | |
69 | ||
db34f798 | 70 | void |
d2729d44 | 71 | set_goto_map (void) |
d0fb370f | 72 | { |
5dde5425 | 73 | state_number s; |
a339aef4 | 74 | goto_number *temp_map = xnmalloc (nvars + 1, sizeof *temp_map); |
d0fb370f | 75 | |
da2a7671 | 76 | goto_map = xcalloc (nvars + 1, sizeof *goto_map); |
d0fb370f | 77 | ngotos = 0; |
5dde5425 | 78 | for (s = 0; s < nstates; ++s) |
7215de24 | 79 | { |
5dde5425 | 80 | transitions *sp = states[s]->transitions; |
d57650a5 | 81 | int i; |
ccaf65bc | 82 | for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i) |
e9690142 JD |
83 | { |
84 | ngotos++; | |
60e3ecc7 | 85 | |
e9690142 JD |
86 | /* Abort if (ngotos + 1) would overflow. */ |
87 | aver (ngotos != GOTO_NUMBER_MAXIMUM); | |
60e3ecc7 | 88 | |
e9690142 JD |
89 | goto_map[TRANSITION_SYMBOL (sp, i) - ntokens]++; |
90 | } | |
7215de24 | 91 | } |
d0fb370f | 92 | |
d0b0fefa | 93 | { |
60e3ecc7 | 94 | goto_number k = 0; |
d57650a5 | 95 | int i; |
d0b0fefa AD |
96 | for (i = ntokens; i < nsyms; i++) |
97 | { | |
e9690142 JD |
98 | temp_map[i - ntokens] = k; |
99 | k += goto_map[i - ntokens]; | |
d0b0fefa | 100 | } |
d0fb370f | 101 | |
d0b0fefa | 102 | for (i = ntokens; i < nsyms; i++) |
5362cd5f | 103 | goto_map[i - ntokens] = temp_map[i - ntokens]; |
d0fb370f | 104 | |
5362cd5f PE |
105 | goto_map[nsyms - ntokens] = ngotos; |
106 | temp_map[nsyms - ntokens] = ngotos; | |
d0b0fefa | 107 | } |
d0fb370f | 108 | |
da2a7671 PE |
109 | from_state = xcalloc (ngotos, sizeof *from_state); |
110 | to_state = xcalloc (ngotos, sizeof *to_state); | |
d0fb370f | 111 | |
5dde5425 | 112 | for (s = 0; s < nstates; ++s) |
d0fb370f | 113 | { |
5dde5425 | 114 | transitions *sp = states[s]->transitions; |
d57650a5 | 115 | int i; |
ccaf65bc | 116 | for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i) |
e9690142 JD |
117 | { |
118 | goto_number k = temp_map[TRANSITION_SYMBOL (sp, i) - ntokens]++; | |
119 | from_state[k] = s; | |
120 | to_state[k] = sp->states[i]->number; | |
121 | } | |
d0fb370f RS |
122 | } |
123 | ||
afbb696d | 124 | free (temp_map); |
d0fb370f RS |
125 | } |
126 | ||
127 | ||
db34f798 | 128 | goto_number |
5dde5425 | 129 | map_goto (state_number s0, symbol_number sym) |
d0fb370f | 130 | { |
a339aef4 AD |
131 | goto_number low = goto_map[sym - ntokens]; |
132 | goto_number high = goto_map[sym - ntokens + 1] - 1; | |
d0fb370f | 133 | |
58a84254 | 134 | for (;;) |
d0fb370f | 135 | { |
a339aef4 AD |
136 | goto_number middle; |
137 | state_number s; | |
4f82b42a | 138 | aver (low <= high); |
d0fb370f RS |
139 | middle = (low + high) / 2; |
140 | s = from_state[middle]; | |
5dde5425 | 141 | if (s == s0) |
e9690142 | 142 | return middle; |
5dde5425 | 143 | else if (s < s0) |
e9690142 | 144 | low = middle + 1; |
d0fb370f | 145 | else |
e9690142 | 146 | high = middle - 1; |
d0fb370f | 147 | } |
d0fb370f RS |
148 | } |
149 | ||
150 | ||
4a120d45 | 151 | static void |
d2729d44 | 152 | initialize_F (void) |
d0fb370f | 153 | { |
da2a7671 PE |
154 | goto_number **reads = xnmalloc (ngotos, sizeof *reads); |
155 | goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge); | |
60e3ecc7 | 156 | goto_number nedges = 0; |
d0fb370f | 157 | |
60e3ecc7 | 158 | goto_number i; |
d0fb370f | 159 | |
db34f798 | 160 | goto_follows = bitsetv_create (ngotos, ntokens, BITSET_FIXED); |
d0fb370f | 161 | |
d0fb370f RS |
162 | for (i = 0; i < ngotos; i++) |
163 | { | |
5dde5425 PE |
164 | state_number stateno = to_state[i]; |
165 | transitions *sp = states[stateno]->transitions; | |
d0fb370f | 166 | |
d954473d | 167 | int j; |
640748ee | 168 | FOR_EACH_SHIFT (sp, j) |
e9690142 | 169 | bitset_set (goto_follows[i], TRANSITION_SYMBOL (sp, j)); |
d0fb370f | 170 | |
ccaf65bc | 171 | for (; j < sp->num; j++) |
e9690142 JD |
172 | { |
173 | symbol_number sym = TRANSITION_SYMBOL (sp, j); | |
174 | if (nullable[sym - ntokens]) | |
175 | edge[nedges++] = map_goto (stateno, sym); | |
176 | } | |
a0f6b076 | 177 | |
da2a7671 | 178 | if (nedges == 0) |
e9690142 | 179 | reads[i] = NULL; |
da2a7671 | 180 | else |
e9690142 JD |
181 | { |
182 | reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]); | |
183 | memcpy (reads[i], edge, nedges * sizeof edge[0]); | |
184 | reads[i][nedges] = END_NODE; | |
185 | nedges = 0; | |
186 | } | |
d0fb370f RS |
187 | } |
188 | ||
db34f798 | 189 | relation_digraph (reads, ngotos, &goto_follows); |
d0fb370f RS |
190 | |
191 | for (i = 0; i < ngotos; i++) | |
afbb696d | 192 | free (reads[i]); |
d0fb370f | 193 | |
afbb696d PE |
194 | free (reads); |
195 | free (edge); | |
d0fb370f RS |
196 | } |
197 | ||
198 | ||
4a120d45 | 199 | static void |
60e3ecc7 | 200 | add_lookback_edge (state *s, rule *r, goto_number gotono) |
d0fb370f | 201 | { |
5dde5425 | 202 | int ri = state_reduction_find (s, r); |
da2a7671 | 203 | goto_list *sp = xmalloc (sizeof *sp); |
742e4900 | 204 | sp->next = lookback[(s->reductions->lookahead_tokens - LA) + ri]; |
d0fb370f | 205 | sp->value = gotono; |
742e4900 | 206 | lookback[(s->reductions->lookahead_tokens - LA) + ri] = sp; |
d0fb370f RS |
207 | } |
208 | ||
209 | ||
d0fb370f | 210 | |
4a120d45 | 211 | static void |
720d742f | 212 | build_relations (void) |
d0fb370f | 213 | { |
da2a7671 PE |
214 | goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge); |
215 | state_number *states1 = xnmalloc (ritem_longest_rhs () + 1, sizeof *states1); | |
60e3ecc7 | 216 | goto_number i; |
720d742f | 217 | |
da2a7671 | 218 | includes = xnmalloc (ngotos, sizeof *includes); |
d0fb370f RS |
219 | |
220 | for (i = 0; i < ngotos; i++) | |
221 | { | |
9887c18a | 222 | int nedges = 0; |
5dde5425 PE |
223 | symbol_number symbol1 = states[to_state[i]]->accessing_symbol; |
224 | rule **rulep; | |
d0fb370f | 225 | |
5362cd5f | 226 | for (rulep = derives[symbol1 - ntokens]; *rulep; rulep++) |
e9690142 JD |
227 | { |
228 | bool done; | |
229 | int length = 1; | |
230 | item_number const *rp; | |
231 | state *s = states[from_state[i]]; | |
232 | states1[0] = s->number; | |
233 | ||
234 | for (rp = (*rulep)->rhs; ! item_number_is_rule_number (*rp); rp++) | |
235 | { | |
236 | s = transitions_to (s->transitions, | |
237 | item_number_as_symbol_number (*rp)); | |
238 | states1[length++] = s->number; | |
239 | } | |
240 | ||
241 | if (!s->consistent) | |
242 | add_lookback_edge (s, *rulep, i); | |
243 | ||
244 | length--; | |
245 | done = false; | |
246 | while (!done) | |
247 | { | |
248 | done = true; | |
249 | /* Each rhs ends in a rule number, and there is a | |
250 | sentinel (ritem[-1]=0) before the first rhs, so it is safe to | |
251 | decrement RP here. */ | |
252 | rp--; | |
253 | if (ISVAR (*rp)) | |
254 | { | |
255 | /* Downcasting from item_number to symbol_number. */ | |
256 | edge[nedges++] = map_goto (states1[--length], | |
257 | item_number_as_symbol_number (*rp)); | |
258 | if (nullable[*rp - ntokens]) | |
259 | done = false; | |
260 | } | |
261 | } | |
262 | } | |
d0fb370f | 263 | |
da2a7671 | 264 | if (nedges == 0) |
e9690142 | 265 | includes[i] = NULL; |
da2a7671 | 266 | else |
e9690142 JD |
267 | { |
268 | int j; | |
269 | includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]); | |
270 | for (j = 0; j < nedges; j++) | |
271 | includes[i][j] = edge[j]; | |
272 | includes[i][nedges] = END_NODE; | |
273 | } | |
d0fb370f RS |
274 | } |
275 | ||
afbb696d PE |
276 | free (edge); |
277 | free (states1); | |
9887c18a | 278 | |
0e4d5753 | 279 | relation_transpose (&includes, ngotos); |
d0fb370f RS |
280 | } |
281 | ||
282 | ||
720d742f | 283 | |
4a120d45 | 284 | static void |
720d742f | 285 | compute_FOLLOWS (void) |
d0fb370f | 286 | { |
60e3ecc7 | 287 | goto_number i; |
d0fb370f | 288 | |
db34f798 | 289 | relation_digraph (includes, ngotos, &goto_follows); |
d0fb370f RS |
290 | |
291 | for (i = 0; i < ngotos; i++) | |
afbb696d | 292 | free (includes[i]); |
d0fb370f | 293 | |
afbb696d | 294 | free (includes); |
d0fb370f RS |
295 | } |
296 | ||
297 | ||
4a120d45 | 298 | static void |
742e4900 | 299 | compute_lookahead_tokens (void) |
d0fb370f | 300 | { |
d200e455 | 301 | size_t i; |
5dde5425 | 302 | goto_list *sp; |
d0fb370f | 303 | |
d200e455 | 304 | for (i = 0; i < nLA; i++) |
ddcd5fdf | 305 | for (sp = lookback[i]; sp; sp = sp->next) |
db34f798 | 306 | bitset_or (LA[i], LA[i], goto_follows[sp->value]); |
d0fb370f | 307 | |
ddcd5fdf | 308 | /* Free LOOKBACK. */ |
d200e455 | 309 | for (i = 0; i < nLA; i++) |
5dde5425 | 310 | LIST_FREE (goto_list, lookback[i]); |
d0fb370f | 311 | |
afbb696d | 312 | free (lookback); |
720d742f | 313 | } |
d0fb370f | 314 | |
d0fb370f | 315 | |
742e4900 JD |
316 | /*----------------------------------------------------. |
317 | | Count the number of lookahead tokens required for S | | |
318 | | (N_LOOKAHEAD_TOKENS member). | | |
319 | `----------------------------------------------------*/ | |
5edafffd | 320 | |
cd08e51e | 321 | static int |
110ef36a | 322 | state_lookahead_tokens_count (state *s, bool default_reduction_only_for_accept) |
5edafffd | 323 | { |
742e4900 | 324 | int n_lookahead_tokens = 0; |
5dde5425 PE |
325 | reductions *rp = s->reductions; |
326 | transitions *sp = s->transitions; | |
cd08e51e | 327 | |
7254f6a8 JD |
328 | /* Transitions are only disabled during conflict resolution, and that |
329 | hasn't happened yet, so there should be no need to check that | |
330 | transition 0 hasn't been disabled before checking if it is a shift. | |
331 | However, this check was performed at one time, so we leave it as an | |
332 | aver. */ | |
2a6b783d | 333 | aver (sp->num == 0 || !TRANSITION_IS_DISABLED (sp, 0)); |
7254f6a8 JD |
334 | |
335 | /* We need a lookahead either to distinguish different reductions | |
336 | (i.e., there are two or more), or to distinguish a reduction from a | |
337 | shift. Otherwise, it is straightforward, and the state is | |
45eebca4 | 338 | 'consistent'. However, do not treat a state with any reductions as |
e459fb0e JD |
339 | consistent unless it is the accepting state (because there is never |
340 | a lookahead token that makes sense there, and so no lookahead token | |
341 | should be read) if the user has otherwise disabled default | |
342 | reductions. */ | |
cd08e51e | 343 | if (rp->num > 1 |
7254f6a8 JD |
344 | || (rp->num == 1 && sp->num && TRANSITION_IS_SHIFT (sp, 0)) |
345 | || (rp->num == 1 && rp->rules[0]->number != 0 | |
110ef36a | 346 | && default_reduction_only_for_accept)) |
742e4900 | 347 | n_lookahead_tokens += rp->num; |
cd08e51e | 348 | else |
5dde5425 | 349 | s->consistent = 1; |
cd08e51e | 350 | |
742e4900 | 351 | return n_lookahead_tokens; |
5edafffd AD |
352 | } |
353 | ||
7c6b64d0 | 354 | |
742e4900 JD |
355 | /*----------------------------------------------------. |
356 | | Compute LA, NLA, and the lookahead_tokens members. | | |
357 | `----------------------------------------------------*/ | |
c0263492 | 358 | |
db34f798 | 359 | void |
cd08e51e | 360 | initialize_LA (void) |
c0263492 | 361 | { |
5dde5425 | 362 | state_number i; |
cd08e51e | 363 | bitsetv pLA; |
110ef36a | 364 | bool default_reduction_only_for_accept; |
7254f6a8 | 365 | { |
110ef36a | 366 | char *default_reductions = |
f3bc3386 | 367 | muscle_percent_define_get ("lr.default-reduction"); |
f518dbaf | 368 | default_reduction_only_for_accept = STREQ (default_reductions, "accepting"); |
110ef36a | 369 | free (default_reductions); |
7254f6a8 | 370 | } |
c0263492 | 371 | |
742e4900 | 372 | /* Compute the total number of reductions requiring a lookahead. */ |
cd08e51e AD |
373 | nLA = 0; |
374 | for (i = 0; i < nstates; i++) | |
7254f6a8 | 375 | nLA += |
110ef36a JD |
376 | state_lookahead_tokens_count (states[i], |
377 | default_reduction_only_for_accept); | |
cd08e51e AD |
378 | /* Avoid having to special case 0. */ |
379 | if (!nLA) | |
380 | nLA = 1; | |
381 | ||
382 | pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED); | |
cd08e51e | 383 | |
742e4900 JD |
384 | /* Initialize the members LOOKAHEAD_TOKENS for each state whose reductions |
385 | require lookahead tokens. */ | |
c0263492 AD |
386 | for (i = 0; i < nstates; i++) |
387 | { | |
7254f6a8 | 388 | int count = |
110ef36a JD |
389 | state_lookahead_tokens_count (states[i], |
390 | default_reduction_only_for_accept); | |
cd08e51e | 391 | if (count) |
e9690142 JD |
392 | { |
393 | states[i]->reductions->lookahead_tokens = pLA; | |
394 | pLA += count; | |
395 | } | |
c0263492 AD |
396 | } |
397 | } | |
398 | ||
399 | ||
742e4900 JD |
400 | /*---------------------------------------------. |
401 | | Output the lookahead tokens for each state. | | |
402 | `---------------------------------------------*/ | |
7c6b64d0 AD |
403 | |
404 | static void | |
742e4900 | 405 | lookahead_tokens_print (FILE *out) |
7c6b64d0 | 406 | { |
5dde5425 | 407 | state_number i; |
742e4900 | 408 | fprintf (out, "Lookahead tokens: BEGIN\n"); |
7c6b64d0 AD |
409 | for (i = 0; i < nstates; ++i) |
410 | { | |
5dde5425 | 411 | reductions *reds = states[i]->reductions; |
613f5e1a | 412 | bitset_iterator iter; |
742e4900 | 413 | int n_lookahead_tokens = 0; |
cd08e51e | 414 | |
742e4900 | 415 | if (reds->lookahead_tokens) |
a339aef4 AD |
416 | { |
417 | int j; | |
418 | for (j = 0; j < reds->num; ++j) | |
419 | if (reds->lookahead_tokens[j]) | |
420 | ++n_lookahead_tokens; | |
421 | } | |
613f5e1a | 422 | |
742e4900 | 423 | fprintf (out, "State %d: %d lookahead tokens\n", |
e9690142 | 424 | i, n_lookahead_tokens); |
7c6b64d0 | 425 | |
742e4900 | 426 | if (reds->lookahead_tokens) |
a339aef4 AD |
427 | { |
428 | int j, k; | |
429 | for (j = 0; j < reds->num; ++j) | |
430 | BITSET_FOR_EACH (iter, reds->lookahead_tokens[j], k, 0) | |
431 | fprintf (out, " on %d (%s) -> rule %d\n", | |
432 | k, symbols[k]->tag, | |
433 | reds->rules[j]->number); | |
434 | } | |
7c6b64d0 | 435 | } |
742e4900 | 436 | fprintf (out, "Lookahead tokens: END\n"); |
7c6b64d0 AD |
437 | } |
438 | ||
720d742f AD |
439 | void |
440 | lalr (void) | |
441 | { | |
720d742f AD |
442 | initialize_LA (); |
443 | set_goto_map (); | |
444 | initialize_F (); | |
db34f798 | 445 | lookback = xcalloc (nLA, sizeof *lookback); |
720d742f AD |
446 | build_relations (); |
447 | compute_FOLLOWS (); | |
742e4900 | 448 | compute_lookahead_tokens (); |
7c6b64d0 | 449 | |
273a74fa | 450 | if (trace_flag & trace_sets) |
742e4900 | 451 | lookahead_tokens_print (stderr); |
d0fb370f | 452 | } |
3325ddc4 AD |
453 | |
454 | ||
5967f0cf JD |
455 | void |
456 | lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old) | |
457 | { | |
458 | goto_number ngotos_reachable = 0; | |
459 | symbol_number nonterminal = 0; | |
460 | aver (nsyms == nvars + ntokens); | |
14462c2b JD |
461 | { |
462 | goto_number i; | |
463 | for (i = 0; i < ngotos; ++i) | |
464 | { | |
465 | while (i == goto_map[nonterminal]) | |
466 | goto_map[nonterminal++] = ngotos_reachable; | |
467 | /* If old_to_new[from_state[i]] = nstates_old, remove this goto | |
468 | entry. */ | |
469 | if (old_to_new[from_state[i]] != nstates_old) | |
470 | { | |
471 | /* from_state[i] is not removed, so it and thus to_state[i] are | |
472 | reachable, so to_state[i] != nstates_old. */ | |
473 | aver (old_to_new[to_state[i]] != nstates_old); | |
474 | from_state[ngotos_reachable] = old_to_new[from_state[i]]; | |
475 | to_state[ngotos_reachable] = old_to_new[to_state[i]]; | |
476 | ++ngotos_reachable; | |
477 | } | |
478 | } | |
479 | } | |
5967f0cf JD |
480 | while (nonterminal <= nvars) |
481 | { | |
482 | aver (ngotos == goto_map[nonterminal]); | |
483 | goto_map[nonterminal++] = ngotos_reachable; | |
484 | } | |
485 | ngotos = ngotos_reachable; | |
486 | } | |
487 | ||
488 | ||
3325ddc4 AD |
489 | void |
490 | lalr_free (void) | |
491 | { | |
5dde5425 | 492 | state_number s; |
3325ddc4 | 493 | for (s = 0; s < nstates; ++s) |
742e4900 | 494 | states[s]->reductions->lookahead_tokens = NULL; |
3325ddc4 | 495 | bitsetv_free (LA); |
3325ddc4 | 496 | } |