]>
Commit | Line | Data |
---|---|---|
742e4900 | 1 | /* Compute lookahead criteria for Bison. |
5362cd5f | 2 | |
e9ad4aec | 3 | Copyright (C) 1984, 1986, 1989, 2000, 2001, 2002, 2003, 2004, 2005, |
75ad86ee | 4 | 2006, 2007 Free Software Foundation, Inc. |
d0fb370f | 5 | |
340ef489 | 6 | This file is part of Bison, the GNU Compiler Compiler. |
d0fb370f | 7 | |
f16b0819 | 8 | This program is free software: you can redistribute it and/or modify |
340ef489 | 9 | it under the terms of the GNU General Public License as published by |
f16b0819 PE |
10 | the Free Software Foundation, either version 3 of the License, or |
11 | (at your option) any later version. | |
d0fb370f | 12 | |
f16b0819 | 13 | This program is distributed in the hope that it will be useful, |
340ef489 AD |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
d0fb370f | 17 | |
340ef489 | 18 | You should have received a copy of the GNU General Public License |
f16b0819 | 19 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
d0fb370f RS |
20 | |
21 | ||
720d742f | 22 | /* Compute how to make the finite state machine deterministic; find |
742e4900 | 23 | which rules need lookahead in each state, and which lookahead |
340ef489 | 24 | tokens they accept. */ |
d0fb370f | 25 | |
2cec9080 | 26 | #include <config.h> |
d0fb370f | 27 | #include "system.h" |
5dde5425 PE |
28 | |
29 | #include <bitset.h> | |
30 | #include <bitsetv.h> | |
31 | #include <quotearg.h> | |
32 | ||
b2ca4022 | 33 | #include "LR0.h" |
a0f6b076 | 34 | #include "complain.h" |
340ef489 | 35 | #include "derives.h" |
f67d13aa | 36 | #include "getargs.h" |
5dde5425 PE |
37 | #include "gram.h" |
38 | #include "lalr.h" | |
39 | #include "nullable.h" | |
40 | #include "reader.h" | |
41 | #include "relation.h" | |
42 | #include "symtab.h" | |
d0fb370f | 43 | |
4cf44c00 PE |
44 | goto_number *goto_map; |
45 | static goto_number ngotos; | |
46 | state_number *from_state; | |
47 | state_number *to_state; | |
e68e0410 AD |
48 | |
49 | /* Linked list of goto numbers. */ | |
5dde5425 | 50 | typedef struct goto_list |
e68e0410 | 51 | { |
5dde5425 PE |
52 | struct goto_list *next; |
53 | goto_number value; | |
54 | } goto_list; | |
e68e0410 AD |
55 | |
56 | ||
b09f4f48 | 57 | /* LA is an NLA by NTOKENS matrix of bits. LA[l, i] is 1 if the rule |
3325ddc4 AD |
58 | LArule[l] is applicable in the appropriate state when the next |
59 | token is symbol i. If LA[l, i] and LA[l, j] are both 1 for i != j, | |
60 | it is a conflict. */ | |
61 | ||
62 | static bitsetv LA = NULL; | |
d200e455 | 63 | size_t nLA; |
9703cc49 | 64 | |
d0fb370f | 65 | |
fe961097 | 66 | /* And for the famous F variable, which name is so descriptive that a |
ddcd5fdf | 67 | comment is hardly needed. <grin>. */ |
914feea9 | 68 | static bitsetv F = NULL; |
ddcd5fdf | 69 | |
5dde5425 PE |
70 | static goto_number **includes; |
71 | static goto_list **lookback; | |
aa2aab3c AD |
72 | |
73 | ||
720d742f | 74 | |
bb527fc2 | 75 | |
4a120d45 | 76 | static void |
d2729d44 | 77 | set_goto_map (void) |
d0fb370f | 78 | { |
5dde5425 PE |
79 | state_number s; |
80 | goto_number *temp_map; | |
d0fb370f | 81 | |
da2a7671 PE |
82 | goto_map = xcalloc (nvars + 1, sizeof *goto_map); |
83 | temp_map = xnmalloc (nvars + 1, sizeof *temp_map); | |
d0fb370f RS |
84 | |
85 | ngotos = 0; | |
5dde5425 | 86 | for (s = 0; s < nstates; ++s) |
7215de24 | 87 | { |
5dde5425 | 88 | transitions *sp = states[s]->transitions; |
d57650a5 | 89 | int i; |
ccaf65bc | 90 | for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i) |
7215de24 | 91 | { |
7215de24 | 92 | ngotos++; |
60e3ecc7 PE |
93 | |
94 | /* Abort if (ngotos + 1) would overflow. */ | |
4f82b42a | 95 | aver (ngotos != GOTO_NUMBER_MAXIMUM); |
60e3ecc7 | 96 | |
5362cd5f | 97 | goto_map[TRANSITION_SYMBOL (sp, i) - ntokens]++; |
7215de24 AD |
98 | } |
99 | } | |
d0fb370f | 100 | |
d0b0fefa | 101 | { |
60e3ecc7 | 102 | goto_number k = 0; |
d57650a5 | 103 | int i; |
d0b0fefa AD |
104 | for (i = ntokens; i < nsyms; i++) |
105 | { | |
5362cd5f PE |
106 | temp_map[i - ntokens] = k; |
107 | k += goto_map[i - ntokens]; | |
d0b0fefa | 108 | } |
d0fb370f | 109 | |
d0b0fefa | 110 | for (i = ntokens; i < nsyms; i++) |
5362cd5f | 111 | goto_map[i - ntokens] = temp_map[i - ntokens]; |
d0fb370f | 112 | |
5362cd5f PE |
113 | goto_map[nsyms - ntokens] = ngotos; |
114 | temp_map[nsyms - ntokens] = ngotos; | |
d0b0fefa | 115 | } |
d0fb370f | 116 | |
da2a7671 PE |
117 | from_state = xcalloc (ngotos, sizeof *from_state); |
118 | to_state = xcalloc (ngotos, sizeof *to_state); | |
d0fb370f | 119 | |
5dde5425 | 120 | for (s = 0; s < nstates; ++s) |
d0fb370f | 121 | { |
5dde5425 | 122 | transitions *sp = states[s]->transitions; |
d57650a5 | 123 | int i; |
ccaf65bc | 124 | for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i) |
d0fb370f | 125 | { |
60e3ecc7 | 126 | goto_number k = temp_map[TRANSITION_SYMBOL (sp, i) - ntokens]++; |
5dde5425 | 127 | from_state[k] = s; |
640748ee | 128 | to_state[k] = sp->states[i]->number; |
d0fb370f RS |
129 | } |
130 | } | |
131 | ||
afbb696d | 132 | free (temp_map); |
d0fb370f RS |
133 | } |
134 | ||
135 | ||
136 | ||
43591cec AD |
137 | /*----------------------------------------------------------. |
138 | | Map a state/symbol pair into its numeric representation. | | |
139 | `----------------------------------------------------------*/ | |
d0fb370f | 140 | |
60e3ecc7 | 141 | static goto_number |
5dde5425 | 142 | map_goto (state_number s0, symbol_number sym) |
d0fb370f | 143 | { |
60e3ecc7 PE |
144 | goto_number high; |
145 | goto_number low; | |
146 | goto_number middle; | |
5dde5425 | 147 | state_number s; |
d0fb370f | 148 | |
5362cd5f PE |
149 | low = goto_map[sym - ntokens]; |
150 | high = goto_map[sym - ntokens + 1] - 1; | |
d0fb370f | 151 | |
58a84254 | 152 | for (;;) |
d0fb370f | 153 | { |
4f82b42a | 154 | aver (low <= high); |
d0fb370f RS |
155 | middle = (low + high) / 2; |
156 | s = from_state[middle]; | |
5dde5425 | 157 | if (s == s0) |
36281465 | 158 | return middle; |
5dde5425 | 159 | else if (s < s0) |
d0fb370f RS |
160 | low = middle + 1; |
161 | else | |
162 | high = middle - 1; | |
163 | } | |
d0fb370f RS |
164 | } |
165 | ||
166 | ||
4a120d45 | 167 | static void |
d2729d44 | 168 | initialize_F (void) |
d0fb370f | 169 | { |
da2a7671 PE |
170 | goto_number **reads = xnmalloc (ngotos, sizeof *reads); |
171 | goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge); | |
60e3ecc7 | 172 | goto_number nedges = 0; |
d0fb370f | 173 | |
60e3ecc7 | 174 | goto_number i; |
d0fb370f | 175 | |
914feea9 | 176 | F = bitsetv_create (ngotos, ntokens, BITSET_FIXED); |
d0fb370f | 177 | |
d0fb370f RS |
178 | for (i = 0; i < ngotos; i++) |
179 | { | |
5dde5425 PE |
180 | state_number stateno = to_state[i]; |
181 | transitions *sp = states[stateno]->transitions; | |
d0fb370f | 182 | |
d954473d | 183 | int j; |
640748ee | 184 | FOR_EACH_SHIFT (sp, j) |
ccaf65bc | 185 | bitset_set (F[i], TRANSITION_SYMBOL (sp, j)); |
d0fb370f | 186 | |
ccaf65bc | 187 | for (; j < sp->num; j++) |
d954473d | 188 | { |
5dde5425 | 189 | symbol_number sym = TRANSITION_SYMBOL (sp, j); |
5362cd5f | 190 | if (nullable[sym - ntokens]) |
5dde5425 | 191 | edge[nedges++] = map_goto (stateno, sym); |
d954473d | 192 | } |
a0f6b076 | 193 | |
da2a7671 PE |
194 | if (nedges == 0) |
195 | reads[i] = NULL; | |
196 | else | |
d954473d | 197 | { |
da2a7671 PE |
198 | reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]); |
199 | memcpy (reads[i], edge, nedges * sizeof edge[0]); | |
60e3ecc7 | 200 | reads[i][nedges] = END_NODE; |
d954473d | 201 | nedges = 0; |
d0fb370f | 202 | } |
d0fb370f RS |
203 | } |
204 | ||
0e4d5753 | 205 | relation_digraph (reads, ngotos, &F); |
d0fb370f RS |
206 | |
207 | for (i = 0; i < ngotos; i++) | |
afbb696d | 208 | free (reads[i]); |
d0fb370f | 209 | |
afbb696d PE |
210 | free (reads); |
211 | free (edge); | |
d0fb370f RS |
212 | } |
213 | ||
214 | ||
4a120d45 | 215 | static void |
60e3ecc7 | 216 | add_lookback_edge (state *s, rule *r, goto_number gotono) |
d0fb370f | 217 | { |
5dde5425 | 218 | int ri = state_reduction_find (s, r); |
da2a7671 | 219 | goto_list *sp = xmalloc (sizeof *sp); |
742e4900 | 220 | sp->next = lookback[(s->reductions->lookahead_tokens - LA) + ri]; |
d0fb370f | 221 | sp->value = gotono; |
742e4900 | 222 | lookback[(s->reductions->lookahead_tokens - LA) + ri] = sp; |
d0fb370f RS |
223 | } |
224 | ||
225 | ||
d0fb370f | 226 | |
4a120d45 | 227 | static void |
720d742f | 228 | build_relations (void) |
d0fb370f | 229 | { |
da2a7671 PE |
230 | goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge); |
231 | state_number *states1 = xnmalloc (ritem_longest_rhs () + 1, sizeof *states1); | |
60e3ecc7 | 232 | goto_number i; |
720d742f | 233 | |
da2a7671 | 234 | includes = xnmalloc (ngotos, sizeof *includes); |
d0fb370f RS |
235 | |
236 | for (i = 0; i < ngotos; i++) | |
237 | { | |
9887c18a | 238 | int nedges = 0; |
5dde5425 PE |
239 | symbol_number symbol1 = states[to_state[i]]->accessing_symbol; |
240 | rule **rulep; | |
d0fb370f | 241 | |
5362cd5f | 242 | for (rulep = derives[symbol1 - ntokens]; *rulep; rulep++) |
720d742f | 243 | { |
d0829076 | 244 | bool done; |
80a69750 | 245 | int length = 1; |
e9ad4aec | 246 | item_number const *rp; |
5dde5425 PE |
247 | state *s = states[from_state[i]]; |
248 | states1[0] = s->number; | |
d0fb370f | 249 | |
e9ad4aec | 250 | for (rp = (*rulep)->rhs; ! item_number_is_rule_number (*rp); rp++) |
720d742f | 251 | { |
5dde5425 PE |
252 | s = transitions_to (s->transitions, |
253 | item_number_as_symbol_number (*rp)); | |
254 | states1[length++] = s->number; | |
720d742f AD |
255 | } |
256 | ||
5dde5425 PE |
257 | if (!s->consistent) |
258 | add_lookback_edge (s, *rulep, i); | |
720d742f AD |
259 | |
260 | length--; | |
d0829076 | 261 | done = false; |
720d742f AD |
262 | while (!done) |
263 | { | |
d0829076 | 264 | done = true; |
fa6aa7b3 | 265 | /* Each rhs ends in a rule number, and there is a |
e9ad4aec PE |
266 | sentinel before the first rhs, so it is safe to |
267 | decrement RP here. */ | |
720d742f | 268 | rp--; |
e9ad4aec | 269 | if (ISVAR (*rp)) |
720d742f | 270 | { |
5dde5425 | 271 | /* Downcasting from item_number to symbol_number. */ |
5fbb0954 | 272 | edge[nedges++] = map_goto (states1[--length], |
a49aecd5 | 273 | item_number_as_symbol_number (*rp)); |
5362cd5f | 274 | if (nullable[*rp - ntokens]) |
d0829076 | 275 | done = false; |
720d742f AD |
276 | } |
277 | } | |
d0fb370f RS |
278 | } |
279 | ||
da2a7671 PE |
280 | if (nedges == 0) |
281 | includes[i] = NULL; | |
282 | else | |
720d742f | 283 | { |
9887c18a | 284 | int j; |
da2a7671 | 285 | includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]); |
720d742f | 286 | for (j = 0; j < nedges; j++) |
80a69750 | 287 | includes[i][j] = edge[j]; |
60e3ecc7 | 288 | includes[i][nedges] = END_NODE; |
720d742f | 289 | } |
d0fb370f RS |
290 | } |
291 | ||
afbb696d PE |
292 | free (edge); |
293 | free (states1); | |
9887c18a | 294 | |
0e4d5753 | 295 | relation_transpose (&includes, ngotos); |
d0fb370f RS |
296 | } |
297 | ||
298 | ||
720d742f | 299 | |
4a120d45 | 300 | static void |
720d742f | 301 | compute_FOLLOWS (void) |
d0fb370f | 302 | { |
60e3ecc7 | 303 | goto_number i; |
d0fb370f | 304 | |
0e4d5753 | 305 | relation_digraph (includes, ngotos, &F); |
d0fb370f RS |
306 | |
307 | for (i = 0; i < ngotos; i++) | |
afbb696d | 308 | free (includes[i]); |
d0fb370f | 309 | |
afbb696d | 310 | free (includes); |
d0fb370f RS |
311 | } |
312 | ||
313 | ||
4a120d45 | 314 | static void |
742e4900 | 315 | compute_lookahead_tokens (void) |
d0fb370f | 316 | { |
d200e455 | 317 | size_t i; |
5dde5425 | 318 | goto_list *sp; |
d0fb370f | 319 | |
d200e455 | 320 | for (i = 0; i < nLA; i++) |
ddcd5fdf | 321 | for (sp = lookback[i]; sp; sp = sp->next) |
0fb1ffb1 | 322 | bitset_or (LA[i], LA[i], F[sp->value]); |
d0fb370f | 323 | |
ddcd5fdf | 324 | /* Free LOOKBACK. */ |
d200e455 | 325 | for (i = 0; i < nLA; i++) |
5dde5425 | 326 | LIST_FREE (goto_list, lookback[i]); |
d0fb370f | 327 | |
afbb696d | 328 | free (lookback); |
914feea9 | 329 | bitsetv_free (F); |
720d742f | 330 | } |
d0fb370f | 331 | |
d0fb370f | 332 | |
742e4900 JD |
333 | /*----------------------------------------------------. |
334 | | Count the number of lookahead tokens required for S | | |
335 | | (N_LOOKAHEAD_TOKENS member). | | |
336 | `----------------------------------------------------*/ | |
5edafffd | 337 | |
cd08e51e | 338 | static int |
742e4900 | 339 | state_lookahead_tokens_count (state *s) |
5edafffd | 340 | { |
742e4900 | 341 | int n_lookahead_tokens = 0; |
5dde5425 PE |
342 | reductions *rp = s->reductions; |
343 | transitions *sp = s->transitions; | |
cd08e51e | 344 | |
742e4900 | 345 | /* We need a lookahead either to distinguish different |
cd08e51e AD |
346 | reductions (i.e., there are two or more), or to distinguish a |
347 | reduction from a shift. Otherwise, it is straightforward, | |
2a6b783d JD |
348 | and the state is `consistent'. There is no need to check that |
349 | transition 0 hasn't been disabled before checking if it is a | |
350 | shift since transitions are only disabled during conflict | |
351 | resolution, and that hasn't happened yet. */ | |
352 | aver (sp->num == 0 || !TRANSITION_IS_DISABLED (sp, 0)); | |
cd08e51e | 353 | if (rp->num > 1 |
2a6b783d | 354 | || (rp->num == 1 && sp->num && TRANSITION_IS_SHIFT (sp, 0))) |
742e4900 | 355 | n_lookahead_tokens += rp->num; |
cd08e51e | 356 | else |
5dde5425 | 357 | s->consistent = 1; |
cd08e51e | 358 | |
742e4900 | 359 | return n_lookahead_tokens; |
5edafffd AD |
360 | } |
361 | ||
7c6b64d0 | 362 | |
742e4900 JD |
363 | /*----------------------------------------------------. |
364 | | Compute LA, NLA, and the lookahead_tokens members. | | |
365 | `----------------------------------------------------*/ | |
c0263492 AD |
366 | |
367 | static void | |
cd08e51e | 368 | initialize_LA (void) |
c0263492 | 369 | { |
5dde5425 | 370 | state_number i; |
cd08e51e | 371 | bitsetv pLA; |
c0263492 | 372 | |
742e4900 | 373 | /* Compute the total number of reductions requiring a lookahead. */ |
cd08e51e AD |
374 | nLA = 0; |
375 | for (i = 0; i < nstates; i++) | |
742e4900 | 376 | nLA += state_lookahead_tokens_count (states[i]); |
cd08e51e AD |
377 | /* Avoid having to special case 0. */ |
378 | if (!nLA) | |
379 | nLA = 1; | |
380 | ||
381 | pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED); | |
da2a7671 | 382 | lookback = xcalloc (nLA, sizeof *lookback); |
cd08e51e | 383 | |
742e4900 JD |
384 | /* Initialize the members LOOKAHEAD_TOKENS for each state whose reductions |
385 | require lookahead tokens. */ | |
c0263492 AD |
386 | for (i = 0; i < nstates; i++) |
387 | { | |
742e4900 | 388 | int count = state_lookahead_tokens_count (states[i]); |
cd08e51e AD |
389 | if (count) |
390 | { | |
742e4900 | 391 | states[i]->reductions->lookahead_tokens = pLA; |
cd08e51e AD |
392 | pLA += count; |
393 | } | |
c0263492 AD |
394 | } |
395 | } | |
396 | ||
397 | ||
742e4900 JD |
398 | /*---------------------------------------------. |
399 | | Output the lookahead tokens for each state. | | |
400 | `---------------------------------------------*/ | |
7c6b64d0 AD |
401 | |
402 | static void | |
742e4900 | 403 | lookahead_tokens_print (FILE *out) |
7c6b64d0 | 404 | { |
5dde5425 | 405 | state_number i; |
602bbf31 | 406 | int j, k; |
742e4900 | 407 | fprintf (out, "Lookahead tokens: BEGIN\n"); |
7c6b64d0 AD |
408 | for (i = 0; i < nstates; ++i) |
409 | { | |
5dde5425 | 410 | reductions *reds = states[i]->reductions; |
613f5e1a | 411 | bitset_iterator iter; |
742e4900 | 412 | int n_lookahead_tokens = 0; |
cd08e51e | 413 | |
742e4900 | 414 | if (reds->lookahead_tokens) |
cd08e51e | 415 | for (k = 0; k < reds->num; ++k) |
742e4900 JD |
416 | if (reds->lookahead_tokens[k]) |
417 | ++n_lookahead_tokens; | |
613f5e1a | 418 | |
742e4900 JD |
419 | fprintf (out, "State %d: %d lookahead tokens\n", |
420 | i, n_lookahead_tokens); | |
7c6b64d0 | 421 | |
742e4900 | 422 | if (reds->lookahead_tokens) |
cd08e51e | 423 | for (j = 0; j < reds->num; ++j) |
742e4900 | 424 | BITSET_FOR_EACH (iter, reds->lookahead_tokens[j], k, 0) |
cd08e51e | 425 | { |
427c0dda | 426 | fprintf (out, " on %d (%s) -> rule %d\n", |
cd08e51e AD |
427 | k, symbols[k]->tag, |
428 | reds->rules[j]->number); | |
429 | }; | |
7c6b64d0 | 430 | } |
742e4900 | 431 | fprintf (out, "Lookahead tokens: END\n"); |
7c6b64d0 AD |
432 | } |
433 | ||
720d742f AD |
434 | void |
435 | lalr (void) | |
436 | { | |
720d742f AD |
437 | initialize_LA (); |
438 | set_goto_map (); | |
439 | initialize_F (); | |
440 | build_relations (); | |
441 | compute_FOLLOWS (); | |
742e4900 | 442 | compute_lookahead_tokens (); |
7c6b64d0 | 443 | |
273a74fa | 444 | if (trace_flag & trace_sets) |
742e4900 | 445 | lookahead_tokens_print (stderr); |
d0fb370f | 446 | } |
3325ddc4 AD |
447 | |
448 | ||
5967f0cf JD |
449 | void |
450 | lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old) | |
451 | { | |
452 | goto_number ngotos_reachable = 0; | |
453 | symbol_number nonterminal = 0; | |
454 | aver (nsyms == nvars + ntokens); | |
14462c2b JD |
455 | { |
456 | goto_number i; | |
457 | for (i = 0; i < ngotos; ++i) | |
458 | { | |
459 | while (i == goto_map[nonterminal]) | |
460 | goto_map[nonterminal++] = ngotos_reachable; | |
461 | /* If old_to_new[from_state[i]] = nstates_old, remove this goto | |
462 | entry. */ | |
463 | if (old_to_new[from_state[i]] != nstates_old) | |
464 | { | |
465 | /* from_state[i] is not removed, so it and thus to_state[i] are | |
466 | reachable, so to_state[i] != nstates_old. */ | |
467 | aver (old_to_new[to_state[i]] != nstates_old); | |
468 | from_state[ngotos_reachable] = old_to_new[from_state[i]]; | |
469 | to_state[ngotos_reachable] = old_to_new[to_state[i]]; | |
470 | ++ngotos_reachable; | |
471 | } | |
472 | } | |
473 | } | |
5967f0cf JD |
474 | while (nonterminal <= nvars) |
475 | { | |
476 | aver (ngotos == goto_map[nonterminal]); | |
477 | goto_map[nonterminal++] = ngotos_reachable; | |
478 | } | |
479 | ngotos = ngotos_reachable; | |
480 | } | |
481 | ||
482 | ||
3325ddc4 AD |
483 | void |
484 | lalr_free (void) | |
485 | { | |
5dde5425 | 486 | state_number s; |
3325ddc4 | 487 | for (s = 0; s < nstates; ++s) |
742e4900 | 488 | states[s]->reductions->lookahead_tokens = NULL; |
3325ddc4 | 489 | bitsetv_free (LA); |
3325ddc4 | 490 | } |