]>
Commit | Line | Data |
---|---|---|
5362cd5f PE |
1 | /* Compute look-ahead criteria for Bison. |
2 | ||
e9ad4aec PE |
3 | Copyright (C) 1984, 1986, 1989, 2000, 2001, 2002, 2003, 2004, 2005, |
4 | 2006 Free Software Foundation, Inc. | |
d0fb370f | 5 | |
340ef489 | 6 | This file is part of Bison, the GNU Compiler Compiler. |
d0fb370f | 7 | |
340ef489 AD |
8 | Bison is free software; you can redistribute it and/or modify |
9 | it under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 2, or (at your option) | |
11 | any later version. | |
d0fb370f | 12 | |
340ef489 AD |
13 | Bison is distributed in the hope that it will be useful, |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
d0fb370f | 17 | |
340ef489 AD |
18 | You should have received a copy of the GNU General Public License |
19 | along with Bison; see the file COPYING. If not, write to | |
0fb669f9 PE |
20 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
21 | Boston, MA 02110-1301, USA. */ | |
d0fb370f RS |
22 | |
23 | ||
720d742f | 24 | /* Compute how to make the finite state machine deterministic; find |
8dd162d3 | 25 | which rules need look-ahead in each state, and which look-ahead |
340ef489 | 26 | tokens they accept. */ |
d0fb370f | 27 | |
2cec9080 | 28 | #include <config.h> |
d0fb370f | 29 | #include "system.h" |
5dde5425 PE |
30 | |
31 | #include <bitset.h> | |
32 | #include <bitsetv.h> | |
33 | #include <quotearg.h> | |
34 | ||
b2ca4022 | 35 | #include "LR0.h" |
a0f6b076 | 36 | #include "complain.h" |
340ef489 | 37 | #include "derives.h" |
f67d13aa | 38 | #include "getargs.h" |
5dde5425 PE |
39 | #include "gram.h" |
40 | #include "lalr.h" | |
41 | #include "nullable.h" | |
42 | #include "reader.h" | |
43 | #include "relation.h" | |
44 | #include "symtab.h" | |
d0fb370f | 45 | |
4cf44c00 PE |
46 | goto_number *goto_map; |
47 | static goto_number ngotos; | |
48 | state_number *from_state; | |
49 | state_number *to_state; | |
e68e0410 AD |
50 | |
51 | /* Linked list of goto numbers. */ | |
5dde5425 | 52 | typedef struct goto_list |
e68e0410 | 53 | { |
5dde5425 PE |
54 | struct goto_list *next; |
55 | goto_number value; | |
56 | } goto_list; | |
e68e0410 AD |
57 | |
58 | ||
3325ddc4 AD |
59 | /* LA is a LR by NTOKENS matrix of bits. LA[l, i] is 1 if the rule |
60 | LArule[l] is applicable in the appropriate state when the next | |
61 | token is symbol i. If LA[l, i] and LA[l, j] are both 1 for i != j, | |
62 | it is a conflict. */ | |
63 | ||
64 | static bitsetv LA = NULL; | |
d200e455 | 65 | size_t nLA; |
9703cc49 | 66 | |
d0fb370f | 67 | |
fe961097 | 68 | /* And for the famous F variable, which name is so descriptive that a |
ddcd5fdf | 69 | comment is hardly needed. <grin>. */ |
914feea9 | 70 | static bitsetv F = NULL; |
ddcd5fdf | 71 | |
5dde5425 PE |
72 | static goto_number **includes; |
73 | static goto_list **lookback; | |
aa2aab3c AD |
74 | |
75 | ||
720d742f | 76 | |
bb527fc2 | 77 | |
4a120d45 | 78 | static void |
d2729d44 | 79 | set_goto_map (void) |
d0fb370f | 80 | { |
5dde5425 PE |
81 | state_number s; |
82 | goto_number *temp_map; | |
d0fb370f | 83 | |
da2a7671 PE |
84 | goto_map = xcalloc (nvars + 1, sizeof *goto_map); |
85 | temp_map = xnmalloc (nvars + 1, sizeof *temp_map); | |
d0fb370f RS |
86 | |
87 | ngotos = 0; | |
5dde5425 | 88 | for (s = 0; s < nstates; ++s) |
7215de24 | 89 | { |
5dde5425 | 90 | transitions *sp = states[s]->transitions; |
d57650a5 | 91 | int i; |
ccaf65bc | 92 | for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i) |
7215de24 | 93 | { |
7215de24 | 94 | ngotos++; |
60e3ecc7 PE |
95 | |
96 | /* Abort if (ngotos + 1) would overflow. */ | |
97 | if (ngotos == GOTO_NUMBER_MAXIMUM) | |
4cf44c00 | 98 | abort (); |
60e3ecc7 | 99 | |
5362cd5f | 100 | goto_map[TRANSITION_SYMBOL (sp, i) - ntokens]++; |
7215de24 AD |
101 | } |
102 | } | |
d0fb370f | 103 | |
d0b0fefa | 104 | { |
60e3ecc7 | 105 | goto_number k = 0; |
d57650a5 | 106 | int i; |
d0b0fefa AD |
107 | for (i = ntokens; i < nsyms; i++) |
108 | { | |
5362cd5f PE |
109 | temp_map[i - ntokens] = k; |
110 | k += goto_map[i - ntokens]; | |
d0b0fefa | 111 | } |
d0fb370f | 112 | |
d0b0fefa | 113 | for (i = ntokens; i < nsyms; i++) |
5362cd5f | 114 | goto_map[i - ntokens] = temp_map[i - ntokens]; |
d0fb370f | 115 | |
5362cd5f PE |
116 | goto_map[nsyms - ntokens] = ngotos; |
117 | temp_map[nsyms - ntokens] = ngotos; | |
d0b0fefa | 118 | } |
d0fb370f | 119 | |
da2a7671 PE |
120 | from_state = xcalloc (ngotos, sizeof *from_state); |
121 | to_state = xcalloc (ngotos, sizeof *to_state); | |
d0fb370f | 122 | |
5dde5425 | 123 | for (s = 0; s < nstates; ++s) |
d0fb370f | 124 | { |
5dde5425 | 125 | transitions *sp = states[s]->transitions; |
d57650a5 | 126 | int i; |
ccaf65bc | 127 | for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i) |
d0fb370f | 128 | { |
60e3ecc7 | 129 | goto_number k = temp_map[TRANSITION_SYMBOL (sp, i) - ntokens]++; |
5dde5425 | 130 | from_state[k] = s; |
640748ee | 131 | to_state[k] = sp->states[i]->number; |
d0fb370f RS |
132 | } |
133 | } | |
134 | ||
afbb696d | 135 | free (temp_map); |
d0fb370f RS |
136 | } |
137 | ||
138 | ||
139 | ||
43591cec AD |
140 | /*----------------------------------------------------------. |
141 | | Map a state/symbol pair into its numeric representation. | | |
142 | `----------------------------------------------------------*/ | |
d0fb370f | 143 | |
60e3ecc7 | 144 | static goto_number |
5dde5425 | 145 | map_goto (state_number s0, symbol_number sym) |
d0fb370f | 146 | { |
60e3ecc7 PE |
147 | goto_number high; |
148 | goto_number low; | |
149 | goto_number middle; | |
5dde5425 | 150 | state_number s; |
d0fb370f | 151 | |
5362cd5f PE |
152 | low = goto_map[sym - ntokens]; |
153 | high = goto_map[sym - ntokens + 1] - 1; | |
d0fb370f | 154 | |
58a84254 | 155 | for (;;) |
d0fb370f | 156 | { |
58a84254 PE |
157 | if (high < low) |
158 | abort (); | |
d0fb370f RS |
159 | middle = (low + high) / 2; |
160 | s = from_state[middle]; | |
5dde5425 | 161 | if (s == s0) |
36281465 | 162 | return middle; |
5dde5425 | 163 | else if (s < s0) |
d0fb370f RS |
164 | low = middle + 1; |
165 | else | |
166 | high = middle - 1; | |
167 | } | |
d0fb370f RS |
168 | } |
169 | ||
170 | ||
4a120d45 | 171 | static void |
d2729d44 | 172 | initialize_F (void) |
d0fb370f | 173 | { |
da2a7671 PE |
174 | goto_number **reads = xnmalloc (ngotos, sizeof *reads); |
175 | goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge); | |
60e3ecc7 | 176 | goto_number nedges = 0; |
d0fb370f | 177 | |
60e3ecc7 | 178 | goto_number i; |
d0fb370f | 179 | |
914feea9 | 180 | F = bitsetv_create (ngotos, ntokens, BITSET_FIXED); |
d0fb370f | 181 | |
d0fb370f RS |
182 | for (i = 0; i < ngotos; i++) |
183 | { | |
5dde5425 PE |
184 | state_number stateno = to_state[i]; |
185 | transitions *sp = states[stateno]->transitions; | |
d0fb370f | 186 | |
d954473d | 187 | int j; |
640748ee | 188 | FOR_EACH_SHIFT (sp, j) |
ccaf65bc | 189 | bitset_set (F[i], TRANSITION_SYMBOL (sp, j)); |
d0fb370f | 190 | |
ccaf65bc | 191 | for (; j < sp->num; j++) |
d954473d | 192 | { |
5dde5425 | 193 | symbol_number sym = TRANSITION_SYMBOL (sp, j); |
5362cd5f | 194 | if (nullable[sym - ntokens]) |
5dde5425 | 195 | edge[nedges++] = map_goto (stateno, sym); |
d954473d | 196 | } |
a0f6b076 | 197 | |
da2a7671 PE |
198 | if (nedges == 0) |
199 | reads[i] = NULL; | |
200 | else | |
d954473d | 201 | { |
da2a7671 PE |
202 | reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]); |
203 | memcpy (reads[i], edge, nedges * sizeof edge[0]); | |
60e3ecc7 | 204 | reads[i][nedges] = END_NODE; |
d954473d | 205 | nedges = 0; |
d0fb370f | 206 | } |
d0fb370f RS |
207 | } |
208 | ||
0e4d5753 | 209 | relation_digraph (reads, ngotos, &F); |
d0fb370f RS |
210 | |
211 | for (i = 0; i < ngotos; i++) | |
afbb696d | 212 | free (reads[i]); |
d0fb370f | 213 | |
afbb696d PE |
214 | free (reads); |
215 | free (edge); | |
d0fb370f RS |
216 | } |
217 | ||
218 | ||
4a120d45 | 219 | static void |
60e3ecc7 | 220 | add_lookback_edge (state *s, rule *r, goto_number gotono) |
d0fb370f | 221 | { |
5dde5425 | 222 | int ri = state_reduction_find (s, r); |
da2a7671 | 223 | goto_list *sp = xmalloc (sizeof *sp); |
8dd162d3 | 224 | sp->next = lookback[(s->reductions->look_ahead_tokens - LA) + ri]; |
d0fb370f | 225 | sp->value = gotono; |
8dd162d3 | 226 | lookback[(s->reductions->look_ahead_tokens - LA) + ri] = sp; |
d0fb370f RS |
227 | } |
228 | ||
229 | ||
d0fb370f | 230 | |
4a120d45 | 231 | static void |
720d742f | 232 | build_relations (void) |
d0fb370f | 233 | { |
da2a7671 PE |
234 | goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge); |
235 | state_number *states1 = xnmalloc (ritem_longest_rhs () + 1, sizeof *states1); | |
60e3ecc7 | 236 | goto_number i; |
720d742f | 237 | |
da2a7671 | 238 | includes = xnmalloc (ngotos, sizeof *includes); |
d0fb370f RS |
239 | |
240 | for (i = 0; i < ngotos; i++) | |
241 | { | |
9887c18a | 242 | int nedges = 0; |
5dde5425 PE |
243 | symbol_number symbol1 = states[to_state[i]]->accessing_symbol; |
244 | rule **rulep; | |
d0fb370f | 245 | |
5362cd5f | 246 | for (rulep = derives[symbol1 - ntokens]; *rulep; rulep++) |
720d742f | 247 | { |
d0829076 | 248 | bool done; |
80a69750 | 249 | int length = 1; |
e9ad4aec | 250 | item_number const *rp; |
5dde5425 PE |
251 | state *s = states[from_state[i]]; |
252 | states1[0] = s->number; | |
d0fb370f | 253 | |
e9ad4aec | 254 | for (rp = (*rulep)->rhs; ! item_number_is_rule_number (*rp); rp++) |
720d742f | 255 | { |
5dde5425 PE |
256 | s = transitions_to (s->transitions, |
257 | item_number_as_symbol_number (*rp)); | |
258 | states1[length++] = s->number; | |
720d742f AD |
259 | } |
260 | ||
5dde5425 PE |
261 | if (!s->consistent) |
262 | add_lookback_edge (s, *rulep, i); | |
720d742f AD |
263 | |
264 | length--; | |
d0829076 | 265 | done = false; |
720d742f AD |
266 | while (!done) |
267 | { | |
d0829076 | 268 | done = true; |
e9ad4aec PE |
269 | /* Each rhs ends in an item number, and there is a |
270 | sentinel before the first rhs, so it is safe to | |
271 | decrement RP here. */ | |
720d742f | 272 | rp--; |
e9ad4aec | 273 | if (ISVAR (*rp)) |
720d742f | 274 | { |
5dde5425 | 275 | /* Downcasting from item_number to symbol_number. */ |
5fbb0954 | 276 | edge[nedges++] = map_goto (states1[--length], |
a49aecd5 | 277 | item_number_as_symbol_number (*rp)); |
5362cd5f | 278 | if (nullable[*rp - ntokens]) |
d0829076 | 279 | done = false; |
720d742f AD |
280 | } |
281 | } | |
d0fb370f RS |
282 | } |
283 | ||
da2a7671 PE |
284 | if (nedges == 0) |
285 | includes[i] = NULL; | |
286 | else | |
720d742f | 287 | { |
9887c18a | 288 | int j; |
da2a7671 | 289 | includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]); |
720d742f | 290 | for (j = 0; j < nedges; j++) |
80a69750 | 291 | includes[i][j] = edge[j]; |
60e3ecc7 | 292 | includes[i][nedges] = END_NODE; |
720d742f | 293 | } |
d0fb370f RS |
294 | } |
295 | ||
afbb696d PE |
296 | free (edge); |
297 | free (states1); | |
9887c18a | 298 | |
0e4d5753 | 299 | relation_transpose (&includes, ngotos); |
d0fb370f RS |
300 | } |
301 | ||
302 | ||
720d742f | 303 | |
4a120d45 | 304 | static void |
720d742f | 305 | compute_FOLLOWS (void) |
d0fb370f | 306 | { |
60e3ecc7 | 307 | goto_number i; |
d0fb370f | 308 | |
0e4d5753 | 309 | relation_digraph (includes, ngotos, &F); |
d0fb370f RS |
310 | |
311 | for (i = 0; i < ngotos; i++) | |
afbb696d | 312 | free (includes[i]); |
d0fb370f | 313 | |
afbb696d | 314 | free (includes); |
d0fb370f RS |
315 | } |
316 | ||
317 | ||
4a120d45 | 318 | static void |
8dd162d3 | 319 | compute_look_ahead_tokens (void) |
d0fb370f | 320 | { |
d200e455 | 321 | size_t i; |
5dde5425 | 322 | goto_list *sp; |
d0fb370f | 323 | |
d200e455 | 324 | for (i = 0; i < nLA; i++) |
ddcd5fdf | 325 | for (sp = lookback[i]; sp; sp = sp->next) |
0fb1ffb1 | 326 | bitset_or (LA[i], LA[i], F[sp->value]); |
d0fb370f | 327 | |
ddcd5fdf | 328 | /* Free LOOKBACK. */ |
d200e455 | 329 | for (i = 0; i < nLA; i++) |
5dde5425 | 330 | LIST_FREE (goto_list, lookback[i]); |
d0fb370f | 331 | |
afbb696d | 332 | free (lookback); |
914feea9 | 333 | bitsetv_free (F); |
720d742f | 334 | } |
d0fb370f | 335 | |
d0fb370f | 336 | |
8dd162d3 PE |
337 | /*-----------------------------------------------------. |
338 | | Count the number of look-ahead tokens required for S | | |
339 | | (N_LOOK_AHEAD_TOKENS member). | | |
340 | `-----------------------------------------------------*/ | |
5edafffd | 341 | |
cd08e51e | 342 | static int |
8dd162d3 | 343 | state_look_ahead_tokens_count (state *s) |
5edafffd | 344 | { |
cd08e51e | 345 | int k; |
8dd162d3 | 346 | int n_look_ahead_tokens = 0; |
5dde5425 PE |
347 | reductions *rp = s->reductions; |
348 | transitions *sp = s->transitions; | |
cd08e51e | 349 | |
8dd162d3 | 350 | /* We need a look-ahead either to distinguish different |
cd08e51e AD |
351 | reductions (i.e., there are two or more), or to distinguish a |
352 | reduction from a shift. Otherwise, it is straightforward, | |
353 | and the state is `consistent'. */ | |
354 | if (rp->num > 1 | |
355 | || (rp->num == 1 && sp->num && | |
356 | !TRANSITION_IS_DISABLED (sp, 0) && TRANSITION_IS_SHIFT (sp, 0))) | |
8dd162d3 | 357 | n_look_ahead_tokens += rp->num; |
cd08e51e | 358 | else |
5dde5425 | 359 | s->consistent = 1; |
cd08e51e AD |
360 | |
361 | for (k = 0; k < sp->num; k++) | |
362 | if (!TRANSITION_IS_DISABLED (sp, k) && TRANSITION_IS_ERROR (sp, k)) | |
363 | { | |
5dde5425 | 364 | s->consistent = 0; |
cd08e51e AD |
365 | break; |
366 | } | |
3877f72b | 367 | |
8dd162d3 | 368 | return n_look_ahead_tokens; |
5edafffd AD |
369 | } |
370 | ||
7c6b64d0 | 371 | |
8dd162d3 PE |
372 | /*-----------------------------------------------------. |
373 | | Compute LA, NLA, and the look_ahead_tokens members. | | |
374 | `-----------------------------------------------------*/ | |
c0263492 AD |
375 | |
376 | static void | |
cd08e51e | 377 | initialize_LA (void) |
c0263492 | 378 | { |
5dde5425 | 379 | state_number i; |
cd08e51e | 380 | bitsetv pLA; |
c0263492 | 381 | |
8dd162d3 | 382 | /* Compute the total number of reductions requiring a look-ahead. */ |
cd08e51e AD |
383 | nLA = 0; |
384 | for (i = 0; i < nstates; i++) | |
8dd162d3 | 385 | nLA += state_look_ahead_tokens_count (states[i]); |
cd08e51e AD |
386 | /* Avoid having to special case 0. */ |
387 | if (!nLA) | |
388 | nLA = 1; | |
389 | ||
390 | pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED); | |
da2a7671 | 391 | lookback = xcalloc (nLA, sizeof *lookback); |
cd08e51e | 392 | |
8dd162d3 PE |
393 | /* Initialize the members LOOK_AHEAD_TOKENS for each state whose reductions |
394 | require look-ahead tokens. */ | |
c0263492 AD |
395 | for (i = 0; i < nstates; i++) |
396 | { | |
8dd162d3 | 397 | int count = state_look_ahead_tokens_count (states[i]); |
cd08e51e AD |
398 | if (count) |
399 | { | |
8dd162d3 | 400 | states[i]->reductions->look_ahead_tokens = pLA; |
cd08e51e AD |
401 | pLA += count; |
402 | } | |
c0263492 AD |
403 | } |
404 | } | |
405 | ||
406 | ||
8dd162d3 PE |
407 | /*----------------------------------------------. |
408 | | Output the look-ahead tokens for each state. | | |
409 | `----------------------------------------------*/ | |
7c6b64d0 AD |
410 | |
411 | static void | |
8dd162d3 | 412 | look_ahead_tokens_print (FILE *out) |
7c6b64d0 | 413 | { |
5dde5425 | 414 | state_number i; |
602bbf31 | 415 | int j, k; |
2f4f028d | 416 | fprintf (out, "Look-ahead tokens: BEGIN\n"); |
7c6b64d0 AD |
417 | for (i = 0; i < nstates; ++i) |
418 | { | |
5dde5425 | 419 | reductions *reds = states[i]->reductions; |
613f5e1a | 420 | bitset_iterator iter; |
8dd162d3 | 421 | int n_look_ahead_tokens = 0; |
cd08e51e | 422 | |
8dd162d3 | 423 | if (reds->look_ahead_tokens) |
cd08e51e | 424 | for (k = 0; k < reds->num; ++k) |
8dd162d3 PE |
425 | if (reds->look_ahead_tokens[k]) |
426 | ++n_look_ahead_tokens; | |
613f5e1a | 427 | |
8dd162d3 PE |
428 | fprintf (out, "State %d: %d look-ahead tokens\n", |
429 | i, n_look_ahead_tokens); | |
7c6b64d0 | 430 | |
8dd162d3 | 431 | if (reds->look_ahead_tokens) |
cd08e51e | 432 | for (j = 0; j < reds->num; ++j) |
8dd162d3 | 433 | BITSET_FOR_EACH (iter, reds->look_ahead_tokens[j], k, 0) |
cd08e51e | 434 | { |
427c0dda | 435 | fprintf (out, " on %d (%s) -> rule %d\n", |
cd08e51e AD |
436 | k, symbols[k]->tag, |
437 | reds->rules[j]->number); | |
438 | }; | |
7c6b64d0 | 439 | } |
2f4f028d | 440 | fprintf (out, "Look-ahead tokens: END\n"); |
7c6b64d0 AD |
441 | } |
442 | ||
720d742f AD |
443 | void |
444 | lalr (void) | |
445 | { | |
720d742f AD |
446 | initialize_LA (); |
447 | set_goto_map (); | |
448 | initialize_F (); | |
449 | build_relations (); | |
450 | compute_FOLLOWS (); | |
8dd162d3 | 451 | compute_look_ahead_tokens (); |
7c6b64d0 | 452 | |
273a74fa | 453 | if (trace_flag & trace_sets) |
8dd162d3 | 454 | look_ahead_tokens_print (stderr); |
d0fb370f | 455 | } |
3325ddc4 AD |
456 | |
457 | ||
458 | void | |
459 | lalr_free (void) | |
460 | { | |
5dde5425 | 461 | state_number s; |
3325ddc4 | 462 | for (s = 0; s < nstates; ++s) |
8dd162d3 | 463 | states[s]->reductions->look_ahead_tokens = NULL; |
3325ddc4 | 464 | bitsetv_free (LA); |
3325ddc4 | 465 | } |