]>
Commit | Line | Data |
---|---|---|
d0fb370f | 1 | /* Compute look-ahead criteria for bison, |
b0299a2e AD |
2 | Copyright (C) 1984, 1986, 1989, 2000, 2001, 2002 |
3 | Free Software Foundation, Inc. | |
d0fb370f | 4 | |
340ef489 | 5 | This file is part of Bison, the GNU Compiler Compiler. |
d0fb370f | 6 | |
340ef489 AD |
7 | Bison is free software; you can redistribute it and/or modify |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 2, or (at your option) | |
10 | any later version. | |
d0fb370f | 11 | |
340ef489 AD |
12 | Bison is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
d0fb370f | 16 | |
340ef489 AD |
17 | You should have received a copy of the GNU General Public License |
18 | along with Bison; see the file COPYING. If not, write to | |
19 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
20 | Boston, MA 02111-1307, USA. */ | |
d0fb370f RS |
21 | |
22 | ||
720d742f AD |
23 | /* Compute how to make the finite state machine deterministic; find |
24 | which rules need lookahead in each state, and which lookahead | |
340ef489 | 25 | tokens they accept. */ |
d0fb370f | 26 | |
d0fb370f | 27 | #include "system.h" |
602bbf31 | 28 | #include "bitset.h" |
914feea9 | 29 | #include "bitsetv.h" |
0e4d5753 | 30 | #include "relation.h" |
8b3df748 | 31 | #include "quotearg.h" |
62a3e4f0 AD |
32 | #include "symtab.h" |
33 | #include "gram.h" | |
7c6b64d0 | 34 | #include "reader.h" |
b2ca4022 | 35 | #include "LR0.h" |
a0f6b076 | 36 | #include "complain.h" |
720d742f | 37 | #include "lalr.h" |
3519ec76 | 38 | #include "nullable.h" |
340ef489 | 39 | #include "derives.h" |
f67d13aa | 40 | #include "getargs.h" |
d0fb370f | 41 | |
e68e0410 AD |
42 | goto_number_t *goto_map = NULL; |
43 | static goto_number_t ngotos = 0; | |
44 | state_number_t *from_state = NULL; | |
45 | state_number_t *to_state = NULL; | |
46 | ||
47 | /* Linked list of goto numbers. */ | |
48 | typedef struct goto_list_s | |
49 | { | |
50 | struct goto_list_s *next; | |
51 | goto_number_t value; | |
52 | } goto_list_t; | |
53 | ||
54 | ||
3325ddc4 AD |
55 | /* LA is a LR by NTOKENS matrix of bits. LA[l, i] is 1 if the rule |
56 | LArule[l] is applicable in the appropriate state when the next | |
57 | token is symbol i. If LA[l, i] and LA[l, j] are both 1 for i != j, | |
58 | it is a conflict. */ | |
59 | ||
60 | static bitsetv LA = NULL; | |
d200e455 | 61 | size_t nLA; |
9703cc49 | 62 | |
d0fb370f | 63 | |
fe961097 | 64 | /* And for the famous F variable, which name is so descriptive that a |
ddcd5fdf | 65 | comment is hardly needed. <grin>. */ |
914feea9 | 66 | static bitsetv F = NULL; |
ddcd5fdf | 67 | |
e68e0410 AD |
68 | static goto_number_t **includes; |
69 | static goto_list_t **lookback; | |
aa2aab3c AD |
70 | |
71 | ||
720d742f | 72 | |
bb527fc2 | 73 | |
4a120d45 | 74 | static void |
d2729d44 | 75 | set_goto_map (void) |
d0fb370f | 76 | { |
d57650a5 | 77 | state_number_t state; |
e68e0410 | 78 | goto_number_t *temp_map; |
d0fb370f | 79 | |
e68e0410 AD |
80 | goto_map = XCALLOC (goto_number_t, nvars + 1) - ntokens; |
81 | temp_map = XCALLOC (goto_number_t, nvars + 1) - ntokens; | |
d0fb370f RS |
82 | |
83 | ngotos = 0; | |
7215de24 AD |
84 | for (state = 0; state < nstates; ++state) |
85 | { | |
8b752b00 | 86 | transitions_t *sp = states[state]->transitions; |
d57650a5 | 87 | int i; |
ccaf65bc | 88 | for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i) |
7215de24 | 89 | { |
58a84254 PE |
90 | if (ngotos >= GOTO_NUMBER_MAX) |
91 | abort (); | |
7215de24 | 92 | ngotos++; |
ccaf65bc | 93 | goto_map[TRANSITION_SYMBOL (sp, i)]++; |
7215de24 AD |
94 | } |
95 | } | |
d0fb370f | 96 | |
d0b0fefa AD |
97 | { |
98 | int k = 0; | |
d57650a5 | 99 | int i; |
d0b0fefa AD |
100 | for (i = ntokens; i < nsyms; i++) |
101 | { | |
102 | temp_map[i] = k; | |
103 | k += goto_map[i]; | |
104 | } | |
d0fb370f | 105 | |
d0b0fefa AD |
106 | for (i = ntokens; i < nsyms; i++) |
107 | goto_map[i] = temp_map[i]; | |
d0fb370f | 108 | |
d0b0fefa AD |
109 | goto_map[nsyms] = ngotos; |
110 | temp_map[nsyms] = ngotos; | |
111 | } | |
d0fb370f | 112 | |
d57650a5 AD |
113 | from_state = XCALLOC (state_number_t, ngotos); |
114 | to_state = XCALLOC (state_number_t, ngotos); | |
d0fb370f | 115 | |
7215de24 | 116 | for (state = 0; state < nstates; ++state) |
d0fb370f | 117 | { |
8b752b00 | 118 | transitions_t *sp = states[state]->transitions; |
d57650a5 | 119 | int i; |
ccaf65bc | 120 | for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i) |
d0fb370f | 121 | { |
ccaf65bc | 122 | int k = temp_map[TRANSITION_SYMBOL (sp, i)]++; |
d0b0fefa | 123 | from_state[k] = state; |
640748ee | 124 | to_state[k] = sp->states[i]->number; |
d0fb370f RS |
125 | } |
126 | } | |
127 | ||
d7913476 | 128 | XFREE (temp_map + ntokens); |
d0fb370f RS |
129 | } |
130 | ||
131 | ||
132 | ||
43591cec AD |
133 | /*----------------------------------------------------------. |
134 | | Map a state/symbol pair into its numeric representation. | | |
135 | `----------------------------------------------------------*/ | |
d0fb370f | 136 | |
4a120d45 | 137 | static int |
d57650a5 | 138 | map_goto (state_number_t state, symbol_number_t symbol) |
d0fb370f | 139 | { |
720d742f AD |
140 | int high; |
141 | int low; | |
142 | int middle; | |
d57650a5 | 143 | state_number_t s; |
d0fb370f RS |
144 | |
145 | low = goto_map[symbol]; | |
146 | high = goto_map[symbol + 1] - 1; | |
147 | ||
58a84254 | 148 | for (;;) |
d0fb370f | 149 | { |
58a84254 PE |
150 | if (high < low) |
151 | abort (); | |
d0fb370f RS |
152 | middle = (low + high) / 2; |
153 | s = from_state[middle]; | |
154 | if (s == state) | |
36281465 | 155 | return middle; |
d0fb370f RS |
156 | else if (s < state) |
157 | low = middle + 1; | |
158 | else | |
159 | high = middle - 1; | |
160 | } | |
d0fb370f RS |
161 | } |
162 | ||
163 | ||
4a120d45 | 164 | static void |
d2729d44 | 165 | initialize_F (void) |
d0fb370f | 166 | { |
e68e0410 AD |
167 | goto_number_t **reads = XCALLOC (goto_number_t *, ngotos); |
168 | goto_number_t *edge = XCALLOC (goto_number_t, ngotos + 1); | |
4d4f699c | 169 | int nedges = 0; |
d0fb370f | 170 | |
4d4f699c | 171 | int i; |
d0fb370f | 172 | |
914feea9 | 173 | F = bitsetv_create (ngotos, ntokens, BITSET_FIXED); |
d0fb370f | 174 | |
d0fb370f RS |
175 | for (i = 0; i < ngotos; i++) |
176 | { | |
d57650a5 | 177 | state_number_t stateno = to_state[i]; |
8b752b00 | 178 | transitions_t *sp = states[stateno]->transitions; |
d0fb370f | 179 | |
d954473d | 180 | int j; |
640748ee | 181 | FOR_EACH_SHIFT (sp, j) |
ccaf65bc | 182 | bitset_set (F[i], TRANSITION_SYMBOL (sp, j)); |
d0fb370f | 183 | |
ccaf65bc | 184 | for (; j < sp->num; j++) |
d954473d | 185 | { |
ccaf65bc | 186 | symbol_number_t symbol = TRANSITION_SYMBOL (sp, j); |
d954473d AD |
187 | if (nullable[symbol]) |
188 | edge[nedges++] = map_goto (stateno, symbol); | |
189 | } | |
a0f6b076 | 190 | |
d954473d AD |
191 | if (nedges) |
192 | { | |
e68e0410 | 193 | reads[i] = XCALLOC (goto_number_t, nedges + 1); |
62a3e4f0 | 194 | memcpy (reads[i], edge, nedges * sizeof (edge[0])); |
d954473d AD |
195 | reads[i][nedges] = -1; |
196 | nedges = 0; | |
d0fb370f | 197 | } |
d0fb370f RS |
198 | } |
199 | ||
0e4d5753 | 200 | relation_digraph (reads, ngotos, &F); |
d0fb370f RS |
201 | |
202 | for (i = 0; i < ngotos; i++) | |
ddcd5fdf | 203 | XFREE (reads[i]); |
d0fb370f | 204 | |
d7913476 AD |
205 | XFREE (reads); |
206 | XFREE (edge); | |
d0fb370f RS |
207 | } |
208 | ||
209 | ||
4a120d45 | 210 | static void |
bb0027a9 | 211 | add_lookback_edge (state_t *state, rule_t *rule, int gotono) |
d0fb370f | 212 | { |
cd08e51e AD |
213 | int r = state_reduction_find (state, rule); |
214 | goto_list_t *sp = XCALLOC (goto_list_t, 1); | |
215 | sp->next = lookback[(state->reductions->lookaheads - LA) + r]; | |
d0fb370f | 216 | sp->value = gotono; |
cd08e51e | 217 | lookback[(state->reductions->lookaheads - LA) + r] = sp; |
d0fb370f RS |
218 | } |
219 | ||
220 | ||
d0fb370f | 221 | |
4a120d45 | 222 | static void |
720d742f | 223 | build_relations (void) |
d0fb370f | 224 | { |
e68e0410 | 225 | goto_number_t *edge = XCALLOC (goto_number_t, ngotos + 1); |
d57650a5 | 226 | state_number_t *states1 = XCALLOC (state_number_t, ritem_longest_rhs () + 1); |
720d742f | 227 | int i; |
720d742f | 228 | |
e68e0410 | 229 | includes = XCALLOC (goto_number_t *, ngotos); |
d0fb370f RS |
230 | |
231 | for (i = 0; i < ngotos; i++) | |
232 | { | |
9887c18a | 233 | int nedges = 0; |
a49aecd5 | 234 | symbol_number_t symbol1 = states[to_state[i]]->accessing_symbol; |
bb0027a9 | 235 | rule_t **rulep; |
d0fb370f | 236 | |
bb0027a9 | 237 | for (rulep = derives[symbol1]; *rulep; rulep++) |
720d742f | 238 | { |
9887c18a | 239 | int done; |
80a69750 | 240 | int length = 1; |
62a3e4f0 | 241 | item_number_t *rp; |
29e88316 | 242 | state_t *state = states[from_state[i]]; |
b9f71f19 | 243 | states1[0] = state->number; |
d0fb370f | 244 | |
bb0027a9 | 245 | for (rp = (*rulep)->rhs; *rp >= 0; rp++) |
720d742f | 246 | { |
8b752b00 AD |
247 | state = transitions_to (state->transitions, |
248 | item_number_as_symbol_number (*rp)); | |
b9f71f19 | 249 | states1[length++] = state->number; |
720d742f AD |
250 | } |
251 | ||
dac3c910 AD |
252 | if (!state->consistent) |
253 | add_lookback_edge (state, *rulep, i); | |
720d742f AD |
254 | |
255 | length--; | |
256 | done = 0; | |
257 | while (!done) | |
258 | { | |
259 | done = 1; | |
260 | rp--; | |
261 | /* JF added rp>=ritem && I hope to god its right! */ | |
262 | if (rp >= ritem && ISVAR (*rp)) | |
263 | { | |
a49aecd5 | 264 | /* Downcasting from item_number_t to symbol_number_t. */ |
5fbb0954 | 265 | edge[nedges++] = map_goto (states1[--length], |
a49aecd5 | 266 | item_number_as_symbol_number (*rp)); |
720d742f AD |
267 | if (nullable[*rp]) |
268 | done = 0; | |
269 | } | |
270 | } | |
d0fb370f RS |
271 | } |
272 | ||
720d742f AD |
273 | if (nedges) |
274 | { | |
9887c18a | 275 | int j; |
e68e0410 | 276 | includes[i] = XCALLOC (goto_number_t, nedges + 1); |
720d742f | 277 | for (j = 0; j < nedges; j++) |
80a69750 AD |
278 | includes[i][j] = edge[j]; |
279 | includes[i][nedges] = -1; | |
720d742f | 280 | } |
d0fb370f RS |
281 | } |
282 | ||
d7913476 | 283 | XFREE (edge); |
b9f71f19 | 284 | XFREE (states1); |
9887c18a | 285 | |
0e4d5753 | 286 | relation_transpose (&includes, ngotos); |
d0fb370f RS |
287 | } |
288 | ||
289 | ||
720d742f | 290 | |
4a120d45 | 291 | static void |
720d742f | 292 | compute_FOLLOWS (void) |
d0fb370f | 293 | { |
720d742f | 294 | int i; |
d0fb370f | 295 | |
0e4d5753 | 296 | relation_digraph (includes, ngotos, &F); |
d0fb370f RS |
297 | |
298 | for (i = 0; i < ngotos; i++) | |
ddcd5fdf | 299 | XFREE (includes[i]); |
d0fb370f | 300 | |
d7913476 | 301 | XFREE (includes); |
d0fb370f RS |
302 | } |
303 | ||
304 | ||
4a120d45 | 305 | static void |
720d742f | 306 | compute_lookaheads (void) |
d0fb370f | 307 | { |
d200e455 | 308 | size_t i; |
e68e0410 | 309 | goto_list_t *sp; |
d0fb370f | 310 | |
d200e455 | 311 | for (i = 0; i < nLA; i++) |
ddcd5fdf | 312 | for (sp = lookback[i]; sp; sp = sp->next) |
0fb1ffb1 | 313 | bitset_or (LA[i], LA[i], F[sp->value]); |
d0fb370f | 314 | |
ddcd5fdf | 315 | /* Free LOOKBACK. */ |
d200e455 | 316 | for (i = 0; i < nLA; i++) |
e68e0410 | 317 | LIST_FREE (goto_list_t, lookback[i]); |
d0fb370f | 318 | |
d7913476 | 319 | XFREE (lookback); |
914feea9 | 320 | bitsetv_free (F); |
720d742f | 321 | } |
d0fb370f | 322 | |
d0fb370f | 323 | |
cd08e51e AD |
324 | /*---------------------------------------------------------------. |
325 | | Count the number of lookaheads required for STATE (NLOOKAHEADS | | |
326 | | member). | | |
327 | `---------------------------------------------------------------*/ | |
5edafffd | 328 | |
cd08e51e AD |
329 | static int |
330 | state_lookaheads_count (state_t *state) | |
5edafffd | 331 | { |
cd08e51e AD |
332 | int k; |
333 | int nlookaheads = 0; | |
334 | reductions_t *rp = state->reductions; | |
335 | transitions_t *sp = state->transitions; | |
336 | ||
337 | /* We need a lookahead either to distinguish different | |
338 | reductions (i.e., there are two or more), or to distinguish a | |
339 | reduction from a shift. Otherwise, it is straightforward, | |
340 | and the state is `consistent'. */ | |
341 | if (rp->num > 1 | |
342 | || (rp->num == 1 && sp->num && | |
343 | !TRANSITION_IS_DISABLED (sp, 0) && TRANSITION_IS_SHIFT (sp, 0))) | |
344 | nlookaheads += rp->num; | |
345 | else | |
346 | state->consistent = 1; | |
347 | ||
348 | for (k = 0; k < sp->num; k++) | |
349 | if (!TRANSITION_IS_DISABLED (sp, k) && TRANSITION_IS_ERROR (sp, k)) | |
350 | { | |
351 | state->consistent = 0; | |
352 | break; | |
353 | } | |
3877f72b | 354 | |
cd08e51e | 355 | return nlookaheads; |
5edafffd AD |
356 | } |
357 | ||
7c6b64d0 | 358 | |
cd08e51e AD |
359 | /*----------------------------------------------. |
360 | | Compute LA, NLA, and the lookaheads members. | | |
361 | `----------------------------------------------*/ | |
c0263492 AD |
362 | |
363 | static void | |
cd08e51e | 364 | initialize_LA (void) |
c0263492 | 365 | { |
d57650a5 | 366 | state_number_t i; |
cd08e51e | 367 | bitsetv pLA; |
c0263492 | 368 | |
cd08e51e AD |
369 | /* Compute the total number of reductions requiring a lookahead. */ |
370 | nLA = 0; | |
371 | for (i = 0; i < nstates; i++) | |
372 | nLA += state_lookaheads_count (states[i]); | |
373 | /* Avoid having to special case 0. */ | |
374 | if (!nLA) | |
375 | nLA = 1; | |
376 | ||
377 | pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED); | |
378 | lookback = XCALLOC (goto_list_t *, nLA); | |
379 | ||
380 | /* Initialize the members LOOKAHEADS for each state which reductions | |
381 | require lookaheads. */ | |
c0263492 AD |
382 | for (i = 0; i < nstates; i++) |
383 | { | |
cd08e51e AD |
384 | int count = state_lookaheads_count (states[i]); |
385 | if (count) | |
386 | { | |
387 | states[i]->reductions->lookaheads = pLA; | |
388 | pLA += count; | |
389 | } | |
c0263492 AD |
390 | } |
391 | } | |
392 | ||
393 | ||
7c6b64d0 AD |
394 | /*---------------------------------------. |
395 | | Output the lookaheads for each state. | | |
396 | `---------------------------------------*/ | |
397 | ||
398 | static void | |
399 | lookaheads_print (FILE *out) | |
400 | { | |
d57650a5 | 401 | state_number_t i; |
602bbf31 | 402 | int j, k; |
427c0dda | 403 | fprintf (out, "Lookaheads: BEGIN\n"); |
7c6b64d0 AD |
404 | for (i = 0; i < nstates; ++i) |
405 | { | |
cd08e51e | 406 | reductions_t *reds = states[i]->reductions; |
613f5e1a | 407 | bitset_iterator iter; |
cd08e51e AD |
408 | int nlookaheads = 0; |
409 | ||
410 | if (reds->lookaheads) | |
411 | for (k = 0; k < reds->num; ++k) | |
412 | if (reds->lookaheads[k]) | |
413 | ++nlookaheads; | |
613f5e1a | 414 | |
427c0dda | 415 | fprintf (out, "State %d: %d lookaheads\n", |
cd08e51e | 416 | i, nlookaheads); |
7c6b64d0 | 417 | |
cd08e51e AD |
418 | if (reds->lookaheads) |
419 | for (j = 0; j < reds->num; ++j) | |
420 | BITSET_FOR_EACH (iter, reds->lookaheads[j], k, 0) | |
421 | { | |
427c0dda | 422 | fprintf (out, " on %d (%s) -> rule %d\n", |
cd08e51e AD |
423 | k, symbols[k]->tag, |
424 | reds->rules[j]->number); | |
425 | }; | |
7c6b64d0 | 426 | } |
427c0dda | 427 | fprintf (out, "Lookaheads: END\n"); |
7c6b64d0 AD |
428 | } |
429 | ||
720d742f AD |
430 | void |
431 | lalr (void) | |
432 | { | |
720d742f AD |
433 | initialize_LA (); |
434 | set_goto_map (); | |
435 | initialize_F (); | |
436 | build_relations (); | |
437 | compute_FOLLOWS (); | |
438 | compute_lookaheads (); | |
7c6b64d0 | 439 | |
273a74fa | 440 | if (trace_flag & trace_sets) |
7c6b64d0 | 441 | lookaheads_print (stderr); |
d0fb370f | 442 | } |
3325ddc4 AD |
443 | |
444 | ||
445 | void | |
446 | lalr_free (void) | |
447 | { | |
448 | state_number_t s; | |
449 | for (s = 0; s < nstates; ++s) | |
cd08e51e | 450 | states[s]->reductions->lookaheads = NULL; |
3325ddc4 | 451 | bitsetv_free (LA); |
3325ddc4 | 452 | } |