]>
Commit | Line | Data |
---|---|---|
d0fb370f | 1 | /* Compute look-ahead criteria for bison, |
b0299a2e AD |
2 | Copyright (C) 1984, 1986, 1989, 2000, 2001, 2002 |
3 | Free Software Foundation, Inc. | |
d0fb370f | 4 | |
340ef489 | 5 | This file is part of Bison, the GNU Compiler Compiler. |
d0fb370f | 6 | |
340ef489 AD |
7 | Bison is free software; you can redistribute it and/or modify |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 2, or (at your option) | |
10 | any later version. | |
d0fb370f | 11 | |
340ef489 AD |
12 | Bison is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
d0fb370f | 16 | |
340ef489 AD |
17 | You should have received a copy of the GNU General Public License |
18 | along with Bison; see the file COPYING. If not, write to | |
19 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
20 | Boston, MA 02111-1307, USA. */ | |
d0fb370f RS |
21 | |
22 | ||
720d742f AD |
23 | /* Compute how to make the finite state machine deterministic; find |
24 | which rules need lookahead in each state, and which lookahead | |
340ef489 | 25 | tokens they accept. */ |
d0fb370f | 26 | |
d0fb370f | 27 | #include "system.h" |
602bbf31 | 28 | #include "bitset.h" |
914feea9 | 29 | #include "bitsetv.h" |
0e4d5753 | 30 | #include "relation.h" |
8b3df748 | 31 | #include "quotearg.h" |
62a3e4f0 AD |
32 | #include "symtab.h" |
33 | #include "gram.h" | |
7c6b64d0 | 34 | #include "reader.h" |
d0fb370f | 35 | #include "types.h" |
b2ca4022 | 36 | #include "LR0.h" |
a0f6b076 | 37 | #include "complain.h" |
720d742f | 38 | #include "lalr.h" |
3519ec76 | 39 | #include "nullable.h" |
340ef489 | 40 | #include "derives.h" |
f67d13aa | 41 | #include "getargs.h" |
d0fb370f | 42 | |
b0299a2e | 43 | rule_t **LArule = NULL; |
914feea9 | 44 | bitsetv LA = NULL; |
d200e455 | 45 | size_t nLA; |
9703cc49 | 46 | |
aa2aab3c | 47 | static int ngotos; |
602bbf31 | 48 | short *goto_map = NULL; |
d57650a5 AD |
49 | state_number_t *from_state = NULL; |
50 | state_number_t *to_state = NULL; | |
d0fb370f | 51 | |
fe961097 | 52 | /* And for the famous F variable, which name is so descriptive that a |
ddcd5fdf | 53 | comment is hardly needed. <grin>. */ |
914feea9 | 54 | static bitsetv F = NULL; |
ddcd5fdf | 55 | |
d0fb370f RS |
56 | static short **includes; |
57 | static shorts **lookback; | |
aa2aab3c AD |
58 | |
59 | ||
720d742f | 60 | |
bb527fc2 | 61 | |
4a120d45 | 62 | static void |
d2729d44 | 63 | initialize_LA (void) |
d0fb370f | 64 | { |
d57650a5 | 65 | state_number_t i; |
720d742f | 66 | int j; |
b0299a2e | 67 | rule_t **np; |
d0fb370f | 68 | |
d200e455 | 69 | /* Avoid having to special case 0. */ |
a845a697 AD |
70 | if (!nLA) |
71 | nLA = 1; | |
d0fb370f | 72 | |
914feea9 | 73 | LA = bitsetv_create (nLA, ntokens, BITSET_FIXED); |
b0299a2e | 74 | LArule = XCALLOC (rule_t *, nLA); |
a845a697 | 75 | lookback = XCALLOC (shorts *, nLA); |
d0fb370f | 76 | |
b0299a2e | 77 | np = LArule; |
d0fb370f | 78 | for (i = 0; i < nstates; i++) |
29e88316 | 79 | if (!states[i]->consistent) |
d2576365 | 80 | for (j = 0; j < states[i]->reductions->num; j++) |
b0299a2e | 81 | *np++ = &rules[states[i]->reductions->rules[j]]; |
d0fb370f RS |
82 | } |
83 | ||
84 | ||
4a120d45 | 85 | static void |
d2729d44 | 86 | set_goto_map (void) |
d0fb370f | 87 | { |
d57650a5 | 88 | state_number_t state; |
720d742f | 89 | short *temp_map; |
d0fb370f | 90 | |
d7913476 AD |
91 | goto_map = XCALLOC (short, nvars + 1) - ntokens; |
92 | temp_map = XCALLOC (short, nvars + 1) - ntokens; | |
d0fb370f RS |
93 | |
94 | ngotos = 0; | |
7215de24 AD |
95 | for (state = 0; state < nstates; ++state) |
96 | { | |
ccaf65bc | 97 | transitions_t *sp = states[state]->shifts; |
d57650a5 | 98 | int i; |
ccaf65bc | 99 | for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i) |
7215de24 | 100 | { |
62a3e4f0 AD |
101 | if (ngotos == SHRT_MAX) |
102 | fatal (_("too many gotos (max %d)"), SHRT_MAX); | |
d0fb370f | 103 | |
7215de24 | 104 | ngotos++; |
ccaf65bc | 105 | goto_map[TRANSITION_SYMBOL (sp, i)]++; |
7215de24 AD |
106 | } |
107 | } | |
d0fb370f | 108 | |
d0b0fefa AD |
109 | { |
110 | int k = 0; | |
d57650a5 | 111 | int i; |
d0b0fefa AD |
112 | for (i = ntokens; i < nsyms; i++) |
113 | { | |
114 | temp_map[i] = k; | |
115 | k += goto_map[i]; | |
116 | } | |
d0fb370f | 117 | |
d0b0fefa AD |
118 | for (i = ntokens; i < nsyms; i++) |
119 | goto_map[i] = temp_map[i]; | |
d0fb370f | 120 | |
d0b0fefa AD |
121 | goto_map[nsyms] = ngotos; |
122 | temp_map[nsyms] = ngotos; | |
123 | } | |
d0fb370f | 124 | |
d57650a5 AD |
125 | from_state = XCALLOC (state_number_t, ngotos); |
126 | to_state = XCALLOC (state_number_t, ngotos); | |
d0fb370f | 127 | |
7215de24 | 128 | for (state = 0; state < nstates; ++state) |
d0fb370f | 129 | { |
ccaf65bc | 130 | transitions_t *sp = states[state]->shifts; |
d57650a5 | 131 | int i; |
ccaf65bc | 132 | for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i) |
d0fb370f | 133 | { |
ccaf65bc | 134 | int k = temp_map[TRANSITION_SYMBOL (sp, i)]++; |
d0b0fefa | 135 | from_state[k] = state; |
ccaf65bc | 136 | to_state[k] = sp->states[i]; |
d0fb370f RS |
137 | } |
138 | } | |
139 | ||
d7913476 | 140 | XFREE (temp_map + ntokens); |
d0fb370f RS |
141 | } |
142 | ||
143 | ||
144 | ||
43591cec AD |
145 | /*----------------------------------------------------------. |
146 | | Map a state/symbol pair into its numeric representation. | | |
147 | `----------------------------------------------------------*/ | |
d0fb370f | 148 | |
4a120d45 | 149 | static int |
d57650a5 | 150 | map_goto (state_number_t state, symbol_number_t symbol) |
d0fb370f | 151 | { |
720d742f AD |
152 | int high; |
153 | int low; | |
154 | int middle; | |
d57650a5 | 155 | state_number_t s; |
d0fb370f RS |
156 | |
157 | low = goto_map[symbol]; | |
158 | high = goto_map[symbol + 1] - 1; | |
159 | ||
160 | while (low <= high) | |
161 | { | |
162 | middle = (low + high) / 2; | |
163 | s = from_state[middle]; | |
164 | if (s == state) | |
36281465 | 165 | return middle; |
d0fb370f RS |
166 | else if (s < state) |
167 | low = middle + 1; | |
168 | else | |
169 | high = middle - 1; | |
170 | } | |
171 | ||
43591cec AD |
172 | assert (0); |
173 | /* NOTREACHED */ | |
d0fb370f RS |
174 | return 0; |
175 | } | |
176 | ||
177 | ||
4a120d45 | 178 | static void |
d2729d44 | 179 | initialize_F (void) |
d0fb370f | 180 | { |
4d4f699c AD |
181 | short **reads = XCALLOC (short *, ngotos); |
182 | short *edge = XCALLOC (short, ngotos + 1); | |
183 | int nedges = 0; | |
d0fb370f | 184 | |
4d4f699c | 185 | int i; |
d0fb370f | 186 | |
914feea9 | 187 | F = bitsetv_create (ngotos, ntokens, BITSET_FIXED); |
d0fb370f | 188 | |
d0fb370f RS |
189 | for (i = 0; i < ngotos; i++) |
190 | { | |
d57650a5 | 191 | state_number_t stateno = to_state[i]; |
ccaf65bc | 192 | transitions_t *sp = states[stateno]->shifts; |
d0fb370f | 193 | |
d954473d | 194 | int j; |
ccaf65bc AD |
195 | for (j = 0; j < sp->num && TRANSITION_IS_SHIFT (sp, j); j++) |
196 | bitset_set (F[i], TRANSITION_SYMBOL (sp, j)); | |
d0fb370f | 197 | |
ccaf65bc | 198 | for (; j < sp->num; j++) |
d954473d | 199 | { |
ccaf65bc | 200 | symbol_number_t symbol = TRANSITION_SYMBOL (sp, j); |
d954473d AD |
201 | if (nullable[symbol]) |
202 | edge[nedges++] = map_goto (stateno, symbol); | |
203 | } | |
a0f6b076 | 204 | |
d954473d AD |
205 | if (nedges) |
206 | { | |
207 | reads[i] = XCALLOC (short, nedges + 1); | |
62a3e4f0 | 208 | memcpy (reads[i], edge, nedges * sizeof (edge[0])); |
d954473d AD |
209 | reads[i][nedges] = -1; |
210 | nedges = 0; | |
d0fb370f | 211 | } |
d0fb370f RS |
212 | } |
213 | ||
0e4d5753 | 214 | relation_digraph (reads, ngotos, &F); |
d0fb370f RS |
215 | |
216 | for (i = 0; i < ngotos; i++) | |
ddcd5fdf | 217 | XFREE (reads[i]); |
d0fb370f | 218 | |
d7913476 AD |
219 | XFREE (reads); |
220 | XFREE (edge); | |
d0fb370f RS |
221 | } |
222 | ||
223 | ||
4a120d45 | 224 | static void |
9222837b | 225 | add_lookback_edge (state_t *state, rule_number_t ruleno, int gotono) |
d0fb370f | 226 | { |
720d742f | 227 | int i; |
720d742f | 228 | shorts *sp; |
d0fb370f | 229 | |
dac3c910 | 230 | for (i = 0; i < state->nlookaheads; ++i) |
c0263492 | 231 | if (state->lookaheads_rule[i]->number == ruleno) |
065fbd27 | 232 | break; |
d0fb370f | 233 | |
c0263492 | 234 | assert (state->lookaheads_rule[i]->number == ruleno); |
d0fb370f | 235 | |
d7913476 | 236 | sp = XCALLOC (shorts, 1); |
c0263492 | 237 | sp->next = lookback[(state->lookaheads - LA) + i]; |
d0fb370f | 238 | sp->value = gotono; |
c0263492 | 239 | lookback[(state->lookaheads - LA) + i] = sp; |
d0fb370f RS |
240 | } |
241 | ||
242 | ||
d0fb370f | 243 | |
4a120d45 | 244 | static void |
720d742f | 245 | build_relations (void) |
d0fb370f | 246 | { |
9887c18a | 247 | short *edge = XCALLOC (short, ngotos + 1); |
d57650a5 | 248 | state_number_t *states1 = XCALLOC (state_number_t, ritem_longest_rhs () + 1); |
720d742f | 249 | int i; |
720d742f | 250 | |
d7913476 | 251 | includes = XCALLOC (short *, ngotos); |
d0fb370f RS |
252 | |
253 | for (i = 0; i < ngotos; i++) | |
254 | { | |
9887c18a | 255 | int nedges = 0; |
a49aecd5 | 256 | symbol_number_t symbol1 = states[to_state[i]]->accessing_symbol; |
9222837b | 257 | rule_number_t *rulep; |
d0fb370f | 258 | |
720d742f AD |
259 | for (rulep = derives[symbol1]; *rulep > 0; rulep++) |
260 | { | |
9887c18a | 261 | int done; |
80a69750 | 262 | int length = 1; |
62a3e4f0 | 263 | item_number_t *rp; |
29e88316 | 264 | state_t *state = states[from_state[i]]; |
b9f71f19 | 265 | states1[0] = state->number; |
d0fb370f | 266 | |
99013900 | 267 | for (rp = rules[*rulep].rhs; *rp >= 0; rp++) |
720d742f | 268 | { |
ccaf65bc | 269 | state = transitions_to (state->shifts, |
24c7d800 | 270 | item_number_as_symbol_number (*rp)); |
b9f71f19 | 271 | states1[length++] = state->number; |
720d742f AD |
272 | } |
273 | ||
dac3c910 AD |
274 | if (!state->consistent) |
275 | add_lookback_edge (state, *rulep, i); | |
720d742f AD |
276 | |
277 | length--; | |
278 | done = 0; | |
279 | while (!done) | |
280 | { | |
281 | done = 1; | |
282 | rp--; | |
283 | /* JF added rp>=ritem && I hope to god its right! */ | |
284 | if (rp >= ritem && ISVAR (*rp)) | |
285 | { | |
a49aecd5 | 286 | /* Downcasting from item_number_t to symbol_number_t. */ |
5fbb0954 | 287 | edge[nedges++] = map_goto (states1[--length], |
a49aecd5 | 288 | item_number_as_symbol_number (*rp)); |
720d742f AD |
289 | if (nullable[*rp]) |
290 | done = 0; | |
291 | } | |
292 | } | |
d0fb370f RS |
293 | } |
294 | ||
720d742f AD |
295 | if (nedges) |
296 | { | |
9887c18a | 297 | int j; |
80a69750 | 298 | includes[i] = XCALLOC (short, nedges + 1); |
720d742f | 299 | for (j = 0; j < nedges; j++) |
80a69750 AD |
300 | includes[i][j] = edge[j]; |
301 | includes[i][nedges] = -1; | |
720d742f | 302 | } |
d0fb370f RS |
303 | } |
304 | ||
d7913476 | 305 | XFREE (edge); |
b9f71f19 | 306 | XFREE (states1); |
9887c18a | 307 | |
0e4d5753 | 308 | relation_transpose (&includes, ngotos); |
d0fb370f RS |
309 | } |
310 | ||
311 | ||
720d742f | 312 | |
4a120d45 | 313 | static void |
720d742f | 314 | compute_FOLLOWS (void) |
d0fb370f | 315 | { |
720d742f | 316 | int i; |
d0fb370f | 317 | |
0e4d5753 | 318 | relation_digraph (includes, ngotos, &F); |
d0fb370f RS |
319 | |
320 | for (i = 0; i < ngotos; i++) | |
ddcd5fdf | 321 | XFREE (includes[i]); |
d0fb370f | 322 | |
d7913476 | 323 | XFREE (includes); |
d0fb370f RS |
324 | } |
325 | ||
326 | ||
4a120d45 | 327 | static void |
720d742f | 328 | compute_lookaheads (void) |
d0fb370f | 329 | { |
d200e455 | 330 | size_t i; |
720d742f | 331 | shorts *sp; |
d0fb370f | 332 | |
d200e455 | 333 | for (i = 0; i < nLA; i++) |
ddcd5fdf | 334 | for (sp = lookback[i]; sp; sp = sp->next) |
0fb1ffb1 | 335 | bitset_or (LA[i], LA[i], F[sp->value]); |
d0fb370f | 336 | |
ddcd5fdf | 337 | /* Free LOOKBACK. */ |
d200e455 | 338 | for (i = 0; i < nLA; i++) |
300f275f | 339 | LIST_FREE (shorts, lookback[i]); |
d0fb370f | 340 | |
d7913476 | 341 | XFREE (lookback); |
914feea9 | 342 | bitsetv_free (F); |
720d742f | 343 | } |
d0fb370f | 344 | |
d0fb370f | 345 | |
c0263492 AD |
346 | /*-------------------------------------------------------------. |
347 | | Count the number of lookaheads required for each state | | |
348 | | (NLOOKAHEADS member). Compute the total number of LA, NLA. | | |
349 | `-------------------------------------------------------------*/ | |
5edafffd AD |
350 | |
351 | static void | |
c0263492 | 352 | states_lookaheads_count (void) |
5edafffd | 353 | { |
d57650a5 | 354 | state_number_t i; |
d200e455 | 355 | nLA = 0; |
c0263492 AD |
356 | |
357 | /* Count */ | |
5edafffd AD |
358 | for (i = 0; i < nstates; i++) |
359 | { | |
360 | int k; | |
3877f72b | 361 | int nlookaheads = 0; |
8a731ca8 | 362 | reductions_t *rp = states[i]->reductions; |
ccaf65bc | 363 | transitions_t *sp = states[i]->shifts; |
5edafffd | 364 | |
80dac38c AD |
365 | /* We need a lookahead either to distinguish different |
366 | reductions (i.e., there are two or more), or to distinguish a | |
367 | reduction from a shift. Otherwise, it is straightforward, | |
368 | and the state is `consistent'. */ | |
d2576365 AD |
369 | if (rp->num > 1 |
370 | || (rp->num == 1 && sp->num && TRANSITION_IS_SHIFT (sp, 0))) | |
371 | nlookaheads += rp->num; | |
5edafffd | 372 | else |
29e88316 | 373 | states[i]->consistent = 1; |
5edafffd | 374 | |
ccaf65bc AD |
375 | for (k = 0; k < sp->num; k++) |
376 | if (TRANSITION_IS_ERROR (sp, k)) | |
5edafffd | 377 | { |
29e88316 | 378 | states[i]->consistent = 0; |
5edafffd AD |
379 | break; |
380 | } | |
3877f72b | 381 | |
29e88316 | 382 | states[i]->nlookaheads = nlookaheads; |
d200e455 | 383 | nLA += nlookaheads; |
5edafffd | 384 | } |
5edafffd AD |
385 | } |
386 | ||
7c6b64d0 | 387 | |
c0263492 AD |
388 | /*--------------------------------------. |
389 | | Initializing the lookaheads members. | | |
390 | `--------------------------------------*/ | |
391 | ||
392 | static void | |
393 | states_lookaheads_initialize (void) | |
394 | { | |
d57650a5 | 395 | state_number_t i; |
c0263492 AD |
396 | bitsetv pLA = LA; |
397 | rule_t **pLArule = LArule; | |
398 | ||
9801d40c AD |
399 | /* Initialize the members LOOKAHEADS and LOOKAHEADS_RULE for each |
400 | state. */ | |
c0263492 AD |
401 | for (i = 0; i < nstates; i++) |
402 | { | |
403 | states[i]->lookaheads = pLA; | |
404 | states[i]->lookaheads_rule = pLArule; | |
405 | pLA += states[i]->nlookaheads; | |
406 | pLArule += states[i]->nlookaheads; | |
407 | } | |
408 | } | |
409 | ||
410 | ||
7c6b64d0 AD |
411 | /*---------------------------------------. |
412 | | Output the lookaheads for each state. | | |
413 | `---------------------------------------*/ | |
414 | ||
415 | static void | |
416 | lookaheads_print (FILE *out) | |
417 | { | |
d57650a5 | 418 | state_number_t i; |
602bbf31 | 419 | int j, k; |
7c6b64d0 AD |
420 | fprintf (out, "Lookaheads: BEGIN\n"); |
421 | for (i = 0; i < nstates; ++i) | |
422 | { | |
423 | fprintf (out, "State %d: %d lookaheads\n", | |
29e88316 | 424 | i, states[i]->nlookaheads); |
7c6b64d0 | 425 | |
29e88316 | 426 | for (j = 0; j < states[i]->nlookaheads; ++j) |
574fb2d5 AD |
427 | BITSET_EXECUTE (states[i]->lookaheads[j], 0, k, |
428 | { | |
429 | fprintf (out, " on %d (%s) -> rule %d\n", | |
97650f4e | 430 | k, symbols[k]->tag, |
574fb2d5 AD |
431 | states[i]->lookaheads_rule[j]->number - 1); |
432 | }); | |
7c6b64d0 AD |
433 | } |
434 | fprintf (out, "Lookaheads: END\n"); | |
435 | } | |
436 | ||
720d742f AD |
437 | void |
438 | lalr (void) | |
439 | { | |
c0263492 | 440 | states_lookaheads_count (); |
720d742f | 441 | initialize_LA (); |
c0263492 | 442 | states_lookaheads_initialize (); |
720d742f AD |
443 | set_goto_map (); |
444 | initialize_F (); | |
445 | build_relations (); | |
446 | compute_FOLLOWS (); | |
447 | compute_lookaheads (); | |
7c6b64d0 AD |
448 | |
449 | if (trace_flag) | |
450 | lookaheads_print (stderr); | |
d0fb370f | 451 | } |