]>
Commit | Line | Data |
---|---|---|
d0fb370f | 1 | /* Compute look-ahead criteria for bison, |
3feec034 | 2 | Copyright 1984, 1986, 1989, 2000, 2001 Free Software Foundation, Inc. |
d0fb370f | 3 | |
340ef489 | 4 | This file is part of Bison, the GNU Compiler Compiler. |
d0fb370f | 5 | |
340ef489 AD |
6 | Bison is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 2, or (at your option) | |
9 | any later version. | |
d0fb370f | 10 | |
340ef489 AD |
11 | Bison is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
d0fb370f | 15 | |
340ef489 AD |
16 | You should have received a copy of the GNU General Public License |
17 | along with Bison; see the file COPYING. If not, write to | |
18 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
19 | Boston, MA 02111-1307, USA. */ | |
d0fb370f RS |
20 | |
21 | ||
720d742f AD |
22 | /* Compute how to make the finite state machine deterministic; find |
23 | which rules need lookahead in each state, and which lookahead | |
340ef489 | 24 | tokens they accept. */ |
d0fb370f | 25 | |
d0fb370f | 26 | #include "system.h" |
d0fb370f | 27 | #include "types.h" |
b2ca4022 | 28 | #include "LR0.h" |
d0fb370f | 29 | #include "gram.h" |
a0f6b076 | 30 | #include "complain.h" |
720d742f | 31 | #include "lalr.h" |
3519ec76 | 32 | #include "nullable.h" |
340ef489 | 33 | #include "derives.h" |
d0fb370f | 34 | |
9703cc49 AD |
35 | /* All the decorated states, indexed by the state number. Warning: |
36 | there is a state_TABLE in LR0.c, but it is different and static. | |
37 | */ | |
38 | state_t *state_table = NULL; | |
39 | ||
d0fb370f | 40 | int tokensetsize; |
d0fb370f RS |
41 | short *LAruleno; |
42 | unsigned *LA; | |
9703cc49 | 43 | |
d0fb370f RS |
44 | short *goto_map; |
45 | short *from_state; | |
46 | short *to_state; | |
47 | ||
720d742f | 48 | extern void berror PARAMS ((const char *)); |
d0fb370f RS |
49 | |
50 | static int infinity; | |
d0fb370f | 51 | static int ngotos; |
ddcd5fdf | 52 | |
fe961097 | 53 | /* And for the famous F variable, which name is so descriptive that a |
ddcd5fdf AD |
54 | comment is hardly needed. <grin>. */ |
55 | static unsigned *F = NULL; | |
56 | #define F(Rule) (F + (Rule) * tokensetsize) | |
57 | ||
d0fb370f RS |
58 | static short **includes; |
59 | static shorts **lookback; | |
60 | static short **R; | |
61 | static short *INDEX; | |
62 | static short *VERTICES; | |
63 | static int top; | |
64 | ||
65 | ||
720d742f AD |
66 | static void |
67 | traverse (int i) | |
d0fb370f | 68 | { |
720d742f | 69 | int j; |
fe961097 | 70 | size_t k; |
720d742f | 71 | int height; |
fe961097 | 72 | size_t size = F (i + 1) - F(i); |
720d742f AD |
73 | |
74 | VERTICES[++top] = i; | |
75 | INDEX[i] = height = top; | |
76 | ||
fe961097 AD |
77 | if (R[i]) |
78 | for (j = 0; R[i][j] >= 0; ++j) | |
79 | { | |
80 | if (INDEX[R[i][j]] == 0) | |
81 | traverse (R[i][j]); | |
720d742f | 82 | |
fe961097 AD |
83 | if (INDEX[i] > INDEX[R[i][j]]) |
84 | INDEX[i] = INDEX[R[i][j]]; | |
720d742f | 85 | |
fe961097 AD |
86 | for (k = 0; k < size; ++k) |
87 | F (i)[k] |= F (R[i][j])[k]; | |
88 | } | |
720d742f AD |
89 | |
90 | if (INDEX[i] == height) | |
fe961097 AD |
91 | for (;;) |
92 | { | |
93 | j = VERTICES[top--]; | |
94 | INDEX[j] = infinity; | |
720d742f | 95 | |
fe961097 AD |
96 | if (i == j) |
97 | break; | |
720d742f | 98 | |
fe961097 AD |
99 | for (k = 0; k < size; ++k) |
100 | F (i)[k] = F (j)[k]; | |
101 | } | |
d0fb370f RS |
102 | } |
103 | ||
104 | ||
720d742f AD |
105 | static void |
106 | digraph (short **relation) | |
107 | { | |
108 | int i; | |
109 | ||
110 | infinity = ngotos + 2; | |
d7913476 AD |
111 | INDEX = XCALLOC (short, ngotos + 1); |
112 | VERTICES = XCALLOC (short, ngotos + 1); | |
720d742f AD |
113 | top = 0; |
114 | ||
115 | R = relation; | |
116 | ||
117 | for (i = 0; i < ngotos; i++) | |
118 | INDEX[i] = 0; | |
119 | ||
120 | for (i = 0; i < ngotos; i++) | |
ddcd5fdf AD |
121 | if (INDEX[i] == 0 && R[i]) |
122 | traverse (i); | |
720d742f | 123 | |
d7913476 AD |
124 | XFREE (INDEX); |
125 | XFREE (VERTICES); | |
720d742f AD |
126 | } |
127 | ||
bb527fc2 AD |
128 | |
129 | /*--------------------. | |
130 | | Build STATE_TABLE. | | |
131 | `--------------------*/ | |
132 | ||
4a120d45 | 133 | static void |
d2729d44 | 134 | set_state_table (void) |
d0fb370f | 135 | { |
f004bf6a AD |
136 | /* NSTATES + 1 because lookahead for the pseudo state number NSTATES |
137 | might be used (see conflicts.c). It is too opaque for me to | |
138 | provide a probably less hacky implementation. --akim */ | |
139 | state_table = XCALLOC (state_t, nstates + 1); | |
d0fb370f | 140 | |
90b4416b AD |
141 | { |
142 | core *sp; | |
143 | for (sp = first_state; sp; sp = sp->next) | |
144 | { | |
145 | state_table[sp->number].state = sp; | |
146 | state_table[sp->number].accessing_symbol = sp->accessing_symbol; | |
147 | } | |
148 | } | |
149 | ||
150 | { | |
151 | shifts *sp; | |
152 | for (sp = first_shift; sp; sp = sp->next) | |
153 | state_table[sp->number].shift_table = sp; | |
154 | } | |
155 | ||
156 | { | |
157 | reductions *rp; | |
158 | for (rp = first_reduction; rp; rp = rp->next) | |
159 | state_table[rp->number].reduction_table = rp; | |
160 | } | |
a845a697 AD |
161 | |
162 | /* Initializing the lookaheads members. Please note that it must be | |
163 | performed after having set some of the other members which are | |
164 | used below. Change with extreme caution. */ | |
165 | { | |
166 | int i; | |
167 | int count = 0; | |
168 | for (i = 0; i < nstates; i++) | |
169 | { | |
170 | int k; | |
171 | reductions *rp = state_table[i].reduction_table; | |
172 | shifts *sp = state_table[i].shift_table; | |
173 | ||
174 | state_table[i].lookaheads = count; | |
175 | ||
176 | if (rp | |
177 | && (rp->nreds > 1 | |
178 | || (sp && !ISVAR (state_table[sp->shifts[0]].accessing_symbol)))) | |
179 | count += rp->nreds; | |
180 | else | |
181 | state_table[i].consistent = 1; | |
182 | ||
183 | if (sp) | |
184 | for (k = 0; k < sp->nshifts; k++) | |
185 | if (state_table[sp->shifts[k]].accessing_symbol | |
186 | == error_token_number) | |
187 | { | |
188 | state_table[i].consistent = 0; | |
189 | break; | |
190 | } | |
191 | } | |
192 | state_table[nstates].lookaheads = count; | |
193 | } | |
d0fb370f RS |
194 | } |
195 | ||
196 | ||
4a120d45 | 197 | static void |
d2729d44 | 198 | initialize_LA (void) |
d0fb370f | 199 | { |
720d742f AD |
200 | int i; |
201 | int j; | |
720d742f | 202 | short *np; |
a845a697 | 203 | reductions *rp; |
d0fb370f | 204 | |
a845a697 AD |
205 | size_t nLA = state_table[nstates].lookaheads; |
206 | if (!nLA) | |
207 | nLA = 1; | |
d0fb370f | 208 | |
a845a697 AD |
209 | LA = XCALLOC (unsigned, nLA * tokensetsize); |
210 | LAruleno = XCALLOC (short, nLA); | |
211 | lookback = XCALLOC (shorts *, nLA); | |
d0fb370f RS |
212 | |
213 | np = LAruleno; | |
214 | for (i = 0; i < nstates; i++) | |
a845a697 AD |
215 | if (!state_table[i].consistent) |
216 | if ((rp = state_table[i].reduction_table)) | |
217 | for (j = 0; j < rp->nreds; j++) | |
218 | *np++ = rp->rules[j]; | |
d0fb370f RS |
219 | } |
220 | ||
221 | ||
4a120d45 | 222 | static void |
d2729d44 | 223 | set_goto_map (void) |
d0fb370f | 224 | { |
720d742f AD |
225 | shifts *sp; |
226 | int i; | |
227 | int symbol; | |
228 | int k; | |
229 | short *temp_map; | |
230 | int state2; | |
231 | int state1; | |
d0fb370f | 232 | |
d7913476 AD |
233 | goto_map = XCALLOC (short, nvars + 1) - ntokens; |
234 | temp_map = XCALLOC (short, nvars + 1) - ntokens; | |
d0fb370f RS |
235 | |
236 | ngotos = 0; | |
237 | for (sp = first_shift; sp; sp = sp->next) | |
238 | { | |
239 | for (i = sp->nshifts - 1; i >= 0; i--) | |
240 | { | |
9703cc49 | 241 | symbol = state_table[sp->shifts[i]].accessing_symbol; |
d0fb370f | 242 | |
720d742f AD |
243 | if (ISTOKEN (symbol)) |
244 | break; | |
d0fb370f RS |
245 | |
246 | if (ngotos == MAXSHORT) | |
a0f6b076 | 247 | fatal (_("too many gotos (max %d)"), MAXSHORT); |
d0fb370f RS |
248 | |
249 | ngotos++; | |
250 | goto_map[symbol]++; | |
720d742f | 251 | } |
d0fb370f RS |
252 | } |
253 | ||
254 | k = 0; | |
255 | for (i = ntokens; i < nsyms; i++) | |
256 | { | |
257 | temp_map[i] = k; | |
258 | k += goto_map[i]; | |
259 | } | |
260 | ||
261 | for (i = ntokens; i < nsyms; i++) | |
262 | goto_map[i] = temp_map[i]; | |
263 | ||
264 | goto_map[nsyms] = ngotos; | |
265 | temp_map[nsyms] = ngotos; | |
266 | ||
d7913476 AD |
267 | from_state = XCALLOC (short, ngotos); |
268 | to_state = XCALLOC (short, ngotos); | |
d0fb370f RS |
269 | |
270 | for (sp = first_shift; sp; sp = sp->next) | |
271 | { | |
272 | state1 = sp->number; | |
273 | for (i = sp->nshifts - 1; i >= 0; i--) | |
274 | { | |
275 | state2 = sp->shifts[i]; | |
9703cc49 | 276 | symbol = state_table[state2].accessing_symbol; |
d0fb370f | 277 | |
720d742f AD |
278 | if (ISTOKEN (symbol)) |
279 | break; | |
d0fb370f RS |
280 | |
281 | k = temp_map[symbol]++; | |
282 | from_state[k] = state1; | |
283 | to_state[k] = state2; | |
284 | } | |
285 | } | |
286 | ||
d7913476 | 287 | XFREE (temp_map + ntokens); |
d0fb370f RS |
288 | } |
289 | ||
290 | ||
291 | ||
43591cec AD |
292 | /*----------------------------------------------------------. |
293 | | Map a state/symbol pair into its numeric representation. | | |
294 | `----------------------------------------------------------*/ | |
d0fb370f | 295 | |
4a120d45 | 296 | static int |
d2729d44 | 297 | map_goto (int state, int symbol) |
d0fb370f | 298 | { |
720d742f AD |
299 | int high; |
300 | int low; | |
301 | int middle; | |
302 | int s; | |
d0fb370f RS |
303 | |
304 | low = goto_map[symbol]; | |
305 | high = goto_map[symbol + 1] - 1; | |
306 | ||
307 | while (low <= high) | |
308 | { | |
309 | middle = (low + high) / 2; | |
310 | s = from_state[middle]; | |
311 | if (s == state) | |
36281465 | 312 | return middle; |
d0fb370f RS |
313 | else if (s < state) |
314 | low = middle + 1; | |
315 | else | |
316 | high = middle - 1; | |
317 | } | |
318 | ||
43591cec AD |
319 | assert (0); |
320 | /* NOTREACHED */ | |
d0fb370f RS |
321 | return 0; |
322 | } | |
323 | ||
324 | ||
4a120d45 | 325 | static void |
d2729d44 | 326 | initialize_F (void) |
d0fb370f | 327 | { |
4d4f699c AD |
328 | short **reads = XCALLOC (short *, ngotos); |
329 | short *edge = XCALLOC (short, ngotos + 1); | |
330 | int nedges = 0; | |
d0fb370f | 331 | |
4d4f699c | 332 | int i; |
d0fb370f | 333 | |
4d4f699c | 334 | F = XCALLOC (unsigned, ngotos * tokensetsize); |
d0fb370f | 335 | |
d0fb370f RS |
336 | for (i = 0; i < ngotos; i++) |
337 | { | |
80a69750 AD |
338 | int stateno = to_state[i]; |
339 | shifts *sp = state_table[stateno].shift_table; | |
d0fb370f RS |
340 | |
341 | if (sp) | |
342 | { | |
4d4f699c | 343 | int j; |
80a69750 | 344 | for (j = 0; j < sp->nshifts; j++) |
d0fb370f | 345 | { |
4d4f699c | 346 | int symbol = state_table[sp->shifts[j]].accessing_symbol; |
720d742f | 347 | if (ISVAR (symbol)) |
d0fb370f | 348 | break; |
4d4f699c | 349 | SETBIT (F + i * tokensetsize, symbol); |
d0fb370f RS |
350 | } |
351 | ||
80a69750 | 352 | for (; j < sp->nshifts; j++) |
d0fb370f | 353 | { |
4d4f699c | 354 | int symbol = state_table[sp->shifts[j]].accessing_symbol; |
d0fb370f | 355 | if (nullable[symbol]) |
720d742f | 356 | edge[nedges++] = map_goto (stateno, symbol); |
d0fb370f | 357 | } |
a0f6b076 | 358 | |
d0fb370f RS |
359 | if (nedges) |
360 | { | |
4d4f699c AD |
361 | reads[i] = XCALLOC (short, nedges + 1); |
362 | shortcpy (reads[i], edge, nedges); | |
363 | reads[i][nedges] = -1; | |
d0fb370f RS |
364 | nedges = 0; |
365 | } | |
366 | } | |
d0fb370f RS |
367 | } |
368 | ||
720d742f | 369 | digraph (reads); |
d0fb370f RS |
370 | |
371 | for (i = 0; i < ngotos; i++) | |
ddcd5fdf | 372 | XFREE (reads[i]); |
d0fb370f | 373 | |
d7913476 AD |
374 | XFREE (reads); |
375 | XFREE (edge); | |
d0fb370f RS |
376 | } |
377 | ||
378 | ||
4a120d45 | 379 | static void |
d2729d44 | 380 | add_lookback_edge (int stateno, int ruleno, int gotono) |
d0fb370f | 381 | { |
720d742f AD |
382 | int i; |
383 | int k; | |
384 | int found; | |
385 | shorts *sp; | |
d0fb370f | 386 | |
f004bf6a AD |
387 | i = state_table[stateno].lookaheads; |
388 | k = state_table[stateno + 1].lookaheads; | |
d0fb370f RS |
389 | found = 0; |
390 | while (!found && i < k) | |
391 | { | |
392 | if (LAruleno[i] == ruleno) | |
393 | found = 1; | |
394 | else | |
395 | i++; | |
396 | } | |
397 | ||
340ef489 | 398 | assert (found); |
d0fb370f | 399 | |
d7913476 | 400 | sp = XCALLOC (shorts, 1); |
d0fb370f RS |
401 | sp->next = lookback[i]; |
402 | sp->value = gotono; | |
403 | lookback[i] = sp; | |
404 | } | |
405 | ||
406 | ||
9887c18a AD |
407 | /*-------------------------------------------------------------------. |
408 | | Return the transpose of R_ARG, of size N. Destroy R_ARG, as it is | | |
409 | | replaced with the result. | | |
410 | `-------------------------------------------------------------------*/ | |
411 | ||
4a120d45 | 412 | static short ** |
d2729d44 | 413 | transpose (short **R_arg, int n) |
d0fb370f | 414 | { |
720d742f AD |
415 | short **new_R; |
416 | short **temp_R; | |
417 | short *nedges; | |
720d742f | 418 | int i; |
d0fb370f | 419 | |
d7913476 | 420 | nedges = XCALLOC (short, n); |
d0fb370f RS |
421 | |
422 | for (i = 0; i < n; i++) | |
423 | { | |
80a69750 | 424 | short *sp = R_arg[i]; |
d0fb370f RS |
425 | if (sp) |
426 | { | |
427 | while (*sp >= 0) | |
428 | nedges[*sp++]++; | |
429 | } | |
430 | } | |
431 | ||
d7913476 AD |
432 | new_R = XCALLOC (short *, n); |
433 | temp_R = XCALLOC (short *, n); | |
d0fb370f RS |
434 | |
435 | for (i = 0; i < n; i++) | |
80a69750 AD |
436 | if (nedges[i] > 0) |
437 | { | |
438 | short *sp = XCALLOC (short, nedges[i] + 1); | |
439 | new_R[i] = sp; | |
440 | temp_R[i] = sp; | |
441 | sp[nedges[i]] = -1; | |
442 | } | |
d0fb370f | 443 | |
d7913476 | 444 | XFREE (nedges); |
d0fb370f RS |
445 | |
446 | for (i = 0; i < n; i++) | |
447 | { | |
80a69750 | 448 | short *sp = R_arg[i]; |
d0fb370f | 449 | if (sp) |
80a69750 AD |
450 | while (*sp >= 0) |
451 | *temp_R[*sp++]++ = i; | |
d0fb370f RS |
452 | } |
453 | ||
d7913476 | 454 | XFREE (temp_R); |
d0fb370f | 455 | |
9887c18a AD |
456 | /* Free the input: it is replaced with the result. */ |
457 | for (i = 0; i < n; i++) | |
458 | XFREE (R_arg[i]); | |
459 | XFREE (R_arg); | |
460 | ||
36281465 | 461 | return new_R; |
d0fb370f RS |
462 | } |
463 | ||
464 | ||
4a120d45 | 465 | static void |
720d742f | 466 | build_relations (void) |
d0fb370f | 467 | { |
9887c18a | 468 | short *edge = XCALLOC (short, ngotos + 1); |
c2713865 | 469 | short *states = XCALLOC (short, ritem_longest_rhs () + 1); |
720d742f | 470 | int i; |
720d742f | 471 | |
d7913476 | 472 | includes = XCALLOC (short *, ngotos); |
d0fb370f RS |
473 | |
474 | for (i = 0; i < ngotos; i++) | |
475 | { | |
9887c18a AD |
476 | int nedges = 0; |
477 | int state1 = from_state[i]; | |
478 | int symbol1 = state_table[to_state[i]].accessing_symbol; | |
479 | short *rulep; | |
d0fb370f | 480 | |
720d742f AD |
481 | for (rulep = derives[symbol1]; *rulep > 0; rulep++) |
482 | { | |
9887c18a | 483 | int done; |
80a69750 | 484 | int length = 1; |
9887c18a AD |
485 | int stateno = state1; |
486 | short *rp; | |
720d742f | 487 | states[0] = state1; |
d0fb370f | 488 | |
b2ed6e58 | 489 | for (rp = ritem + rule_table[*rulep].rhs; *rp > 0; rp++) |
720d742f | 490 | { |
80a69750 | 491 | shifts *sp = state_table[stateno].shift_table; |
9887c18a | 492 | int j; |
80a69750 | 493 | for (j = 0; j < sp->nshifts; j++) |
720d742f AD |
494 | { |
495 | stateno = sp->shifts[j]; | |
9887c18a | 496 | if (state_table[stateno].accessing_symbol == *rp) |
720d742f AD |
497 | break; |
498 | } | |
d0fb370f | 499 | |
720d742f AD |
500 | states[length++] = stateno; |
501 | } | |
502 | ||
de326cc0 | 503 | if (!state_table[stateno].consistent) |
720d742f AD |
504 | add_lookback_edge (stateno, *rulep, i); |
505 | ||
506 | length--; | |
507 | done = 0; | |
508 | while (!done) | |
509 | { | |
510 | done = 1; | |
511 | rp--; | |
512 | /* JF added rp>=ritem && I hope to god its right! */ | |
513 | if (rp >= ritem && ISVAR (*rp)) | |
514 | { | |
515 | stateno = states[--length]; | |
516 | edge[nedges++] = map_goto (stateno, *rp); | |
517 | if (nullable[*rp]) | |
518 | done = 0; | |
519 | } | |
520 | } | |
d0fb370f RS |
521 | } |
522 | ||
720d742f AD |
523 | if (nedges) |
524 | { | |
9887c18a | 525 | int j; |
80a69750 | 526 | includes[i] = XCALLOC (short, nedges + 1); |
720d742f | 527 | for (j = 0; j < nedges; j++) |
80a69750 AD |
528 | includes[i][j] = edge[j]; |
529 | includes[i][nedges] = -1; | |
720d742f | 530 | } |
d0fb370f RS |
531 | } |
532 | ||
d7913476 AD |
533 | XFREE (edge); |
534 | XFREE (states); | |
9887c18a AD |
535 | |
536 | includes = transpose (includes, ngotos); | |
d0fb370f RS |
537 | } |
538 | ||
539 | ||
720d742f | 540 | |
4a120d45 | 541 | static void |
720d742f | 542 | compute_FOLLOWS (void) |
d0fb370f | 543 | { |
720d742f | 544 | int i; |
d0fb370f | 545 | |
720d742f | 546 | digraph (includes); |
d0fb370f RS |
547 | |
548 | for (i = 0; i < ngotos; i++) | |
ddcd5fdf | 549 | XFREE (includes[i]); |
d0fb370f | 550 | |
d7913476 | 551 | XFREE (includes); |
d0fb370f RS |
552 | } |
553 | ||
554 | ||
4a120d45 | 555 | static void |
720d742f | 556 | compute_lookaheads (void) |
d0fb370f | 557 | { |
720d742f | 558 | int i; |
720d742f | 559 | shorts *sp; |
d0fb370f | 560 | |
f004bf6a | 561 | for (i = 0; i < state_table[nstates].lookaheads; i++) |
ddcd5fdf AD |
562 | for (sp = lookback[i]; sp; sp = sp->next) |
563 | { | |
9887c18a AD |
564 | int size = LA (i + 1) - LA (i); |
565 | int j; | |
566 | for (j = 0; j < size; ++j) | |
567 | LA (i)[j] |= F (sp->value)[j]; | |
ddcd5fdf | 568 | } |
d0fb370f | 569 | |
ddcd5fdf | 570 | /* Free LOOKBACK. */ |
f004bf6a | 571 | for (i = 0; i < state_table[nstates].lookaheads; i++) |
300f275f | 572 | LIST_FREE (shorts, lookback[i]); |
d0fb370f | 573 | |
d7913476 AD |
574 | XFREE (lookback); |
575 | XFREE (F); | |
720d742f | 576 | } |
d0fb370f | 577 | |
d0fb370f | 578 | |
720d742f AD |
579 | void |
580 | lalr (void) | |
581 | { | |
582 | tokensetsize = WORDSIZE (ntokens); | |
583 | ||
584 | set_state_table (); | |
720d742f AD |
585 | initialize_LA (); |
586 | set_goto_map (); | |
587 | initialize_F (); | |
588 | build_relations (); | |
589 | compute_FOLLOWS (); | |
590 | compute_lookaheads (); | |
d0fb370f | 591 | } |