]>
Commit | Line | Data |
---|---|---|
1 | /* Compute look-ahead criteria for bison, | |
2 | Copyright 1984, 1986, 1989, 2000, 2001 Free Software Foundation, Inc. | |
3 | ||
4 | This file is part of Bison, the GNU Compiler Compiler. | |
5 | ||
6 | Bison is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 2, or (at your option) | |
9 | any later version. | |
10 | ||
11 | Bison is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with Bison; see the file COPYING. If not, write to | |
18 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
19 | Boston, MA 02111-1307, USA. */ | |
20 | ||
21 | ||
22 | /* Compute how to make the finite state machine deterministic; find | |
23 | which rules need lookahead in each state, and which lookahead | |
24 | tokens they accept. */ | |
25 | ||
26 | #include "system.h" | |
27 | #include "types.h" | |
28 | #include "LR0.h" | |
29 | #include "gram.h" | |
30 | #include "complain.h" | |
31 | #include "lalr.h" | |
32 | #include "nullable.h" | |
33 | #include "derives.h" | |
34 | #include "getargs.h" | |
35 | ||
36 | /* All the decorated states, indexed by the state number. */ | |
37 | state_t **state_table = NULL; | |
38 | ||
39 | int tokensetsize; | |
40 | short *LAruleno; | |
41 | unsigned *LA; | |
42 | size_t nLA; | |
43 | ||
44 | static int ngotos; | |
45 | short *goto_map; | |
46 | short *from_state; | |
47 | short *to_state; | |
48 | ||
49 | /* And for the famous F variable, which name is so descriptive that a | |
50 | comment is hardly needed. <grin>. */ | |
51 | static unsigned *F = NULL; | |
52 | #define F(Rule) (F + (Rule) * tokensetsize) | |
53 | ||
54 | static short **includes; | |
55 | static shorts **lookback; | |
56 | ||
57 | ||
58 | /*---------------------------------------------------------------. | |
59 | | digraph & traverse. | | |
60 | | | | |
61 | | The following variables are used as common storage between the | | |
62 | | two. | | |
63 | `---------------------------------------------------------------*/ | |
64 | ||
65 | static short **R; | |
66 | static short *INDEX; | |
67 | static short *VERTICES; | |
68 | static int top; | |
69 | static int infinity; | |
70 | ||
71 | static void | |
72 | traverse (int i) | |
73 | { | |
74 | int j; | |
75 | size_t k; | |
76 | int height; | |
77 | size_t size = F (i + 1) - F(i); | |
78 | ||
79 | VERTICES[++top] = i; | |
80 | INDEX[i] = height = top; | |
81 | ||
82 | if (R[i]) | |
83 | for (j = 0; R[i][j] >= 0; ++j) | |
84 | { | |
85 | if (INDEX[R[i][j]] == 0) | |
86 | traverse (R[i][j]); | |
87 | ||
88 | if (INDEX[i] > INDEX[R[i][j]]) | |
89 | INDEX[i] = INDEX[R[i][j]]; | |
90 | ||
91 | for (k = 0; k < size; ++k) | |
92 | F (i)[k] |= F (R[i][j])[k]; | |
93 | } | |
94 | ||
95 | if (INDEX[i] == height) | |
96 | for (;;) | |
97 | { | |
98 | j = VERTICES[top--]; | |
99 | INDEX[j] = infinity; | |
100 | ||
101 | if (i == j) | |
102 | break; | |
103 | ||
104 | for (k = 0; k < size; ++k) | |
105 | F (j)[k] = F (i)[k]; | |
106 | } | |
107 | } | |
108 | ||
109 | ||
110 | static void | |
111 | digraph (short **relation) | |
112 | { | |
113 | int i; | |
114 | ||
115 | infinity = ngotos + 2; | |
116 | INDEX = XCALLOC (short, ngotos + 1); | |
117 | VERTICES = XCALLOC (short, ngotos + 1); | |
118 | top = 0; | |
119 | ||
120 | R = relation; | |
121 | ||
122 | for (i = 0; i < ngotos; i++) | |
123 | INDEX[i] = 0; | |
124 | ||
125 | for (i = 0; i < ngotos; i++) | |
126 | if (INDEX[i] == 0 && R[i]) | |
127 | traverse (i); | |
128 | ||
129 | XFREE (INDEX); | |
130 | XFREE (VERTICES); | |
131 | } | |
132 | ||
133 | ||
134 | static void | |
135 | initialize_LA (void) | |
136 | { | |
137 | int i; | |
138 | int j; | |
139 | short *np; | |
140 | ||
141 | /* Avoid having to special case 0. */ | |
142 | if (!nLA) | |
143 | nLA = 1; | |
144 | ||
145 | LA = XCALLOC (unsigned, nLA * tokensetsize); | |
146 | LAruleno = XCALLOC (short, nLA); | |
147 | lookback = XCALLOC (shorts *, nLA); | |
148 | ||
149 | np = LAruleno; | |
150 | for (i = 0; i < nstates; i++) | |
151 | if (!state_table[i]->consistent) | |
152 | for (j = 0; j < state_table[i]->reductions->nreds; j++) | |
153 | *np++ = state_table[i]->reductions->rules[j]; | |
154 | } | |
155 | ||
156 | ||
157 | static void | |
158 | set_goto_map (void) | |
159 | { | |
160 | int state, i; | |
161 | short *temp_map; | |
162 | ||
163 | goto_map = XCALLOC (short, nvars + 1) - ntokens; | |
164 | temp_map = XCALLOC (short, nvars + 1) - ntokens; | |
165 | ||
166 | ngotos = 0; | |
167 | for (state = 0; state < nstates; ++state) | |
168 | { | |
169 | shifts *sp = state_table[state]->shifts; | |
170 | for (i = sp->nshifts - 1; i >= 0 && SHIFT_IS_GOTO (sp, i); --i) | |
171 | { | |
172 | if (ngotos == MAXSHORT) | |
173 | fatal (_("too many gotos (max %d)"), MAXSHORT); | |
174 | ||
175 | ngotos++; | |
176 | goto_map[SHIFT_SYMBOL (sp, i)]++; | |
177 | } | |
178 | } | |
179 | ||
180 | { | |
181 | int k = 0; | |
182 | for (i = ntokens; i < nsyms; i++) | |
183 | { | |
184 | temp_map[i] = k; | |
185 | k += goto_map[i]; | |
186 | } | |
187 | ||
188 | for (i = ntokens; i < nsyms; i++) | |
189 | goto_map[i] = temp_map[i]; | |
190 | ||
191 | goto_map[nsyms] = ngotos; | |
192 | temp_map[nsyms] = ngotos; | |
193 | } | |
194 | ||
195 | from_state = XCALLOC (short, ngotos); | |
196 | to_state = XCALLOC (short, ngotos); | |
197 | ||
198 | for (state = 0; state < nstates; ++state) | |
199 | { | |
200 | shifts *sp = state_table[state]->shifts; | |
201 | for (i = sp->nshifts - 1; i >= 0 && SHIFT_IS_GOTO (sp, i); --i) | |
202 | { | |
203 | int k = temp_map[SHIFT_SYMBOL (sp, i)]++; | |
204 | from_state[k] = state; | |
205 | to_state[k] = sp->shifts[i]; | |
206 | } | |
207 | } | |
208 | ||
209 | XFREE (temp_map + ntokens); | |
210 | } | |
211 | ||
212 | ||
213 | ||
214 | /*----------------------------------------------------------. | |
215 | | Map a state/symbol pair into its numeric representation. | | |
216 | `----------------------------------------------------------*/ | |
217 | ||
218 | static int | |
219 | map_goto (int state, int symbol) | |
220 | { | |
221 | int high; | |
222 | int low; | |
223 | int middle; | |
224 | int s; | |
225 | ||
226 | low = goto_map[symbol]; | |
227 | high = goto_map[symbol + 1] - 1; | |
228 | ||
229 | while (low <= high) | |
230 | { | |
231 | middle = (low + high) / 2; | |
232 | s = from_state[middle]; | |
233 | if (s == state) | |
234 | return middle; | |
235 | else if (s < state) | |
236 | low = middle + 1; | |
237 | else | |
238 | high = middle - 1; | |
239 | } | |
240 | ||
241 | assert (0); | |
242 | /* NOTREACHED */ | |
243 | return 0; | |
244 | } | |
245 | ||
246 | ||
247 | static void | |
248 | initialize_F (void) | |
249 | { | |
250 | short **reads = XCALLOC (short *, ngotos); | |
251 | short *edge = XCALLOC (short, ngotos + 1); | |
252 | int nedges = 0; | |
253 | ||
254 | int i; | |
255 | ||
256 | F = XCALLOC (unsigned, ngotos * tokensetsize); | |
257 | ||
258 | for (i = 0; i < ngotos; i++) | |
259 | { | |
260 | int stateno = to_state[i]; | |
261 | shifts *sp = state_table[stateno]->shifts; | |
262 | ||
263 | int j; | |
264 | for (j = 0; j < sp->nshifts && SHIFT_IS_SHIFT (sp, j); j++) | |
265 | SETBIT (F (i), SHIFT_SYMBOL (sp, j)); | |
266 | ||
267 | for (; j < sp->nshifts; j++) | |
268 | { | |
269 | int symbol = SHIFT_SYMBOL (sp, j); | |
270 | if (nullable[symbol]) | |
271 | edge[nedges++] = map_goto (stateno, symbol); | |
272 | } | |
273 | ||
274 | if (nedges) | |
275 | { | |
276 | reads[i] = XCALLOC (short, nedges + 1); | |
277 | shortcpy (reads[i], edge, nedges); | |
278 | reads[i][nedges] = -1; | |
279 | nedges = 0; | |
280 | } | |
281 | } | |
282 | ||
283 | digraph (reads); | |
284 | ||
285 | for (i = 0; i < ngotos; i++) | |
286 | XFREE (reads[i]); | |
287 | ||
288 | XFREE (reads); | |
289 | XFREE (edge); | |
290 | } | |
291 | ||
292 | ||
293 | static void | |
294 | add_lookback_edge (state_t *state, int ruleno, int gotono) | |
295 | { | |
296 | int i; | |
297 | shorts *sp; | |
298 | ||
299 | for (i = 0; i < state->nlookaheads; ++i) | |
300 | if (LAruleno[state->lookaheadsp + i] == ruleno) | |
301 | break; | |
302 | ||
303 | assert (LAruleno[state->lookaheadsp + i] == ruleno); | |
304 | ||
305 | sp = XCALLOC (shorts, 1); | |
306 | sp->next = lookback[state->lookaheadsp + i]; | |
307 | sp->value = gotono; | |
308 | lookback[state->lookaheadsp + i] = sp; | |
309 | } | |
310 | ||
311 | ||
312 | static void | |
313 | matrix_print (FILE *out, short **matrix, int n) | |
314 | { | |
315 | int i, j; | |
316 | ||
317 | for (i = 0; i < n; ++i) | |
318 | { | |
319 | fprintf (out, "%3d: ", i); | |
320 | if (matrix[i]) | |
321 | for (j = 0; matrix[i][j] != -1; ++j) | |
322 | fprintf (out, "%3d ", matrix[i][j]); | |
323 | fputc ('\n', out); | |
324 | } | |
325 | fputc ('\n', out); | |
326 | } | |
327 | ||
328 | /*-------------------------------------------------------------------. | |
329 | | Return the transpose of R_ARG, of size N. Destroy R_ARG, as it is | | |
330 | | replaced with the result. | | |
331 | | | | |
332 | | R_ARG[I] is NULL or a -1 terminated list of numbers. | | |
333 | | | | |
334 | | RESULT[NUM] is NULL or the -1 terminated list of the I such as NUM | | |
335 | | is in R_ARG[I]. | | |
336 | `-------------------------------------------------------------------*/ | |
337 | ||
338 | static short ** | |
339 | transpose (short **R_arg, int n) | |
340 | { | |
341 | /* The result. */ | |
342 | short **new_R = XCALLOC (short *, n); | |
343 | /* END_R[I] -- next entry of NEW_R[I]. */ | |
344 | short **end_R = XCALLOC (short *, n); | |
345 | /* NEDGES[I] -- total size of NEW_R[I]. */ | |
346 | short *nedges = XCALLOC (short, n); | |
347 | int i, j; | |
348 | ||
349 | if (trace_flag) | |
350 | { | |
351 | fputs ("transpose: input\n", stderr); | |
352 | matrix_print (stderr, R_arg, n); | |
353 | } | |
354 | ||
355 | /* Count. */ | |
356 | for (i = 0; i < n; i++) | |
357 | if (R_arg[i]) | |
358 | for (j = 0; R_arg[i][j] >= 0; ++j) | |
359 | ++nedges[R_arg[i][j]]; | |
360 | ||
361 | /* Allocate. */ | |
362 | for (i = 0; i < n; i++) | |
363 | if (nedges[i] > 0) | |
364 | { | |
365 | short *sp = XCALLOC (short, nedges[i] + 1); | |
366 | sp[nedges[i]] = -1; | |
367 | new_R[i] = sp; | |
368 | end_R[i] = sp; | |
369 | } | |
370 | ||
371 | /* Store. */ | |
372 | for (i = 0; i < n; i++) | |
373 | if (R_arg[i]) | |
374 | for (j = 0; R_arg[i][j] >= 0; ++j) | |
375 | { | |
376 | *end_R[R_arg[i][j]] = i; | |
377 | ++end_R[R_arg[i][j]]; | |
378 | } | |
379 | ||
380 | free (nedges); | |
381 | free (end_R); | |
382 | ||
383 | /* Free the input: it is replaced with the result. */ | |
384 | for (i = 0; i < n; i++) | |
385 | XFREE (R_arg[i]); | |
386 | free (R_arg); | |
387 | ||
388 | if (trace_flag) | |
389 | { | |
390 | fputs ("transpose: output\n", stderr); | |
391 | matrix_print (stderr, new_R, n); | |
392 | } | |
393 | ||
394 | return new_R; | |
395 | } | |
396 | ||
397 | ||
398 | static void | |
399 | build_relations (void) | |
400 | { | |
401 | short *edge = XCALLOC (short, ngotos + 1); | |
402 | short *states = XCALLOC (short, ritem_longest_rhs () + 1); | |
403 | int i; | |
404 | ||
405 | includes = XCALLOC (short *, ngotos); | |
406 | ||
407 | for (i = 0; i < ngotos; i++) | |
408 | { | |
409 | int nedges = 0; | |
410 | int symbol1 = state_table[to_state[i]]->accessing_symbol; | |
411 | short *rulep; | |
412 | ||
413 | for (rulep = derives[symbol1]; *rulep > 0; rulep++) | |
414 | { | |
415 | int done; | |
416 | int length = 1; | |
417 | short *rp; | |
418 | state_t *state = state_table[from_state[i]]; | |
419 | states[0] = state->number; | |
420 | ||
421 | for (rp = ritem + rule_table[*rulep].rhs; *rp > 0; rp++) | |
422 | { | |
423 | shifts *sp = state->shifts; | |
424 | int j; | |
425 | for (j = 0; j < sp->nshifts; j++) | |
426 | { | |
427 | state = state_table[sp->shifts[j]]; | |
428 | if (state->accessing_symbol == *rp) | |
429 | break; | |
430 | } | |
431 | ||
432 | states[length++] = state->number; | |
433 | } | |
434 | ||
435 | if (!state->consistent) | |
436 | add_lookback_edge (state, *rulep, i); | |
437 | ||
438 | length--; | |
439 | done = 0; | |
440 | while (!done) | |
441 | { | |
442 | done = 1; | |
443 | rp--; | |
444 | /* JF added rp>=ritem && I hope to god its right! */ | |
445 | if (rp >= ritem && ISVAR (*rp)) | |
446 | { | |
447 | edge[nedges++] = map_goto (states[--length], *rp); | |
448 | if (nullable[*rp]) | |
449 | done = 0; | |
450 | } | |
451 | } | |
452 | } | |
453 | ||
454 | if (nedges) | |
455 | { | |
456 | int j; | |
457 | includes[i] = XCALLOC (short, nedges + 1); | |
458 | for (j = 0; j < nedges; j++) | |
459 | includes[i][j] = edge[j]; | |
460 | includes[i][nedges] = -1; | |
461 | } | |
462 | } | |
463 | ||
464 | XFREE (edge); | |
465 | XFREE (states); | |
466 | ||
467 | includes = transpose (includes, ngotos); | |
468 | } | |
469 | ||
470 | ||
471 | ||
472 | static void | |
473 | compute_FOLLOWS (void) | |
474 | { | |
475 | int i; | |
476 | ||
477 | digraph (includes); | |
478 | ||
479 | for (i = 0; i < ngotos; i++) | |
480 | XFREE (includes[i]); | |
481 | ||
482 | XFREE (includes); | |
483 | } | |
484 | ||
485 | ||
486 | static void | |
487 | compute_lookaheads (void) | |
488 | { | |
489 | size_t i; | |
490 | shorts *sp; | |
491 | ||
492 | for (i = 0; i < nLA; i++) | |
493 | for (sp = lookback[i]; sp; sp = sp->next) | |
494 | { | |
495 | int size = LA (i + 1) - LA (i); | |
496 | int j; | |
497 | for (j = 0; j < size; ++j) | |
498 | LA (i)[j] |= F (sp->value)[j]; | |
499 | } | |
500 | ||
501 | /* Free LOOKBACK. */ | |
502 | for (i = 0; i < nLA; i++) | |
503 | LIST_FREE (shorts, lookback[i]); | |
504 | ||
505 | XFREE (lookback); | |
506 | XFREE (F); | |
507 | } | |
508 | ||
509 | ||
510 | /*--------------------------------------. | |
511 | | Initializing the lookaheads members. | | |
512 | `--------------------------------------*/ | |
513 | ||
514 | static void | |
515 | initialize_lookaheads (void) | |
516 | { | |
517 | int i; | |
518 | nLA = 0; | |
519 | for (i = 0; i < nstates; i++) | |
520 | { | |
521 | int k; | |
522 | int nlookaheads = 0; | |
523 | reductions *rp = state_table[i]->reductions; | |
524 | shifts *sp = state_table[i]->shifts; | |
525 | ||
526 | /* We need a lookahead either to distinguish different | |
527 | reductions (i.e., there are two or more), or to distinguish a | |
528 | reduction from a shift. Otherwise, it is straightforward, | |
529 | and the state is `consistent'. */ | |
530 | if (rp->nreds > 1 | |
531 | || (rp->nreds == 1 && sp->nshifts && SHIFT_IS_SHIFT (sp, 0))) | |
532 | nlookaheads += rp->nreds; | |
533 | else | |
534 | state_table[i]->consistent = 1; | |
535 | ||
536 | for (k = 0; k < sp->nshifts; k++) | |
537 | if (SHIFT_IS_ERROR (sp, k)) | |
538 | { | |
539 | state_table[i]->consistent = 0; | |
540 | break; | |
541 | } | |
542 | ||
543 | state_table[i]->nlookaheads = nlookaheads; | |
544 | state_table[i]->lookaheadsp = nLA; | |
545 | nLA += nlookaheads; | |
546 | } | |
547 | } | |
548 | ||
549 | void | |
550 | lalr (void) | |
551 | { | |
552 | tokensetsize = WORDSIZE (ntokens); | |
553 | ||
554 | initialize_lookaheads (); | |
555 | initialize_LA (); | |
556 | set_goto_map (); | |
557 | initialize_F (); | |
558 | build_relations (); | |
559 | compute_FOLLOWS (); | |
560 | compute_lookaheads (); | |
561 | } |