]> git.saurik.com Git - bison.git/blob - src/lalr.c
* src/closure.c, src/derives.c, src/gram.h, src/lalr.c,
[bison.git] / src / lalr.c
1 /* Compute look-ahead criteria for bison,
2 Copyright 1984, 1986, 1989, 2000, 2001 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 /* Compute how to make the finite state machine deterministic; find
23 which rules need lookahead in each state, and which lookahead
24 tokens they accept. */
25
26 #include "system.h"
27 #include "bitset.h"
28 #include "bitsetv.h"
29 #include "reader.h"
30 #include "types.h"
31 #include "LR0.h"
32 #include "symtab.h"
33 #include "gram.h"
34 #include "complain.h"
35 #include "lalr.h"
36 #include "nullable.h"
37 #include "derives.h"
38 #include "getargs.h"
39
40 /* All the decorated states, indexed by the state number. */
41 state_t **states = NULL;
42
43 short *LAruleno = NULL;
44 bitsetv LA = NULL;
45 size_t nLA;
46
47 static int ngotos;
48 short *goto_map = NULL;
49 short *from_state = NULL;
50 short *to_state = NULL;
51
52 /* And for the famous F variable, which name is so descriptive that a
53 comment is hardly needed. <grin>. */
54 static bitsetv F = NULL;
55
56 static short **includes;
57 static shorts **lookback;
58
59
60 /*---------------------------------------------------------------.
61 | digraph & traverse. |
62 | |
63 | The following variables are used as common storage between the |
64 | two. |
65 `---------------------------------------------------------------*/
66
67 static short **R;
68 static short *INDEX;
69 static short *VERTICES;
70 static int top;
71 static int infinity;
72
73 static void
74 traverse (int i)
75 {
76 int j;
77 int height;
78
79 VERTICES[++top] = i;
80 INDEX[i] = height = top;
81
82 if (R[i])
83 for (j = 0; R[i][j] >= 0; ++j)
84 {
85 if (INDEX[R[i][j]] == 0)
86 traverse (R[i][j]);
87
88 if (INDEX[i] > INDEX[R[i][j]])
89 INDEX[i] = INDEX[R[i][j]];
90
91 bitset_or (F[i], F[i], F[R[i][j]]);
92 }
93
94 if (INDEX[i] == height)
95 for (;;)
96 {
97 j = VERTICES[top--];
98 INDEX[j] = infinity;
99
100 if (i == j)
101 break;
102
103 bitset_copy (F[j], F[i]);
104 }
105 }
106
107
108 static void
109 digraph (short **relation)
110 {
111 int i;
112
113 infinity = ngotos + 2;
114 INDEX = XCALLOC (short, ngotos + 1);
115 VERTICES = XCALLOC (short, ngotos + 1);
116 top = 0;
117
118 R = relation;
119
120 for (i = 0; i < ngotos; i++)
121 INDEX[i] = 0;
122
123 for (i = 0; i < ngotos; i++)
124 if (INDEX[i] == 0 && R[i])
125 traverse (i);
126
127 XFREE (INDEX);
128 XFREE (VERTICES);
129 }
130
131
132 static void
133 initialize_LA (void)
134 {
135 size_t i;
136 int j;
137 short *np;
138
139 /* Avoid having to special case 0. */
140 if (!nLA)
141 nLA = 1;
142
143 LA = bitsetv_create (nLA, ntokens, BITSET_FIXED);
144 LAruleno = XCALLOC (short, nLA);
145 lookback = XCALLOC (shorts *, nLA);
146
147 np = LAruleno;
148 for (i = 0; i < nstates; i++)
149 if (!states[i]->consistent)
150 for (j = 0; j < states[i]->reductions->nreds; j++)
151 *np++ = states[i]->reductions->rules[j];
152 }
153
154
155 static void
156 set_goto_map (void)
157 {
158 size_t state;
159 int i;
160 short *temp_map;
161
162 goto_map = XCALLOC (short, nvars + 1) - ntokens;
163 temp_map = XCALLOC (short, nvars + 1) - ntokens;
164
165 ngotos = 0;
166 for (state = 0; state < nstates; ++state)
167 {
168 shifts *sp = states[state]->shifts;
169 for (i = sp->nshifts - 1; i >= 0 && SHIFT_IS_GOTO (sp, i); --i)
170 {
171 if (ngotos == MAXSHORT)
172 fatal (_("too many gotos (max %d)"), MAXSHORT);
173
174 ngotos++;
175 goto_map[SHIFT_SYMBOL (sp, i)]++;
176 }
177 }
178
179 {
180 int k = 0;
181 for (i = ntokens; i < nsyms; i++)
182 {
183 temp_map[i] = k;
184 k += goto_map[i];
185 }
186
187 for (i = ntokens; i < nsyms; i++)
188 goto_map[i] = temp_map[i];
189
190 goto_map[nsyms] = ngotos;
191 temp_map[nsyms] = ngotos;
192 }
193
194 from_state = XCALLOC (short, ngotos);
195 to_state = XCALLOC (short, ngotos);
196
197 for (state = 0; state < nstates; ++state)
198 {
199 shifts *sp = states[state]->shifts;
200 for (i = sp->nshifts - 1; i >= 0 && SHIFT_IS_GOTO (sp, i); --i)
201 {
202 int k = temp_map[SHIFT_SYMBOL (sp, i)]++;
203 from_state[k] = state;
204 to_state[k] = sp->shifts[i];
205 }
206 }
207
208 XFREE (temp_map + ntokens);
209 }
210
211
212
213 /*----------------------------------------------------------.
214 | Map a state/symbol pair into its numeric representation. |
215 `----------------------------------------------------------*/
216
217 static int
218 map_goto (int state, int symbol)
219 {
220 int high;
221 int low;
222 int middle;
223 int s;
224
225 low = goto_map[symbol];
226 high = goto_map[symbol + 1] - 1;
227
228 while (low <= high)
229 {
230 middle = (low + high) / 2;
231 s = from_state[middle];
232 if (s == state)
233 return middle;
234 else if (s < state)
235 low = middle + 1;
236 else
237 high = middle - 1;
238 }
239
240 assert (0);
241 /* NOTREACHED */
242 return 0;
243 }
244
245
246 static void
247 initialize_F (void)
248 {
249 short **reads = XCALLOC (short *, ngotos);
250 short *edge = XCALLOC (short, ngotos + 1);
251 int nedges = 0;
252
253 int i;
254
255 F = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
256
257 for (i = 0; i < ngotos; i++)
258 {
259 int stateno = to_state[i];
260 shifts *sp = states[stateno]->shifts;
261
262 int j;
263 for (j = 0; j < sp->nshifts && SHIFT_IS_SHIFT (sp, j); j++)
264 bitset_set (F[i], SHIFT_SYMBOL (sp, j));
265
266 for (; j < sp->nshifts; j++)
267 {
268 int symbol = SHIFT_SYMBOL (sp, j);
269 if (nullable[symbol])
270 edge[nedges++] = map_goto (stateno, symbol);
271 }
272
273 if (nedges)
274 {
275 reads[i] = XCALLOC (short, nedges + 1);
276 shortcpy (reads[i], edge, nedges);
277 reads[i][nedges] = -1;
278 nedges = 0;
279 }
280 }
281
282 digraph (reads);
283
284 for (i = 0; i < ngotos; i++)
285 XFREE (reads[i]);
286
287 XFREE (reads);
288 XFREE (edge);
289 }
290
291
292 static void
293 add_lookback_edge (state_t *state, int ruleno, int gotono)
294 {
295 int i;
296 shorts *sp;
297
298 for (i = 0; i < state->nlookaheads; ++i)
299 if (LAruleno[state->lookaheadsp + i] == ruleno)
300 break;
301
302 assert (LAruleno[state->lookaheadsp + i] == ruleno);
303
304 sp = XCALLOC (shorts, 1);
305 sp->next = lookback[state->lookaheadsp + i];
306 sp->value = gotono;
307 lookback[state->lookaheadsp + i] = sp;
308 }
309
310
311 static void
312 matrix_print (FILE *out, short **matrix, int n)
313 {
314 int i, j;
315
316 for (i = 0; i < n; ++i)
317 {
318 fprintf (out, "%3d: ", i);
319 if (matrix[i])
320 for (j = 0; matrix[i][j] != -1; ++j)
321 fprintf (out, "%3d ", matrix[i][j]);
322 fputc ('\n', out);
323 }
324 fputc ('\n', out);
325 }
326
327 /*-------------------------------------------------------------------.
328 | Return the transpose of R_ARG, of size N. Destroy R_ARG, as it is |
329 | replaced with the result. |
330 | |
331 | R_ARG[I] is NULL or a -1 terminated list of numbers. |
332 | |
333 | RESULT[NUM] is NULL or the -1 terminated list of the I such as NUM |
334 | is in R_ARG[I]. |
335 `-------------------------------------------------------------------*/
336
337 static short **
338 transpose (short **R_arg, int n)
339 {
340 /* The result. */
341 short **new_R = XCALLOC (short *, n);
342 /* END_R[I] -- next entry of NEW_R[I]. */
343 short **end_R = XCALLOC (short *, n);
344 /* NEDGES[I] -- total size of NEW_R[I]. */
345 short *nedges = XCALLOC (short, n);
346 int i, j;
347
348 if (trace_flag)
349 {
350 fputs ("transpose: input\n", stderr);
351 matrix_print (stderr, R_arg, n);
352 }
353
354 /* Count. */
355 for (i = 0; i < n; i++)
356 if (R_arg[i])
357 for (j = 0; R_arg[i][j] >= 0; ++j)
358 ++nedges[R_arg[i][j]];
359
360 /* Allocate. */
361 for (i = 0; i < n; i++)
362 if (nedges[i] > 0)
363 {
364 short *sp = XCALLOC (short, nedges[i] + 1);
365 sp[nedges[i]] = -1;
366 new_R[i] = sp;
367 end_R[i] = sp;
368 }
369
370 /* Store. */
371 for (i = 0; i < n; i++)
372 if (R_arg[i])
373 for (j = 0; R_arg[i][j] >= 0; ++j)
374 {
375 *end_R[R_arg[i][j]] = i;
376 ++end_R[R_arg[i][j]];
377 }
378
379 free (nedges);
380 free (end_R);
381
382 /* Free the input: it is replaced with the result. */
383 for (i = 0; i < n; i++)
384 XFREE (R_arg[i]);
385 free (R_arg);
386
387 if (trace_flag)
388 {
389 fputs ("transpose: output\n", stderr);
390 matrix_print (stderr, new_R, n);
391 }
392
393 return new_R;
394 }
395
396
397 static void
398 build_relations (void)
399 {
400 short *edge = XCALLOC (short, ngotos + 1);
401 short *states1 = XCALLOC (short, ritem_longest_rhs () + 1);
402 int i;
403
404 includes = XCALLOC (short *, ngotos);
405
406 for (i = 0; i < ngotos; i++)
407 {
408 int nedges = 0;
409 int symbol1 = states[to_state[i]]->accessing_symbol;
410 short *rulep;
411
412 for (rulep = derives[symbol1]; *rulep > 0; rulep++)
413 {
414 int done;
415 int length = 1;
416 short *rp;
417 state_t *state = states[from_state[i]];
418 states1[0] = state->number;
419
420 for (rp = rules[*rulep].rhs; *rp >= 0; rp++)
421 {
422 shifts *sp = state->shifts;
423 int j;
424 for (j = 0; j < sp->nshifts; j++)
425 {
426 state = states[sp->shifts[j]];
427 if (state->accessing_symbol == *rp)
428 break;
429 }
430
431 states1[length++] = state->number;
432 }
433
434 if (!state->consistent)
435 add_lookback_edge (state, *rulep, i);
436
437 length--;
438 done = 0;
439 while (!done)
440 {
441 done = 1;
442 rp--;
443 /* JF added rp>=ritem && I hope to god its right! */
444 if (rp >= ritem && ISVAR (*rp))
445 {
446 edge[nedges++] = map_goto (states1[--length], *rp);
447 if (nullable[*rp])
448 done = 0;
449 }
450 }
451 }
452
453 if (nedges)
454 {
455 int j;
456 includes[i] = XCALLOC (short, nedges + 1);
457 for (j = 0; j < nedges; j++)
458 includes[i][j] = edge[j];
459 includes[i][nedges] = -1;
460 }
461 }
462
463 XFREE (edge);
464 XFREE (states1);
465
466 includes = transpose (includes, ngotos);
467 }
468
469
470
471 static void
472 compute_FOLLOWS (void)
473 {
474 int i;
475
476 digraph (includes);
477
478 for (i = 0; i < ngotos; i++)
479 XFREE (includes[i]);
480
481 XFREE (includes);
482 }
483
484
485 static void
486 compute_lookaheads (void)
487 {
488 size_t i;
489 shorts *sp;
490
491 for (i = 0; i < nLA; i++)
492 for (sp = lookback[i]; sp; sp = sp->next)
493 bitset_or (LA[i], LA[i], F[sp->value]);
494
495 /* Free LOOKBACK. */
496 for (i = 0; i < nLA; i++)
497 LIST_FREE (shorts, lookback[i]);
498
499 XFREE (lookback);
500 bitsetv_free (F);
501 }
502
503
504 /*--------------------------------------.
505 | Initializing the lookaheads members. |
506 `--------------------------------------*/
507
508 static void
509 initialize_lookaheads (void)
510 {
511 size_t i;
512 nLA = 0;
513 for (i = 0; i < nstates; i++)
514 {
515 int k;
516 int nlookaheads = 0;
517 reductions *rp = states[i]->reductions;
518 shifts *sp = states[i]->shifts;
519
520 /* We need a lookahead either to distinguish different
521 reductions (i.e., there are two or more), or to distinguish a
522 reduction from a shift. Otherwise, it is straightforward,
523 and the state is `consistent'. */
524 if (rp->nreds > 1
525 || (rp->nreds == 1 && sp->nshifts && SHIFT_IS_SHIFT (sp, 0)))
526 nlookaheads += rp->nreds;
527 else
528 states[i]->consistent = 1;
529
530 for (k = 0; k < sp->nshifts; k++)
531 if (SHIFT_IS_ERROR (sp, k))
532 {
533 states[i]->consistent = 0;
534 break;
535 }
536
537 states[i]->nlookaheads = nlookaheads;
538 states[i]->lookaheadsp = nLA;
539 nLA += nlookaheads;
540 }
541 }
542
543
544 /*---------------------------------------.
545 | Output the lookaheads for each state. |
546 `---------------------------------------*/
547
548 static void
549 lookaheads_print (FILE *out)
550 {
551 size_t i;
552 int j, k;
553 fprintf (out, "Lookaheads: BEGIN\n");
554 for (i = 0; i < nstates; ++i)
555 {
556 fprintf (out, "State %d: %d lookaheads\n",
557 i, states[i]->nlookaheads);
558
559 for (j = 0; j < states[i]->nlookaheads; ++j)
560 for (k = 0; k < ntokens; ++k)
561 if (bitset_test (LA[states[i]->lookaheadsp + j], j))
562 fprintf (out, " on %d (%s) -> rule %d\n",
563 k, symbols[k]->tag,
564 -LAruleno[states[i]->lookaheadsp + j] - 1);
565 }
566 fprintf (out, "Lookaheads: END\n");
567 }
568
569 void
570 lalr (void)
571 {
572 initialize_lookaheads ();
573 initialize_LA ();
574 set_goto_map ();
575 initialize_F ();
576 build_relations ();
577 compute_FOLLOWS ();
578 compute_lookaheads ();
579
580 if (trace_flag)
581 lookaheads_print (stderr);
582 }