]> git.saurik.com Git - bison.git/blob - src/lalr.c
* src/state.h (state_number_t, STATE_NUMBER_MAX): New.
[bison.git] / src / lalr.c
1 /* Compute look-ahead criteria for bison,
2 Copyright (C) 1984, 1986, 1989, 2000, 2001, 2002
3 Free Software Foundation, Inc.
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22
23 /* Compute how to make the finite state machine deterministic; find
24 which rules need lookahead in each state, and which lookahead
25 tokens they accept. */
26
27 #include "system.h"
28 #include "bitset.h"
29 #include "bitsetv.h"
30 #include "quotearg.h"
31 #include "symtab.h"
32 #include "gram.h"
33 #include "reader.h"
34 #include "types.h"
35 #include "LR0.h"
36 #include "complain.h"
37 #include "lalr.h"
38 #include "nullable.h"
39 #include "derives.h"
40 #include "getargs.h"
41
42 /* All the decorated states, indexed by the state number. */
43 state_t **states = NULL;
44
45 rule_t **LArule = NULL;
46 bitsetv LA = NULL;
47 size_t nLA;
48
49 static int ngotos;
50 short *goto_map = NULL;
51 state_number_t *from_state = NULL;
52 state_number_t *to_state = NULL;
53
54 /* And for the famous F variable, which name is so descriptive that a
55 comment is hardly needed. <grin>. */
56 static bitsetv F = NULL;
57
58 static short **includes;
59 static shorts **lookback;
60
61
62 /*---------------------------------------------------------------.
63 | digraph & traverse. |
64 | |
65 | The following variables are used as common storage between the |
66 | two. |
67 `---------------------------------------------------------------*/
68
69 static short **R;
70 static short *INDEX;
71 static short *VERTICES;
72 static int top;
73 static int infinity;
74
75 static void
76 traverse (int i)
77 {
78 int j;
79 int height;
80
81 VERTICES[++top] = i;
82 INDEX[i] = height = top;
83
84 if (R[i])
85 for (j = 0; R[i][j] >= 0; ++j)
86 {
87 if (INDEX[R[i][j]] == 0)
88 traverse (R[i][j]);
89
90 if (INDEX[i] > INDEX[R[i][j]])
91 INDEX[i] = INDEX[R[i][j]];
92
93 bitset_or (F[i], F[i], F[R[i][j]]);
94 }
95
96 if (INDEX[i] == height)
97 for (;;)
98 {
99 j = VERTICES[top--];
100 INDEX[j] = infinity;
101
102 if (i == j)
103 break;
104
105 bitset_copy (F[j], F[i]);
106 }
107 }
108
109
110 static void
111 digraph (short **relation)
112 {
113 int i;
114
115 infinity = ngotos + 2;
116 INDEX = XCALLOC (short, ngotos + 1);
117 VERTICES = XCALLOC (short, ngotos + 1);
118 top = 0;
119
120 R = relation;
121
122 for (i = 0; i < ngotos; i++)
123 INDEX[i] = 0;
124
125 for (i = 0; i < ngotos; i++)
126 if (INDEX[i] == 0 && R[i])
127 traverse (i);
128
129 XFREE (INDEX);
130 XFREE (VERTICES);
131 }
132
133
134 static void
135 initialize_LA (void)
136 {
137 state_number_t i;
138 int j;
139 rule_t **np;
140
141 /* Avoid having to special case 0. */
142 if (!nLA)
143 nLA = 1;
144
145 LA = bitsetv_create (nLA, ntokens, BITSET_FIXED);
146 LArule = XCALLOC (rule_t *, nLA);
147 lookback = XCALLOC (shorts *, nLA);
148
149 np = LArule;
150 for (i = 0; i < nstates; i++)
151 if (!states[i]->consistent)
152 for (j = 0; j < states[i]->reductions->nreds; j++)
153 *np++ = &rules[states[i]->reductions->rules[j]];
154 }
155
156
157 static void
158 set_goto_map (void)
159 {
160 state_number_t state;
161 short *temp_map;
162
163 goto_map = XCALLOC (short, nvars + 1) - ntokens;
164 temp_map = XCALLOC (short, nvars + 1) - ntokens;
165
166 ngotos = 0;
167 for (state = 0; state < nstates; ++state)
168 {
169 shifts *sp = states[state]->shifts;
170 int i;
171 for (i = sp->nshifts - 1; i >= 0 && SHIFT_IS_GOTO (sp, i); --i)
172 {
173 if (ngotos == SHRT_MAX)
174 fatal (_("too many gotos (max %d)"), SHRT_MAX);
175
176 ngotos++;
177 goto_map[SHIFT_SYMBOL (sp, i)]++;
178 }
179 }
180
181 {
182 int k = 0;
183 int i;
184 for (i = ntokens; i < nsyms; i++)
185 {
186 temp_map[i] = k;
187 k += goto_map[i];
188 }
189
190 for (i = ntokens; i < nsyms; i++)
191 goto_map[i] = temp_map[i];
192
193 goto_map[nsyms] = ngotos;
194 temp_map[nsyms] = ngotos;
195 }
196
197 from_state = XCALLOC (state_number_t, ngotos);
198 to_state = XCALLOC (state_number_t, ngotos);
199
200 for (state = 0; state < nstates; ++state)
201 {
202 shifts *sp = states[state]->shifts;
203 int i;
204 for (i = sp->nshifts - 1; i >= 0 && SHIFT_IS_GOTO (sp, i); --i)
205 {
206 int k = temp_map[SHIFT_SYMBOL (sp, i)]++;
207 from_state[k] = state;
208 to_state[k] = sp->shifts[i];
209 }
210 }
211
212 XFREE (temp_map + ntokens);
213 }
214
215
216
217 /*----------------------------------------------------------.
218 | Map a state/symbol pair into its numeric representation. |
219 `----------------------------------------------------------*/
220
221 static int
222 map_goto (state_number_t state, symbol_number_t symbol)
223 {
224 int high;
225 int low;
226 int middle;
227 state_number_t s;
228
229 low = goto_map[symbol];
230 high = goto_map[symbol + 1] - 1;
231
232 while (low <= high)
233 {
234 middle = (low + high) / 2;
235 s = from_state[middle];
236 if (s == state)
237 return middle;
238 else if (s < state)
239 low = middle + 1;
240 else
241 high = middle - 1;
242 }
243
244 assert (0);
245 /* NOTREACHED */
246 return 0;
247 }
248
249
250 static void
251 initialize_F (void)
252 {
253 short **reads = XCALLOC (short *, ngotos);
254 short *edge = XCALLOC (short, ngotos + 1);
255 int nedges = 0;
256
257 int i;
258
259 F = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
260
261 for (i = 0; i < ngotos; i++)
262 {
263 state_number_t stateno = to_state[i];
264 shifts *sp = states[stateno]->shifts;
265
266 int j;
267 for (j = 0; j < sp->nshifts && SHIFT_IS_SHIFT (sp, j); j++)
268 bitset_set (F[i], SHIFT_SYMBOL (sp, j));
269
270 for (; j < sp->nshifts; j++)
271 {
272 symbol_number_t symbol = SHIFT_SYMBOL (sp, j);
273 if (nullable[symbol])
274 edge[nedges++] = map_goto (stateno, symbol);
275 }
276
277 if (nedges)
278 {
279 reads[i] = XCALLOC (short, nedges + 1);
280 memcpy (reads[i], edge, nedges * sizeof (edge[0]));
281 reads[i][nedges] = -1;
282 nedges = 0;
283 }
284 }
285
286 digraph (reads);
287
288 for (i = 0; i < ngotos; i++)
289 XFREE (reads[i]);
290
291 XFREE (reads);
292 XFREE (edge);
293 }
294
295
296 static void
297 add_lookback_edge (state_t *state, int ruleno, int gotono)
298 {
299 int i;
300 shorts *sp;
301
302 for (i = 0; i < state->nlookaheads; ++i)
303 if (state->lookaheads_rule[i]->number == ruleno)
304 break;
305
306 assert (state->lookaheads_rule[i]->number == ruleno);
307
308 sp = XCALLOC (shorts, 1);
309 sp->next = lookback[(state->lookaheads - LA) + i];
310 sp->value = gotono;
311 lookback[(state->lookaheads - LA) + i] = sp;
312 }
313
314
315 static void
316 matrix_print (FILE *out, short **matrix, int n)
317 {
318 int i, j;
319
320 for (i = 0; i < n; ++i)
321 {
322 fprintf (out, "%3d: ", i);
323 if (matrix[i])
324 for (j = 0; matrix[i][j] != -1; ++j)
325 fprintf (out, "%3d ", matrix[i][j]);
326 fputc ('\n', out);
327 }
328 fputc ('\n', out);
329 }
330
331 /*-------------------------------------------------------------------.
332 | Return the transpose of R_ARG, of size N. Destroy R_ARG, as it is |
333 | replaced with the result. |
334 | |
335 | R_ARG[I] is NULL or a -1 terminated list of numbers. |
336 | |
337 | RESULT[NUM] is NULL or the -1 terminated list of the I such as NUM |
338 | is in R_ARG[I]. |
339 `-------------------------------------------------------------------*/
340
341 static short **
342 transpose (short **R_arg, int n)
343 {
344 /* The result. */
345 short **new_R = XCALLOC (short *, n);
346 /* END_R[I] -- next entry of NEW_R[I]. */
347 short **end_R = XCALLOC (short *, n);
348 /* NEDGES[I] -- total size of NEW_R[I]. */
349 short *nedges = XCALLOC (short, n);
350 int i, j;
351
352 if (trace_flag)
353 {
354 fputs ("transpose: input\n", stderr);
355 matrix_print (stderr, R_arg, n);
356 }
357
358 /* Count. */
359 for (i = 0; i < n; i++)
360 if (R_arg[i])
361 for (j = 0; R_arg[i][j] >= 0; ++j)
362 ++nedges[R_arg[i][j]];
363
364 /* Allocate. */
365 for (i = 0; i < n; i++)
366 if (nedges[i] > 0)
367 {
368 short *sp = XCALLOC (short, nedges[i] + 1);
369 sp[nedges[i]] = -1;
370 new_R[i] = sp;
371 end_R[i] = sp;
372 }
373
374 /* Store. */
375 for (i = 0; i < n; i++)
376 if (R_arg[i])
377 for (j = 0; R_arg[i][j] >= 0; ++j)
378 {
379 *end_R[R_arg[i][j]] = i;
380 ++end_R[R_arg[i][j]];
381 }
382
383 free (nedges);
384 free (end_R);
385
386 /* Free the input: it is replaced with the result. */
387 for (i = 0; i < n; i++)
388 XFREE (R_arg[i]);
389 free (R_arg);
390
391 if (trace_flag)
392 {
393 fputs ("transpose: output\n", stderr);
394 matrix_print (stderr, new_R, n);
395 }
396
397 return new_R;
398 }
399
400
401 static void
402 build_relations (void)
403 {
404 short *edge = XCALLOC (short, ngotos + 1);
405 state_number_t *states1 = XCALLOC (state_number_t, ritem_longest_rhs () + 1);
406 int i;
407
408 includes = XCALLOC (short *, ngotos);
409
410 for (i = 0; i < ngotos; i++)
411 {
412 int nedges = 0;
413 symbol_number_t symbol1 = states[to_state[i]]->accessing_symbol;
414 short *rulep;
415
416 for (rulep = derives[symbol1]; *rulep > 0; rulep++)
417 {
418 int done;
419 int length = 1;
420 item_number_t *rp;
421 state_t *state = states[from_state[i]];
422 states1[0] = state->number;
423
424 for (rp = rules[*rulep].rhs; *rp >= 0; rp++)
425 {
426 shifts *sp = state->shifts;
427 int j;
428 for (j = 0; j < sp->nshifts; j++)
429 {
430 state = states[sp->shifts[j]];
431 if (state->accessing_symbol
432 == item_number_as_symbol_number (*rp))
433 break;
434 }
435
436 states1[length++] = state->number;
437 }
438
439 if (!state->consistent)
440 add_lookback_edge (state, *rulep, i);
441
442 length--;
443 done = 0;
444 while (!done)
445 {
446 done = 1;
447 rp--;
448 /* JF added rp>=ritem && I hope to god its right! */
449 if (rp >= ritem && ISVAR (*rp))
450 {
451 /* Downcasting from item_number_t to symbol_number_t. */
452 edge[nedges++] = map_goto (states1[--length],
453 item_number_as_symbol_number (*rp));
454 if (nullable[*rp])
455 done = 0;
456 }
457 }
458 }
459
460 if (nedges)
461 {
462 int j;
463 includes[i] = XCALLOC (short, nedges + 1);
464 for (j = 0; j < nedges; j++)
465 includes[i][j] = edge[j];
466 includes[i][nedges] = -1;
467 }
468 }
469
470 XFREE (edge);
471 XFREE (states1);
472
473 includes = transpose (includes, ngotos);
474 }
475
476
477
478 static void
479 compute_FOLLOWS (void)
480 {
481 int i;
482
483 digraph (includes);
484
485 for (i = 0; i < ngotos; i++)
486 XFREE (includes[i]);
487
488 XFREE (includes);
489 }
490
491
492 static void
493 compute_lookaheads (void)
494 {
495 size_t i;
496 shorts *sp;
497
498 for (i = 0; i < nLA; i++)
499 for (sp = lookback[i]; sp; sp = sp->next)
500 bitset_or (LA[i], LA[i], F[sp->value]);
501
502 /* Free LOOKBACK. */
503 for (i = 0; i < nLA; i++)
504 LIST_FREE (shorts, lookback[i]);
505
506 XFREE (lookback);
507 bitsetv_free (F);
508 }
509
510
511 /*-------------------------------------------------------------.
512 | Count the number of lookaheads required for each state |
513 | (NLOOKAHEADS member). Compute the total number of LA, NLA. |
514 `-------------------------------------------------------------*/
515
516 static void
517 states_lookaheads_count (void)
518 {
519 state_number_t i;
520 nLA = 0;
521
522 /* Count */
523 for (i = 0; i < nstates; i++)
524 {
525 int k;
526 int nlookaheads = 0;
527 reductions *rp = states[i]->reductions;
528 shifts *sp = states[i]->shifts;
529
530 /* We need a lookahead either to distinguish different
531 reductions (i.e., there are two or more), or to distinguish a
532 reduction from a shift. Otherwise, it is straightforward,
533 and the state is `consistent'. */
534 if (rp->nreds > 1
535 || (rp->nreds == 1 && sp->nshifts && SHIFT_IS_SHIFT (sp, 0)))
536 nlookaheads += rp->nreds;
537 else
538 states[i]->consistent = 1;
539
540 for (k = 0; k < sp->nshifts; k++)
541 if (SHIFT_IS_ERROR (sp, k))
542 {
543 states[i]->consistent = 0;
544 break;
545 }
546
547 states[i]->nlookaheads = nlookaheads;
548 nLA += nlookaheads;
549 }
550 }
551
552
553 /*--------------------------------------.
554 | Initializing the lookaheads members. |
555 `--------------------------------------*/
556
557 static void
558 states_lookaheads_initialize (void)
559 {
560 state_number_t i;
561 bitsetv pLA = LA;
562 rule_t **pLArule = LArule;
563
564 /* Initialize the members LOOKAHEADS and LOOKAHEADS_RULE for each
565 state. */
566 for (i = 0; i < nstates; i++)
567 {
568 states[i]->lookaheads = pLA;
569 states[i]->lookaheads_rule = pLArule;
570 pLA += states[i]->nlookaheads;
571 pLArule += states[i]->nlookaheads;
572 }
573 }
574
575
576 /*---------------------------------------.
577 | Output the lookaheads for each state. |
578 `---------------------------------------*/
579
580 static void
581 lookaheads_print (FILE *out)
582 {
583 state_number_t i;
584 int j, k;
585 fprintf (out, "Lookaheads: BEGIN\n");
586 for (i = 0; i < nstates; ++i)
587 {
588 fprintf (out, "State %d: %d lookaheads\n",
589 i, states[i]->nlookaheads);
590
591 for (j = 0; j < states[i]->nlookaheads; ++j)
592 for (k = 0; k < ntokens; ++k)
593 if (bitset_test (states[i]->lookaheads[j], k))
594 fprintf (out, " on %d (%s) -> rule %d\n",
595 k, symbol_tag_get (symbols[k]),
596 states[i]->lookaheads_rule[j]->number - 1);
597 }
598 fprintf (out, "Lookaheads: END\n");
599 }
600
601 void
602 lalr (void)
603 {
604 states_lookaheads_count ();
605 initialize_LA ();
606 states_lookaheads_initialize ();
607 set_goto_map ();
608 initialize_F ();
609 build_relations ();
610 compute_FOLLOWS ();
611 compute_lookaheads ();
612
613 if (trace_flag)
614 lookaheads_print (stderr);
615 }