]> git.saurik.com Git - bison.git/blame - src/lalr.c
* src/gram.h, src/gram.c (rules_rhs_length): New.
[bison.git] / src / lalr.c
CommitLineData
d0fb370f 1/* Compute look-ahead criteria for bison,
3feec034 2 Copyright 1984, 1986, 1989, 2000, 2001 Free Software Foundation, Inc.
d0fb370f 3
340ef489 4 This file is part of Bison, the GNU Compiler Compiler.
d0fb370f 5
340ef489
AD
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
d0fb370f 10
340ef489
AD
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
d0fb370f 15
340ef489
AD
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
d0fb370f
RS
20
21
720d742f
AD
22/* Compute how to make the finite state machine deterministic; find
23 which rules need lookahead in each state, and which lookahead
340ef489 24 tokens they accept. */
d0fb370f 25
d0fb370f 26#include "system.h"
602bbf31 27#include "bitset.h"
914feea9 28#include "bitsetv.h"
7c6b64d0 29#include "reader.h"
d0fb370f 30#include "types.h"
b2ca4022 31#include "LR0.h"
ad949da9 32#include "symtab.h"
d0fb370f 33#include "gram.h"
a0f6b076 34#include "complain.h"
720d742f 35#include "lalr.h"
3519ec76 36#include "nullable.h"
340ef489 37#include "derives.h"
f67d13aa 38#include "getargs.h"
d0fb370f 39
d200e455 40/* All the decorated states, indexed by the state number. */
29e88316 41state_t **states = NULL;
9703cc49 42
602bbf31 43short *LAruleno = NULL;
914feea9 44bitsetv LA = NULL;
d200e455 45size_t nLA;
9703cc49 46
aa2aab3c 47static int ngotos;
602bbf31
AD
48short *goto_map = NULL;
49short *from_state = NULL;
50short *to_state = NULL;
d0fb370f 51
fe961097 52/* And for the famous F variable, which name is so descriptive that a
ddcd5fdf 53 comment is hardly needed. <grin>. */
914feea9 54static bitsetv F = NULL;
ddcd5fdf 55
d0fb370f
RS
56static short **includes;
57static shorts **lookback;
aa2aab3c
AD
58
59
60/*---------------------------------------------------------------.
61| digraph & traverse. |
62| |
63| The following variables are used as common storage between the |
64| two. |
65`---------------------------------------------------------------*/
66
d0fb370f
RS
67static short **R;
68static short *INDEX;
69static short *VERTICES;
70static int top;
aa2aab3c 71static int infinity;
d0fb370f 72
720d742f
AD
73static void
74traverse (int i)
d0fb370f 75{
720d742f 76 int j;
720d742f 77 int height;
720d742f
AD
78
79 VERTICES[++top] = i;
80 INDEX[i] = height = top;
81
fe961097
AD
82 if (R[i])
83 for (j = 0; R[i][j] >= 0; ++j)
84 {
85 if (INDEX[R[i][j]] == 0)
86 traverse (R[i][j]);
720d742f 87
fe961097
AD
88 if (INDEX[i] > INDEX[R[i][j]])
89 INDEX[i] = INDEX[R[i][j]];
720d742f 90
0fb1ffb1 91 bitset_or (F[i], F[i], F[R[i][j]]);
fe961097 92 }
720d742f
AD
93
94 if (INDEX[i] == height)
fe961097
AD
95 for (;;)
96 {
97 j = VERTICES[top--];
98 INDEX[j] = infinity;
720d742f 99
fe961097
AD
100 if (i == j)
101 break;
720d742f 102
0fb1ffb1 103 bitset_copy (F[j], F[i]);
fe961097 104 }
d0fb370f
RS
105}
106
107
720d742f
AD
108static void
109digraph (short **relation)
110{
111 int i;
112
113 infinity = ngotos + 2;
d7913476
AD
114 INDEX = XCALLOC (short, ngotos + 1);
115 VERTICES = XCALLOC (short, ngotos + 1);
720d742f
AD
116 top = 0;
117
118 R = relation;
119
120 for (i = 0; i < ngotos; i++)
121 INDEX[i] = 0;
122
123 for (i = 0; i < ngotos; i++)
ddcd5fdf
AD
124 if (INDEX[i] == 0 && R[i])
125 traverse (i);
720d742f 126
d7913476
AD
127 XFREE (INDEX);
128 XFREE (VERTICES);
720d742f
AD
129}
130
bb527fc2 131
4a120d45 132static void
d2729d44 133initialize_LA (void)
d0fb370f 134{
602bbf31 135 size_t i;
720d742f 136 int j;
720d742f 137 short *np;
d0fb370f 138
d200e455 139 /* Avoid having to special case 0. */
a845a697
AD
140 if (!nLA)
141 nLA = 1;
d0fb370f 142
914feea9 143 LA = bitsetv_create (nLA, ntokens, BITSET_FIXED);
a845a697
AD
144 LAruleno = XCALLOC (short, nLA);
145 lookback = XCALLOC (shorts *, nLA);
d0fb370f
RS
146
147 np = LAruleno;
148 for (i = 0; i < nstates; i++)
29e88316
AD
149 if (!states[i]->consistent)
150 for (j = 0; j < states[i]->reductions->nreds; j++)
151 *np++ = states[i]->reductions->rules[j];
d0fb370f
RS
152}
153
154
4a120d45 155static void
d2729d44 156set_goto_map (void)
d0fb370f 157{
602bbf31
AD
158 size_t state;
159 int i;
720d742f 160 short *temp_map;
d0fb370f 161
d7913476
AD
162 goto_map = XCALLOC (short, nvars + 1) - ntokens;
163 temp_map = XCALLOC (short, nvars + 1) - ntokens;
d0fb370f
RS
164
165 ngotos = 0;
7215de24
AD
166 for (state = 0; state < nstates; ++state)
167 {
29e88316 168 shifts *sp = states[state]->shifts;
7215de24
AD
169 for (i = sp->nshifts - 1; i >= 0 && SHIFT_IS_GOTO (sp, i); --i)
170 {
7215de24
AD
171 if (ngotos == MAXSHORT)
172 fatal (_("too many gotos (max %d)"), MAXSHORT);
d0fb370f 173
7215de24 174 ngotos++;
13ca549a 175 goto_map[SHIFT_SYMBOL (sp, i)]++;
7215de24
AD
176 }
177 }
d0fb370f 178
d0b0fefa
AD
179 {
180 int k = 0;
181 for (i = ntokens; i < nsyms; i++)
182 {
183 temp_map[i] = k;
184 k += goto_map[i];
185 }
d0fb370f 186
d0b0fefa
AD
187 for (i = ntokens; i < nsyms; i++)
188 goto_map[i] = temp_map[i];
d0fb370f 189
d0b0fefa
AD
190 goto_map[nsyms] = ngotos;
191 temp_map[nsyms] = ngotos;
192 }
d0fb370f 193
d7913476
AD
194 from_state = XCALLOC (short, ngotos);
195 to_state = XCALLOC (short, ngotos);
d0fb370f 196
7215de24 197 for (state = 0; state < nstates; ++state)
d0fb370f 198 {
29e88316 199 shifts *sp = states[state]->shifts;
aa2aab3c 200 for (i = sp->nshifts - 1; i >= 0 && SHIFT_IS_GOTO (sp, i); --i)
d0fb370f 201 {
13ca549a 202 int k = temp_map[SHIFT_SYMBOL (sp, i)]++;
d0b0fefa 203 from_state[k] = state;
13ca549a 204 to_state[k] = sp->shifts[i];
d0fb370f
RS
205 }
206 }
207
d7913476 208 XFREE (temp_map + ntokens);
d0fb370f
RS
209}
210
211
212
43591cec
AD
213/*----------------------------------------------------------.
214| Map a state/symbol pair into its numeric representation. |
215`----------------------------------------------------------*/
d0fb370f 216
4a120d45 217static int
d2729d44 218map_goto (int state, int symbol)
d0fb370f 219{
720d742f
AD
220 int high;
221 int low;
222 int middle;
223 int s;
d0fb370f
RS
224
225 low = goto_map[symbol];
226 high = goto_map[symbol + 1] - 1;
227
228 while (low <= high)
229 {
230 middle = (low + high) / 2;
231 s = from_state[middle];
232 if (s == state)
36281465 233 return middle;
d0fb370f
RS
234 else if (s < state)
235 low = middle + 1;
236 else
237 high = middle - 1;
238 }
239
43591cec
AD
240 assert (0);
241 /* NOTREACHED */
d0fb370f
RS
242 return 0;
243}
244
245
4a120d45 246static void
d2729d44 247initialize_F (void)
d0fb370f 248{
4d4f699c
AD
249 short **reads = XCALLOC (short *, ngotos);
250 short *edge = XCALLOC (short, ngotos + 1);
251 int nedges = 0;
d0fb370f 252
4d4f699c 253 int i;
d0fb370f 254
914feea9 255 F = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
d0fb370f 256
d0fb370f
RS
257 for (i = 0; i < ngotos; i++)
258 {
80a69750 259 int stateno = to_state[i];
29e88316 260 shifts *sp = states[stateno]->shifts;
d0fb370f 261
d954473d
AD
262 int j;
263 for (j = 0; j < sp->nshifts && SHIFT_IS_SHIFT (sp, j); j++)
0fb1ffb1 264 bitset_set (F[i], SHIFT_SYMBOL (sp, j));
d0fb370f 265
d954473d
AD
266 for (; j < sp->nshifts; j++)
267 {
13ca549a 268 int symbol = SHIFT_SYMBOL (sp, j);
d954473d
AD
269 if (nullable[symbol])
270 edge[nedges++] = map_goto (stateno, symbol);
271 }
a0f6b076 272
d954473d
AD
273 if (nedges)
274 {
275 reads[i] = XCALLOC (short, nedges + 1);
276 shortcpy (reads[i], edge, nedges);
277 reads[i][nedges] = -1;
278 nedges = 0;
d0fb370f 279 }
d0fb370f
RS
280 }
281
720d742f 282 digraph (reads);
d0fb370f
RS
283
284 for (i = 0; i < ngotos; i++)
ddcd5fdf 285 XFREE (reads[i]);
d0fb370f 286
d7913476
AD
287 XFREE (reads);
288 XFREE (edge);
d0fb370f
RS
289}
290
291
4a120d45 292static void
dac3c910 293add_lookback_edge (state_t *state, int ruleno, int gotono)
d0fb370f 294{
720d742f 295 int i;
720d742f 296 shorts *sp;
d0fb370f 297
dac3c910
AD
298 for (i = 0; i < state->nlookaheads; ++i)
299 if (LAruleno[state->lookaheadsp + i] == ruleno)
065fbd27 300 break;
d0fb370f 301
dac3c910 302 assert (LAruleno[state->lookaheadsp + i] == ruleno);
d0fb370f 303
d7913476 304 sp = XCALLOC (shorts, 1);
dac3c910 305 sp->next = lookback[state->lookaheadsp + i];
d0fb370f 306 sp->value = gotono;
dac3c910 307 lookback[state->lookaheadsp + i] = sp;
d0fb370f
RS
308}
309
310
f67d13aa
AD
311static void
312matrix_print (FILE *out, short **matrix, int n)
313{
314 int i, j;
315
316 for (i = 0; i < n; ++i)
317 {
318 fprintf (out, "%3d: ", i);
319 if (matrix[i])
320 for (j = 0; matrix[i][j] != -1; ++j)
321 fprintf (out, "%3d ", matrix[i][j]);
322 fputc ('\n', out);
323 }
324 fputc ('\n', out);
325}
326
9887c18a
AD
327/*-------------------------------------------------------------------.
328| Return the transpose of R_ARG, of size N. Destroy R_ARG, as it is |
329| replaced with the result. |
f67d13aa
AD
330| |
331| R_ARG[I] is NULL or a -1 terminated list of numbers. |
332| |
333| RESULT[NUM] is NULL or the -1 terminated list of the I such as NUM |
334| is in R_ARG[I]. |
9887c18a
AD
335`-------------------------------------------------------------------*/
336
4a120d45 337static short **
d2729d44 338transpose (short **R_arg, int n)
d0fb370f 339{
f67d13aa
AD
340 /* The result. */
341 short **new_R = XCALLOC (short *, n);
342 /* END_R[I] -- next entry of NEW_R[I]. */
343 short **end_R = XCALLOC (short *, n);
344 /* NEDGES[I] -- total size of NEW_R[I]. */
345 short *nedges = XCALLOC (short, n);
346 int i, j;
347
348 if (trace_flag)
d0fb370f 349 {
f67d13aa
AD
350 fputs ("transpose: input\n", stderr);
351 matrix_print (stderr, R_arg, n);
d0fb370f
RS
352 }
353
f67d13aa
AD
354 /* Count. */
355 for (i = 0; i < n; i++)
356 if (R_arg[i])
357 for (j = 0; R_arg[i][j] >= 0; ++j)
358 ++nedges[R_arg[i][j]];
d0fb370f 359
f67d13aa 360 /* Allocate. */
d0fb370f 361 for (i = 0; i < n; i++)
80a69750
AD
362 if (nedges[i] > 0)
363 {
364 short *sp = XCALLOC (short, nedges[i] + 1);
80a69750 365 sp[nedges[i]] = -1;
f67d13aa
AD
366 new_R[i] = sp;
367 end_R[i] = sp;
80a69750 368 }
d0fb370f 369
f67d13aa 370 /* Store. */
d0fb370f 371 for (i = 0; i < n; i++)
f67d13aa
AD
372 if (R_arg[i])
373 for (j = 0; R_arg[i][j] >= 0; ++j)
374 {
375 *end_R[R_arg[i][j]] = i;
376 ++end_R[R_arg[i][j]];
377 }
d0fb370f 378
f67d13aa
AD
379 free (nedges);
380 free (end_R);
d0fb370f 381
9887c18a
AD
382 /* Free the input: it is replaced with the result. */
383 for (i = 0; i < n; i++)
384 XFREE (R_arg[i]);
f67d13aa
AD
385 free (R_arg);
386
387 if (trace_flag)
388 {
389 fputs ("transpose: output\n", stderr);
390 matrix_print (stderr, new_R, n);
391 }
9887c18a 392
36281465 393 return new_R;
d0fb370f
RS
394}
395
396
4a120d45 397static void
720d742f 398build_relations (void)
d0fb370f 399{
9887c18a 400 short *edge = XCALLOC (short, ngotos + 1);
b9f71f19 401 short *states1 = XCALLOC (short, ritem_longest_rhs () + 1);
720d742f 402 int i;
720d742f 403
d7913476 404 includes = XCALLOC (short *, ngotos);
d0fb370f
RS
405
406 for (i = 0; i < ngotos; i++)
407 {
9887c18a 408 int nedges = 0;
29e88316 409 int symbol1 = states[to_state[i]]->accessing_symbol;
9887c18a 410 short *rulep;
d0fb370f 411
720d742f
AD
412 for (rulep = derives[symbol1]; *rulep > 0; rulep++)
413 {
9887c18a 414 int done;
80a69750 415 int length = 1;
9887c18a 416 short *rp;
29e88316 417 state_t *state = states[from_state[i]];
b9f71f19 418 states1[0] = state->number;
d0fb370f 419
99013900 420 for (rp = rules[*rulep].rhs; *rp >= 0; rp++)
720d742f 421 {
dac3c910 422 shifts *sp = state->shifts;
9887c18a 423 int j;
80a69750 424 for (j = 0; j < sp->nshifts; j++)
720d742f 425 {
29e88316 426 state = states[sp->shifts[j]];
dac3c910 427 if (state->accessing_symbol == *rp)
720d742f
AD
428 break;
429 }
d0fb370f 430
b9f71f19 431 states1[length++] = state->number;
720d742f
AD
432 }
433
dac3c910
AD
434 if (!state->consistent)
435 add_lookback_edge (state, *rulep, i);
720d742f
AD
436
437 length--;
438 done = 0;
439 while (!done)
440 {
441 done = 1;
442 rp--;
443 /* JF added rp>=ritem && I hope to god its right! */
444 if (rp >= ritem && ISVAR (*rp))
445 {
b9f71f19 446 edge[nedges++] = map_goto (states1[--length], *rp);
720d742f
AD
447 if (nullable[*rp])
448 done = 0;
449 }
450 }
d0fb370f
RS
451 }
452
720d742f
AD
453 if (nedges)
454 {
9887c18a 455 int j;
80a69750 456 includes[i] = XCALLOC (short, nedges + 1);
720d742f 457 for (j = 0; j < nedges; j++)
80a69750
AD
458 includes[i][j] = edge[j];
459 includes[i][nedges] = -1;
720d742f 460 }
d0fb370f
RS
461 }
462
d7913476 463 XFREE (edge);
b9f71f19 464 XFREE (states1);
9887c18a
AD
465
466 includes = transpose (includes, ngotos);
d0fb370f
RS
467}
468
469
720d742f 470
4a120d45 471static void
720d742f 472compute_FOLLOWS (void)
d0fb370f 473{
720d742f 474 int i;
d0fb370f 475
720d742f 476 digraph (includes);
d0fb370f
RS
477
478 for (i = 0; i < ngotos; i++)
ddcd5fdf 479 XFREE (includes[i]);
d0fb370f 480
d7913476 481 XFREE (includes);
d0fb370f
RS
482}
483
484
4a120d45 485static void
720d742f 486compute_lookaheads (void)
d0fb370f 487{
d200e455 488 size_t i;
720d742f 489 shorts *sp;
d0fb370f 490
d200e455 491 for (i = 0; i < nLA; i++)
ddcd5fdf 492 for (sp = lookback[i]; sp; sp = sp->next)
0fb1ffb1 493 bitset_or (LA[i], LA[i], F[sp->value]);
d0fb370f 494
ddcd5fdf 495 /* Free LOOKBACK. */
d200e455 496 for (i = 0; i < nLA; i++)
300f275f 497 LIST_FREE (shorts, lookback[i]);
d0fb370f 498
d7913476 499 XFREE (lookback);
914feea9 500 bitsetv_free (F);
720d742f 501}
d0fb370f 502
d0fb370f 503
5edafffd
AD
504/*--------------------------------------.
505| Initializing the lookaheads members. |
506`--------------------------------------*/
507
508static void
509initialize_lookaheads (void)
510{
602bbf31 511 size_t i;
d200e455 512 nLA = 0;
5edafffd
AD
513 for (i = 0; i < nstates; i++)
514 {
515 int k;
3877f72b 516 int nlookaheads = 0;
29e88316
AD
517 reductions *rp = states[i]->reductions;
518 shifts *sp = states[i]->shifts;
5edafffd 519
80dac38c
AD
520 /* We need a lookahead either to distinguish different
521 reductions (i.e., there are two or more), or to distinguish a
522 reduction from a shift. Otherwise, it is straightforward,
523 and the state is `consistent'. */
524 if (rp->nreds > 1
525 || (rp->nreds == 1 && sp->nshifts && SHIFT_IS_SHIFT (sp, 0)))
3877f72b 526 nlookaheads += rp->nreds;
5edafffd 527 else
29e88316 528 states[i]->consistent = 1;
5edafffd
AD
529
530 for (k = 0; k < sp->nshifts; k++)
531 if (SHIFT_IS_ERROR (sp, k))
532 {
29e88316 533 states[i]->consistent = 0;
5edafffd
AD
534 break;
535 }
3877f72b 536
29e88316
AD
537 states[i]->nlookaheads = nlookaheads;
538 states[i]->lookaheadsp = nLA;
d200e455 539 nLA += nlookaheads;
5edafffd 540 }
5edafffd
AD
541}
542
7c6b64d0
AD
543
544/*---------------------------------------.
545| Output the lookaheads for each state. |
546`---------------------------------------*/
547
548static void
549lookaheads_print (FILE *out)
550{
602bbf31
AD
551 size_t i;
552 int j, k;
7c6b64d0
AD
553 fprintf (out, "Lookaheads: BEGIN\n");
554 for (i = 0; i < nstates; ++i)
555 {
556 fprintf (out, "State %d: %d lookaheads\n",
29e88316 557 i, states[i]->nlookaheads);
7c6b64d0 558
29e88316 559 for (j = 0; j < states[i]->nlookaheads; ++j)
7c6b64d0 560 for (k = 0; k < ntokens; ++k)
602bbf31 561 if (bitset_test (LA[states[i]->lookaheadsp + j], j))
7c6b64d0 562 fprintf (out, " on %d (%s) -> rule %d\n",
ad949da9 563 k, symbols[k]->tag,
29e88316 564 -LAruleno[states[i]->lookaheadsp + j] - 1);
7c6b64d0
AD
565 }
566 fprintf (out, "Lookaheads: END\n");
567}
568
720d742f
AD
569void
570lalr (void)
571{
5edafffd 572 initialize_lookaheads ();
720d742f
AD
573 initialize_LA ();
574 set_goto_map ();
575 initialize_F ();
576 build_relations ();
577 compute_FOLLOWS ();
578 compute_lookaheads ();
7c6b64d0
AD
579
580 if (trace_flag)
581 lookaheads_print (stderr);
d0fb370f 582}