]> git.saurik.com Git - bison.git/blame - src/lalr.c
* src/L0.c, src/LR0.h (nstates): Be size_t.
[bison.git] / src / lalr.c
CommitLineData
d0fb370f 1/* Compute look-ahead criteria for bison,
3feec034 2 Copyright 1984, 1986, 1989, 2000, 2001 Free Software Foundation, Inc.
d0fb370f 3
340ef489 4 This file is part of Bison, the GNU Compiler Compiler.
d0fb370f 5
340ef489
AD
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
d0fb370f 10
340ef489
AD
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
d0fb370f 15
340ef489
AD
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
d0fb370f
RS
20
21
720d742f
AD
22/* Compute how to make the finite state machine deterministic; find
23 which rules need lookahead in each state, and which lookahead
340ef489 24 tokens they accept. */
d0fb370f 25
d0fb370f 26#include "system.h"
602bbf31 27#include "bitset.h"
7c6b64d0 28#include "reader.h"
d0fb370f 29#include "types.h"
b2ca4022 30#include "LR0.h"
ad949da9 31#include "symtab.h"
d0fb370f 32#include "gram.h"
a0f6b076 33#include "complain.h"
720d742f 34#include "lalr.h"
3519ec76 35#include "nullable.h"
340ef489 36#include "derives.h"
f67d13aa 37#include "getargs.h"
d0fb370f 38
d200e455 39/* All the decorated states, indexed by the state number. */
29e88316 40state_t **states = NULL;
9703cc49 41
d0fb370f 42int tokensetsize;
602bbf31
AD
43short *LAruleno = NULL;
44bitset *LA = NULL;
d200e455 45size_t nLA;
9703cc49 46
aa2aab3c 47static int ngotos;
602bbf31
AD
48short *goto_map = NULL;
49short *from_state = NULL;
50short *to_state = NULL;
d0fb370f 51
fe961097 52/* And for the famous F variable, which name is so descriptive that a
ddcd5fdf
AD
53 comment is hardly needed. <grin>. */
54static unsigned *F = NULL;
55#define F(Rule) (F + (Rule) * tokensetsize)
56
d0fb370f
RS
57static short **includes;
58static shorts **lookback;
aa2aab3c
AD
59
60
61/*---------------------------------------------------------------.
62| digraph & traverse. |
63| |
64| The following variables are used as common storage between the |
65| two. |
66`---------------------------------------------------------------*/
67
d0fb370f
RS
68static short **R;
69static short *INDEX;
70static short *VERTICES;
71static int top;
aa2aab3c 72static int infinity;
d0fb370f 73
720d742f
AD
74static void
75traverse (int i)
d0fb370f 76{
720d742f 77 int j;
fe961097 78 size_t k;
720d742f 79 int height;
fe961097 80 size_t size = F (i + 1) - F(i);
720d742f
AD
81
82 VERTICES[++top] = i;
83 INDEX[i] = height = top;
84
fe961097
AD
85 if (R[i])
86 for (j = 0; R[i][j] >= 0; ++j)
87 {
88 if (INDEX[R[i][j]] == 0)
89 traverse (R[i][j]);
720d742f 90
fe961097
AD
91 if (INDEX[i] > INDEX[R[i][j]])
92 INDEX[i] = INDEX[R[i][j]];
720d742f 93
fe961097
AD
94 for (k = 0; k < size; ++k)
95 F (i)[k] |= F (R[i][j])[k];
96 }
720d742f
AD
97
98 if (INDEX[i] == height)
fe961097
AD
99 for (;;)
100 {
101 j = VERTICES[top--];
102 INDEX[j] = infinity;
720d742f 103
fe961097
AD
104 if (i == j)
105 break;
720d742f 106
fe961097 107 for (k = 0; k < size; ++k)
b9752825 108 F (j)[k] = F (i)[k];
fe961097 109 }
d0fb370f
RS
110}
111
112
720d742f
AD
113static void
114digraph (short **relation)
115{
116 int i;
117
118 infinity = ngotos + 2;
d7913476
AD
119 INDEX = XCALLOC (short, ngotos + 1);
120 VERTICES = XCALLOC (short, ngotos + 1);
720d742f
AD
121 top = 0;
122
123 R = relation;
124
125 for (i = 0; i < ngotos; i++)
126 INDEX[i] = 0;
127
128 for (i = 0; i < ngotos; i++)
ddcd5fdf
AD
129 if (INDEX[i] == 0 && R[i])
130 traverse (i);
720d742f 131
d7913476
AD
132 XFREE (INDEX);
133 XFREE (VERTICES);
720d742f
AD
134}
135
bb527fc2 136
4a120d45 137static void
d2729d44 138initialize_LA (void)
d0fb370f 139{
602bbf31 140 size_t i;
720d742f 141 int j;
720d742f 142 short *np;
d0fb370f 143
d200e455 144 /* Avoid having to special case 0. */
a845a697
AD
145 if (!nLA)
146 nLA = 1;
d0fb370f 147
602bbf31
AD
148 LA = XCALLOC (bitset, nLA);
149 for (i = 0; i < nLA; ++i)
150 {
151 LA[i] = bitset_create (ntokens, BITSET_FIXED);
152 bitset_zero (LA[i]);
153 }
a845a697
AD
154 LAruleno = XCALLOC (short, nLA);
155 lookback = XCALLOC (shorts *, nLA);
d0fb370f
RS
156
157 np = LAruleno;
158 for (i = 0; i < nstates; i++)
29e88316
AD
159 if (!states[i]->consistent)
160 for (j = 0; j < states[i]->reductions->nreds; j++)
161 *np++ = states[i]->reductions->rules[j];
d0fb370f
RS
162}
163
164
4a120d45 165static void
d2729d44 166set_goto_map (void)
d0fb370f 167{
602bbf31
AD
168 size_t state;
169 int i;
720d742f 170 short *temp_map;
d0fb370f 171
d7913476
AD
172 goto_map = XCALLOC (short, nvars + 1) - ntokens;
173 temp_map = XCALLOC (short, nvars + 1) - ntokens;
d0fb370f
RS
174
175 ngotos = 0;
7215de24
AD
176 for (state = 0; state < nstates; ++state)
177 {
29e88316 178 shifts *sp = states[state]->shifts;
7215de24
AD
179 for (i = sp->nshifts - 1; i >= 0 && SHIFT_IS_GOTO (sp, i); --i)
180 {
7215de24
AD
181 if (ngotos == MAXSHORT)
182 fatal (_("too many gotos (max %d)"), MAXSHORT);
d0fb370f 183
7215de24 184 ngotos++;
13ca549a 185 goto_map[SHIFT_SYMBOL (sp, i)]++;
7215de24
AD
186 }
187 }
d0fb370f 188
d0b0fefa
AD
189 {
190 int k = 0;
191 for (i = ntokens; i < nsyms; i++)
192 {
193 temp_map[i] = k;
194 k += goto_map[i];
195 }
d0fb370f 196
d0b0fefa
AD
197 for (i = ntokens; i < nsyms; i++)
198 goto_map[i] = temp_map[i];
d0fb370f 199
d0b0fefa
AD
200 goto_map[nsyms] = ngotos;
201 temp_map[nsyms] = ngotos;
202 }
d0fb370f 203
d7913476
AD
204 from_state = XCALLOC (short, ngotos);
205 to_state = XCALLOC (short, ngotos);
d0fb370f 206
7215de24 207 for (state = 0; state < nstates; ++state)
d0fb370f 208 {
29e88316 209 shifts *sp = states[state]->shifts;
aa2aab3c 210 for (i = sp->nshifts - 1; i >= 0 && SHIFT_IS_GOTO (sp, i); --i)
d0fb370f 211 {
13ca549a 212 int k = temp_map[SHIFT_SYMBOL (sp, i)]++;
d0b0fefa 213 from_state[k] = state;
13ca549a 214 to_state[k] = sp->shifts[i];
d0fb370f
RS
215 }
216 }
217
d7913476 218 XFREE (temp_map + ntokens);
d0fb370f
RS
219}
220
221
222
43591cec
AD
223/*----------------------------------------------------------.
224| Map a state/symbol pair into its numeric representation. |
225`----------------------------------------------------------*/
d0fb370f 226
4a120d45 227static int
d2729d44 228map_goto (int state, int symbol)
d0fb370f 229{
720d742f
AD
230 int high;
231 int low;
232 int middle;
233 int s;
d0fb370f
RS
234
235 low = goto_map[symbol];
236 high = goto_map[symbol + 1] - 1;
237
238 while (low <= high)
239 {
240 middle = (low + high) / 2;
241 s = from_state[middle];
242 if (s == state)
36281465 243 return middle;
d0fb370f
RS
244 else if (s < state)
245 low = middle + 1;
246 else
247 high = middle - 1;
248 }
249
43591cec
AD
250 assert (0);
251 /* NOTREACHED */
d0fb370f
RS
252 return 0;
253}
254
255
4a120d45 256static void
d2729d44 257initialize_F (void)
d0fb370f 258{
4d4f699c
AD
259 short **reads = XCALLOC (short *, ngotos);
260 short *edge = XCALLOC (short, ngotos + 1);
261 int nedges = 0;
d0fb370f 262
4d4f699c 263 int i;
d0fb370f 264
4d4f699c 265 F = XCALLOC (unsigned, ngotos * tokensetsize);
d0fb370f 266
d0fb370f
RS
267 for (i = 0; i < ngotos; i++)
268 {
80a69750 269 int stateno = to_state[i];
29e88316 270 shifts *sp = states[stateno]->shifts;
d0fb370f 271
d954473d
AD
272 int j;
273 for (j = 0; j < sp->nshifts && SHIFT_IS_SHIFT (sp, j); j++)
13ca549a 274 SETBIT (F (i), SHIFT_SYMBOL (sp, j));
d0fb370f 275
d954473d
AD
276 for (; j < sp->nshifts; j++)
277 {
13ca549a 278 int symbol = SHIFT_SYMBOL (sp, j);
d954473d
AD
279 if (nullable[symbol])
280 edge[nedges++] = map_goto (stateno, symbol);
281 }
a0f6b076 282
d954473d
AD
283 if (nedges)
284 {
285 reads[i] = XCALLOC (short, nedges + 1);
286 shortcpy (reads[i], edge, nedges);
287 reads[i][nedges] = -1;
288 nedges = 0;
d0fb370f 289 }
d0fb370f
RS
290 }
291
720d742f 292 digraph (reads);
d0fb370f
RS
293
294 for (i = 0; i < ngotos; i++)
ddcd5fdf 295 XFREE (reads[i]);
d0fb370f 296
d7913476
AD
297 XFREE (reads);
298 XFREE (edge);
d0fb370f
RS
299}
300
301
4a120d45 302static void
dac3c910 303add_lookback_edge (state_t *state, int ruleno, int gotono)
d0fb370f 304{
720d742f 305 int i;
720d742f 306 shorts *sp;
d0fb370f 307
dac3c910
AD
308 for (i = 0; i < state->nlookaheads; ++i)
309 if (LAruleno[state->lookaheadsp + i] == ruleno)
065fbd27 310 break;
d0fb370f 311
dac3c910 312 assert (LAruleno[state->lookaheadsp + i] == ruleno);
d0fb370f 313
d7913476 314 sp = XCALLOC (shorts, 1);
dac3c910 315 sp->next = lookback[state->lookaheadsp + i];
d0fb370f 316 sp->value = gotono;
dac3c910 317 lookback[state->lookaheadsp + i] = sp;
d0fb370f
RS
318}
319
320
f67d13aa
AD
321static void
322matrix_print (FILE *out, short **matrix, int n)
323{
324 int i, j;
325
326 for (i = 0; i < n; ++i)
327 {
328 fprintf (out, "%3d: ", i);
329 if (matrix[i])
330 for (j = 0; matrix[i][j] != -1; ++j)
331 fprintf (out, "%3d ", matrix[i][j]);
332 fputc ('\n', out);
333 }
334 fputc ('\n', out);
335}
336
9887c18a
AD
337/*-------------------------------------------------------------------.
338| Return the transpose of R_ARG, of size N. Destroy R_ARG, as it is |
339| replaced with the result. |
f67d13aa
AD
340| |
341| R_ARG[I] is NULL or a -1 terminated list of numbers. |
342| |
343| RESULT[NUM] is NULL or the -1 terminated list of the I such as NUM |
344| is in R_ARG[I]. |
9887c18a
AD
345`-------------------------------------------------------------------*/
346
4a120d45 347static short **
d2729d44 348transpose (short **R_arg, int n)
d0fb370f 349{
f67d13aa
AD
350 /* The result. */
351 short **new_R = XCALLOC (short *, n);
352 /* END_R[I] -- next entry of NEW_R[I]. */
353 short **end_R = XCALLOC (short *, n);
354 /* NEDGES[I] -- total size of NEW_R[I]. */
355 short *nedges = XCALLOC (short, n);
356 int i, j;
357
358 if (trace_flag)
d0fb370f 359 {
f67d13aa
AD
360 fputs ("transpose: input\n", stderr);
361 matrix_print (stderr, R_arg, n);
d0fb370f
RS
362 }
363
f67d13aa
AD
364 /* Count. */
365 for (i = 0; i < n; i++)
366 if (R_arg[i])
367 for (j = 0; R_arg[i][j] >= 0; ++j)
368 ++nedges[R_arg[i][j]];
d0fb370f 369
f67d13aa 370 /* Allocate. */
d0fb370f 371 for (i = 0; i < n; i++)
80a69750
AD
372 if (nedges[i] > 0)
373 {
374 short *sp = XCALLOC (short, nedges[i] + 1);
80a69750 375 sp[nedges[i]] = -1;
f67d13aa
AD
376 new_R[i] = sp;
377 end_R[i] = sp;
80a69750 378 }
d0fb370f 379
f67d13aa 380 /* Store. */
d0fb370f 381 for (i = 0; i < n; i++)
f67d13aa
AD
382 if (R_arg[i])
383 for (j = 0; R_arg[i][j] >= 0; ++j)
384 {
385 *end_R[R_arg[i][j]] = i;
386 ++end_R[R_arg[i][j]];
387 }
d0fb370f 388
f67d13aa
AD
389 free (nedges);
390 free (end_R);
d0fb370f 391
9887c18a
AD
392 /* Free the input: it is replaced with the result. */
393 for (i = 0; i < n; i++)
394 XFREE (R_arg[i]);
f67d13aa
AD
395 free (R_arg);
396
397 if (trace_flag)
398 {
399 fputs ("transpose: output\n", stderr);
400 matrix_print (stderr, new_R, n);
401 }
9887c18a 402
36281465 403 return new_R;
d0fb370f
RS
404}
405
406
4a120d45 407static void
720d742f 408build_relations (void)
d0fb370f 409{
9887c18a 410 short *edge = XCALLOC (short, ngotos + 1);
b9f71f19 411 short *states1 = XCALLOC (short, ritem_longest_rhs () + 1);
720d742f 412 int i;
720d742f 413
d7913476 414 includes = XCALLOC (short *, ngotos);
d0fb370f
RS
415
416 for (i = 0; i < ngotos; i++)
417 {
9887c18a 418 int nedges = 0;
29e88316 419 int symbol1 = states[to_state[i]]->accessing_symbol;
9887c18a 420 short *rulep;
d0fb370f 421
720d742f
AD
422 for (rulep = derives[symbol1]; *rulep > 0; rulep++)
423 {
9887c18a 424 int done;
80a69750 425 int length = 1;
9887c18a 426 short *rp;
29e88316 427 state_t *state = states[from_state[i]];
b9f71f19 428 states1[0] = state->number;
d0fb370f 429
1a2b5d37 430 for (rp = &ritem[rules[*rulep].rhs]; *rp >= 0; rp++)
720d742f 431 {
dac3c910 432 shifts *sp = state->shifts;
9887c18a 433 int j;
80a69750 434 for (j = 0; j < sp->nshifts; j++)
720d742f 435 {
29e88316 436 state = states[sp->shifts[j]];
dac3c910 437 if (state->accessing_symbol == *rp)
720d742f
AD
438 break;
439 }
d0fb370f 440
b9f71f19 441 states1[length++] = state->number;
720d742f
AD
442 }
443
dac3c910
AD
444 if (!state->consistent)
445 add_lookback_edge (state, *rulep, i);
720d742f
AD
446
447 length--;
448 done = 0;
449 while (!done)
450 {
451 done = 1;
452 rp--;
453 /* JF added rp>=ritem && I hope to god its right! */
454 if (rp >= ritem && ISVAR (*rp))
455 {
b9f71f19 456 edge[nedges++] = map_goto (states1[--length], *rp);
720d742f
AD
457 if (nullable[*rp])
458 done = 0;
459 }
460 }
d0fb370f
RS
461 }
462
720d742f
AD
463 if (nedges)
464 {
9887c18a 465 int j;
80a69750 466 includes[i] = XCALLOC (short, nedges + 1);
720d742f 467 for (j = 0; j < nedges; j++)
80a69750
AD
468 includes[i][j] = edge[j];
469 includes[i][nedges] = -1;
720d742f 470 }
d0fb370f
RS
471 }
472
d7913476 473 XFREE (edge);
b9f71f19 474 XFREE (states1);
9887c18a
AD
475
476 includes = transpose (includes, ngotos);
d0fb370f
RS
477}
478
479
720d742f 480
4a120d45 481static void
720d742f 482compute_FOLLOWS (void)
d0fb370f 483{
720d742f 484 int i;
d0fb370f 485
720d742f 486 digraph (includes);
d0fb370f
RS
487
488 for (i = 0; i < ngotos; i++)
ddcd5fdf 489 XFREE (includes[i]);
d0fb370f 490
d7913476 491 XFREE (includes);
d0fb370f
RS
492}
493
494
4a120d45 495static void
720d742f 496compute_lookaheads (void)
d0fb370f 497{
d200e455 498 size_t i;
720d742f 499 shorts *sp;
d0fb370f 500
d200e455 501 for (i = 0; i < nLA; i++)
ddcd5fdf
AD
502 for (sp = lookback[i]; sp; sp = sp->next)
503 {
9887c18a 504 int j;
602bbf31
AD
505 for (j = 0; j < ntokens; ++j)
506 if (BITISSET (F (sp->value), j))
507 bitset_set (LA[i], j);
ddcd5fdf 508 }
d0fb370f 509
ddcd5fdf 510 /* Free LOOKBACK. */
d200e455 511 for (i = 0; i < nLA; i++)
300f275f 512 LIST_FREE (shorts, lookback[i]);
d0fb370f 513
d7913476
AD
514 XFREE (lookback);
515 XFREE (F);
720d742f 516}
d0fb370f 517
d0fb370f 518
5edafffd
AD
519/*--------------------------------------.
520| Initializing the lookaheads members. |
521`--------------------------------------*/
522
523static void
524initialize_lookaheads (void)
525{
602bbf31 526 size_t i;
d200e455 527 nLA = 0;
5edafffd
AD
528 for (i = 0; i < nstates; i++)
529 {
530 int k;
3877f72b 531 int nlookaheads = 0;
29e88316
AD
532 reductions *rp = states[i]->reductions;
533 shifts *sp = states[i]->shifts;
5edafffd 534
80dac38c
AD
535 /* We need a lookahead either to distinguish different
536 reductions (i.e., there are two or more), or to distinguish a
537 reduction from a shift. Otherwise, it is straightforward,
538 and the state is `consistent'. */
539 if (rp->nreds > 1
540 || (rp->nreds == 1 && sp->nshifts && SHIFT_IS_SHIFT (sp, 0)))
3877f72b 541 nlookaheads += rp->nreds;
5edafffd 542 else
29e88316 543 states[i]->consistent = 1;
5edafffd
AD
544
545 for (k = 0; k < sp->nshifts; k++)
546 if (SHIFT_IS_ERROR (sp, k))
547 {
29e88316 548 states[i]->consistent = 0;
5edafffd
AD
549 break;
550 }
3877f72b 551
29e88316
AD
552 states[i]->nlookaheads = nlookaheads;
553 states[i]->lookaheadsp = nLA;
d200e455 554 nLA += nlookaheads;
5edafffd 555 }
5edafffd
AD
556}
557
7c6b64d0
AD
558
559/*---------------------------------------.
560| Output the lookaheads for each state. |
561`---------------------------------------*/
562
563static void
564lookaheads_print (FILE *out)
565{
602bbf31
AD
566 size_t i;
567 int j, k;
7c6b64d0
AD
568 fprintf (out, "Lookaheads: BEGIN\n");
569 for (i = 0; i < nstates; ++i)
570 {
571 fprintf (out, "State %d: %d lookaheads\n",
29e88316 572 i, states[i]->nlookaheads);
7c6b64d0 573
29e88316 574 for (j = 0; j < states[i]->nlookaheads; ++j)
7c6b64d0 575 for (k = 0; k < ntokens; ++k)
602bbf31 576 if (bitset_test (LA[states[i]->lookaheadsp + j], j))
7c6b64d0 577 fprintf (out, " on %d (%s) -> rule %d\n",
ad949da9 578 k, symbols[k]->tag,
29e88316 579 -LAruleno[states[i]->lookaheadsp + j] - 1);
7c6b64d0
AD
580 }
581 fprintf (out, "Lookaheads: END\n");
582}
583
720d742f
AD
584void
585lalr (void)
586{
587 tokensetsize = WORDSIZE (ntokens);
588
5edafffd 589 initialize_lookaheads ();
720d742f
AD
590 initialize_LA ();
591 set_goto_map ();
592 initialize_F ();
593 build_relations ();
594 compute_FOLLOWS ();
595 compute_lookaheads ();
7c6b64d0
AD
596
597 if (trace_flag)
598 lookaheads_print (stderr);
d0fb370f 599}