]> git.saurik.com Git - bison.git/blame - src/lalr.c
Let symbols have a location.
[bison.git] / src / lalr.c
CommitLineData
d0fb370f 1/* Compute look-ahead criteria for bison,
b0299a2e
AD
2 Copyright (C) 1984, 1986, 1989, 2000, 2001, 2002
3 Free Software Foundation, Inc.
d0fb370f 4
340ef489 5 This file is part of Bison, the GNU Compiler Compiler.
d0fb370f 6
340ef489
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
d0fb370f 11
340ef489
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
d0fb370f 16
340ef489
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
d0fb370f
RS
21
22
720d742f
AD
23/* Compute how to make the finite state machine deterministic; find
24 which rules need lookahead in each state, and which lookahead
340ef489 25 tokens they accept. */
d0fb370f 26
d0fb370f 27#include "system.h"
602bbf31 28#include "bitset.h"
914feea9 29#include "bitsetv.h"
8b3df748 30#include "quotearg.h"
62a3e4f0
AD
31#include "symtab.h"
32#include "gram.h"
7c6b64d0 33#include "reader.h"
d0fb370f 34#include "types.h"
b2ca4022 35#include "LR0.h"
a0f6b076 36#include "complain.h"
720d742f 37#include "lalr.h"
3519ec76 38#include "nullable.h"
340ef489 39#include "derives.h"
f67d13aa 40#include "getargs.h"
d0fb370f 41
d200e455 42/* All the decorated states, indexed by the state number. */
29e88316 43state_t **states = NULL;
9703cc49 44
b0299a2e 45rule_t **LArule = NULL;
914feea9 46bitsetv LA = NULL;
d200e455 47size_t nLA;
9703cc49 48
aa2aab3c 49static int ngotos;
602bbf31
AD
50short *goto_map = NULL;
51short *from_state = NULL;
52short *to_state = NULL;
d0fb370f 53
fe961097 54/* And for the famous F variable, which name is so descriptive that a
ddcd5fdf 55 comment is hardly needed. <grin>. */
914feea9 56static bitsetv F = NULL;
ddcd5fdf 57
d0fb370f
RS
58static short **includes;
59static shorts **lookback;
aa2aab3c
AD
60
61
62/*---------------------------------------------------------------.
63| digraph & traverse. |
64| |
65| The following variables are used as common storage between the |
66| two. |
67`---------------------------------------------------------------*/
68
d0fb370f
RS
69static short **R;
70static short *INDEX;
71static short *VERTICES;
72static int top;
aa2aab3c 73static int infinity;
d0fb370f 74
720d742f
AD
75static void
76traverse (int i)
d0fb370f 77{
720d742f 78 int j;
720d742f 79 int height;
720d742f
AD
80
81 VERTICES[++top] = i;
82 INDEX[i] = height = top;
83
fe961097
AD
84 if (R[i])
85 for (j = 0; R[i][j] >= 0; ++j)
86 {
87 if (INDEX[R[i][j]] == 0)
88 traverse (R[i][j]);
720d742f 89
fe961097
AD
90 if (INDEX[i] > INDEX[R[i][j]])
91 INDEX[i] = INDEX[R[i][j]];
720d742f 92
0fb1ffb1 93 bitset_or (F[i], F[i], F[R[i][j]]);
fe961097 94 }
720d742f
AD
95
96 if (INDEX[i] == height)
fe961097
AD
97 for (;;)
98 {
99 j = VERTICES[top--];
100 INDEX[j] = infinity;
720d742f 101
fe961097
AD
102 if (i == j)
103 break;
720d742f 104
0fb1ffb1 105 bitset_copy (F[j], F[i]);
fe961097 106 }
d0fb370f
RS
107}
108
109
720d742f
AD
110static void
111digraph (short **relation)
112{
113 int i;
114
115 infinity = ngotos + 2;
d7913476
AD
116 INDEX = XCALLOC (short, ngotos + 1);
117 VERTICES = XCALLOC (short, ngotos + 1);
720d742f
AD
118 top = 0;
119
120 R = relation;
121
122 for (i = 0; i < ngotos; i++)
123 INDEX[i] = 0;
124
125 for (i = 0; i < ngotos; i++)
ddcd5fdf
AD
126 if (INDEX[i] == 0 && R[i])
127 traverse (i);
720d742f 128
d7913476
AD
129 XFREE (INDEX);
130 XFREE (VERTICES);
720d742f
AD
131}
132
bb527fc2 133
4a120d45 134static void
d2729d44 135initialize_LA (void)
d0fb370f 136{
602bbf31 137 size_t i;
720d742f 138 int j;
b0299a2e 139 rule_t **np;
d0fb370f 140
d200e455 141 /* Avoid having to special case 0. */
a845a697
AD
142 if (!nLA)
143 nLA = 1;
d0fb370f 144
914feea9 145 LA = bitsetv_create (nLA, ntokens, BITSET_FIXED);
b0299a2e 146 LArule = XCALLOC (rule_t *, nLA);
a845a697 147 lookback = XCALLOC (shorts *, nLA);
d0fb370f 148
b0299a2e 149 np = LArule;
d0fb370f 150 for (i = 0; i < nstates; i++)
29e88316
AD
151 if (!states[i]->consistent)
152 for (j = 0; j < states[i]->reductions->nreds; j++)
b0299a2e 153 *np++ = &rules[states[i]->reductions->rules[j]];
d0fb370f
RS
154}
155
156
4a120d45 157static void
d2729d44 158set_goto_map (void)
d0fb370f 159{
602bbf31
AD
160 size_t state;
161 int i;
720d742f 162 short *temp_map;
d0fb370f 163
d7913476
AD
164 goto_map = XCALLOC (short, nvars + 1) - ntokens;
165 temp_map = XCALLOC (short, nvars + 1) - ntokens;
d0fb370f
RS
166
167 ngotos = 0;
7215de24
AD
168 for (state = 0; state < nstates; ++state)
169 {
29e88316 170 shifts *sp = states[state]->shifts;
7215de24
AD
171 for (i = sp->nshifts - 1; i >= 0 && SHIFT_IS_GOTO (sp, i); --i)
172 {
62a3e4f0
AD
173 if (ngotos == SHRT_MAX)
174 fatal (_("too many gotos (max %d)"), SHRT_MAX);
d0fb370f 175
7215de24 176 ngotos++;
13ca549a 177 goto_map[SHIFT_SYMBOL (sp, i)]++;
7215de24
AD
178 }
179 }
d0fb370f 180
d0b0fefa
AD
181 {
182 int k = 0;
183 for (i = ntokens; i < nsyms; i++)
184 {
185 temp_map[i] = k;
186 k += goto_map[i];
187 }
d0fb370f 188
d0b0fefa
AD
189 for (i = ntokens; i < nsyms; i++)
190 goto_map[i] = temp_map[i];
d0fb370f 191
d0b0fefa
AD
192 goto_map[nsyms] = ngotos;
193 temp_map[nsyms] = ngotos;
194 }
d0fb370f 195
d7913476
AD
196 from_state = XCALLOC (short, ngotos);
197 to_state = XCALLOC (short, ngotos);
d0fb370f 198
7215de24 199 for (state = 0; state < nstates; ++state)
d0fb370f 200 {
29e88316 201 shifts *sp = states[state]->shifts;
aa2aab3c 202 for (i = sp->nshifts - 1; i >= 0 && SHIFT_IS_GOTO (sp, i); --i)
d0fb370f 203 {
13ca549a 204 int k = temp_map[SHIFT_SYMBOL (sp, i)]++;
d0b0fefa 205 from_state[k] = state;
13ca549a 206 to_state[k] = sp->shifts[i];
d0fb370f
RS
207 }
208 }
209
d7913476 210 XFREE (temp_map + ntokens);
d0fb370f
RS
211}
212
213
214
43591cec
AD
215/*----------------------------------------------------------.
216| Map a state/symbol pair into its numeric representation. |
217`----------------------------------------------------------*/
d0fb370f 218
4a120d45 219static int
a49aecd5 220map_goto (int state, symbol_number_t symbol)
d0fb370f 221{
720d742f
AD
222 int high;
223 int low;
224 int middle;
225 int s;
d0fb370f
RS
226
227 low = goto_map[symbol];
228 high = goto_map[symbol + 1] - 1;
229
230 while (low <= high)
231 {
232 middle = (low + high) / 2;
233 s = from_state[middle];
234 if (s == state)
36281465 235 return middle;
d0fb370f
RS
236 else if (s < state)
237 low = middle + 1;
238 else
239 high = middle - 1;
240 }
241
43591cec
AD
242 assert (0);
243 /* NOTREACHED */
d0fb370f
RS
244 return 0;
245}
246
247
4a120d45 248static void
d2729d44 249initialize_F (void)
d0fb370f 250{
4d4f699c
AD
251 short **reads = XCALLOC (short *, ngotos);
252 short *edge = XCALLOC (short, ngotos + 1);
253 int nedges = 0;
d0fb370f 254
4d4f699c 255 int i;
d0fb370f 256
914feea9 257 F = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
d0fb370f 258
d0fb370f
RS
259 for (i = 0; i < ngotos; i++)
260 {
80a69750 261 int stateno = to_state[i];
29e88316 262 shifts *sp = states[stateno]->shifts;
d0fb370f 263
d954473d
AD
264 int j;
265 for (j = 0; j < sp->nshifts && SHIFT_IS_SHIFT (sp, j); j++)
0fb1ffb1 266 bitset_set (F[i], SHIFT_SYMBOL (sp, j));
d0fb370f 267
d954473d
AD
268 for (; j < sp->nshifts; j++)
269 {
a49aecd5 270 symbol_number_t symbol = SHIFT_SYMBOL (sp, j);
d954473d
AD
271 if (nullable[symbol])
272 edge[nedges++] = map_goto (stateno, symbol);
273 }
a0f6b076 274
d954473d
AD
275 if (nedges)
276 {
277 reads[i] = XCALLOC (short, nedges + 1);
62a3e4f0 278 memcpy (reads[i], edge, nedges * sizeof (edge[0]));
d954473d
AD
279 reads[i][nedges] = -1;
280 nedges = 0;
d0fb370f 281 }
d0fb370f
RS
282 }
283
720d742f 284 digraph (reads);
d0fb370f
RS
285
286 for (i = 0; i < ngotos; i++)
ddcd5fdf 287 XFREE (reads[i]);
d0fb370f 288
d7913476
AD
289 XFREE (reads);
290 XFREE (edge);
d0fb370f
RS
291}
292
293
4a120d45 294static void
dac3c910 295add_lookback_edge (state_t *state, int ruleno, int gotono)
d0fb370f 296{
720d742f 297 int i;
720d742f 298 shorts *sp;
d0fb370f 299
dac3c910 300 for (i = 0; i < state->nlookaheads; ++i)
b0299a2e 301 if (LArule[state->lookaheadsp + i]->number == ruleno)
065fbd27 302 break;
d0fb370f 303
b0299a2e 304 assert (LArule[state->lookaheadsp + i]->number == ruleno);
d0fb370f 305
d7913476 306 sp = XCALLOC (shorts, 1);
dac3c910 307 sp->next = lookback[state->lookaheadsp + i];
d0fb370f 308 sp->value = gotono;
dac3c910 309 lookback[state->lookaheadsp + i] = sp;
d0fb370f
RS
310}
311
312
f67d13aa
AD
313static void
314matrix_print (FILE *out, short **matrix, int n)
315{
316 int i, j;
317
318 for (i = 0; i < n; ++i)
319 {
320 fprintf (out, "%3d: ", i);
321 if (matrix[i])
322 for (j = 0; matrix[i][j] != -1; ++j)
323 fprintf (out, "%3d ", matrix[i][j]);
324 fputc ('\n', out);
325 }
326 fputc ('\n', out);
327}
328
9887c18a
AD
329/*-------------------------------------------------------------------.
330| Return the transpose of R_ARG, of size N. Destroy R_ARG, as it is |
331| replaced with the result. |
f67d13aa
AD
332| |
333| R_ARG[I] is NULL or a -1 terminated list of numbers. |
334| |
335| RESULT[NUM] is NULL or the -1 terminated list of the I such as NUM |
336| is in R_ARG[I]. |
9887c18a
AD
337`-------------------------------------------------------------------*/
338
4a120d45 339static short **
d2729d44 340transpose (short **R_arg, int n)
d0fb370f 341{
f67d13aa
AD
342 /* The result. */
343 short **new_R = XCALLOC (short *, n);
344 /* END_R[I] -- next entry of NEW_R[I]. */
345 short **end_R = XCALLOC (short *, n);
346 /* NEDGES[I] -- total size of NEW_R[I]. */
347 short *nedges = XCALLOC (short, n);
348 int i, j;
349
350 if (trace_flag)
d0fb370f 351 {
f67d13aa
AD
352 fputs ("transpose: input\n", stderr);
353 matrix_print (stderr, R_arg, n);
d0fb370f
RS
354 }
355
f67d13aa
AD
356 /* Count. */
357 for (i = 0; i < n; i++)
358 if (R_arg[i])
359 for (j = 0; R_arg[i][j] >= 0; ++j)
360 ++nedges[R_arg[i][j]];
d0fb370f 361
f67d13aa 362 /* Allocate. */
d0fb370f 363 for (i = 0; i < n; i++)
80a69750
AD
364 if (nedges[i] > 0)
365 {
366 short *sp = XCALLOC (short, nedges[i] + 1);
80a69750 367 sp[nedges[i]] = -1;
f67d13aa
AD
368 new_R[i] = sp;
369 end_R[i] = sp;
80a69750 370 }
d0fb370f 371
f67d13aa 372 /* Store. */
d0fb370f 373 for (i = 0; i < n; i++)
f67d13aa
AD
374 if (R_arg[i])
375 for (j = 0; R_arg[i][j] >= 0; ++j)
376 {
377 *end_R[R_arg[i][j]] = i;
378 ++end_R[R_arg[i][j]];
379 }
d0fb370f 380
f67d13aa
AD
381 free (nedges);
382 free (end_R);
d0fb370f 383
9887c18a
AD
384 /* Free the input: it is replaced with the result. */
385 for (i = 0; i < n; i++)
386 XFREE (R_arg[i]);
f67d13aa
AD
387 free (R_arg);
388
389 if (trace_flag)
390 {
391 fputs ("transpose: output\n", stderr);
392 matrix_print (stderr, new_R, n);
393 }
9887c18a 394
36281465 395 return new_R;
d0fb370f
RS
396}
397
398
4a120d45 399static void
720d742f 400build_relations (void)
d0fb370f 401{
9887c18a 402 short *edge = XCALLOC (short, ngotos + 1);
b9f71f19 403 short *states1 = XCALLOC (short, ritem_longest_rhs () + 1);
720d742f 404 int i;
720d742f 405
d7913476 406 includes = XCALLOC (short *, ngotos);
d0fb370f
RS
407
408 for (i = 0; i < ngotos; i++)
409 {
9887c18a 410 int nedges = 0;
a49aecd5 411 symbol_number_t symbol1 = states[to_state[i]]->accessing_symbol;
9887c18a 412 short *rulep;
d0fb370f 413
720d742f
AD
414 for (rulep = derives[symbol1]; *rulep > 0; rulep++)
415 {
9887c18a 416 int done;
80a69750 417 int length = 1;
62a3e4f0 418 item_number_t *rp;
29e88316 419 state_t *state = states[from_state[i]];
b9f71f19 420 states1[0] = state->number;
d0fb370f 421
99013900 422 for (rp = rules[*rulep].rhs; *rp >= 0; rp++)
720d742f 423 {
dac3c910 424 shifts *sp = state->shifts;
9887c18a 425 int j;
80a69750 426 for (j = 0; j < sp->nshifts; j++)
720d742f 427 {
29e88316 428 state = states[sp->shifts[j]];
5fbb0954 429 if (state->accessing_symbol
a49aecd5 430 == item_number_as_symbol_number (*rp))
720d742f
AD
431 break;
432 }
d0fb370f 433
b9f71f19 434 states1[length++] = state->number;
720d742f
AD
435 }
436
dac3c910
AD
437 if (!state->consistent)
438 add_lookback_edge (state, *rulep, i);
720d742f
AD
439
440 length--;
441 done = 0;
442 while (!done)
443 {
444 done = 1;
445 rp--;
446 /* JF added rp>=ritem && I hope to god its right! */
447 if (rp >= ritem && ISVAR (*rp))
448 {
a49aecd5 449 /* Downcasting from item_number_t to symbol_number_t. */
5fbb0954 450 edge[nedges++] = map_goto (states1[--length],
a49aecd5 451 item_number_as_symbol_number (*rp));
720d742f
AD
452 if (nullable[*rp])
453 done = 0;
454 }
455 }
d0fb370f
RS
456 }
457
720d742f
AD
458 if (nedges)
459 {
9887c18a 460 int j;
80a69750 461 includes[i] = XCALLOC (short, nedges + 1);
720d742f 462 for (j = 0; j < nedges; j++)
80a69750
AD
463 includes[i][j] = edge[j];
464 includes[i][nedges] = -1;
720d742f 465 }
d0fb370f
RS
466 }
467
d7913476 468 XFREE (edge);
b9f71f19 469 XFREE (states1);
9887c18a
AD
470
471 includes = transpose (includes, ngotos);
d0fb370f
RS
472}
473
474
720d742f 475
4a120d45 476static void
720d742f 477compute_FOLLOWS (void)
d0fb370f 478{
720d742f 479 int i;
d0fb370f 480
720d742f 481 digraph (includes);
d0fb370f
RS
482
483 for (i = 0; i < ngotos; i++)
ddcd5fdf 484 XFREE (includes[i]);
d0fb370f 485
d7913476 486 XFREE (includes);
d0fb370f
RS
487}
488
489
4a120d45 490static void
720d742f 491compute_lookaheads (void)
d0fb370f 492{
d200e455 493 size_t i;
720d742f 494 shorts *sp;
d0fb370f 495
d200e455 496 for (i = 0; i < nLA; i++)
ddcd5fdf 497 for (sp = lookback[i]; sp; sp = sp->next)
0fb1ffb1 498 bitset_or (LA[i], LA[i], F[sp->value]);
d0fb370f 499
ddcd5fdf 500 /* Free LOOKBACK. */
d200e455 501 for (i = 0; i < nLA; i++)
300f275f 502 LIST_FREE (shorts, lookback[i]);
d0fb370f 503
d7913476 504 XFREE (lookback);
914feea9 505 bitsetv_free (F);
720d742f 506}
d0fb370f 507
d0fb370f 508
5edafffd
AD
509/*--------------------------------------.
510| Initializing the lookaheads members. |
511`--------------------------------------*/
512
513static void
514initialize_lookaheads (void)
515{
602bbf31 516 size_t i;
d200e455 517 nLA = 0;
5edafffd
AD
518 for (i = 0; i < nstates; i++)
519 {
520 int k;
3877f72b 521 int nlookaheads = 0;
29e88316
AD
522 reductions *rp = states[i]->reductions;
523 shifts *sp = states[i]->shifts;
5edafffd 524
80dac38c
AD
525 /* We need a lookahead either to distinguish different
526 reductions (i.e., there are two or more), or to distinguish a
527 reduction from a shift. Otherwise, it is straightforward,
528 and the state is `consistent'. */
529 if (rp->nreds > 1
530 || (rp->nreds == 1 && sp->nshifts && SHIFT_IS_SHIFT (sp, 0)))
3877f72b 531 nlookaheads += rp->nreds;
5edafffd 532 else
29e88316 533 states[i]->consistent = 1;
5edafffd
AD
534
535 for (k = 0; k < sp->nshifts; k++)
536 if (SHIFT_IS_ERROR (sp, k))
537 {
29e88316 538 states[i]->consistent = 0;
5edafffd
AD
539 break;
540 }
3877f72b 541
29e88316
AD
542 states[i]->nlookaheads = nlookaheads;
543 states[i]->lookaheadsp = nLA;
d200e455 544 nLA += nlookaheads;
5edafffd 545 }
5edafffd
AD
546}
547
7c6b64d0
AD
548
549/*---------------------------------------.
550| Output the lookaheads for each state. |
551`---------------------------------------*/
552
553static void
554lookaheads_print (FILE *out)
555{
602bbf31
AD
556 size_t i;
557 int j, k;
7c6b64d0
AD
558 fprintf (out, "Lookaheads: BEGIN\n");
559 for (i = 0; i < nstates; ++i)
560 {
561 fprintf (out, "State %d: %d lookaheads\n",
29e88316 562 i, states[i]->nlookaheads);
7c6b64d0 563
29e88316 564 for (j = 0; j < states[i]->nlookaheads; ++j)
7c6b64d0 565 for (k = 0; k < ntokens; ++k)
8b3df748 566 if (bitset_test (LA[states[i]->lookaheadsp + j], k))
7c6b64d0 567 fprintf (out, " on %d (%s) -> rule %d\n",
8b3df748 568 k, quotearg_style (escape_quoting_style, symbols[k]->tag),
b0299a2e 569 LArule[states[i]->lookaheadsp + j]->number - 1);
7c6b64d0
AD
570 }
571 fprintf (out, "Lookaheads: END\n");
572}
573
720d742f
AD
574void
575lalr (void)
576{
5edafffd 577 initialize_lookaheads ();
720d742f
AD
578 initialize_LA ();
579 set_goto_map ();
580 initialize_F ();
581 build_relations ();
582 compute_FOLLOWS ();
583 compute_lookaheads ();
7c6b64d0
AD
584
585 if (trace_flag)
586 lookaheads_print (stderr);
d0fb370f 587}