]> git.saurik.com Git - bison.git/blame - src/lalr.c
Work around Java's ``code too large'' problem for parser tables.
[bison.git] / src / lalr.c
CommitLineData
742e4900 1/* Compute lookahead criteria for Bison.
5362cd5f 2
e9ad4aec 3 Copyright (C) 1984, 1986, 1989, 2000, 2001, 2002, 2003, 2004, 2005,
75ad86ee 4 2006, 2007 Free Software Foundation, Inc.
d0fb370f 5
340ef489 6 This file is part of Bison, the GNU Compiler Compiler.
d0fb370f 7
f16b0819 8 This program is free software: you can redistribute it and/or modify
340ef489 9 it under the terms of the GNU General Public License as published by
f16b0819
PE
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
d0fb370f 12
f16b0819 13 This program is distributed in the hope that it will be useful,
340ef489
AD
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
d0fb370f 17
340ef489 18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
d0fb370f
RS
20
21
720d742f 22/* Compute how to make the finite state machine deterministic; find
742e4900 23 which rules need lookahead in each state, and which lookahead
340ef489 24 tokens they accept. */
d0fb370f 25
2cec9080 26#include <config.h>
d0fb370f 27#include "system.h"
5dde5425
PE
28
29#include <bitset.h>
30#include <bitsetv.h>
31#include <quotearg.h>
32
b2ca4022 33#include "LR0.h"
a0f6b076 34#include "complain.h"
340ef489 35#include "derives.h"
f67d13aa 36#include "getargs.h"
5dde5425
PE
37#include "gram.h"
38#include "lalr.h"
39#include "nullable.h"
40#include "reader.h"
41#include "relation.h"
42#include "symtab.h"
d0fb370f 43
4cf44c00
PE
44goto_number *goto_map;
45static goto_number ngotos;
46state_number *from_state;
47state_number *to_state;
e68e0410
AD
48
49/* Linked list of goto numbers. */
5dde5425 50typedef struct goto_list
e68e0410 51{
5dde5425
PE
52 struct goto_list *next;
53 goto_number value;
54} goto_list;
e68e0410
AD
55
56
b09f4f48 57/* LA is an NLA by NTOKENS matrix of bits. LA[l, i] is 1 if the rule
3325ddc4
AD
58 LArule[l] is applicable in the appropriate state when the next
59 token is symbol i. If LA[l, i] and LA[l, j] are both 1 for i != j,
60 it is a conflict. */
61
62static bitsetv LA = NULL;
d200e455 63size_t nLA;
9703cc49 64
d0fb370f 65
fe961097 66/* And for the famous F variable, which name is so descriptive that a
ddcd5fdf 67 comment is hardly needed. <grin>. */
914feea9 68static bitsetv F = NULL;
ddcd5fdf 69
5dde5425
PE
70static goto_number **includes;
71static goto_list **lookback;
aa2aab3c
AD
72
73
720d742f 74
bb527fc2 75
4a120d45 76static void
d2729d44 77set_goto_map (void)
d0fb370f 78{
5dde5425
PE
79 state_number s;
80 goto_number *temp_map;
d0fb370f 81
da2a7671
PE
82 goto_map = xcalloc (nvars + 1, sizeof *goto_map);
83 temp_map = xnmalloc (nvars + 1, sizeof *temp_map);
d0fb370f
RS
84
85 ngotos = 0;
5dde5425 86 for (s = 0; s < nstates; ++s)
7215de24 87 {
5dde5425 88 transitions *sp = states[s]->transitions;
d57650a5 89 int i;
ccaf65bc 90 for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i)
7215de24 91 {
7215de24 92 ngotos++;
60e3ecc7
PE
93
94 /* Abort if (ngotos + 1) would overflow. */
4f82b42a 95 aver (ngotos != GOTO_NUMBER_MAXIMUM);
60e3ecc7 96
5362cd5f 97 goto_map[TRANSITION_SYMBOL (sp, i) - ntokens]++;
7215de24
AD
98 }
99 }
d0fb370f 100
d0b0fefa 101 {
60e3ecc7 102 goto_number k = 0;
d57650a5 103 int i;
d0b0fefa
AD
104 for (i = ntokens; i < nsyms; i++)
105 {
5362cd5f
PE
106 temp_map[i - ntokens] = k;
107 k += goto_map[i - ntokens];
d0b0fefa 108 }
d0fb370f 109
d0b0fefa 110 for (i = ntokens; i < nsyms; i++)
5362cd5f 111 goto_map[i - ntokens] = temp_map[i - ntokens];
d0fb370f 112
5362cd5f
PE
113 goto_map[nsyms - ntokens] = ngotos;
114 temp_map[nsyms - ntokens] = ngotos;
d0b0fefa 115 }
d0fb370f 116
da2a7671
PE
117 from_state = xcalloc (ngotos, sizeof *from_state);
118 to_state = xcalloc (ngotos, sizeof *to_state);
d0fb370f 119
5dde5425 120 for (s = 0; s < nstates; ++s)
d0fb370f 121 {
5dde5425 122 transitions *sp = states[s]->transitions;
d57650a5 123 int i;
ccaf65bc 124 for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i)
d0fb370f 125 {
60e3ecc7 126 goto_number k = temp_map[TRANSITION_SYMBOL (sp, i) - ntokens]++;
5dde5425 127 from_state[k] = s;
640748ee 128 to_state[k] = sp->states[i]->number;
d0fb370f
RS
129 }
130 }
131
afbb696d 132 free (temp_map);
d0fb370f
RS
133}
134
135
136
43591cec
AD
137/*----------------------------------------------------------.
138| Map a state/symbol pair into its numeric representation. |
139`----------------------------------------------------------*/
d0fb370f 140
60e3ecc7 141static goto_number
5dde5425 142map_goto (state_number s0, symbol_number sym)
d0fb370f 143{
60e3ecc7
PE
144 goto_number high;
145 goto_number low;
146 goto_number middle;
5dde5425 147 state_number s;
d0fb370f 148
5362cd5f
PE
149 low = goto_map[sym - ntokens];
150 high = goto_map[sym - ntokens + 1] - 1;
d0fb370f 151
58a84254 152 for (;;)
d0fb370f 153 {
4f82b42a 154 aver (low <= high);
d0fb370f
RS
155 middle = (low + high) / 2;
156 s = from_state[middle];
5dde5425 157 if (s == s0)
36281465 158 return middle;
5dde5425 159 else if (s < s0)
d0fb370f
RS
160 low = middle + 1;
161 else
162 high = middle - 1;
163 }
d0fb370f
RS
164}
165
166
4a120d45 167static void
d2729d44 168initialize_F (void)
d0fb370f 169{
da2a7671
PE
170 goto_number **reads = xnmalloc (ngotos, sizeof *reads);
171 goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge);
60e3ecc7 172 goto_number nedges = 0;
d0fb370f 173
60e3ecc7 174 goto_number i;
d0fb370f 175
914feea9 176 F = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
d0fb370f 177
d0fb370f
RS
178 for (i = 0; i < ngotos; i++)
179 {
5dde5425
PE
180 state_number stateno = to_state[i];
181 transitions *sp = states[stateno]->transitions;
d0fb370f 182
d954473d 183 int j;
640748ee 184 FOR_EACH_SHIFT (sp, j)
ccaf65bc 185 bitset_set (F[i], TRANSITION_SYMBOL (sp, j));
d0fb370f 186
ccaf65bc 187 for (; j < sp->num; j++)
d954473d 188 {
5dde5425 189 symbol_number sym = TRANSITION_SYMBOL (sp, j);
5362cd5f 190 if (nullable[sym - ntokens])
5dde5425 191 edge[nedges++] = map_goto (stateno, sym);
d954473d 192 }
a0f6b076 193
da2a7671
PE
194 if (nedges == 0)
195 reads[i] = NULL;
196 else
d954473d 197 {
da2a7671
PE
198 reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]);
199 memcpy (reads[i], edge, nedges * sizeof edge[0]);
60e3ecc7 200 reads[i][nedges] = END_NODE;
d954473d 201 nedges = 0;
d0fb370f 202 }
d0fb370f
RS
203 }
204
0e4d5753 205 relation_digraph (reads, ngotos, &F);
d0fb370f
RS
206
207 for (i = 0; i < ngotos; i++)
afbb696d 208 free (reads[i]);
d0fb370f 209
afbb696d
PE
210 free (reads);
211 free (edge);
d0fb370f
RS
212}
213
214
4a120d45 215static void
60e3ecc7 216add_lookback_edge (state *s, rule *r, goto_number gotono)
d0fb370f 217{
5dde5425 218 int ri = state_reduction_find (s, r);
da2a7671 219 goto_list *sp = xmalloc (sizeof *sp);
742e4900 220 sp->next = lookback[(s->reductions->lookahead_tokens - LA) + ri];
d0fb370f 221 sp->value = gotono;
742e4900 222 lookback[(s->reductions->lookahead_tokens - LA) + ri] = sp;
d0fb370f
RS
223}
224
225
d0fb370f 226
4a120d45 227static void
720d742f 228build_relations (void)
d0fb370f 229{
da2a7671
PE
230 goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge);
231 state_number *states1 = xnmalloc (ritem_longest_rhs () + 1, sizeof *states1);
60e3ecc7 232 goto_number i;
720d742f 233
da2a7671 234 includes = xnmalloc (ngotos, sizeof *includes);
d0fb370f
RS
235
236 for (i = 0; i < ngotos; i++)
237 {
9887c18a 238 int nedges = 0;
5dde5425
PE
239 symbol_number symbol1 = states[to_state[i]]->accessing_symbol;
240 rule **rulep;
d0fb370f 241
5362cd5f 242 for (rulep = derives[symbol1 - ntokens]; *rulep; rulep++)
720d742f 243 {
d0829076 244 bool done;
80a69750 245 int length = 1;
e9ad4aec 246 item_number const *rp;
5dde5425
PE
247 state *s = states[from_state[i]];
248 states1[0] = s->number;
d0fb370f 249
e9ad4aec 250 for (rp = (*rulep)->rhs; ! item_number_is_rule_number (*rp); rp++)
720d742f 251 {
5dde5425
PE
252 s = transitions_to (s->transitions,
253 item_number_as_symbol_number (*rp));
254 states1[length++] = s->number;
720d742f
AD
255 }
256
5dde5425
PE
257 if (!s->consistent)
258 add_lookback_edge (s, *rulep, i);
720d742f
AD
259
260 length--;
d0829076 261 done = false;
720d742f
AD
262 while (!done)
263 {
d0829076 264 done = true;
fa6aa7b3 265 /* Each rhs ends in a rule number, and there is a
e9ad4aec
PE
266 sentinel before the first rhs, so it is safe to
267 decrement RP here. */
720d742f 268 rp--;
e9ad4aec 269 if (ISVAR (*rp))
720d742f 270 {
5dde5425 271 /* Downcasting from item_number to symbol_number. */
5fbb0954 272 edge[nedges++] = map_goto (states1[--length],
a49aecd5 273 item_number_as_symbol_number (*rp));
5362cd5f 274 if (nullable[*rp - ntokens])
d0829076 275 done = false;
720d742f
AD
276 }
277 }
d0fb370f
RS
278 }
279
da2a7671
PE
280 if (nedges == 0)
281 includes[i] = NULL;
282 else
720d742f 283 {
9887c18a 284 int j;
da2a7671 285 includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]);
720d742f 286 for (j = 0; j < nedges; j++)
80a69750 287 includes[i][j] = edge[j];
60e3ecc7 288 includes[i][nedges] = END_NODE;
720d742f 289 }
d0fb370f
RS
290 }
291
afbb696d
PE
292 free (edge);
293 free (states1);
9887c18a 294
0e4d5753 295 relation_transpose (&includes, ngotos);
d0fb370f
RS
296}
297
298
720d742f 299
4a120d45 300static void
720d742f 301compute_FOLLOWS (void)
d0fb370f 302{
60e3ecc7 303 goto_number i;
d0fb370f 304
0e4d5753 305 relation_digraph (includes, ngotos, &F);
d0fb370f
RS
306
307 for (i = 0; i < ngotos; i++)
afbb696d 308 free (includes[i]);
d0fb370f 309
afbb696d 310 free (includes);
d0fb370f
RS
311}
312
313
4a120d45 314static void
742e4900 315compute_lookahead_tokens (void)
d0fb370f 316{
d200e455 317 size_t i;
5dde5425 318 goto_list *sp;
d0fb370f 319
d200e455 320 for (i = 0; i < nLA; i++)
ddcd5fdf 321 for (sp = lookback[i]; sp; sp = sp->next)
0fb1ffb1 322 bitset_or (LA[i], LA[i], F[sp->value]);
d0fb370f 323
ddcd5fdf 324 /* Free LOOKBACK. */
d200e455 325 for (i = 0; i < nLA; i++)
5dde5425 326 LIST_FREE (goto_list, lookback[i]);
d0fb370f 327
afbb696d 328 free (lookback);
914feea9 329 bitsetv_free (F);
720d742f 330}
d0fb370f 331
d0fb370f 332
742e4900
JD
333/*----------------------------------------------------.
334| Count the number of lookahead tokens required for S |
335| (N_LOOKAHEAD_TOKENS member). |
336`----------------------------------------------------*/
5edafffd 337
cd08e51e 338static int
742e4900 339state_lookahead_tokens_count (state *s)
5edafffd 340{
742e4900 341 int n_lookahead_tokens = 0;
5dde5425
PE
342 reductions *rp = s->reductions;
343 transitions *sp = s->transitions;
cd08e51e 344
742e4900 345 /* We need a lookahead either to distinguish different
cd08e51e
AD
346 reductions (i.e., there are two or more), or to distinguish a
347 reduction from a shift. Otherwise, it is straightforward,
2a6b783d
JD
348 and the state is `consistent'. There is no need to check that
349 transition 0 hasn't been disabled before checking if it is a
350 shift since transitions are only disabled during conflict
351 resolution, and that hasn't happened yet. */
352 aver (sp->num == 0 || !TRANSITION_IS_DISABLED (sp, 0));
cd08e51e 353 if (rp->num > 1
2a6b783d 354 || (rp->num == 1 && sp->num && TRANSITION_IS_SHIFT (sp, 0)))
742e4900 355 n_lookahead_tokens += rp->num;
cd08e51e 356 else
5dde5425 357 s->consistent = 1;
cd08e51e 358
742e4900 359 return n_lookahead_tokens;
5edafffd
AD
360}
361
7c6b64d0 362
742e4900
JD
363/*----------------------------------------------------.
364| Compute LA, NLA, and the lookahead_tokens members. |
365`----------------------------------------------------*/
c0263492
AD
366
367static void
cd08e51e 368initialize_LA (void)
c0263492 369{
5dde5425 370 state_number i;
cd08e51e 371 bitsetv pLA;
c0263492 372
742e4900 373 /* Compute the total number of reductions requiring a lookahead. */
cd08e51e
AD
374 nLA = 0;
375 for (i = 0; i < nstates; i++)
742e4900 376 nLA += state_lookahead_tokens_count (states[i]);
cd08e51e
AD
377 /* Avoid having to special case 0. */
378 if (!nLA)
379 nLA = 1;
380
381 pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED);
da2a7671 382 lookback = xcalloc (nLA, sizeof *lookback);
cd08e51e 383
742e4900
JD
384 /* Initialize the members LOOKAHEAD_TOKENS for each state whose reductions
385 require lookahead tokens. */
c0263492
AD
386 for (i = 0; i < nstates; i++)
387 {
742e4900 388 int count = state_lookahead_tokens_count (states[i]);
cd08e51e
AD
389 if (count)
390 {
742e4900 391 states[i]->reductions->lookahead_tokens = pLA;
cd08e51e
AD
392 pLA += count;
393 }
c0263492
AD
394 }
395}
396
397
742e4900
JD
398/*---------------------------------------------.
399| Output the lookahead tokens for each state. |
400`---------------------------------------------*/
7c6b64d0
AD
401
402static void
742e4900 403lookahead_tokens_print (FILE *out)
7c6b64d0 404{
5dde5425 405 state_number i;
602bbf31 406 int j, k;
742e4900 407 fprintf (out, "Lookahead tokens: BEGIN\n");
7c6b64d0
AD
408 for (i = 0; i < nstates; ++i)
409 {
5dde5425 410 reductions *reds = states[i]->reductions;
613f5e1a 411 bitset_iterator iter;
742e4900 412 int n_lookahead_tokens = 0;
cd08e51e 413
742e4900 414 if (reds->lookahead_tokens)
cd08e51e 415 for (k = 0; k < reds->num; ++k)
742e4900
JD
416 if (reds->lookahead_tokens[k])
417 ++n_lookahead_tokens;
613f5e1a 418
742e4900
JD
419 fprintf (out, "State %d: %d lookahead tokens\n",
420 i, n_lookahead_tokens);
7c6b64d0 421
742e4900 422 if (reds->lookahead_tokens)
cd08e51e 423 for (j = 0; j < reds->num; ++j)
742e4900 424 BITSET_FOR_EACH (iter, reds->lookahead_tokens[j], k, 0)
cd08e51e 425 {
427c0dda 426 fprintf (out, " on %d (%s) -> rule %d\n",
cd08e51e
AD
427 k, symbols[k]->tag,
428 reds->rules[j]->number);
429 };
7c6b64d0 430 }
742e4900 431 fprintf (out, "Lookahead tokens: END\n");
7c6b64d0
AD
432}
433
720d742f
AD
434void
435lalr (void)
436{
720d742f
AD
437 initialize_LA ();
438 set_goto_map ();
439 initialize_F ();
440 build_relations ();
441 compute_FOLLOWS ();
742e4900 442 compute_lookahead_tokens ();
7c6b64d0 443
273a74fa 444 if (trace_flag & trace_sets)
742e4900 445 lookahead_tokens_print (stderr);
d0fb370f 446}
3325ddc4
AD
447
448
5967f0cf
JD
449void
450lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old)
451{
452 goto_number ngotos_reachable = 0;
453 symbol_number nonterminal = 0;
454 aver (nsyms == nvars + ntokens);
14462c2b
JD
455 {
456 goto_number i;
457 for (i = 0; i < ngotos; ++i)
458 {
459 while (i == goto_map[nonterminal])
460 goto_map[nonterminal++] = ngotos_reachable;
461 /* If old_to_new[from_state[i]] = nstates_old, remove this goto
462 entry. */
463 if (old_to_new[from_state[i]] != nstates_old)
464 {
465 /* from_state[i] is not removed, so it and thus to_state[i] are
466 reachable, so to_state[i] != nstates_old. */
467 aver (old_to_new[to_state[i]] != nstates_old);
468 from_state[ngotos_reachable] = old_to_new[from_state[i]];
469 to_state[ngotos_reachable] = old_to_new[to_state[i]];
470 ++ngotos_reachable;
471 }
472 }
473 }
5967f0cf
JD
474 while (nonterminal <= nvars)
475 {
476 aver (ngotos == goto_map[nonterminal]);
477 goto_map[nonterminal++] = ngotos_reachable;
478 }
479 ngotos = ngotos_reachable;
480}
481
482
3325ddc4
AD
483void
484lalr_free (void)
485{
5dde5425 486 state_number s;
3325ddc4 487 for (s = 0; s < nstates; ++s)
742e4900 488 states[s]->reductions->lookahead_tokens = NULL;
3325ddc4 489 bitsetv_free (LA);
3325ddc4 490}