]> git.saurik.com Git - bison.git/blame - src/lalr.c
* lib/.cvsignore: Add charset.alias.
[bison.git] / src / lalr.c
CommitLineData
742e4900 1/* Compute lookahead criteria for Bison.
5362cd5f 2
e9ad4aec 3 Copyright (C) 1984, 1986, 1989, 2000, 2001, 2002, 2003, 2004, 2005,
75ad86ee 4 2006, 2007 Free Software Foundation, Inc.
d0fb370f 5
340ef489 6 This file is part of Bison, the GNU Compiler Compiler.
d0fb370f 7
340ef489
AD
8 Bison is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
d0fb370f 12
340ef489
AD
13 Bison is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
d0fb370f 17
340ef489
AD
18 You should have received a copy of the GNU General Public License
19 along with Bison; see the file COPYING. If not, write to
0fb669f9
PE
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
d0fb370f
RS
22
23
720d742f 24/* Compute how to make the finite state machine deterministic; find
742e4900 25 which rules need lookahead in each state, and which lookahead
340ef489 26 tokens they accept. */
d0fb370f 27
2cec9080 28#include <config.h>
d0fb370f 29#include "system.h"
5dde5425
PE
30
31#include <bitset.h>
32#include <bitsetv.h>
33#include <quotearg.h>
34
b2ca4022 35#include "LR0.h"
a0f6b076 36#include "complain.h"
340ef489 37#include "derives.h"
f67d13aa 38#include "getargs.h"
5dde5425
PE
39#include "gram.h"
40#include "lalr.h"
41#include "nullable.h"
42#include "reader.h"
43#include "relation.h"
44#include "symtab.h"
d0fb370f 45
4cf44c00
PE
46goto_number *goto_map;
47static goto_number ngotos;
48state_number *from_state;
49state_number *to_state;
e68e0410
AD
50
51/* Linked list of goto numbers. */
5dde5425 52typedef struct goto_list
e68e0410 53{
5dde5425
PE
54 struct goto_list *next;
55 goto_number value;
56} goto_list;
e68e0410
AD
57
58
b09f4f48 59/* LA is an NLA by NTOKENS matrix of bits. LA[l, i] is 1 if the rule
3325ddc4
AD
60 LArule[l] is applicable in the appropriate state when the next
61 token is symbol i. If LA[l, i] and LA[l, j] are both 1 for i != j,
62 it is a conflict. */
63
64static bitsetv LA = NULL;
d200e455 65size_t nLA;
9703cc49 66
d0fb370f 67
fe961097 68/* And for the famous F variable, which name is so descriptive that a
ddcd5fdf 69 comment is hardly needed. <grin>. */
914feea9 70static bitsetv F = NULL;
ddcd5fdf 71
5dde5425
PE
72static goto_number **includes;
73static goto_list **lookback;
aa2aab3c
AD
74
75
720d742f 76
bb527fc2 77
4a120d45 78static void
d2729d44 79set_goto_map (void)
d0fb370f 80{
5dde5425
PE
81 state_number s;
82 goto_number *temp_map;
d0fb370f 83
da2a7671
PE
84 goto_map = xcalloc (nvars + 1, sizeof *goto_map);
85 temp_map = xnmalloc (nvars + 1, sizeof *temp_map);
d0fb370f
RS
86
87 ngotos = 0;
5dde5425 88 for (s = 0; s < nstates; ++s)
7215de24 89 {
5dde5425 90 transitions *sp = states[s]->transitions;
d57650a5 91 int i;
ccaf65bc 92 for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i)
7215de24 93 {
7215de24 94 ngotos++;
60e3ecc7
PE
95
96 /* Abort if (ngotos + 1) would overflow. */
4f82b42a 97 aver (ngotos != GOTO_NUMBER_MAXIMUM);
60e3ecc7 98
5362cd5f 99 goto_map[TRANSITION_SYMBOL (sp, i) - ntokens]++;
7215de24
AD
100 }
101 }
d0fb370f 102
d0b0fefa 103 {
60e3ecc7 104 goto_number k = 0;
d57650a5 105 int i;
d0b0fefa
AD
106 for (i = ntokens; i < nsyms; i++)
107 {
5362cd5f
PE
108 temp_map[i - ntokens] = k;
109 k += goto_map[i - ntokens];
d0b0fefa 110 }
d0fb370f 111
d0b0fefa 112 for (i = ntokens; i < nsyms; i++)
5362cd5f 113 goto_map[i - ntokens] = temp_map[i - ntokens];
d0fb370f 114
5362cd5f
PE
115 goto_map[nsyms - ntokens] = ngotos;
116 temp_map[nsyms - ntokens] = ngotos;
d0b0fefa 117 }
d0fb370f 118
da2a7671
PE
119 from_state = xcalloc (ngotos, sizeof *from_state);
120 to_state = xcalloc (ngotos, sizeof *to_state);
d0fb370f 121
5dde5425 122 for (s = 0; s < nstates; ++s)
d0fb370f 123 {
5dde5425 124 transitions *sp = states[s]->transitions;
d57650a5 125 int i;
ccaf65bc 126 for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i)
d0fb370f 127 {
60e3ecc7 128 goto_number k = temp_map[TRANSITION_SYMBOL (sp, i) - ntokens]++;
5dde5425 129 from_state[k] = s;
640748ee 130 to_state[k] = sp->states[i]->number;
d0fb370f
RS
131 }
132 }
133
afbb696d 134 free (temp_map);
d0fb370f
RS
135}
136
137
138
43591cec
AD
139/*----------------------------------------------------------.
140| Map a state/symbol pair into its numeric representation. |
141`----------------------------------------------------------*/
d0fb370f 142
60e3ecc7 143static goto_number
5dde5425 144map_goto (state_number s0, symbol_number sym)
d0fb370f 145{
60e3ecc7
PE
146 goto_number high;
147 goto_number low;
148 goto_number middle;
5dde5425 149 state_number s;
d0fb370f 150
5362cd5f
PE
151 low = goto_map[sym - ntokens];
152 high = goto_map[sym - ntokens + 1] - 1;
d0fb370f 153
58a84254 154 for (;;)
d0fb370f 155 {
4f82b42a 156 aver (low <= high);
d0fb370f
RS
157 middle = (low + high) / 2;
158 s = from_state[middle];
5dde5425 159 if (s == s0)
36281465 160 return middle;
5dde5425 161 else if (s < s0)
d0fb370f
RS
162 low = middle + 1;
163 else
164 high = middle - 1;
165 }
d0fb370f
RS
166}
167
168
4a120d45 169static void
d2729d44 170initialize_F (void)
d0fb370f 171{
da2a7671
PE
172 goto_number **reads = xnmalloc (ngotos, sizeof *reads);
173 goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge);
60e3ecc7 174 goto_number nedges = 0;
d0fb370f 175
60e3ecc7 176 goto_number i;
d0fb370f 177
914feea9 178 F = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
d0fb370f 179
d0fb370f
RS
180 for (i = 0; i < ngotos; i++)
181 {
5dde5425
PE
182 state_number stateno = to_state[i];
183 transitions *sp = states[stateno]->transitions;
d0fb370f 184
d954473d 185 int j;
640748ee 186 FOR_EACH_SHIFT (sp, j)
ccaf65bc 187 bitset_set (F[i], TRANSITION_SYMBOL (sp, j));
d0fb370f 188
ccaf65bc 189 for (; j < sp->num; j++)
d954473d 190 {
5dde5425 191 symbol_number sym = TRANSITION_SYMBOL (sp, j);
5362cd5f 192 if (nullable[sym - ntokens])
5dde5425 193 edge[nedges++] = map_goto (stateno, sym);
d954473d 194 }
a0f6b076 195
da2a7671
PE
196 if (nedges == 0)
197 reads[i] = NULL;
198 else
d954473d 199 {
da2a7671
PE
200 reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]);
201 memcpy (reads[i], edge, nedges * sizeof edge[0]);
60e3ecc7 202 reads[i][nedges] = END_NODE;
d954473d 203 nedges = 0;
d0fb370f 204 }
d0fb370f
RS
205 }
206
0e4d5753 207 relation_digraph (reads, ngotos, &F);
d0fb370f
RS
208
209 for (i = 0; i < ngotos; i++)
afbb696d 210 free (reads[i]);
d0fb370f 211
afbb696d
PE
212 free (reads);
213 free (edge);
d0fb370f
RS
214}
215
216
4a120d45 217static void
60e3ecc7 218add_lookback_edge (state *s, rule *r, goto_number gotono)
d0fb370f 219{
5dde5425 220 int ri = state_reduction_find (s, r);
da2a7671 221 goto_list *sp = xmalloc (sizeof *sp);
742e4900 222 sp->next = lookback[(s->reductions->lookahead_tokens - LA) + ri];
d0fb370f 223 sp->value = gotono;
742e4900 224 lookback[(s->reductions->lookahead_tokens - LA) + ri] = sp;
d0fb370f
RS
225}
226
227
d0fb370f 228
4a120d45 229static void
720d742f 230build_relations (void)
d0fb370f 231{
da2a7671
PE
232 goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge);
233 state_number *states1 = xnmalloc (ritem_longest_rhs () + 1, sizeof *states1);
60e3ecc7 234 goto_number i;
720d742f 235
da2a7671 236 includes = xnmalloc (ngotos, sizeof *includes);
d0fb370f
RS
237
238 for (i = 0; i < ngotos; i++)
239 {
9887c18a 240 int nedges = 0;
5dde5425
PE
241 symbol_number symbol1 = states[to_state[i]]->accessing_symbol;
242 rule **rulep;
d0fb370f 243
5362cd5f 244 for (rulep = derives[symbol1 - ntokens]; *rulep; rulep++)
720d742f 245 {
d0829076 246 bool done;
80a69750 247 int length = 1;
e9ad4aec 248 item_number const *rp;
5dde5425
PE
249 state *s = states[from_state[i]];
250 states1[0] = s->number;
d0fb370f 251
e9ad4aec 252 for (rp = (*rulep)->rhs; ! item_number_is_rule_number (*rp); rp++)
720d742f 253 {
5dde5425
PE
254 s = transitions_to (s->transitions,
255 item_number_as_symbol_number (*rp));
256 states1[length++] = s->number;
720d742f
AD
257 }
258
5dde5425
PE
259 if (!s->consistent)
260 add_lookback_edge (s, *rulep, i);
720d742f
AD
261
262 length--;
d0829076 263 done = false;
720d742f
AD
264 while (!done)
265 {
d0829076 266 done = true;
e9ad4aec
PE
267 /* Each rhs ends in an item number, and there is a
268 sentinel before the first rhs, so it is safe to
269 decrement RP here. */
720d742f 270 rp--;
e9ad4aec 271 if (ISVAR (*rp))
720d742f 272 {
5dde5425 273 /* Downcasting from item_number to symbol_number. */
5fbb0954 274 edge[nedges++] = map_goto (states1[--length],
a49aecd5 275 item_number_as_symbol_number (*rp));
5362cd5f 276 if (nullable[*rp - ntokens])
d0829076 277 done = false;
720d742f
AD
278 }
279 }
d0fb370f
RS
280 }
281
da2a7671
PE
282 if (nedges == 0)
283 includes[i] = NULL;
284 else
720d742f 285 {
9887c18a 286 int j;
da2a7671 287 includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]);
720d742f 288 for (j = 0; j < nedges; j++)
80a69750 289 includes[i][j] = edge[j];
60e3ecc7 290 includes[i][nedges] = END_NODE;
720d742f 291 }
d0fb370f
RS
292 }
293
afbb696d
PE
294 free (edge);
295 free (states1);
9887c18a 296
0e4d5753 297 relation_transpose (&includes, ngotos);
d0fb370f
RS
298}
299
300
720d742f 301
4a120d45 302static void
720d742f 303compute_FOLLOWS (void)
d0fb370f 304{
60e3ecc7 305 goto_number i;
d0fb370f 306
0e4d5753 307 relation_digraph (includes, ngotos, &F);
d0fb370f
RS
308
309 for (i = 0; i < ngotos; i++)
afbb696d 310 free (includes[i]);
d0fb370f 311
afbb696d 312 free (includes);
d0fb370f
RS
313}
314
315
4a120d45 316static void
742e4900 317compute_lookahead_tokens (void)
d0fb370f 318{
d200e455 319 size_t i;
5dde5425 320 goto_list *sp;
d0fb370f 321
d200e455 322 for (i = 0; i < nLA; i++)
ddcd5fdf 323 for (sp = lookback[i]; sp; sp = sp->next)
0fb1ffb1 324 bitset_or (LA[i], LA[i], F[sp->value]);
d0fb370f 325
ddcd5fdf 326 /* Free LOOKBACK. */
d200e455 327 for (i = 0; i < nLA; i++)
5dde5425 328 LIST_FREE (goto_list, lookback[i]);
d0fb370f 329
afbb696d 330 free (lookback);
914feea9 331 bitsetv_free (F);
720d742f 332}
d0fb370f 333
d0fb370f 334
742e4900
JD
335/*----------------------------------------------------.
336| Count the number of lookahead tokens required for S |
337| (N_LOOKAHEAD_TOKENS member). |
338`----------------------------------------------------*/
5edafffd 339
cd08e51e 340static int
742e4900 341state_lookahead_tokens_count (state *s)
5edafffd 342{
742e4900 343 int n_lookahead_tokens = 0;
5dde5425
PE
344 reductions *rp = s->reductions;
345 transitions *sp = s->transitions;
cd08e51e 346
742e4900 347 /* We need a lookahead either to distinguish different
cd08e51e
AD
348 reductions (i.e., there are two or more), or to distinguish a
349 reduction from a shift. Otherwise, it is straightforward,
2a6b783d
JD
350 and the state is `consistent'. There is no need to check that
351 transition 0 hasn't been disabled before checking if it is a
352 shift since transitions are only disabled during conflict
353 resolution, and that hasn't happened yet. */
354 aver (sp->num == 0 || !TRANSITION_IS_DISABLED (sp, 0));
cd08e51e 355 if (rp->num > 1
2a6b783d 356 || (rp->num == 1 && sp->num && TRANSITION_IS_SHIFT (sp, 0)))
742e4900 357 n_lookahead_tokens += rp->num;
cd08e51e 358 else
5dde5425 359 s->consistent = 1;
cd08e51e 360
742e4900 361 return n_lookahead_tokens;
5edafffd
AD
362}
363
7c6b64d0 364
742e4900
JD
365/*----------------------------------------------------.
366| Compute LA, NLA, and the lookahead_tokens members. |
367`----------------------------------------------------*/
c0263492
AD
368
369static void
cd08e51e 370initialize_LA (void)
c0263492 371{
5dde5425 372 state_number i;
cd08e51e 373 bitsetv pLA;
c0263492 374
742e4900 375 /* Compute the total number of reductions requiring a lookahead. */
cd08e51e
AD
376 nLA = 0;
377 for (i = 0; i < nstates; i++)
742e4900 378 nLA += state_lookahead_tokens_count (states[i]);
cd08e51e
AD
379 /* Avoid having to special case 0. */
380 if (!nLA)
381 nLA = 1;
382
383 pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED);
da2a7671 384 lookback = xcalloc (nLA, sizeof *lookback);
cd08e51e 385
742e4900
JD
386 /* Initialize the members LOOKAHEAD_TOKENS for each state whose reductions
387 require lookahead tokens. */
c0263492
AD
388 for (i = 0; i < nstates; i++)
389 {
742e4900 390 int count = state_lookahead_tokens_count (states[i]);
cd08e51e
AD
391 if (count)
392 {
742e4900 393 states[i]->reductions->lookahead_tokens = pLA;
cd08e51e
AD
394 pLA += count;
395 }
c0263492
AD
396 }
397}
398
399
742e4900
JD
400/*---------------------------------------------.
401| Output the lookahead tokens for each state. |
402`---------------------------------------------*/
7c6b64d0
AD
403
404static void
742e4900 405lookahead_tokens_print (FILE *out)
7c6b64d0 406{
5dde5425 407 state_number i;
602bbf31 408 int j, k;
742e4900 409 fprintf (out, "Lookahead tokens: BEGIN\n");
7c6b64d0
AD
410 for (i = 0; i < nstates; ++i)
411 {
5dde5425 412 reductions *reds = states[i]->reductions;
613f5e1a 413 bitset_iterator iter;
742e4900 414 int n_lookahead_tokens = 0;
cd08e51e 415
742e4900 416 if (reds->lookahead_tokens)
cd08e51e 417 for (k = 0; k < reds->num; ++k)
742e4900
JD
418 if (reds->lookahead_tokens[k])
419 ++n_lookahead_tokens;
613f5e1a 420
742e4900
JD
421 fprintf (out, "State %d: %d lookahead tokens\n",
422 i, n_lookahead_tokens);
7c6b64d0 423
742e4900 424 if (reds->lookahead_tokens)
cd08e51e 425 for (j = 0; j < reds->num; ++j)
742e4900 426 BITSET_FOR_EACH (iter, reds->lookahead_tokens[j], k, 0)
cd08e51e 427 {
427c0dda 428 fprintf (out, " on %d (%s) -> rule %d\n",
cd08e51e
AD
429 k, symbols[k]->tag,
430 reds->rules[j]->number);
431 };
7c6b64d0 432 }
742e4900 433 fprintf (out, "Lookahead tokens: END\n");
7c6b64d0
AD
434}
435
720d742f
AD
436void
437lalr (void)
438{
720d742f
AD
439 initialize_LA ();
440 set_goto_map ();
441 initialize_F ();
442 build_relations ();
443 compute_FOLLOWS ();
742e4900 444 compute_lookahead_tokens ();
7c6b64d0 445
273a74fa 446 if (trace_flag & trace_sets)
742e4900 447 lookahead_tokens_print (stderr);
d0fb370f 448}
3325ddc4
AD
449
450
5967f0cf
JD
451void
452lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old)
453{
454 goto_number ngotos_reachable = 0;
455 symbol_number nonterminal = 0;
456 aver (nsyms == nvars + ntokens);
14462c2b
JD
457 {
458 goto_number i;
459 for (i = 0; i < ngotos; ++i)
460 {
461 while (i == goto_map[nonterminal])
462 goto_map[nonterminal++] = ngotos_reachable;
463 /* If old_to_new[from_state[i]] = nstates_old, remove this goto
464 entry. */
465 if (old_to_new[from_state[i]] != nstates_old)
466 {
467 /* from_state[i] is not removed, so it and thus to_state[i] are
468 reachable, so to_state[i] != nstates_old. */
469 aver (old_to_new[to_state[i]] != nstates_old);
470 from_state[ngotos_reachable] = old_to_new[from_state[i]];
471 to_state[ngotos_reachable] = old_to_new[to_state[i]];
472 ++ngotos_reachable;
473 }
474 }
475 }
5967f0cf
JD
476 while (nonterminal <= nvars)
477 {
478 aver (ngotos == goto_map[nonterminal]);
479 goto_map[nonterminal++] = ngotos_reachable;
480 }
481 ngotos = ngotos_reachable;
482}
483
484
3325ddc4
AD
485void
486lalr_free (void)
487{
5dde5425 488 state_number s;
3325ddc4 489 for (s = 0; s < nstates; ++s)
742e4900 490 states[s]->reductions->lookahead_tokens = NULL;
3325ddc4 491 bitsetv_free (LA);
3325ddc4 492}