]> git.saurik.com Git - bison.git/blame - src/lalr.c
tests: improve the generic yylex implementation.
[bison.git] / src / lalr.c
CommitLineData
742e4900 1/* Compute lookahead criteria for Bison.
5362cd5f 2
c932d613 3 Copyright (C) 1984, 1986, 1989, 2000-2012 Free Software Foundation,
ea0a7676 4 Inc.
d0fb370f 5
340ef489 6 This file is part of Bison, the GNU Compiler Compiler.
d0fb370f 7
f16b0819 8 This program is free software: you can redistribute it and/or modify
340ef489 9 it under the terms of the GNU General Public License as published by
f16b0819
PE
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
d0fb370f 12
f16b0819 13 This program is distributed in the hope that it will be useful,
340ef489
AD
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
d0fb370f 17
340ef489 18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
d0fb370f
RS
20
21
1c4ad777 22/* Find which rules need lookahead in each state, and which lookahead
340ef489 23 tokens they accept. */
d0fb370f 24
2cec9080 25#include <config.h>
d0fb370f 26#include "system.h"
5dde5425
PE
27
28#include <bitset.h>
29#include <bitsetv.h>
5dde5425 30
b2ca4022 31#include "LR0.h"
a0f6b076 32#include "complain.h"
340ef489 33#include "derives.h"
f67d13aa 34#include "getargs.h"
5dde5425
PE
35#include "gram.h"
36#include "lalr.h"
23ec25b7 37#include "muscle-tab.h"
5dde5425
PE
38#include "nullable.h"
39#include "reader.h"
40#include "relation.h"
41#include "symtab.h"
d0fb370f 42
4cf44c00 43goto_number *goto_map;
f805dfcb 44goto_number ngotos;
4cf44c00
PE
45state_number *from_state;
46state_number *to_state;
f805dfcb 47bitsetv goto_follows = NULL;
e68e0410
AD
48
49/* Linked list of goto numbers. */
5dde5425 50typedef struct goto_list
e68e0410 51{
5dde5425
PE
52 struct goto_list *next;
53 goto_number value;
54} goto_list;
e68e0410
AD
55
56
b09f4f48 57/* LA is an NLA by NTOKENS matrix of bits. LA[l, i] is 1 if the rule
3325ddc4
AD
58 LArule[l] is applicable in the appropriate state when the next
59 token is symbol i. If LA[l, i] and LA[l, j] are both 1 for i != j,
60 it is a conflict. */
61
62static bitsetv LA = NULL;
d200e455 63size_t nLA;
9703cc49 64
d0fb370f 65
5dde5425
PE
66static goto_number **includes;
67static goto_list **lookback;
aa2aab3c
AD
68
69
720d742f 70
bb527fc2 71
f805dfcb 72void
d2729d44 73set_goto_map (void)
d0fb370f 74{
5dde5425
PE
75 state_number s;
76 goto_number *temp_map;
d0fb370f 77
da2a7671
PE
78 goto_map = xcalloc (nvars + 1, sizeof *goto_map);
79 temp_map = xnmalloc (nvars + 1, sizeof *temp_map);
d0fb370f
RS
80
81 ngotos = 0;
5dde5425 82 for (s = 0; s < nstates; ++s)
7215de24 83 {
5dde5425 84 transitions *sp = states[s]->transitions;
d57650a5 85 int i;
ccaf65bc 86 for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i)
7215de24 87 {
7215de24 88 ngotos++;
60e3ecc7
PE
89
90 /* Abort if (ngotos + 1) would overflow. */
4f82b42a 91 aver (ngotos != GOTO_NUMBER_MAXIMUM);
60e3ecc7 92
5362cd5f 93 goto_map[TRANSITION_SYMBOL (sp, i) - ntokens]++;
7215de24
AD
94 }
95 }
d0fb370f 96
d0b0fefa 97 {
60e3ecc7 98 goto_number k = 0;
d57650a5 99 int i;
d0b0fefa
AD
100 for (i = ntokens; i < nsyms; i++)
101 {
5362cd5f
PE
102 temp_map[i - ntokens] = k;
103 k += goto_map[i - ntokens];
d0b0fefa 104 }
d0fb370f 105
d0b0fefa 106 for (i = ntokens; i < nsyms; i++)
5362cd5f 107 goto_map[i - ntokens] = temp_map[i - ntokens];
d0fb370f 108
5362cd5f
PE
109 goto_map[nsyms - ntokens] = ngotos;
110 temp_map[nsyms - ntokens] = ngotos;
d0b0fefa 111 }
d0fb370f 112
da2a7671
PE
113 from_state = xcalloc (ngotos, sizeof *from_state);
114 to_state = xcalloc (ngotos, sizeof *to_state);
d0fb370f 115
5dde5425 116 for (s = 0; s < nstates; ++s)
d0fb370f 117 {
5dde5425 118 transitions *sp = states[s]->transitions;
d57650a5 119 int i;
ccaf65bc 120 for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i)
d0fb370f 121 {
60e3ecc7 122 goto_number k = temp_map[TRANSITION_SYMBOL (sp, i) - ntokens]++;
5dde5425 123 from_state[k] = s;
640748ee 124 to_state[k] = sp->states[i]->number;
d0fb370f
RS
125 }
126 }
127
afbb696d 128 free (temp_map);
d0fb370f
RS
129}
130
131
f805dfcb 132goto_number
5dde5425 133map_goto (state_number s0, symbol_number sym)
d0fb370f 134{
60e3ecc7
PE
135 goto_number high;
136 goto_number low;
137 goto_number middle;
5dde5425 138 state_number s;
d0fb370f 139
5362cd5f
PE
140 low = goto_map[sym - ntokens];
141 high = goto_map[sym - ntokens + 1] - 1;
d0fb370f 142
58a84254 143 for (;;)
d0fb370f 144 {
4f82b42a 145 aver (low <= high);
d0fb370f
RS
146 middle = (low + high) / 2;
147 s = from_state[middle];
5dde5425 148 if (s == s0)
36281465 149 return middle;
5dde5425 150 else if (s < s0)
d0fb370f
RS
151 low = middle + 1;
152 else
153 high = middle - 1;
154 }
d0fb370f
RS
155}
156
157
4a120d45 158static void
d2729d44 159initialize_F (void)
d0fb370f 160{
da2a7671
PE
161 goto_number **reads = xnmalloc (ngotos, sizeof *reads);
162 goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge);
60e3ecc7 163 goto_number nedges = 0;
d0fb370f 164
60e3ecc7 165 goto_number i;
d0fb370f 166
f805dfcb 167 goto_follows = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
d0fb370f 168
d0fb370f
RS
169 for (i = 0; i < ngotos; i++)
170 {
5dde5425
PE
171 state_number stateno = to_state[i];
172 transitions *sp = states[stateno]->transitions;
d0fb370f 173
d954473d 174 int j;
640748ee 175 FOR_EACH_SHIFT (sp, j)
f805dfcb 176 bitset_set (goto_follows[i], TRANSITION_SYMBOL (sp, j));
d0fb370f 177
ccaf65bc 178 for (; j < sp->num; j++)
d954473d 179 {
5dde5425 180 symbol_number sym = TRANSITION_SYMBOL (sp, j);
5362cd5f 181 if (nullable[sym - ntokens])
5dde5425 182 edge[nedges++] = map_goto (stateno, sym);
d954473d 183 }
a0f6b076 184
da2a7671
PE
185 if (nedges == 0)
186 reads[i] = NULL;
187 else
d954473d 188 {
da2a7671
PE
189 reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]);
190 memcpy (reads[i], edge, nedges * sizeof edge[0]);
60e3ecc7 191 reads[i][nedges] = END_NODE;
d954473d 192 nedges = 0;
d0fb370f 193 }
d0fb370f
RS
194 }
195
f805dfcb 196 relation_digraph (reads, ngotos, &goto_follows);
d0fb370f
RS
197
198 for (i = 0; i < ngotos; i++)
afbb696d 199 free (reads[i]);
d0fb370f 200
afbb696d
PE
201 free (reads);
202 free (edge);
d0fb370f
RS
203}
204
205
4a120d45 206static void
60e3ecc7 207add_lookback_edge (state *s, rule *r, goto_number gotono)
d0fb370f 208{
5dde5425 209 int ri = state_reduction_find (s, r);
da2a7671 210 goto_list *sp = xmalloc (sizeof *sp);
742e4900 211 sp->next = lookback[(s->reductions->lookahead_tokens - LA) + ri];
d0fb370f 212 sp->value = gotono;
742e4900 213 lookback[(s->reductions->lookahead_tokens - LA) + ri] = sp;
d0fb370f
RS
214}
215
216
d0fb370f 217
4a120d45 218static void
720d742f 219build_relations (void)
d0fb370f 220{
da2a7671
PE
221 goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge);
222 state_number *states1 = xnmalloc (ritem_longest_rhs () + 1, sizeof *states1);
60e3ecc7 223 goto_number i;
720d742f 224
da2a7671 225 includes = xnmalloc (ngotos, sizeof *includes);
d0fb370f
RS
226
227 for (i = 0; i < ngotos; i++)
228 {
9887c18a 229 int nedges = 0;
5dde5425
PE
230 symbol_number symbol1 = states[to_state[i]]->accessing_symbol;
231 rule **rulep;
d0fb370f 232
5362cd5f 233 for (rulep = derives[symbol1 - ntokens]; *rulep; rulep++)
720d742f 234 {
d0829076 235 bool done;
80a69750 236 int length = 1;
e9ad4aec 237 item_number const *rp;
5dde5425
PE
238 state *s = states[from_state[i]];
239 states1[0] = s->number;
d0fb370f 240
e9ad4aec 241 for (rp = (*rulep)->rhs; ! item_number_is_rule_number (*rp); rp++)
720d742f 242 {
5dde5425
PE
243 s = transitions_to (s->transitions,
244 item_number_as_symbol_number (*rp));
245 states1[length++] = s->number;
720d742f
AD
246 }
247
5dde5425
PE
248 if (!s->consistent)
249 add_lookback_edge (s, *rulep, i);
720d742f
AD
250
251 length--;
d0829076 252 done = false;
720d742f
AD
253 while (!done)
254 {
d0829076 255 done = true;
fa6aa7b3 256 /* Each rhs ends in a rule number, and there is a
f805dfcb 257 sentinel (ritem[-1]=0) before the first rhs, so it is safe to
e9ad4aec 258 decrement RP here. */
720d742f 259 rp--;
e9ad4aec 260 if (ISVAR (*rp))
720d742f 261 {
5dde5425 262 /* Downcasting from item_number to symbol_number. */
5fbb0954 263 edge[nedges++] = map_goto (states1[--length],
a49aecd5 264 item_number_as_symbol_number (*rp));
5362cd5f 265 if (nullable[*rp - ntokens])
d0829076 266 done = false;
720d742f
AD
267 }
268 }
d0fb370f
RS
269 }
270
da2a7671
PE
271 if (nedges == 0)
272 includes[i] = NULL;
273 else
720d742f 274 {
9887c18a 275 int j;
da2a7671 276 includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]);
720d742f 277 for (j = 0; j < nedges; j++)
80a69750 278 includes[i][j] = edge[j];
60e3ecc7 279 includes[i][nedges] = END_NODE;
720d742f 280 }
d0fb370f
RS
281 }
282
afbb696d
PE
283 free (edge);
284 free (states1);
9887c18a 285
0e4d5753 286 relation_transpose (&includes, ngotos);
d0fb370f
RS
287}
288
289
720d742f 290
4a120d45 291static void
720d742f 292compute_FOLLOWS (void)
d0fb370f 293{
60e3ecc7 294 goto_number i;
d0fb370f 295
f805dfcb 296 relation_digraph (includes, ngotos, &goto_follows);
d0fb370f
RS
297
298 for (i = 0; i < ngotos; i++)
afbb696d 299 free (includes[i]);
d0fb370f 300
afbb696d 301 free (includes);
d0fb370f
RS
302}
303
304
4a120d45 305static void
742e4900 306compute_lookahead_tokens (void)
d0fb370f 307{
d200e455 308 size_t i;
5dde5425 309 goto_list *sp;
d0fb370f 310
d200e455 311 for (i = 0; i < nLA; i++)
ddcd5fdf 312 for (sp = lookback[i]; sp; sp = sp->next)
f805dfcb 313 bitset_or (LA[i], LA[i], goto_follows[sp->value]);
d0fb370f 314
ddcd5fdf 315 /* Free LOOKBACK. */
d200e455 316 for (i = 0; i < nLA; i++)
5dde5425 317 LIST_FREE (goto_list, lookback[i]);
d0fb370f 318
afbb696d 319 free (lookback);
720d742f 320}
d0fb370f 321
d0fb370f 322
742e4900
JD
323/*----------------------------------------------------.
324| Count the number of lookahead tokens required for S |
325| (N_LOOKAHEAD_TOKENS member). |
326`----------------------------------------------------*/
5edafffd 327
cd08e51e 328static int
620b5727 329state_lookahead_tokens_count (state *s, bool default_reduction_only_for_accept)
5edafffd 330{
742e4900 331 int n_lookahead_tokens = 0;
5dde5425
PE
332 reductions *rp = s->reductions;
333 transitions *sp = s->transitions;
cd08e51e 334
03c07b03
JD
335 /* Transitions are only disabled during conflict resolution, and that
336 hasn't happened yet, so there should be no need to check that
337 transition 0 hasn't been disabled before checking if it is a shift.
338 However, this check was performed at one time, so we leave it as an
339 aver. */
2a6b783d 340 aver (sp->num == 0 || !TRANSITION_IS_DISABLED (sp, 0));
03c07b03
JD
341
342 /* We need a lookahead either to distinguish different reductions
343 (i.e., there are two or more), or to distinguish a reduction from a
344 shift. Otherwise, it is straightforward, and the state is
2de160e1
JD
345 `consistent'. However, do not treat a state with any reductions as
346 consistent unless it is the accepting state (because there is never
347 a lookahead token that makes sense there, and so no lookahead token
348 should be read) if the user has otherwise disabled default
349 reductions. */
cd08e51e 350 if (rp->num > 1
03c07b03
JD
351 || (rp->num == 1 && sp->num && TRANSITION_IS_SHIFT (sp, 0))
352 || (rp->num == 1 && rp->rules[0]->number != 0
620b5727 353 && default_reduction_only_for_accept))
742e4900 354 n_lookahead_tokens += rp->num;
cd08e51e 355 else
5dde5425 356 s->consistent = 1;
cd08e51e 357
742e4900 358 return n_lookahead_tokens;
5edafffd
AD
359}
360
7c6b64d0 361
742e4900
JD
362/*----------------------------------------------------.
363| Compute LA, NLA, and the lookahead_tokens members. |
364`----------------------------------------------------*/
c0263492 365
f805dfcb 366void
cd08e51e 367initialize_LA (void)
c0263492 368{
5dde5425 369 state_number i;
cd08e51e 370 bitsetv pLA;
620b5727 371 bool default_reduction_only_for_accept;
03c07b03 372 {
620b5727 373 char *default_reductions =
1d0f55cc 374 muscle_percent_define_get ("lr.default-reductions");
620b5727
JD
375 default_reduction_only_for_accept =
376 0 == strcmp (default_reductions, "accepting");
377 free (default_reductions);
03c07b03 378 }
c0263492 379
742e4900 380 /* Compute the total number of reductions requiring a lookahead. */
cd08e51e
AD
381 nLA = 0;
382 for (i = 0; i < nstates; i++)
03c07b03 383 nLA +=
620b5727
JD
384 state_lookahead_tokens_count (states[i],
385 default_reduction_only_for_accept);
cd08e51e
AD
386 /* Avoid having to special case 0. */
387 if (!nLA)
388 nLA = 1;
389
390 pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED);
cd08e51e 391
742e4900
JD
392 /* Initialize the members LOOKAHEAD_TOKENS for each state whose reductions
393 require lookahead tokens. */
c0263492
AD
394 for (i = 0; i < nstates; i++)
395 {
03c07b03 396 int count =
620b5727
JD
397 state_lookahead_tokens_count (states[i],
398 default_reduction_only_for_accept);
cd08e51e
AD
399 if (count)
400 {
742e4900 401 states[i]->reductions->lookahead_tokens = pLA;
cd08e51e
AD
402 pLA += count;
403 }
c0263492
AD
404 }
405}
406
407
742e4900
JD
408/*---------------------------------------------.
409| Output the lookahead tokens for each state. |
410`---------------------------------------------*/
7c6b64d0
AD
411
412static void
742e4900 413lookahead_tokens_print (FILE *out)
7c6b64d0 414{
5dde5425 415 state_number i;
602bbf31 416 int j, k;
742e4900 417 fprintf (out, "Lookahead tokens: BEGIN\n");
7c6b64d0
AD
418 for (i = 0; i < nstates; ++i)
419 {
5dde5425 420 reductions *reds = states[i]->reductions;
613f5e1a 421 bitset_iterator iter;
742e4900 422 int n_lookahead_tokens = 0;
cd08e51e 423
742e4900 424 if (reds->lookahead_tokens)
cd08e51e 425 for (k = 0; k < reds->num; ++k)
742e4900
JD
426 if (reds->lookahead_tokens[k])
427 ++n_lookahead_tokens;
613f5e1a 428
742e4900
JD
429 fprintf (out, "State %d: %d lookahead tokens\n",
430 i, n_lookahead_tokens);
7c6b64d0 431
742e4900 432 if (reds->lookahead_tokens)
cd08e51e 433 for (j = 0; j < reds->num; ++j)
742e4900 434 BITSET_FOR_EACH (iter, reds->lookahead_tokens[j], k, 0)
cd08e51e 435 {
427c0dda 436 fprintf (out, " on %d (%s) -> rule %d\n",
cd08e51e
AD
437 k, symbols[k]->tag,
438 reds->rules[j]->number);
439 };
7c6b64d0 440 }
742e4900 441 fprintf (out, "Lookahead tokens: END\n");
7c6b64d0
AD
442}
443
720d742f
AD
444void
445lalr (void)
446{
720d742f
AD
447 initialize_LA ();
448 set_goto_map ();
449 initialize_F ();
f805dfcb 450 lookback = xcalloc (nLA, sizeof *lookback);
720d742f
AD
451 build_relations ();
452 compute_FOLLOWS ();
742e4900 453 compute_lookahead_tokens ();
7c6b64d0 454
273a74fa 455 if (trace_flag & trace_sets)
742e4900 456 lookahead_tokens_print (stderr);
d0fb370f 457}
3325ddc4
AD
458
459
5967f0cf
JD
460void
461lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old)
462{
463 goto_number ngotos_reachable = 0;
464 symbol_number nonterminal = 0;
465 aver (nsyms == nvars + ntokens);
14462c2b
JD
466 {
467 goto_number i;
468 for (i = 0; i < ngotos; ++i)
469 {
470 while (i == goto_map[nonterminal])
471 goto_map[nonterminal++] = ngotos_reachable;
472 /* If old_to_new[from_state[i]] = nstates_old, remove this goto
473 entry. */
474 if (old_to_new[from_state[i]] != nstates_old)
475 {
476 /* from_state[i] is not removed, so it and thus to_state[i] are
477 reachable, so to_state[i] != nstates_old. */
478 aver (old_to_new[to_state[i]] != nstates_old);
479 from_state[ngotos_reachable] = old_to_new[from_state[i]];
480 to_state[ngotos_reachable] = old_to_new[to_state[i]];
481 ++ngotos_reachable;
482 }
483 }
484 }
5967f0cf
JD
485 while (nonterminal <= nvars)
486 {
487 aver (ngotos == goto_map[nonterminal]);
488 goto_map[nonterminal++] = ngotos_reachable;
489 }
490 ngotos = ngotos_reachable;
491}
492
493
3325ddc4
AD
494void
495lalr_free (void)
496{
5dde5425 497 state_number s;
3325ddc4 498 for (s = 0; s < nstates; ++s)
742e4900 499 states[s]->reductions->lookahead_tokens = NULL;
3325ddc4 500 bitsetv_free (LA);
3325ddc4 501}