]>
Commit | Line | Data |
---|---|---|
1 | /* Compute lookahead criteria for Bison. | |
2 | ||
3 | Copyright (C) 1984, 1986, 1989, 2000, 2001, 2002, 2003, 2004, 2005, | |
4 | 2006, 2007, 2008, 2009 Free Software Foundation, Inc. | |
5 | ||
6 | This file is part of Bison, the GNU Compiler Compiler. | |
7 | ||
8 | This program is free software: you can redistribute it and/or modify | |
9 | it under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation, either version 3 of the License, or | |
11 | (at your option) any later version. | |
12 | ||
13 | This program is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | ||
22 | /* Compute how to make the finite state machine deterministic; find | |
23 | which rules need lookahead in each state, and which lookahead | |
24 | tokens they accept. */ | |
25 | ||
26 | #include <config.h> | |
27 | #include "system.h" | |
28 | ||
29 | #include <bitset.h> | |
30 | #include <bitsetv.h> | |
31 | #include <quotearg.h> | |
32 | ||
33 | #include "LR0.h" | |
34 | #include "complain.h" | |
35 | #include "derives.h" | |
36 | #include "getargs.h" | |
37 | #include "gram.h" | |
38 | #include "lalr.h" | |
39 | #include "muscle-tab.h" | |
40 | #include "nullable.h" | |
41 | #include "reader.h" | |
42 | #include "relation.h" | |
43 | #include "symtab.h" | |
44 | ||
45 | goto_number *goto_map; | |
46 | goto_number ngotos; | |
47 | state_number *from_state; | |
48 | state_number *to_state; | |
49 | bitsetv goto_follows = NULL; | |
50 | ||
51 | /* Linked list of goto numbers. */ | |
52 | typedef struct goto_list | |
53 | { | |
54 | struct goto_list *next; | |
55 | goto_number value; | |
56 | } goto_list; | |
57 | ||
58 | ||
59 | /* LA is an NLA by NTOKENS matrix of bits. LA[l, i] is 1 if the rule | |
60 | LArule[l] is applicable in the appropriate state when the next | |
61 | token is symbol i. If LA[l, i] and LA[l, j] are both 1 for i != j, | |
62 | it is a conflict. */ | |
63 | ||
64 | static bitsetv LA = NULL; | |
65 | size_t nLA; | |
66 | ||
67 | ||
68 | static goto_number **includes; | |
69 | static goto_list **lookback; | |
70 | ||
71 | ||
72 | ||
73 | ||
74 | void | |
75 | set_goto_map (void) | |
76 | { | |
77 | state_number s; | |
78 | goto_number *temp_map; | |
79 | ||
80 | goto_map = xcalloc (nvars + 1, sizeof *goto_map); | |
81 | temp_map = xnmalloc (nvars + 1, sizeof *temp_map); | |
82 | ||
83 | ngotos = 0; | |
84 | for (s = 0; s < nstates; ++s) | |
85 | { | |
86 | transitions *sp = states[s]->transitions; | |
87 | int i; | |
88 | for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i) | |
89 | { | |
90 | ngotos++; | |
91 | ||
92 | /* Abort if (ngotos + 1) would overflow. */ | |
93 | aver (ngotos != GOTO_NUMBER_MAXIMUM); | |
94 | ||
95 | goto_map[TRANSITION_SYMBOL (sp, i) - ntokens]++; | |
96 | } | |
97 | } | |
98 | ||
99 | { | |
100 | goto_number k = 0; | |
101 | int i; | |
102 | for (i = ntokens; i < nsyms; i++) | |
103 | { | |
104 | temp_map[i - ntokens] = k; | |
105 | k += goto_map[i - ntokens]; | |
106 | } | |
107 | ||
108 | for (i = ntokens; i < nsyms; i++) | |
109 | goto_map[i - ntokens] = temp_map[i - ntokens]; | |
110 | ||
111 | goto_map[nsyms - ntokens] = ngotos; | |
112 | temp_map[nsyms - ntokens] = ngotos; | |
113 | } | |
114 | ||
115 | from_state = xcalloc (ngotos, sizeof *from_state); | |
116 | to_state = xcalloc (ngotos, sizeof *to_state); | |
117 | ||
118 | for (s = 0; s < nstates; ++s) | |
119 | { | |
120 | transitions *sp = states[s]->transitions; | |
121 | int i; | |
122 | for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i) | |
123 | { | |
124 | goto_number k = temp_map[TRANSITION_SYMBOL (sp, i) - ntokens]++; | |
125 | from_state[k] = s; | |
126 | to_state[k] = sp->states[i]->number; | |
127 | } | |
128 | } | |
129 | ||
130 | free (temp_map); | |
131 | } | |
132 | ||
133 | ||
134 | goto_number | |
135 | map_goto (state_number s0, symbol_number sym) | |
136 | { | |
137 | goto_number high; | |
138 | goto_number low; | |
139 | goto_number middle; | |
140 | state_number s; | |
141 | ||
142 | low = goto_map[sym - ntokens]; | |
143 | high = goto_map[sym - ntokens + 1] - 1; | |
144 | ||
145 | for (;;) | |
146 | { | |
147 | aver (low <= high); | |
148 | middle = (low + high) / 2; | |
149 | s = from_state[middle]; | |
150 | if (s == s0) | |
151 | return middle; | |
152 | else if (s < s0) | |
153 | low = middle + 1; | |
154 | else | |
155 | high = middle - 1; | |
156 | } | |
157 | } | |
158 | ||
159 | ||
160 | static void | |
161 | initialize_F (void) | |
162 | { | |
163 | goto_number **reads = xnmalloc (ngotos, sizeof *reads); | |
164 | goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge); | |
165 | goto_number nedges = 0; | |
166 | ||
167 | goto_number i; | |
168 | ||
169 | goto_follows = bitsetv_create (ngotos, ntokens, BITSET_FIXED); | |
170 | ||
171 | for (i = 0; i < ngotos; i++) | |
172 | { | |
173 | state_number stateno = to_state[i]; | |
174 | transitions *sp = states[stateno]->transitions; | |
175 | ||
176 | int j; | |
177 | FOR_EACH_SHIFT (sp, j) | |
178 | bitset_set (goto_follows[i], TRANSITION_SYMBOL (sp, j)); | |
179 | ||
180 | for (; j < sp->num; j++) | |
181 | { | |
182 | symbol_number sym = TRANSITION_SYMBOL (sp, j); | |
183 | if (nullable[sym - ntokens]) | |
184 | edge[nedges++] = map_goto (stateno, sym); | |
185 | } | |
186 | ||
187 | if (nedges == 0) | |
188 | reads[i] = NULL; | |
189 | else | |
190 | { | |
191 | reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]); | |
192 | memcpy (reads[i], edge, nedges * sizeof edge[0]); | |
193 | reads[i][nedges] = END_NODE; | |
194 | nedges = 0; | |
195 | } | |
196 | } | |
197 | ||
198 | relation_digraph (reads, ngotos, &goto_follows); | |
199 | ||
200 | for (i = 0; i < ngotos; i++) | |
201 | free (reads[i]); | |
202 | ||
203 | free (reads); | |
204 | free (edge); | |
205 | } | |
206 | ||
207 | ||
208 | static void | |
209 | add_lookback_edge (state *s, rule *r, goto_number gotono) | |
210 | { | |
211 | int ri = state_reduction_find (s, r); | |
212 | goto_list *sp = xmalloc (sizeof *sp); | |
213 | sp->next = lookback[(s->reductions->lookahead_tokens - LA) + ri]; | |
214 | sp->value = gotono; | |
215 | lookback[(s->reductions->lookahead_tokens - LA) + ri] = sp; | |
216 | } | |
217 | ||
218 | ||
219 | ||
220 | static void | |
221 | build_relations (void) | |
222 | { | |
223 | goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge); | |
224 | state_number *states1 = xnmalloc (ritem_longest_rhs () + 1, sizeof *states1); | |
225 | goto_number i; | |
226 | ||
227 | includes = xnmalloc (ngotos, sizeof *includes); | |
228 | ||
229 | for (i = 0; i < ngotos; i++) | |
230 | { | |
231 | int nedges = 0; | |
232 | symbol_number symbol1 = states[to_state[i]]->accessing_symbol; | |
233 | rule **rulep; | |
234 | ||
235 | for (rulep = derives[symbol1 - ntokens]; *rulep; rulep++) | |
236 | { | |
237 | bool done; | |
238 | int length = 1; | |
239 | item_number const *rp; | |
240 | state *s = states[from_state[i]]; | |
241 | states1[0] = s->number; | |
242 | ||
243 | for (rp = (*rulep)->rhs; ! item_number_is_rule_number (*rp); rp++) | |
244 | { | |
245 | s = transitions_to (s->transitions, | |
246 | item_number_as_symbol_number (*rp)); | |
247 | states1[length++] = s->number; | |
248 | } | |
249 | ||
250 | if (!s->consistent) | |
251 | add_lookback_edge (s, *rulep, i); | |
252 | ||
253 | length--; | |
254 | done = false; | |
255 | while (!done) | |
256 | { | |
257 | done = true; | |
258 | /* Each rhs ends in a rule number, and there is a | |
259 | sentinel (ritem[-1]=0) before the first rhs, so it is safe to | |
260 | decrement RP here. */ | |
261 | rp--; | |
262 | if (ISVAR (*rp)) | |
263 | { | |
264 | /* Downcasting from item_number to symbol_number. */ | |
265 | edge[nedges++] = map_goto (states1[--length], | |
266 | item_number_as_symbol_number (*rp)); | |
267 | if (nullable[*rp - ntokens]) | |
268 | done = false; | |
269 | } | |
270 | } | |
271 | } | |
272 | ||
273 | if (nedges == 0) | |
274 | includes[i] = NULL; | |
275 | else | |
276 | { | |
277 | int j; | |
278 | includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]); | |
279 | for (j = 0; j < nedges; j++) | |
280 | includes[i][j] = edge[j]; | |
281 | includes[i][nedges] = END_NODE; | |
282 | } | |
283 | } | |
284 | ||
285 | free (edge); | |
286 | free (states1); | |
287 | ||
288 | relation_transpose (&includes, ngotos); | |
289 | } | |
290 | ||
291 | ||
292 | ||
293 | static void | |
294 | compute_FOLLOWS (void) | |
295 | { | |
296 | goto_number i; | |
297 | ||
298 | relation_digraph (includes, ngotos, &goto_follows); | |
299 | ||
300 | for (i = 0; i < ngotos; i++) | |
301 | free (includes[i]); | |
302 | ||
303 | free (includes); | |
304 | } | |
305 | ||
306 | ||
307 | static void | |
308 | compute_lookahead_tokens (void) | |
309 | { | |
310 | size_t i; | |
311 | goto_list *sp; | |
312 | ||
313 | for (i = 0; i < nLA; i++) | |
314 | for (sp = lookback[i]; sp; sp = sp->next) | |
315 | bitset_or (LA[i], LA[i], goto_follows[sp->value]); | |
316 | ||
317 | /* Free LOOKBACK. */ | |
318 | for (i = 0; i < nLA; i++) | |
319 | LIST_FREE (goto_list, lookback[i]); | |
320 | ||
321 | free (lookback); | |
322 | } | |
323 | ||
324 | ||
325 | /*----------------------------------------------------. | |
326 | | Count the number of lookahead tokens required for S | | |
327 | | (N_LOOKAHEAD_TOKENS member). | | |
328 | `----------------------------------------------------*/ | |
329 | ||
330 | static int | |
331 | state_lookahead_tokens_count (state *s, bool default_reduction_only_for_accept) | |
332 | { | |
333 | int n_lookahead_tokens = 0; | |
334 | reductions *rp = s->reductions; | |
335 | transitions *sp = s->transitions; | |
336 | ||
337 | /* Transitions are only disabled during conflict resolution, and that | |
338 | hasn't happened yet, so there should be no need to check that | |
339 | transition 0 hasn't been disabled before checking if it is a shift. | |
340 | However, this check was performed at one time, so we leave it as an | |
341 | aver. */ | |
342 | aver (sp->num == 0 || !TRANSITION_IS_DISABLED (sp, 0)); | |
343 | ||
344 | /* We need a lookahead either to distinguish different reductions | |
345 | (i.e., there are two or more), or to distinguish a reduction from a | |
346 | shift. Otherwise, it is straightforward, and the state is | |
347 | `consistent'. However, do not treat a state with any reductions as | |
348 | consistent unless it is the accepting state (because there is never | |
349 | a lookahead token that makes sense there, and so no lookahead token | |
350 | should be read) if the user has otherwise disabled default | |
351 | reductions. */ | |
352 | if (rp->num > 1 | |
353 | || (rp->num == 1 && sp->num && TRANSITION_IS_SHIFT (sp, 0)) | |
354 | || (rp->num == 1 && rp->rules[0]->number != 0 | |
355 | && default_reduction_only_for_accept)) | |
356 | n_lookahead_tokens += rp->num; | |
357 | else | |
358 | s->consistent = 1; | |
359 | ||
360 | return n_lookahead_tokens; | |
361 | } | |
362 | ||
363 | ||
364 | /*----------------------------------------------------. | |
365 | | Compute LA, NLA, and the lookahead_tokens members. | | |
366 | `----------------------------------------------------*/ | |
367 | ||
368 | void | |
369 | initialize_LA (void) | |
370 | { | |
371 | state_number i; | |
372 | bitsetv pLA; | |
373 | bool default_reduction_only_for_accept; | |
374 | { | |
375 | char *default_reductions = | |
376 | muscle_percent_define_get ("lr.default-reductions"); | |
377 | default_reduction_only_for_accept = | |
378 | 0 == strcmp (default_reductions, "accepting"); | |
379 | free (default_reductions); | |
380 | } | |
381 | ||
382 | /* Compute the total number of reductions requiring a lookahead. */ | |
383 | nLA = 0; | |
384 | for (i = 0; i < nstates; i++) | |
385 | nLA += | |
386 | state_lookahead_tokens_count (states[i], | |
387 | default_reduction_only_for_accept); | |
388 | /* Avoid having to special case 0. */ | |
389 | if (!nLA) | |
390 | nLA = 1; | |
391 | ||
392 | pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED); | |
393 | ||
394 | /* Initialize the members LOOKAHEAD_TOKENS for each state whose reductions | |
395 | require lookahead tokens. */ | |
396 | for (i = 0; i < nstates; i++) | |
397 | { | |
398 | int count = | |
399 | state_lookahead_tokens_count (states[i], | |
400 | default_reduction_only_for_accept); | |
401 | if (count) | |
402 | { | |
403 | states[i]->reductions->lookahead_tokens = pLA; | |
404 | pLA += count; | |
405 | } | |
406 | } | |
407 | } | |
408 | ||
409 | ||
410 | /*---------------------------------------------. | |
411 | | Output the lookahead tokens for each state. | | |
412 | `---------------------------------------------*/ | |
413 | ||
414 | static void | |
415 | lookahead_tokens_print (FILE *out) | |
416 | { | |
417 | state_number i; | |
418 | int j, k; | |
419 | fprintf (out, "Lookahead tokens: BEGIN\n"); | |
420 | for (i = 0; i < nstates; ++i) | |
421 | { | |
422 | reductions *reds = states[i]->reductions; | |
423 | bitset_iterator iter; | |
424 | int n_lookahead_tokens = 0; | |
425 | ||
426 | if (reds->lookahead_tokens) | |
427 | for (k = 0; k < reds->num; ++k) | |
428 | if (reds->lookahead_tokens[k]) | |
429 | ++n_lookahead_tokens; | |
430 | ||
431 | fprintf (out, "State %d: %d lookahead tokens\n", | |
432 | i, n_lookahead_tokens); | |
433 | ||
434 | if (reds->lookahead_tokens) | |
435 | for (j = 0; j < reds->num; ++j) | |
436 | BITSET_FOR_EACH (iter, reds->lookahead_tokens[j], k, 0) | |
437 | { | |
438 | fprintf (out, " on %d (%s) -> rule %d\n", | |
439 | k, symbols[k]->tag, | |
440 | reds->rules[j]->number); | |
441 | }; | |
442 | } | |
443 | fprintf (out, "Lookahead tokens: END\n"); | |
444 | } | |
445 | ||
446 | void | |
447 | lalr (void) | |
448 | { | |
449 | initialize_LA (); | |
450 | set_goto_map (); | |
451 | initialize_F (); | |
452 | lookback = xcalloc (nLA, sizeof *lookback); | |
453 | build_relations (); | |
454 | compute_FOLLOWS (); | |
455 | compute_lookahead_tokens (); | |
456 | ||
457 | if (trace_flag & trace_sets) | |
458 | lookahead_tokens_print (stderr); | |
459 | } | |
460 | ||
461 | ||
462 | void | |
463 | lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old) | |
464 | { | |
465 | goto_number ngotos_reachable = 0; | |
466 | symbol_number nonterminal = 0; | |
467 | aver (nsyms == nvars + ntokens); | |
468 | { | |
469 | goto_number i; | |
470 | for (i = 0; i < ngotos; ++i) | |
471 | { | |
472 | while (i == goto_map[nonterminal]) | |
473 | goto_map[nonterminal++] = ngotos_reachable; | |
474 | /* If old_to_new[from_state[i]] = nstates_old, remove this goto | |
475 | entry. */ | |
476 | if (old_to_new[from_state[i]] != nstates_old) | |
477 | { | |
478 | /* from_state[i] is not removed, so it and thus to_state[i] are | |
479 | reachable, so to_state[i] != nstates_old. */ | |
480 | aver (old_to_new[to_state[i]] != nstates_old); | |
481 | from_state[ngotos_reachable] = old_to_new[from_state[i]]; | |
482 | to_state[ngotos_reachable] = old_to_new[to_state[i]]; | |
483 | ++ngotos_reachable; | |
484 | } | |
485 | } | |
486 | } | |
487 | while (nonterminal <= nvars) | |
488 | { | |
489 | aver (ngotos == goto_map[nonterminal]); | |
490 | goto_map[nonterminal++] = ngotos_reachable; | |
491 | } | |
492 | ngotos = ngotos_reachable; | |
493 | } | |
494 | ||
495 | ||
496 | void | |
497 | lalr_free (void) | |
498 | { | |
499 | state_number s; | |
500 | for (s = 0; s < nstates; ++s) | |
501 | states[s]->reductions->lookahead_tokens = NULL; | |
502 | bitsetv_free (LA); | |
503 | } |