]>
Commit | Line | Data |
---|---|---|
1 | /* Compute look-ahead criteria for Bison. | |
2 | ||
3 | Copyright (C) 1984, 1986, 1989, 2000, 2001, 2002, 2003, 2004 | |
4 | Free Software Foundation, Inc. | |
5 | ||
6 | This file is part of Bison, the GNU Compiler Compiler. | |
7 | ||
8 | Bison is free software; you can redistribute it and/or modify | |
9 | it under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 2, or (at your option) | |
11 | any later version. | |
12 | ||
13 | Bison is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with Bison; see the file COPYING. If not, write to | |
20 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 | Boston, MA 02111-1307, USA. */ | |
22 | ||
23 | ||
24 | /* Compute how to make the finite state machine deterministic; find | |
25 | which rules need look-ahead in each state, and which look-ahead | |
26 | tokens they accept. */ | |
27 | ||
28 | #include "system.h" | |
29 | ||
30 | #include <bitset.h> | |
31 | #include <bitsetv.h> | |
32 | #include <quotearg.h> | |
33 | ||
34 | #include "LR0.h" | |
35 | #include "complain.h" | |
36 | #include "derives.h" | |
37 | #include "getargs.h" | |
38 | #include "gram.h" | |
39 | #include "lalr.h" | |
40 | #include "nullable.h" | |
41 | #include "reader.h" | |
42 | #include "relation.h" | |
43 | #include "symtab.h" | |
44 | ||
45 | goto_number *goto_map; | |
46 | static goto_number ngotos; | |
47 | state_number *from_state; | |
48 | state_number *to_state; | |
49 | ||
50 | /* Linked list of goto numbers. */ | |
51 | typedef struct goto_list | |
52 | { | |
53 | struct goto_list *next; | |
54 | goto_number value; | |
55 | } goto_list; | |
56 | ||
57 | ||
58 | /* LA is a LR by NTOKENS matrix of bits. LA[l, i] is 1 if the rule | |
59 | LArule[l] is applicable in the appropriate state when the next | |
60 | token is symbol i. If LA[l, i] and LA[l, j] are both 1 for i != j, | |
61 | it is a conflict. */ | |
62 | ||
63 | static bitsetv LA = NULL; | |
64 | size_t nLA; | |
65 | ||
66 | ||
67 | /* And for the famous F variable, which name is so descriptive that a | |
68 | comment is hardly needed. <grin>. */ | |
69 | static bitsetv F = NULL; | |
70 | ||
71 | static goto_number **includes; | |
72 | static goto_list **lookback; | |
73 | ||
74 | ||
75 | ||
76 | ||
77 | static void | |
78 | set_goto_map (void) | |
79 | { | |
80 | state_number s; | |
81 | goto_number *temp_map; | |
82 | ||
83 | CALLOC (goto_map, nvars + 1); | |
84 | CALLOC (temp_map, nvars + 1); | |
85 | ||
86 | ngotos = 0; | |
87 | for (s = 0; s < nstates; ++s) | |
88 | { | |
89 | transitions *sp = states[s]->transitions; | |
90 | int i; | |
91 | for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i) | |
92 | { | |
93 | ngotos++; | |
94 | ||
95 | /* Abort if (ngotos + 1) would overflow. */ | |
96 | if (ngotos == GOTO_NUMBER_MAXIMUM) | |
97 | abort (); | |
98 | ||
99 | goto_map[TRANSITION_SYMBOL (sp, i) - ntokens]++; | |
100 | } | |
101 | } | |
102 | ||
103 | { | |
104 | goto_number k = 0; | |
105 | int i; | |
106 | for (i = ntokens; i < nsyms; i++) | |
107 | { | |
108 | temp_map[i - ntokens] = k; | |
109 | k += goto_map[i - ntokens]; | |
110 | } | |
111 | ||
112 | for (i = ntokens; i < nsyms; i++) | |
113 | goto_map[i - ntokens] = temp_map[i - ntokens]; | |
114 | ||
115 | goto_map[nsyms - ntokens] = ngotos; | |
116 | temp_map[nsyms - ntokens] = ngotos; | |
117 | } | |
118 | ||
119 | CALLOC (from_state, ngotos); | |
120 | CALLOC (to_state, ngotos); | |
121 | ||
122 | for (s = 0; s < nstates; ++s) | |
123 | { | |
124 | transitions *sp = states[s]->transitions; | |
125 | int i; | |
126 | for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i) | |
127 | { | |
128 | goto_number k = temp_map[TRANSITION_SYMBOL (sp, i) - ntokens]++; | |
129 | from_state[k] = s; | |
130 | to_state[k] = sp->states[i]->number; | |
131 | } | |
132 | } | |
133 | ||
134 | free (temp_map); | |
135 | } | |
136 | ||
137 | ||
138 | ||
139 | /*----------------------------------------------------------. | |
140 | | Map a state/symbol pair into its numeric representation. | | |
141 | `----------------------------------------------------------*/ | |
142 | ||
143 | static goto_number | |
144 | map_goto (state_number s0, symbol_number sym) | |
145 | { | |
146 | goto_number high; | |
147 | goto_number low; | |
148 | goto_number middle; | |
149 | state_number s; | |
150 | ||
151 | low = goto_map[sym - ntokens]; | |
152 | high = goto_map[sym - ntokens + 1] - 1; | |
153 | ||
154 | for (;;) | |
155 | { | |
156 | if (high < low) | |
157 | abort (); | |
158 | middle = (low + high) / 2; | |
159 | s = from_state[middle]; | |
160 | if (s == s0) | |
161 | return middle; | |
162 | else if (s < s0) | |
163 | low = middle + 1; | |
164 | else | |
165 | high = middle - 1; | |
166 | } | |
167 | } | |
168 | ||
169 | ||
170 | static void | |
171 | initialize_F (void) | |
172 | { | |
173 | goto_number **reads = CALLOC (reads, ngotos); | |
174 | goto_number *edge = CALLOC (edge, ngotos + 1); | |
175 | goto_number nedges = 0; | |
176 | ||
177 | goto_number i; | |
178 | ||
179 | F = bitsetv_create (ngotos, ntokens, BITSET_FIXED); | |
180 | ||
181 | for (i = 0; i < ngotos; i++) | |
182 | { | |
183 | state_number stateno = to_state[i]; | |
184 | transitions *sp = states[stateno]->transitions; | |
185 | ||
186 | int j; | |
187 | FOR_EACH_SHIFT (sp, j) | |
188 | bitset_set (F[i], TRANSITION_SYMBOL (sp, j)); | |
189 | ||
190 | for (; j < sp->num; j++) | |
191 | { | |
192 | symbol_number sym = TRANSITION_SYMBOL (sp, j); | |
193 | if (nullable[sym - ntokens]) | |
194 | edge[nedges++] = map_goto (stateno, sym); | |
195 | } | |
196 | ||
197 | if (nedges) | |
198 | { | |
199 | CALLOC (reads[i], nedges + 1); | |
200 | memcpy (reads[i], edge, nedges * sizeof (edge[0])); | |
201 | reads[i][nedges] = END_NODE; | |
202 | nedges = 0; | |
203 | } | |
204 | } | |
205 | ||
206 | relation_digraph (reads, ngotos, &F); | |
207 | ||
208 | for (i = 0; i < ngotos; i++) | |
209 | free (reads[i]); | |
210 | ||
211 | free (reads); | |
212 | free (edge); | |
213 | } | |
214 | ||
215 | ||
216 | static void | |
217 | add_lookback_edge (state *s, rule *r, goto_number gotono) | |
218 | { | |
219 | int ri = state_reduction_find (s, r); | |
220 | goto_list *sp = MALLOC (sp, 1); | |
221 | sp->next = lookback[(s->reductions->look_ahead_tokens - LA) + ri]; | |
222 | sp->value = gotono; | |
223 | lookback[(s->reductions->look_ahead_tokens - LA) + ri] = sp; | |
224 | } | |
225 | ||
226 | ||
227 | ||
228 | static void | |
229 | build_relations (void) | |
230 | { | |
231 | goto_number *edge = CALLOC (edge, ngotos + 1); | |
232 | state_number *states1 = CALLOC (states1, ritem_longest_rhs () + 1); | |
233 | goto_number i; | |
234 | ||
235 | CALLOC (includes, ngotos); | |
236 | ||
237 | for (i = 0; i < ngotos; i++) | |
238 | { | |
239 | int nedges = 0; | |
240 | symbol_number symbol1 = states[to_state[i]]->accessing_symbol; | |
241 | rule **rulep; | |
242 | ||
243 | for (rulep = derives[symbol1 - ntokens]; *rulep; rulep++) | |
244 | { | |
245 | bool done; | |
246 | int length = 1; | |
247 | item_number *rp; | |
248 | state *s = states[from_state[i]]; | |
249 | states1[0] = s->number; | |
250 | ||
251 | for (rp = (*rulep)->rhs; *rp >= 0; rp++) | |
252 | { | |
253 | s = transitions_to (s->transitions, | |
254 | item_number_as_symbol_number (*rp)); | |
255 | states1[length++] = s->number; | |
256 | } | |
257 | ||
258 | if (!s->consistent) | |
259 | add_lookback_edge (s, *rulep, i); | |
260 | ||
261 | length--; | |
262 | done = false; | |
263 | while (!done) | |
264 | { | |
265 | done = true; | |
266 | rp--; | |
267 | /* JF added rp>=ritem && I hope to god its right! */ | |
268 | if (rp >= ritem && ISVAR (*rp)) | |
269 | { | |
270 | /* Downcasting from item_number to symbol_number. */ | |
271 | edge[nedges++] = map_goto (states1[--length], | |
272 | item_number_as_symbol_number (*rp)); | |
273 | if (nullable[*rp - ntokens]) | |
274 | done = false; | |
275 | } | |
276 | } | |
277 | } | |
278 | ||
279 | if (nedges) | |
280 | { | |
281 | int j; | |
282 | CALLOC (includes[i], nedges + 1); | |
283 | for (j = 0; j < nedges; j++) | |
284 | includes[i][j] = edge[j]; | |
285 | includes[i][nedges] = END_NODE; | |
286 | } | |
287 | } | |
288 | ||
289 | free (edge); | |
290 | free (states1); | |
291 | ||
292 | relation_transpose (&includes, ngotos); | |
293 | } | |
294 | ||
295 | ||
296 | ||
297 | static void | |
298 | compute_FOLLOWS (void) | |
299 | { | |
300 | goto_number i; | |
301 | ||
302 | relation_digraph (includes, ngotos, &F); | |
303 | ||
304 | for (i = 0; i < ngotos; i++) | |
305 | free (includes[i]); | |
306 | ||
307 | free (includes); | |
308 | } | |
309 | ||
310 | ||
311 | static void | |
312 | compute_look_ahead_tokens (void) | |
313 | { | |
314 | size_t i; | |
315 | goto_list *sp; | |
316 | ||
317 | for (i = 0; i < nLA; i++) | |
318 | for (sp = lookback[i]; sp; sp = sp->next) | |
319 | bitset_or (LA[i], LA[i], F[sp->value]); | |
320 | ||
321 | /* Free LOOKBACK. */ | |
322 | for (i = 0; i < nLA; i++) | |
323 | LIST_FREE (goto_list, lookback[i]); | |
324 | ||
325 | free (lookback); | |
326 | bitsetv_free (F); | |
327 | } | |
328 | ||
329 | ||
330 | /*-----------------------------------------------------. | |
331 | | Count the number of look-ahead tokens required for S | | |
332 | | (N_LOOK_AHEAD_TOKENS member). | | |
333 | `-----------------------------------------------------*/ | |
334 | ||
335 | static int | |
336 | state_look_ahead_tokens_count (state *s) | |
337 | { | |
338 | int k; | |
339 | int n_look_ahead_tokens = 0; | |
340 | reductions *rp = s->reductions; | |
341 | transitions *sp = s->transitions; | |
342 | ||
343 | /* We need a look-ahead either to distinguish different | |
344 | reductions (i.e., there are two or more), or to distinguish a | |
345 | reduction from a shift. Otherwise, it is straightforward, | |
346 | and the state is `consistent'. */ | |
347 | if (rp->num > 1 | |
348 | || (rp->num == 1 && sp->num && | |
349 | !TRANSITION_IS_DISABLED (sp, 0) && TRANSITION_IS_SHIFT (sp, 0))) | |
350 | n_look_ahead_tokens += rp->num; | |
351 | else | |
352 | s->consistent = 1; | |
353 | ||
354 | for (k = 0; k < sp->num; k++) | |
355 | if (!TRANSITION_IS_DISABLED (sp, k) && TRANSITION_IS_ERROR (sp, k)) | |
356 | { | |
357 | s->consistent = 0; | |
358 | break; | |
359 | } | |
360 | ||
361 | return n_look_ahead_tokens; | |
362 | } | |
363 | ||
364 | ||
365 | /*-----------------------------------------------------. | |
366 | | Compute LA, NLA, and the look_ahead_tokens members. | | |
367 | `-----------------------------------------------------*/ | |
368 | ||
369 | static void | |
370 | initialize_LA (void) | |
371 | { | |
372 | state_number i; | |
373 | bitsetv pLA; | |
374 | ||
375 | /* Compute the total number of reductions requiring a look-ahead. */ | |
376 | nLA = 0; | |
377 | for (i = 0; i < nstates; i++) | |
378 | nLA += state_look_ahead_tokens_count (states[i]); | |
379 | /* Avoid having to special case 0. */ | |
380 | if (!nLA) | |
381 | nLA = 1; | |
382 | ||
383 | pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED); | |
384 | CALLOC (lookback, nLA); | |
385 | ||
386 | /* Initialize the members LOOK_AHEAD_TOKENS for each state whose reductions | |
387 | require look-ahead tokens. */ | |
388 | for (i = 0; i < nstates; i++) | |
389 | { | |
390 | int count = state_look_ahead_tokens_count (states[i]); | |
391 | if (count) | |
392 | { | |
393 | states[i]->reductions->look_ahead_tokens = pLA; | |
394 | pLA += count; | |
395 | } | |
396 | } | |
397 | } | |
398 | ||
399 | ||
400 | /*----------------------------------------------. | |
401 | | Output the look-ahead tokens for each state. | | |
402 | `----------------------------------------------*/ | |
403 | ||
404 | static void | |
405 | look_ahead_tokens_print (FILE *out) | |
406 | { | |
407 | state_number i; | |
408 | int j, k; | |
409 | fprintf (out, "Look-ahead tokens: BEGIN\n"); | |
410 | for (i = 0; i < nstates; ++i) | |
411 | { | |
412 | reductions *reds = states[i]->reductions; | |
413 | bitset_iterator iter; | |
414 | int n_look_ahead_tokens = 0; | |
415 | ||
416 | if (reds->look_ahead_tokens) | |
417 | for (k = 0; k < reds->num; ++k) | |
418 | if (reds->look_ahead_tokens[k]) | |
419 | ++n_look_ahead_tokens; | |
420 | ||
421 | fprintf (out, "State %d: %d look-ahead tokens\n", | |
422 | i, n_look_ahead_tokens); | |
423 | ||
424 | if (reds->look_ahead_tokens) | |
425 | for (j = 0; j < reds->num; ++j) | |
426 | BITSET_FOR_EACH (iter, reds->look_ahead_tokens[j], k, 0) | |
427 | { | |
428 | fprintf (out, " on %d (%s) -> rule %d\n", | |
429 | k, symbols[k]->tag, | |
430 | reds->rules[j]->number); | |
431 | }; | |
432 | } | |
433 | fprintf (out, "Look-ahead tokens: END\n"); | |
434 | } | |
435 | ||
436 | void | |
437 | lalr (void) | |
438 | { | |
439 | initialize_LA (); | |
440 | set_goto_map (); | |
441 | initialize_F (); | |
442 | build_relations (); | |
443 | compute_FOLLOWS (); | |
444 | compute_look_ahead_tokens (); | |
445 | ||
446 | if (trace_flag & trace_sets) | |
447 | look_ahead_tokens_print (stderr); | |
448 | } | |
449 | ||
450 | ||
451 | void | |
452 | lalr_free (void) | |
453 | { | |
454 | state_number s; | |
455 | for (s = 0; s < nstates; ++s) | |
456 | states[s]->reductions->look_ahead_tokens = NULL; | |
457 | bitsetv_free (LA); | |
458 | } |