]>
Commit | Line | Data |
---|---|---|
d0fb370f RS |
1 | /* Compute look-ahead criteria for bison, |
2 | Copyright (C) 1984, 1986, 1989 Free Software Foundation, Inc. | |
3 | ||
4 | This file is part of Bison, the GNU Compiler Compiler. | |
5 | ||
6 | Bison is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 2, or (at your option) | |
9 | any later version. | |
10 | ||
11 | Bison is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with Bison; see the file COPYING. If not, write to | |
18 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ | |
19 | ||
20 | ||
21 | /* Compute how to make the finite state machine deterministic; | |
22 | find which rules need lookahead in each state, and which lookahead tokens they accept. | |
23 | ||
24 | lalr(), the entry point, builds these data structures: | |
25 | ||
26 | goto_map, from_state and to_state | |
27 | record each shift transition which accepts a variable (a nonterminal). | |
28 | ngotos is the number of such transitions. | |
29 | from_state[t] is the state number which a transition leads from | |
30 | and to_state[t] is the state number it leads to. | |
31 | All the transitions that accept a particular variable are grouped together and | |
32 | goto_map[i - ntokens] is the index in from_state and to_state of the first of them. | |
33 | ||
34 | consistent[s] is nonzero if no lookahead is needed to decide what to do in state s. | |
35 | ||
36 | LAruleno is a vector which records the rules that need lookahead in various states. | |
37 | The elements of LAruleno that apply to state s are those from | |
38 | lookaheads[s] through lookaheads[s+1]-1. | |
39 | Each element of LAruleno is a rule number. | |
40 | ||
41 | If lr is the length of LAruleno, then a number from 0 to lr-1 | |
42 | can specify both a rule and a state where the rule might be applied. | |
43 | ||
44 | LA is a lr by ntokens matrix of bits. | |
45 | LA[l, i] is 1 if the rule LAruleno[l] is applicable in the appropriate state | |
46 | when the next token is symbol i. | |
47 | If LA[l, i] and LA[l, j] are both 1 for i != j, it is a conflict. | |
48 | */ | |
49 | ||
50 | #include <stdio.h> | |
51 | #include "system.h" | |
52 | #include "machine.h" | |
53 | #include "types.h" | |
54 | #include "state.h" | |
55 | #include "new.h" | |
56 | #include "gram.h" | |
57 | ||
58 | ||
59 | extern short **derives; | |
60 | extern char *nullable; | |
61 | ||
62 | ||
63 | int tokensetsize; | |
64 | short *lookaheads; | |
65 | short *LAruleno; | |
66 | unsigned *LA; | |
67 | short *accessing_symbol; | |
68 | char *consistent; | |
69 | core **state_table; | |
70 | shifts **shift_table; | |
71 | reductions **reduction_table; | |
72 | short *goto_map; | |
73 | short *from_state; | |
74 | short *to_state; | |
75 | ||
76 | short **transpose(); | |
77 | void set_state_table(); | |
78 | void set_accessing_symbol(); | |
79 | void set_shift_table(); | |
80 | void set_reduction_table(); | |
81 | void set_maxrhs(); | |
82 | void initialize_LA(); | |
83 | void set_goto_map(); | |
84 | void initialize_F(); | |
85 | void build_relations(); | |
86 | void add_lookback_edge(); | |
87 | void compute_FOLLOWS(); | |
88 | void compute_lookaheads(); | |
89 | void digraph(); | |
90 | void traverse(); | |
91 | ||
92 | extern void toomany(); | |
93 | extern void berror(); | |
94 | ||
95 | static int infinity; | |
96 | static int maxrhs; | |
97 | static int ngotos; | |
98 | static unsigned *F; | |
99 | static short **includes; | |
100 | static shorts **lookback; | |
101 | static short **R; | |
102 | static short *INDEX; | |
103 | static short *VERTICES; | |
104 | static int top; | |
105 | ||
106 | ||
107 | void | |
108 | lalr() | |
109 | { | |
110 | tokensetsize = WORDSIZE(ntokens); | |
111 | ||
112 | set_state_table(); | |
113 | set_accessing_symbol(); | |
114 | set_shift_table(); | |
115 | set_reduction_table(); | |
116 | set_maxrhs(); | |
117 | initialize_LA(); | |
118 | set_goto_map(); | |
119 | initialize_F(); | |
120 | build_relations(); | |
121 | compute_FOLLOWS(); | |
122 | compute_lookaheads(); | |
123 | } | |
124 | ||
125 | ||
126 | void | |
127 | set_state_table() | |
128 | { | |
129 | register core *sp; | |
130 | ||
131 | state_table = NEW2(nstates, core *); | |
132 | ||
133 | for (sp = first_state; sp; sp = sp->next) | |
134 | state_table[sp->number] = sp; | |
135 | } | |
136 | ||
137 | ||
138 | void | |
139 | set_accessing_symbol() | |
140 | { | |
141 | register core *sp; | |
142 | ||
143 | accessing_symbol = NEW2(nstates, short); | |
144 | ||
145 | for (sp = first_state; sp; sp = sp->next) | |
146 | accessing_symbol[sp->number] = sp->accessing_symbol; | |
147 | } | |
148 | ||
149 | ||
150 | void | |
151 | set_shift_table() | |
152 | { | |
153 | register shifts *sp; | |
154 | ||
155 | shift_table = NEW2(nstates, shifts *); | |
156 | ||
157 | for (sp = first_shift; sp; sp = sp->next) | |
158 | shift_table[sp->number] = sp; | |
159 | } | |
160 | ||
161 | ||
162 | void | |
163 | set_reduction_table() | |
164 | { | |
165 | register reductions *rp; | |
166 | ||
167 | reduction_table = NEW2(nstates, reductions *); | |
168 | ||
169 | for (rp = first_reduction; rp; rp = rp->next) | |
170 | reduction_table[rp->number] = rp; | |
171 | } | |
172 | ||
173 | ||
174 | void | |
175 | set_maxrhs() | |
176 | { | |
177 | register short *itemp; | |
178 | register int length; | |
179 | register int max; | |
180 | ||
181 | length = 0; | |
182 | max = 0; | |
183 | for (itemp = ritem; *itemp; itemp++) | |
184 | { | |
185 | if (*itemp > 0) | |
186 | { | |
187 | length++; | |
188 | } | |
189 | else | |
190 | { | |
191 | if (length > max) max = length; | |
192 | length = 0; | |
193 | } | |
194 | } | |
195 | ||
196 | maxrhs = max; | |
197 | } | |
198 | ||
199 | ||
200 | void | |
201 | initialize_LA() | |
202 | { | |
203 | register int i; | |
204 | register int j; | |
205 | register int count; | |
206 | register reductions *rp; | |
207 | register shifts *sp; | |
208 | register short *np; | |
209 | ||
210 | consistent = NEW2(nstates, char); | |
211 | lookaheads = NEW2(nstates + 1, short); | |
212 | ||
213 | count = 0; | |
214 | for (i = 0; i < nstates; i++) | |
215 | { | |
216 | register int k; | |
217 | ||
218 | lookaheads[i] = count; | |
219 | ||
220 | rp = reduction_table[i]; | |
221 | sp = shift_table[i]; | |
222 | if (rp && (rp->nreds > 1 | |
223 | || (sp && ! ISVAR(accessing_symbol[sp->shifts[0]])))) | |
224 | count += rp->nreds; | |
225 | else | |
226 | consistent[i] = 1; | |
227 | ||
228 | if (sp) | |
229 | for (k = 0; k < sp->nshifts; k++) | |
230 | { | |
231 | if (accessing_symbol[sp->shifts[k]] == error_token_number) | |
232 | { | |
233 | consistent[i] = 0; | |
234 | break; | |
235 | } | |
236 | } | |
237 | } | |
238 | ||
239 | lookaheads[nstates] = count; | |
240 | ||
241 | if (count == 0) | |
242 | { | |
243 | LA = NEW2(1 * tokensetsize, unsigned); | |
244 | LAruleno = NEW2(1, short); | |
245 | lookback = NEW2(1, shorts *); | |
246 | } | |
247 | else | |
248 | { | |
249 | LA = NEW2(count * tokensetsize, unsigned); | |
250 | LAruleno = NEW2(count, short); | |
251 | lookback = NEW2(count, shorts *); | |
252 | } | |
253 | ||
254 | np = LAruleno; | |
255 | for (i = 0; i < nstates; i++) | |
256 | { | |
257 | if (!consistent[i]) | |
258 | { | |
259 | if (rp = reduction_table[i]) | |
260 | for (j = 0; j < rp->nreds; j++) | |
261 | *np++ = rp->rules[j]; | |
262 | } | |
263 | } | |
264 | } | |
265 | ||
266 | ||
267 | void | |
268 | set_goto_map() | |
269 | { | |
270 | register shifts *sp; | |
271 | register int i; | |
272 | register int symbol; | |
273 | register int k; | |
274 | register short *temp_map; | |
275 | register int state2; | |
276 | register int state1; | |
277 | ||
278 | goto_map = NEW2(nvars + 1, short) - ntokens; | |
279 | temp_map = NEW2(nvars + 1, short) - ntokens; | |
280 | ||
281 | ngotos = 0; | |
282 | for (sp = first_shift; sp; sp = sp->next) | |
283 | { | |
284 | for (i = sp->nshifts - 1; i >= 0; i--) | |
285 | { | |
286 | symbol = accessing_symbol[sp->shifts[i]]; | |
287 | ||
288 | if (ISTOKEN(symbol)) break; | |
289 | ||
290 | if (ngotos == MAXSHORT) | |
291 | toomany("gotos"); | |
292 | ||
293 | ngotos++; | |
294 | goto_map[symbol]++; | |
295 | } | |
296 | } | |
297 | ||
298 | k = 0; | |
299 | for (i = ntokens; i < nsyms; i++) | |
300 | { | |
301 | temp_map[i] = k; | |
302 | k += goto_map[i]; | |
303 | } | |
304 | ||
305 | for (i = ntokens; i < nsyms; i++) | |
306 | goto_map[i] = temp_map[i]; | |
307 | ||
308 | goto_map[nsyms] = ngotos; | |
309 | temp_map[nsyms] = ngotos; | |
310 | ||
311 | from_state = NEW2(ngotos, short); | |
312 | to_state = NEW2(ngotos, short); | |
313 | ||
314 | for (sp = first_shift; sp; sp = sp->next) | |
315 | { | |
316 | state1 = sp->number; | |
317 | for (i = sp->nshifts - 1; i >= 0; i--) | |
318 | { | |
319 | state2 = sp->shifts[i]; | |
320 | symbol = accessing_symbol[state2]; | |
321 | ||
322 | if (ISTOKEN(symbol)) break; | |
323 | ||
324 | k = temp_map[symbol]++; | |
325 | from_state[k] = state1; | |
326 | to_state[k] = state2; | |
327 | } | |
328 | } | |
329 | ||
330 | FREE(temp_map + ntokens); | |
331 | } | |
332 | ||
333 | ||
334 | ||
335 | /* Map_goto maps a state/symbol pair into its numeric representation. */ | |
336 | ||
337 | int | |
338 | map_goto(state, symbol) | |
339 | int state; | |
340 | int symbol; | |
341 | { | |
342 | register int high; | |
343 | register int low; | |
344 | register int middle; | |
345 | register int s; | |
346 | ||
347 | low = goto_map[symbol]; | |
348 | high = goto_map[symbol + 1] - 1; | |
349 | ||
350 | while (low <= high) | |
351 | { | |
352 | middle = (low + high) / 2; | |
353 | s = from_state[middle]; | |
354 | if (s == state) | |
355 | return (middle); | |
356 | else if (s < state) | |
357 | low = middle + 1; | |
358 | else | |
359 | high = middle - 1; | |
360 | } | |
361 | ||
362 | berror("map_goto"); | |
363 | /* NOTREACHED */ | |
364 | return 0; | |
365 | } | |
366 | ||
367 | ||
368 | void | |
369 | initialize_F() | |
370 | { | |
371 | register int i; | |
372 | register int j; | |
373 | register int k; | |
374 | register shifts *sp; | |
375 | register short *edge; | |
376 | register unsigned *rowp; | |
377 | register short *rp; | |
378 | register short **reads; | |
379 | register int nedges; | |
380 | register int stateno; | |
381 | register int symbol; | |
382 | register int nwords; | |
383 | ||
384 | nwords = ngotos * tokensetsize; | |
385 | F = NEW2(nwords, unsigned); | |
386 | ||
387 | reads = NEW2(ngotos, short *); | |
388 | edge = NEW2(ngotos + 1, short); | |
389 | nedges = 0; | |
390 | ||
391 | rowp = F; | |
392 | for (i = 0; i < ngotos; i++) | |
393 | { | |
394 | stateno = to_state[i]; | |
395 | sp = shift_table[stateno]; | |
396 | ||
397 | if (sp) | |
398 | { | |
399 | k = sp->nshifts; | |
400 | ||
401 | for (j = 0; j < k; j++) | |
402 | { | |
403 | symbol = accessing_symbol[sp->shifts[j]]; | |
404 | if (ISVAR(symbol)) | |
405 | break; | |
406 | SETBIT(rowp, symbol); | |
407 | } | |
408 | ||
409 | for (; j < k; j++) | |
410 | { | |
411 | symbol = accessing_symbol[sp->shifts[j]]; | |
412 | if (nullable[symbol]) | |
413 | edge[nedges++] = map_goto(stateno, symbol); | |
414 | } | |
415 | ||
416 | if (nedges) | |
417 | { | |
418 | reads[i] = rp = NEW2(nedges + 1, short); | |
419 | ||
420 | for (j = 0; j < nedges; j++) | |
421 | rp[j] = edge[j]; | |
422 | ||
423 | rp[nedges] = -1; | |
424 | nedges = 0; | |
425 | } | |
426 | } | |
427 | ||
428 | rowp += tokensetsize; | |
429 | } | |
430 | ||
431 | digraph(reads); | |
432 | ||
433 | for (i = 0; i < ngotos; i++) | |
434 | { | |
435 | if (reads[i]) | |
436 | FREE(reads[i]); | |
437 | } | |
438 | ||
439 | FREE(reads); | |
440 | FREE(edge); | |
441 | } | |
442 | ||
443 | ||
444 | void | |
445 | build_relations() | |
446 | { | |
447 | register int i; | |
448 | register int j; | |
449 | register int k; | |
450 | register short *rulep; | |
451 | register short *rp; | |
452 | register shifts *sp; | |
453 | register int length; | |
454 | register int nedges; | |
455 | register int done; | |
456 | register int state1; | |
457 | register int stateno; | |
458 | register int symbol1; | |
459 | register int symbol2; | |
460 | register short *shortp; | |
461 | register short *edge; | |
462 | register short *states; | |
463 | register short **new_includes; | |
464 | ||
465 | includes = NEW2(ngotos, short *); | |
466 | edge = NEW2(ngotos + 1, short); | |
467 | states = NEW2(maxrhs + 1, short); | |
468 | ||
469 | for (i = 0; i < ngotos; i++) | |
470 | { | |
471 | nedges = 0; | |
472 | state1 = from_state[i]; | |
473 | symbol1 = accessing_symbol[to_state[i]]; | |
474 | ||
475 | for (rulep = derives[symbol1]; *rulep > 0; rulep++) | |
476 | { | |
477 | length = 1; | |
478 | states[0] = state1; | |
479 | stateno = state1; | |
480 | ||
481 | for (rp = ritem + rrhs[*rulep]; *rp > 0; rp++) | |
482 | { | |
483 | symbol2 = *rp; | |
484 | sp = shift_table[stateno]; | |
485 | k = sp->nshifts; | |
486 | ||
487 | for (j = 0; j < k; j++) | |
488 | { | |
489 | stateno = sp->shifts[j]; | |
490 | if (accessing_symbol[stateno] == symbol2) break; | |
491 | } | |
492 | ||
493 | states[length++] = stateno; | |
494 | } | |
495 | ||
496 | if (!consistent[stateno]) | |
497 | add_lookback_edge(stateno, *rulep, i); | |
498 | ||
499 | length--; | |
500 | done = 0; | |
501 | while (!done) | |
502 | { | |
503 | done = 1; | |
504 | rp--; | |
505 | /* JF added rp>=ritem && I hope to god its right! */ | |
506 | if (rp>=ritem && ISVAR(*rp)) | |
507 | { | |
508 | stateno = states[--length]; | |
509 | edge[nedges++] = map_goto(stateno, *rp); | |
510 | if (nullable[*rp]) done = 0; | |
511 | } | |
512 | } | |
513 | } | |
514 | ||
515 | if (nedges) | |
516 | { | |
517 | includes[i] = shortp = NEW2(nedges + 1, short); | |
518 | for (j = 0; j < nedges; j++) | |
519 | shortp[j] = edge[j]; | |
520 | shortp[nedges] = -1; | |
521 | } | |
522 | } | |
523 | ||
524 | new_includes = transpose(includes, ngotos); | |
525 | ||
526 | for (i = 0; i < ngotos; i++) | |
527 | if (includes[i]) | |
528 | FREE(includes[i]); | |
529 | ||
530 | FREE(includes); | |
531 | ||
532 | includes = new_includes; | |
533 | ||
534 | FREE(edge); | |
535 | FREE(states); | |
536 | } | |
537 | ||
538 | ||
539 | void | |
540 | add_lookback_edge(stateno, ruleno, gotono) | |
541 | int stateno; | |
542 | int ruleno; | |
543 | int gotono; | |
544 | { | |
545 | register int i; | |
546 | register int k; | |
547 | register int found; | |
548 | register shorts *sp; | |
549 | ||
550 | i = lookaheads[stateno]; | |
551 | k = lookaheads[stateno + 1]; | |
552 | found = 0; | |
553 | while (!found && i < k) | |
554 | { | |
555 | if (LAruleno[i] == ruleno) | |
556 | found = 1; | |
557 | else | |
558 | i++; | |
559 | } | |
560 | ||
561 | if (found == 0) | |
562 | berror("add_lookback_edge"); | |
563 | ||
564 | sp = NEW(shorts); | |
565 | sp->next = lookback[i]; | |
566 | sp->value = gotono; | |
567 | lookback[i] = sp; | |
568 | } | |
569 | ||
570 | ||
571 | ||
572 | short ** | |
573 | transpose(R_arg, n) | |
574 | short **R_arg; | |
575 | int n; | |
576 | { | |
577 | register short **new_R; | |
578 | register short **temp_R; | |
579 | register short *nedges; | |
580 | register short *sp; | |
581 | register int i; | |
582 | register int k; | |
583 | ||
584 | nedges = NEW2(n, short); | |
585 | ||
586 | for (i = 0; i < n; i++) | |
587 | { | |
588 | sp = R_arg[i]; | |
589 | if (sp) | |
590 | { | |
591 | while (*sp >= 0) | |
592 | nedges[*sp++]++; | |
593 | } | |
594 | } | |
595 | ||
596 | new_R = NEW2(n, short *); | |
597 | temp_R = NEW2(n, short *); | |
598 | ||
599 | for (i = 0; i < n; i++) | |
600 | { | |
601 | k = nedges[i]; | |
602 | if (k > 0) | |
603 | { | |
604 | sp = NEW2(k + 1, short); | |
605 | new_R[i] = sp; | |
606 | temp_R[i] = sp; | |
607 | sp[k] = -1; | |
608 | } | |
609 | } | |
610 | ||
611 | FREE(nedges); | |
612 | ||
613 | for (i = 0; i < n; i++) | |
614 | { | |
615 | sp = R_arg[i]; | |
616 | if (sp) | |
617 | { | |
618 | while (*sp >= 0) | |
619 | *temp_R[*sp++]++ = i; | |
620 | } | |
621 | } | |
622 | ||
623 | FREE(temp_R); | |
624 | ||
625 | return (new_R); | |
626 | } | |
627 | ||
628 | ||
629 | void | |
630 | compute_FOLLOWS() | |
631 | { | |
632 | register int i; | |
633 | ||
634 | digraph(includes); | |
635 | ||
636 | for (i = 0; i < ngotos; i++) | |
637 | { | |
638 | if (includes[i]) FREE(includes[i]); | |
639 | } | |
640 | ||
641 | FREE(includes); | |
642 | } | |
643 | ||
644 | ||
645 | void | |
646 | compute_lookaheads() | |
647 | { | |
648 | register int i; | |
649 | register int n; | |
650 | register unsigned *fp1; | |
651 | register unsigned *fp2; | |
652 | register unsigned *fp3; | |
653 | register shorts *sp; | |
654 | register unsigned *rowp; | |
655 | /* register short *rulep; JF unused */ | |
656 | /* register int count; JF unused */ | |
657 | register shorts *sptmp;/* JF */ | |
658 | ||
659 | rowp = LA; | |
660 | n = lookaheads[nstates]; | |
661 | for (i = 0; i < n; i++) | |
662 | { | |
663 | fp3 = rowp + tokensetsize; | |
664 | for (sp = lookback[i]; sp; sp = sp->next) | |
665 | { | |
666 | fp1 = rowp; | |
667 | fp2 = F + tokensetsize * sp->value; | |
668 | while (fp1 < fp3) | |
669 | *fp1++ |= *fp2++; | |
670 | } | |
671 | ||
672 | rowp = fp3; | |
673 | } | |
674 | ||
675 | for (i = 0; i < n; i++) | |
676 | {/* JF removed ref to freed storage */ | |
677 | for (sp = lookback[i]; sp; sp = sptmp) { | |
678 | sptmp=sp->next; | |
679 | FREE(sp); | |
680 | } | |
681 | } | |
682 | ||
683 | FREE(lookback); | |
684 | FREE(F); | |
685 | } | |
686 | ||
687 | ||
688 | void | |
689 | digraph(relation) | |
690 | short **relation; | |
691 | { | |
692 | register int i; | |
693 | ||
694 | infinity = ngotos + 2; | |
695 | INDEX = NEW2(ngotos + 1, short); | |
696 | VERTICES = NEW2(ngotos + 1, short); | |
697 | top = 0; | |
698 | ||
699 | R = relation; | |
700 | ||
701 | for (i = 0; i < ngotos; i++) | |
702 | INDEX[i] = 0; | |
703 | ||
704 | for (i = 0; i < ngotos; i++) | |
705 | { | |
706 | if (INDEX[i] == 0 && R[i]) | |
707 | traverse(i); | |
708 | } | |
709 | ||
710 | FREE(INDEX); | |
711 | FREE(VERTICES); | |
712 | } | |
713 | ||
714 | ||
715 | void | |
716 | traverse(i) | |
717 | register int i; | |
718 | { | |
719 | register unsigned *fp1; | |
720 | register unsigned *fp2; | |
721 | register unsigned *fp3; | |
722 | register int j; | |
723 | register short *rp; | |
724 | ||
725 | int height; | |
726 | unsigned *base; | |
727 | ||
728 | VERTICES[++top] = i; | |
729 | INDEX[i] = height = top; | |
730 | ||
731 | base = F + i * tokensetsize; | |
732 | fp3 = base + tokensetsize; | |
733 | ||
734 | rp = R[i]; | |
735 | if (rp) | |
736 | { | |
737 | while ((j = *rp++) >= 0) | |
738 | { | |
739 | if (INDEX[j] == 0) | |
740 | traverse(j); | |
741 | ||
742 | if (INDEX[i] > INDEX[j]) | |
743 | INDEX[i] = INDEX[j]; | |
744 | ||
745 | fp1 = base; | |
746 | fp2 = F + j * tokensetsize; | |
747 | ||
748 | while (fp1 < fp3) | |
749 | *fp1++ |= *fp2++; | |
750 | } | |
751 | } | |
752 | ||
753 | if (INDEX[i] == height) | |
754 | { | |
755 | for (;;) | |
756 | { | |
757 | j = VERTICES[top--]; | |
758 | INDEX[j] = infinity; | |
759 | ||
760 | if (i == j) | |
761 | break; | |
762 | ||
763 | fp1 = base; | |
764 | fp2 = F + j * tokensetsize; | |
765 | ||
766 | while (fp1 < fp3) | |
767 | *fp2++ = *fp1++; | |
768 | } | |
769 | } | |
770 | } |