]> git.saurik.com Git - bison.git/blob - src/lalr.c
#
[bison.git] / src / lalr.c
1 /* Compute look-ahead criteria for bison,
2 Copyright (C) 1984, 1986, 1989 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
21 /* Compute how to make the finite state machine deterministic;
22 find which rules need lookahead in each state, and which lookahead tokens they accept.
23
24 lalr(), the entry point, builds these data structures:
25
26 goto_map, from_state and to_state
27 record each shift transition which accepts a variable (a nonterminal).
28 ngotos is the number of such transitions.
29 from_state[t] is the state number which a transition leads from
30 and to_state[t] is the state number it leads to.
31 All the transitions that accept a particular variable are grouped together and
32 goto_map[i - ntokens] is the index in from_state and to_state of the first of them.
33
34 consistent[s] is nonzero if no lookahead is needed to decide what to do in state s.
35
36 LAruleno is a vector which records the rules that need lookahead in various states.
37 The elements of LAruleno that apply to state s are those from
38 lookaheads[s] through lookaheads[s+1]-1.
39 Each element of LAruleno is a rule number.
40
41 If lr is the length of LAruleno, then a number from 0 to lr-1
42 can specify both a rule and a state where the rule might be applied.
43
44 LA is a lr by ntokens matrix of bits.
45 LA[l, i] is 1 if the rule LAruleno[l] is applicable in the appropriate state
46 when the next token is symbol i.
47 If LA[l, i] and LA[l, j] are both 1 for i != j, it is a conflict.
48 */
49
50 #include <stdio.h>
51 #include "system.h"
52 #include "machine.h"
53 #include "types.h"
54 #include "state.h"
55 #include "alloc.h"
56 #include "gram.h"
57
58
59 extern short **derives;
60 extern char *nullable;
61
62
63 int tokensetsize;
64 short *lookaheads;
65 short *LAruleno;
66 unsigned *LA;
67 short *accessing_symbol;
68 char *consistent;
69 core **state_table;
70 shifts **shift_table;
71 reductions **reduction_table;
72 short *goto_map;
73 short *from_state;
74 short *to_state;
75
76 short **transpose();
77 void set_state_table();
78 void set_accessing_symbol();
79 void set_shift_table();
80 void set_reduction_table();
81 void set_maxrhs();
82 void initialize_LA();
83 void set_goto_map();
84 void initialize_F();
85 void build_relations();
86 void add_lookback_edge();
87 void compute_FOLLOWS();
88 void compute_lookaheads();
89 void digraph();
90 void traverse();
91
92 extern void toomany();
93 extern void berror();
94
95 static int infinity;
96 static int maxrhs;
97 static int ngotos;
98 static unsigned *F;
99 static short **includes;
100 static shorts **lookback;
101 static short **R;
102 static short *INDEX;
103 static short *VERTICES;
104 static int top;
105
106
107 void
108 lalr()
109 {
110 tokensetsize = WORDSIZE(ntokens);
111
112 set_state_table();
113 set_accessing_symbol();
114 set_shift_table();
115 set_reduction_table();
116 set_maxrhs();
117 initialize_LA();
118 set_goto_map();
119 initialize_F();
120 build_relations();
121 compute_FOLLOWS();
122 compute_lookaheads();
123 }
124
125
126 void
127 set_state_table()
128 {
129 register core *sp;
130
131 state_table = NEW2(nstates, core *);
132
133 for (sp = first_state; sp; sp = sp->next)
134 state_table[sp->number] = sp;
135 }
136
137
138 void
139 set_accessing_symbol()
140 {
141 register core *sp;
142
143 accessing_symbol = NEW2(nstates, short);
144
145 for (sp = first_state; sp; sp = sp->next)
146 accessing_symbol[sp->number] = sp->accessing_symbol;
147 }
148
149
150 void
151 set_shift_table()
152 {
153 register shifts *sp;
154
155 shift_table = NEW2(nstates, shifts *);
156
157 for (sp = first_shift; sp; sp = sp->next)
158 shift_table[sp->number] = sp;
159 }
160
161
162 void
163 set_reduction_table()
164 {
165 register reductions *rp;
166
167 reduction_table = NEW2(nstates, reductions *);
168
169 for (rp = first_reduction; rp; rp = rp->next)
170 reduction_table[rp->number] = rp;
171 }
172
173
174 void
175 set_maxrhs()
176 {
177 register short *itemp;
178 register int length;
179 register int max;
180
181 length = 0;
182 max = 0;
183 for (itemp = ritem; *itemp; itemp++)
184 {
185 if (*itemp > 0)
186 {
187 length++;
188 }
189 else
190 {
191 if (length > max) max = length;
192 length = 0;
193 }
194 }
195
196 maxrhs = max;
197 }
198
199
200 void
201 initialize_LA()
202 {
203 register int i;
204 register int j;
205 register int count;
206 register reductions *rp;
207 register shifts *sp;
208 register short *np;
209
210 consistent = NEW2(nstates, char);
211 lookaheads = NEW2(nstates + 1, short);
212
213 count = 0;
214 for (i = 0; i < nstates; i++)
215 {
216 register int k;
217
218 lookaheads[i] = count;
219
220 rp = reduction_table[i];
221 sp = shift_table[i];
222 if (rp && (rp->nreds > 1
223 || (sp && ! ISVAR(accessing_symbol[sp->shifts[0]]))))
224 count += rp->nreds;
225 else
226 consistent[i] = 1;
227
228 if (sp)
229 for (k = 0; k < sp->nshifts; k++)
230 {
231 if (accessing_symbol[sp->shifts[k]] == error_token_number)
232 {
233 consistent[i] = 0;
234 break;
235 }
236 }
237 }
238
239 lookaheads[nstates] = count;
240
241 if (count == 0)
242 {
243 LA = NEW2(1 * tokensetsize, unsigned);
244 LAruleno = NEW2(1, short);
245 lookback = NEW2(1, shorts *);
246 }
247 else
248 {
249 LA = NEW2(count * tokensetsize, unsigned);
250 LAruleno = NEW2(count, short);
251 lookback = NEW2(count, shorts *);
252 }
253
254 np = LAruleno;
255 for (i = 0; i < nstates; i++)
256 {
257 if (!consistent[i])
258 {
259 if (rp = reduction_table[i])
260 for (j = 0; j < rp->nreds; j++)
261 *np++ = rp->rules[j];
262 }
263 }
264 }
265
266
267 void
268 set_goto_map()
269 {
270 register shifts *sp;
271 register int i;
272 register int symbol;
273 register int k;
274 register short *temp_map;
275 register int state2;
276 register int state1;
277
278 goto_map = NEW2(nvars + 1, short) - ntokens;
279 temp_map = NEW2(nvars + 1, short) - ntokens;
280
281 ngotos = 0;
282 for (sp = first_shift; sp; sp = sp->next)
283 {
284 for (i = sp->nshifts - 1; i >= 0; i--)
285 {
286 symbol = accessing_symbol[sp->shifts[i]];
287
288 if (ISTOKEN(symbol)) break;
289
290 if (ngotos == MAXSHORT)
291 toomany(_("gotos"));
292
293 ngotos++;
294 goto_map[symbol]++;
295 }
296 }
297
298 k = 0;
299 for (i = ntokens; i < nsyms; i++)
300 {
301 temp_map[i] = k;
302 k += goto_map[i];
303 }
304
305 for (i = ntokens; i < nsyms; i++)
306 goto_map[i] = temp_map[i];
307
308 goto_map[nsyms] = ngotos;
309 temp_map[nsyms] = ngotos;
310
311 from_state = NEW2(ngotos, short);
312 to_state = NEW2(ngotos, short);
313
314 for (sp = first_shift; sp; sp = sp->next)
315 {
316 state1 = sp->number;
317 for (i = sp->nshifts - 1; i >= 0; i--)
318 {
319 state2 = sp->shifts[i];
320 symbol = accessing_symbol[state2];
321
322 if (ISTOKEN(symbol)) break;
323
324 k = temp_map[symbol]++;
325 from_state[k] = state1;
326 to_state[k] = state2;
327 }
328 }
329
330 FREE(temp_map + ntokens);
331 }
332
333
334
335 /* Map_goto maps a state/symbol pair into its numeric representation. */
336
337 int
338 map_goto(state, symbol)
339 int state;
340 int symbol;
341 {
342 register int high;
343 register int low;
344 register int middle;
345 register int s;
346
347 low = goto_map[symbol];
348 high = goto_map[symbol + 1] - 1;
349
350 while (low <= high)
351 {
352 middle = (low + high) / 2;
353 s = from_state[middle];
354 if (s == state)
355 return (middle);
356 else if (s < state)
357 low = middle + 1;
358 else
359 high = middle - 1;
360 }
361
362 berror("map_goto");
363 /* NOTREACHED */
364 return 0;
365 }
366
367
368 void
369 initialize_F()
370 {
371 register int i;
372 register int j;
373 register int k;
374 register shifts *sp;
375 register short *edge;
376 register unsigned *rowp;
377 register short *rp;
378 register short **reads;
379 register int nedges;
380 register int stateno;
381 register int symbol;
382 register int nwords;
383
384 nwords = ngotos * tokensetsize;
385 F = NEW2(nwords, unsigned);
386
387 reads = NEW2(ngotos, short *);
388 edge = NEW2(ngotos + 1, short);
389 nedges = 0;
390
391 rowp = F;
392 for (i = 0; i < ngotos; i++)
393 {
394 stateno = to_state[i];
395 sp = shift_table[stateno];
396
397 if (sp)
398 {
399 k = sp->nshifts;
400
401 for (j = 0; j < k; j++)
402 {
403 symbol = accessing_symbol[sp->shifts[j]];
404 if (ISVAR(symbol))
405 break;
406 SETBIT(rowp, symbol);
407 }
408
409 for (; j < k; j++)
410 {
411 symbol = accessing_symbol[sp->shifts[j]];
412 if (nullable[symbol])
413 edge[nedges++] = map_goto(stateno, symbol);
414 }
415
416 if (nedges)
417 {
418 reads[i] = rp = NEW2(nedges + 1, short);
419
420 for (j = 0; j < nedges; j++)
421 rp[j] = edge[j];
422
423 rp[nedges] = -1;
424 nedges = 0;
425 }
426 }
427
428 rowp += tokensetsize;
429 }
430
431 digraph(reads);
432
433 for (i = 0; i < ngotos; i++)
434 {
435 if (reads[i])
436 FREE(reads[i]);
437 }
438
439 FREE(reads);
440 FREE(edge);
441 }
442
443
444 void
445 build_relations()
446 {
447 register int i;
448 register int j;
449 register int k;
450 register short *rulep;
451 register short *rp;
452 register shifts *sp;
453 register int length;
454 register int nedges;
455 register int done;
456 register int state1;
457 register int stateno;
458 register int symbol1;
459 register int symbol2;
460 register short *shortp;
461 register short *edge;
462 register short *states;
463 register short **new_includes;
464
465 includes = NEW2(ngotos, short *);
466 edge = NEW2(ngotos + 1, short);
467 states = NEW2(maxrhs + 1, short);
468
469 for (i = 0; i < ngotos; i++)
470 {
471 nedges = 0;
472 state1 = from_state[i];
473 symbol1 = accessing_symbol[to_state[i]];
474
475 for (rulep = derives[symbol1]; *rulep > 0; rulep++)
476 {
477 length = 1;
478 states[0] = state1;
479 stateno = state1;
480
481 for (rp = ritem + rrhs[*rulep]; *rp > 0; rp++)
482 {
483 symbol2 = *rp;
484 sp = shift_table[stateno];
485 k = sp->nshifts;
486
487 for (j = 0; j < k; j++)
488 {
489 stateno = sp->shifts[j];
490 if (accessing_symbol[stateno] == symbol2) break;
491 }
492
493 states[length++] = stateno;
494 }
495
496 if (!consistent[stateno])
497 add_lookback_edge(stateno, *rulep, i);
498
499 length--;
500 done = 0;
501 while (!done)
502 {
503 done = 1;
504 rp--;
505 /* JF added rp>=ritem && I hope to god its right! */
506 if (rp>=ritem && ISVAR(*rp))
507 {
508 stateno = states[--length];
509 edge[nedges++] = map_goto(stateno, *rp);
510 if (nullable[*rp]) done = 0;
511 }
512 }
513 }
514
515 if (nedges)
516 {
517 includes[i] = shortp = NEW2(nedges + 1, short);
518 for (j = 0; j < nedges; j++)
519 shortp[j] = edge[j];
520 shortp[nedges] = -1;
521 }
522 }
523
524 new_includes = transpose(includes, ngotos);
525
526 for (i = 0; i < ngotos; i++)
527 if (includes[i])
528 FREE(includes[i]);
529
530 FREE(includes);
531
532 includes = new_includes;
533
534 FREE(edge);
535 FREE(states);
536 }
537
538
539 void
540 add_lookback_edge(stateno, ruleno, gotono)
541 int stateno;
542 int ruleno;
543 int gotono;
544 {
545 register int i;
546 register int k;
547 register int found;
548 register shorts *sp;
549
550 i = lookaheads[stateno];
551 k = lookaheads[stateno + 1];
552 found = 0;
553 while (!found && i < k)
554 {
555 if (LAruleno[i] == ruleno)
556 found = 1;
557 else
558 i++;
559 }
560
561 if (found == 0)
562 berror("add_lookback_edge");
563
564 sp = NEW(shorts);
565 sp->next = lookback[i];
566 sp->value = gotono;
567 lookback[i] = sp;
568 }
569
570
571
572 short **
573 transpose(R_arg, n)
574 short **R_arg;
575 int n;
576 {
577 register short **new_R;
578 register short **temp_R;
579 register short *nedges;
580 register short *sp;
581 register int i;
582 register int k;
583
584 nedges = NEW2(n, short);
585
586 for (i = 0; i < n; i++)
587 {
588 sp = R_arg[i];
589 if (sp)
590 {
591 while (*sp >= 0)
592 nedges[*sp++]++;
593 }
594 }
595
596 new_R = NEW2(n, short *);
597 temp_R = NEW2(n, short *);
598
599 for (i = 0; i < n; i++)
600 {
601 k = nedges[i];
602 if (k > 0)
603 {
604 sp = NEW2(k + 1, short);
605 new_R[i] = sp;
606 temp_R[i] = sp;
607 sp[k] = -1;
608 }
609 }
610
611 FREE(nedges);
612
613 for (i = 0; i < n; i++)
614 {
615 sp = R_arg[i];
616 if (sp)
617 {
618 while (*sp >= 0)
619 *temp_R[*sp++]++ = i;
620 }
621 }
622
623 FREE(temp_R);
624
625 return (new_R);
626 }
627
628
629 void
630 compute_FOLLOWS()
631 {
632 register int i;
633
634 digraph(includes);
635
636 for (i = 0; i < ngotos; i++)
637 {
638 if (includes[i]) FREE(includes[i]);
639 }
640
641 FREE(includes);
642 }
643
644
645 void
646 compute_lookaheads()
647 {
648 register int i;
649 register int n;
650 register unsigned *fp1;
651 register unsigned *fp2;
652 register unsigned *fp3;
653 register shorts *sp;
654 register unsigned *rowp;
655 /* register short *rulep; JF unused */
656 /* register int count; JF unused */
657 register shorts *sptmp;/* JF */
658
659 rowp = LA;
660 n = lookaheads[nstates];
661 for (i = 0; i < n; i++)
662 {
663 fp3 = rowp + tokensetsize;
664 for (sp = lookback[i]; sp; sp = sp->next)
665 {
666 fp1 = rowp;
667 fp2 = F + tokensetsize * sp->value;
668 while (fp1 < fp3)
669 *fp1++ |= *fp2++;
670 }
671
672 rowp = fp3;
673 }
674
675 for (i = 0; i < n; i++)
676 {/* JF removed ref to freed storage */
677 for (sp = lookback[i]; sp; sp = sptmp) {
678 sptmp=sp->next;
679 FREE(sp);
680 }
681 }
682
683 FREE(lookback);
684 FREE(F);
685 }
686
687
688 void
689 digraph(relation)
690 short **relation;
691 {
692 register int i;
693
694 infinity = ngotos + 2;
695 INDEX = NEW2(ngotos + 1, short);
696 VERTICES = NEW2(ngotos + 1, short);
697 top = 0;
698
699 R = relation;
700
701 for (i = 0; i < ngotos; i++)
702 INDEX[i] = 0;
703
704 for (i = 0; i < ngotos; i++)
705 {
706 if (INDEX[i] == 0 && R[i])
707 traverse(i);
708 }
709
710 FREE(INDEX);
711 FREE(VERTICES);
712 }
713
714
715 void
716 traverse(i)
717 register int i;
718 {
719 register unsigned *fp1;
720 register unsigned *fp2;
721 register unsigned *fp3;
722 register int j;
723 register short *rp;
724
725 int height;
726 unsigned *base;
727
728 VERTICES[++top] = i;
729 INDEX[i] = height = top;
730
731 base = F + i * tokensetsize;
732 fp3 = base + tokensetsize;
733
734 rp = R[i];
735 if (rp)
736 {
737 while ((j = *rp++) >= 0)
738 {
739 if (INDEX[j] == 0)
740 traverse(j);
741
742 if (INDEX[i] > INDEX[j])
743 INDEX[i] = INDEX[j];
744
745 fp1 = base;
746 fp2 = F + j * tokensetsize;
747
748 while (fp1 < fp3)
749 *fp1++ |= *fp2++;
750 }
751 }
752
753 if (INDEX[i] == height)
754 {
755 for (;;)
756 {
757 j = VERTICES[top--];
758 INDEX[j] = infinity;
759
760 if (i == j)
761 break;
762
763 fp1 = base;
764 fp2 = F + j * tokensetsize;
765
766 while (fp1 < fp3)
767 *fp2++ = *fp1++;
768 }
769 }
770 }