]> git.saurik.com Git - bison.git/blob - src/lalr.c
Some checks for premature EOF.
[bison.git] / src / lalr.c
1 /* Compute look-ahead criteria for bison,
2 Copyright (C) 1984, 1986, 1989 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
21 /* Compute how to make the finite state machine deterministic;
22 find which rules need lookahead in each state, and which lookahead tokens they accept.
23
24 lalr(), the entry point, builds these data structures:
25
26 goto_map, from_state and to_state
27 record each shift transition which accepts a variable (a nonterminal).
28 ngotos is the number of such transitions.
29 from_state[t] is the state number which a transition leads from
30 and to_state[t] is the state number it leads to.
31 All the transitions that accept a particular variable are grouped together and
32 goto_map[i - ntokens] is the index in from_state and to_state of the first of them.
33
34 consistent[s] is nonzero if no lookahead is needed to decide what to do in state s.
35
36 LAruleno is a vector which records the rules that need lookahead in various states.
37 The elements of LAruleno that apply to state s are those from
38 lookaheads[s] through lookaheads[s+1]-1.
39 Each element of LAruleno is a rule number.
40
41 If lr is the length of LAruleno, then a number from 0 to lr-1
42 can specify both a rule and a state where the rule might be applied.
43
44 LA is a lr by ntokens matrix of bits.
45 LA[l, i] is 1 if the rule LAruleno[l] is applicable in the appropriate state
46 when the next token is symbol i.
47 If LA[l, i] and LA[l, j] are both 1 for i != j, it is a conflict.
48 */
49
50 #include <stdio.h>
51 #include "system.h"
52 #include "machine.h"
53 #include "types.h"
54 #include "state.h"
55 #include "alloc.h"
56 #include "gram.h"
57
58
59 extern short **derives;
60 extern char *nullable;
61
62
63 int tokensetsize;
64 short *lookaheads;
65 short *LAruleno;
66 unsigned *LA;
67 short *accessing_symbol;
68 char *consistent;
69 core **state_table;
70 shifts **shift_table;
71 reductions **reduction_table;
72 short *goto_map;
73 short *from_state;
74 short *to_state;
75
76 void lalr PARAMS((void));
77 short **transpose PARAMS((short **, int));
78 void set_state_table PARAMS((void));
79 void set_accessing_symbol PARAMS((void));
80 void set_shift_table PARAMS((void));
81 void set_reduction_table PARAMS((void));
82 void set_maxrhs PARAMS((void));
83 void initialize_LA PARAMS((void));
84 void set_goto_map PARAMS((void));
85 int map_goto PARAMS((int, int));
86 void initialize_F PARAMS((void));
87 void build_relations PARAMS((void));
88 void add_lookback_edge PARAMS((int, int, int));
89 void compute_FOLLOWS PARAMS((void));
90 void compute_lookaheads PARAMS((void));
91 void digraph PARAMS((short **));
92 void traverse PARAMS((register int));
93
94 extern void toomany PARAMS((char *));
95 extern void berror PARAMS((char *));
96
97 static int infinity;
98 static int maxrhs;
99 static int ngotos;
100 static unsigned *F;
101 static short **includes;
102 static shorts **lookback;
103 static short **R;
104 static short *INDEX;
105 static short *VERTICES;
106 static int top;
107
108
109 void
110 lalr (void)
111 {
112 tokensetsize = WORDSIZE(ntokens);
113
114 set_state_table();
115 set_accessing_symbol();
116 set_shift_table();
117 set_reduction_table();
118 set_maxrhs();
119 initialize_LA();
120 set_goto_map();
121 initialize_F();
122 build_relations();
123 compute_FOLLOWS();
124 compute_lookaheads();
125 }
126
127
128 void
129 set_state_table (void)
130 {
131 register core *sp;
132
133 state_table = NEW2(nstates, core *);
134
135 for (sp = first_state; sp; sp = sp->next)
136 state_table[sp->number] = sp;
137 }
138
139
140 void
141 set_accessing_symbol (void)
142 {
143 register core *sp;
144
145 accessing_symbol = NEW2(nstates, short);
146
147 for (sp = first_state; sp; sp = sp->next)
148 accessing_symbol[sp->number] = sp->accessing_symbol;
149 }
150
151
152 void
153 set_shift_table (void)
154 {
155 register shifts *sp;
156
157 shift_table = NEW2(nstates, shifts *);
158
159 for (sp = first_shift; sp; sp = sp->next)
160 shift_table[sp->number] = sp;
161 }
162
163
164 void
165 set_reduction_table (void)
166 {
167 register reductions *rp;
168
169 reduction_table = NEW2(nstates, reductions *);
170
171 for (rp = first_reduction; rp; rp = rp->next)
172 reduction_table[rp->number] = rp;
173 }
174
175
176 void
177 set_maxrhs (void)
178 {
179 register short *itemp;
180 register int length;
181 register int max;
182
183 length = 0;
184 max = 0;
185 for (itemp = ritem; *itemp; itemp++)
186 {
187 if (*itemp > 0)
188 {
189 length++;
190 }
191 else
192 {
193 if (length > max) max = length;
194 length = 0;
195 }
196 }
197
198 maxrhs = max;
199 }
200
201
202 void
203 initialize_LA (void)
204 {
205 register int i;
206 register int j;
207 register int count;
208 register reductions *rp;
209 register shifts *sp;
210 register short *np;
211
212 consistent = NEW2(nstates, char);
213 lookaheads = NEW2(nstates + 1, short);
214
215 count = 0;
216 for (i = 0; i < nstates; i++)
217 {
218 register int k;
219
220 lookaheads[i] = count;
221
222 rp = reduction_table[i];
223 sp = shift_table[i];
224 if (rp && (rp->nreds > 1
225 || (sp && ! ISVAR(accessing_symbol[sp->shifts[0]]))))
226 count += rp->nreds;
227 else
228 consistent[i] = 1;
229
230 if (sp)
231 for (k = 0; k < sp->nshifts; k++)
232 {
233 if (accessing_symbol[sp->shifts[k]] == error_token_number)
234 {
235 consistent[i] = 0;
236 break;
237 }
238 }
239 }
240
241 lookaheads[nstates] = count;
242
243 if (count == 0)
244 {
245 LA = NEW2(1 * tokensetsize, unsigned);
246 LAruleno = NEW2(1, short);
247 lookback = NEW2(1, shorts *);
248 }
249 else
250 {
251 LA = NEW2(count * tokensetsize, unsigned);
252 LAruleno = NEW2(count, short);
253 lookback = NEW2(count, shorts *);
254 }
255
256 np = LAruleno;
257 for (i = 0; i < nstates; i++)
258 {
259 if (!consistent[i])
260 {
261 if ((rp = reduction_table[i]))
262 for (j = 0; j < rp->nreds; j++)
263 *np++ = rp->rules[j];
264 }
265 }
266 }
267
268
269 void
270 set_goto_map (void)
271 {
272 register shifts *sp;
273 register int i;
274 register int symbol;
275 register int k;
276 register short *temp_map;
277 register int state2;
278 register int state1;
279
280 goto_map = NEW2(nvars + 1, short) - ntokens;
281 temp_map = NEW2(nvars + 1, short) - ntokens;
282
283 ngotos = 0;
284 for (sp = first_shift; sp; sp = sp->next)
285 {
286 for (i = sp->nshifts - 1; i >= 0; i--)
287 {
288 symbol = accessing_symbol[sp->shifts[i]];
289
290 if (ISTOKEN(symbol)) break;
291
292 if (ngotos == MAXSHORT)
293 toomany(_("gotos"));
294
295 ngotos++;
296 goto_map[symbol]++;
297 }
298 }
299
300 k = 0;
301 for (i = ntokens; i < nsyms; i++)
302 {
303 temp_map[i] = k;
304 k += goto_map[i];
305 }
306
307 for (i = ntokens; i < nsyms; i++)
308 goto_map[i] = temp_map[i];
309
310 goto_map[nsyms] = ngotos;
311 temp_map[nsyms] = ngotos;
312
313 from_state = NEW2(ngotos, short);
314 to_state = NEW2(ngotos, short);
315
316 for (sp = first_shift; sp; sp = sp->next)
317 {
318 state1 = sp->number;
319 for (i = sp->nshifts - 1; i >= 0; i--)
320 {
321 state2 = sp->shifts[i];
322 symbol = accessing_symbol[state2];
323
324 if (ISTOKEN(symbol)) break;
325
326 k = temp_map[symbol]++;
327 from_state[k] = state1;
328 to_state[k] = state2;
329 }
330 }
331
332 FREE(temp_map + ntokens);
333 }
334
335
336
337 /* Map_goto maps a state/symbol pair into its numeric representation. */
338
339 int
340 map_goto (int state, int symbol)
341 {
342 register int high;
343 register int low;
344 register int middle;
345 register int s;
346
347 low = goto_map[symbol];
348 high = goto_map[symbol + 1] - 1;
349
350 while (low <= high)
351 {
352 middle = (low + high) / 2;
353 s = from_state[middle];
354 if (s == state)
355 return (middle);
356 else if (s < state)
357 low = middle + 1;
358 else
359 high = middle - 1;
360 }
361
362 berror("map_goto");
363 /* NOTREACHED */
364 return 0;
365 }
366
367
368 void
369 initialize_F (void)
370 {
371 register int i;
372 register int j;
373 register int k;
374 register shifts *sp;
375 register short *edge;
376 register unsigned *rowp;
377 register short *rp;
378 register short **reads;
379 register int nedges;
380 register int stateno;
381 register int symbol;
382 register int nwords;
383
384 nwords = ngotos * tokensetsize;
385 F = NEW2(nwords, unsigned);
386
387 reads = NEW2(ngotos, short *);
388 edge = NEW2(ngotos + 1, short);
389 nedges = 0;
390
391 rowp = F;
392 for (i = 0; i < ngotos; i++)
393 {
394 stateno = to_state[i];
395 sp = shift_table[stateno];
396
397 if (sp)
398 {
399 k = sp->nshifts;
400
401 for (j = 0; j < k; j++)
402 {
403 symbol = accessing_symbol[sp->shifts[j]];
404 if (ISVAR(symbol))
405 break;
406 SETBIT(rowp, symbol);
407 }
408
409 for (; j < k; j++)
410 {
411 symbol = accessing_symbol[sp->shifts[j]];
412 if (nullable[symbol])
413 edge[nedges++] = map_goto(stateno, symbol);
414 }
415
416 if (nedges)
417 {
418 reads[i] = rp = NEW2(nedges + 1, short);
419
420 for (j = 0; j < nedges; j++)
421 rp[j] = edge[j];
422
423 rp[nedges] = -1;
424 nedges = 0;
425 }
426 }
427
428 rowp += tokensetsize;
429 }
430
431 digraph(reads);
432
433 for (i = 0; i < ngotos; i++)
434 {
435 if (reads[i])
436 FREE(reads[i]);
437 }
438
439 FREE(reads);
440 FREE(edge);
441 }
442
443
444 void
445 build_relations (void)
446 {
447 register int i;
448 register int j;
449 register int k;
450 register short *rulep;
451 register short *rp;
452 register shifts *sp;
453 register int length;
454 register int nedges;
455 register int done;
456 register int state1;
457 register int stateno;
458 register int symbol1;
459 register int symbol2;
460 register short *shortp;
461 register short *edge;
462 register short *states;
463 register short **new_includes;
464
465 includes = NEW2(ngotos, short *);
466 edge = NEW2(ngotos + 1, short);
467 states = NEW2(maxrhs + 1, short);
468
469 for (i = 0; i < ngotos; i++)
470 {
471 nedges = 0;
472 state1 = from_state[i];
473 symbol1 = accessing_symbol[to_state[i]];
474
475 for (rulep = derives[symbol1]; *rulep > 0; rulep++)
476 {
477 length = 1;
478 states[0] = state1;
479 stateno = state1;
480
481 for (rp = ritem + rrhs[*rulep]; *rp > 0; rp++)
482 {
483 symbol2 = *rp;
484 sp = shift_table[stateno];
485 k = sp->nshifts;
486
487 for (j = 0; j < k; j++)
488 {
489 stateno = sp->shifts[j];
490 if (accessing_symbol[stateno] == symbol2) break;
491 }
492
493 states[length++] = stateno;
494 }
495
496 if (!consistent[stateno])
497 add_lookback_edge(stateno, *rulep, i);
498
499 length--;
500 done = 0;
501 while (!done)
502 {
503 done = 1;
504 rp--;
505 /* JF added rp>=ritem && I hope to god its right! */
506 if (rp>=ritem && ISVAR(*rp))
507 {
508 stateno = states[--length];
509 edge[nedges++] = map_goto(stateno, *rp);
510 if (nullable[*rp]) done = 0;
511 }
512 }
513 }
514
515 if (nedges)
516 {
517 includes[i] = shortp = NEW2(nedges + 1, short);
518 for (j = 0; j < nedges; j++)
519 shortp[j] = edge[j];
520 shortp[nedges] = -1;
521 }
522 }
523
524 new_includes = transpose(includes, ngotos);
525
526 for (i = 0; i < ngotos; i++)
527 if (includes[i])
528 FREE(includes[i]);
529
530 FREE(includes);
531
532 includes = new_includes;
533
534 FREE(edge);
535 FREE(states);
536 }
537
538
539 void
540 add_lookback_edge (int stateno, int ruleno, int gotono)
541 {
542 register int i;
543 register int k;
544 register int found;
545 register shorts *sp;
546
547 i = lookaheads[stateno];
548 k = lookaheads[stateno + 1];
549 found = 0;
550 while (!found && i < k)
551 {
552 if (LAruleno[i] == ruleno)
553 found = 1;
554 else
555 i++;
556 }
557
558 if (found == 0)
559 berror("add_lookback_edge");
560
561 sp = NEW(shorts);
562 sp->next = lookback[i];
563 sp->value = gotono;
564 lookback[i] = sp;
565 }
566
567
568
569 short **
570 transpose (short **R_arg, int n)
571 {
572 register short **new_R;
573 register short **temp_R;
574 register short *nedges;
575 register short *sp;
576 register int i;
577 register int k;
578
579 nedges = NEW2(n, short);
580
581 for (i = 0; i < n; i++)
582 {
583 sp = R_arg[i];
584 if (sp)
585 {
586 while (*sp >= 0)
587 nedges[*sp++]++;
588 }
589 }
590
591 new_R = NEW2(n, short *);
592 temp_R = NEW2(n, short *);
593
594 for (i = 0; i < n; i++)
595 {
596 k = nedges[i];
597 if (k > 0)
598 {
599 sp = NEW2(k + 1, short);
600 new_R[i] = sp;
601 temp_R[i] = sp;
602 sp[k] = -1;
603 }
604 }
605
606 FREE(nedges);
607
608 for (i = 0; i < n; i++)
609 {
610 sp = R_arg[i];
611 if (sp)
612 {
613 while (*sp >= 0)
614 *temp_R[*sp++]++ = i;
615 }
616 }
617
618 FREE(temp_R);
619
620 return (new_R);
621 }
622
623
624 void
625 compute_FOLLOWS (void)
626 {
627 register int i;
628
629 digraph(includes);
630
631 for (i = 0; i < ngotos; i++)
632 {
633 if (includes[i]) FREE(includes[i]);
634 }
635
636 FREE(includes);
637 }
638
639
640 void
641 compute_lookaheads (void)
642 {
643 register int i;
644 register int n;
645 register unsigned *fp1;
646 register unsigned *fp2;
647 register unsigned *fp3;
648 register shorts *sp;
649 register unsigned *rowp;
650 /* register short *rulep; JF unused */
651 /* register int count; JF unused */
652 register shorts *sptmp;/* JF */
653
654 rowp = LA;
655 n = lookaheads[nstates];
656 for (i = 0; i < n; i++)
657 {
658 fp3 = rowp + tokensetsize;
659 for (sp = lookback[i]; sp; sp = sp->next)
660 {
661 fp1 = rowp;
662 fp2 = F + tokensetsize * sp->value;
663 while (fp1 < fp3)
664 *fp1++ |= *fp2++;
665 }
666
667 rowp = fp3;
668 }
669
670 for (i = 0; i < n; i++)
671 {/* JF removed ref to freed storage */
672 for (sp = lookback[i]; sp; sp = sptmp) {
673 sptmp=sp->next;
674 FREE(sp);
675 }
676 }
677
678 FREE(lookback);
679 FREE(F);
680 }
681
682
683 void
684 digraph (short **relation)
685 {
686 register int i;
687
688 infinity = ngotos + 2;
689 INDEX = NEW2(ngotos + 1, short);
690 VERTICES = NEW2(ngotos + 1, short);
691 top = 0;
692
693 R = relation;
694
695 for (i = 0; i < ngotos; i++)
696 INDEX[i] = 0;
697
698 for (i = 0; i < ngotos; i++)
699 {
700 if (INDEX[i] == 0 && R[i])
701 traverse(i);
702 }
703
704 FREE(INDEX);
705 FREE(VERTICES);
706 }
707
708
709 void
710 traverse (register int i)
711 {
712 register unsigned *fp1;
713 register unsigned *fp2;
714 register unsigned *fp3;
715 register int j;
716 register short *rp;
717
718 int height;
719 unsigned *base;
720
721 VERTICES[++top] = i;
722 INDEX[i] = height = top;
723
724 base = F + i * tokensetsize;
725 fp3 = base + tokensetsize;
726
727 rp = R[i];
728 if (rp)
729 {
730 while ((j = *rp++) >= 0)
731 {
732 if (INDEX[j] == 0)
733 traverse(j);
734
735 if (INDEX[i] > INDEX[j])
736 INDEX[i] = INDEX[j];
737
738 fp1 = base;
739 fp2 = F + j * tokensetsize;
740
741 while (fp1 < fp3)
742 *fp1++ |= *fp2++;
743 }
744 }
745
746 if (INDEX[i] == height)
747 {
748 for (;;)
749 {
750 j = VERTICES[top--];
751 INDEX[j] = infinity;
752
753 if (i == j)
754 break;
755
756 fp1 = base;
757 fp2 = F + j * tokensetsize;
758
759 while (fp1 < fp3)
760 *fp2++ = *fp1++;
761 }
762 }
763 }