src/lalr.c

   1 /* Compute look-ahead criteria for bison,
   2    Copyright (C) 1984, 1986, 1989 Free Software Foundation, Inc.
   3
   4 This file is part of Bison, the GNU Compiler Compiler.
   5
   6 Bison is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 Bison is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Bison; see the file COPYING.  If not, write to
  18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
  19
  20
  21 /* Compute how to make the finite state machine deterministic;
  22  find which rules need lookahead in each state, and which lookahead tokens they accept.
  23
  24 lalr(), the entry point, builds these data structures:
  25
  26 goto_map, from_state and to_state
  27  record each shift transition which accepts a variable (a nonterminal).
  28 ngotos is the number of such transitions.
  29 from_state[t] is the state number which a transition leads from
  30 and to_state[t] is the state number it leads to.
  31 All the transitions that accept a particular variable are grouped together and
  32 goto_map[i - ntokens] is the index in from_state and to_state of the first of them.
  33
  34 consistent[s] is nonzero if no lookahead is needed to decide what to do in state s.
  35
  36 LAruleno is a vector which records the rules that need lookahead in various states.
  37 The elements of LAruleno that apply to state s are those from
  38  lookaheads[s] through lookaheads[s+1]-1.
  39 Each element of LAruleno is a rule number.
  40
  41 If lr is the length of LAruleno, then a number from 0 to lr-1
  42 can specify both a rule and a state where the rule might be applied.
  43
  44 LA is a lr by ntokens matrix of bits.
  45 LA[l, i] is 1 if the rule LAruleno[l] is applicable in the appropriate state
  46  when the next token is symbol i.
  47 If LA[l, i] and LA[l, j] are both 1 for i != j, it is a conflict.
  48 */
  49
  50 #include <stdio.h>
  51 #include "system.h"
  52 #include "machine.h"
  53 #include "types.h"
  54 #include "state.h"
  55 #include "new.h"
  56 #include "gram.h"
  57
  58
  59 extern short **derives;
  60 extern char *nullable;
  61
  62
  63 int tokensetsize;
  64 short *lookaheads;
  65 short *LAruleno;
  66 unsigned *LA;
  67 short *accessing_symbol;
  68 char *consistent;
  69 core **state_table;
  70 shifts **shift_table;
  71 reductions **reduction_table;
  72 short *goto_map;
  73 short *from_state;
  74 short *to_state;
  75
  76 short **transpose();
  77 void set_state_table();
  78 void set_accessing_symbol();
  79 void set_shift_table();
  80 void set_reduction_table();
  81 void set_maxrhs();
  82 void initialize_LA();
  83 void set_goto_map();
  84 void initialize_F();
  85 void build_relations();
  86 void add_lookback_edge();
  87 void compute_FOLLOWS();
  88 void compute_lookaheads();
  89 void digraph();
  90 void traverse();
  91
  92 extern void toomany();
  93 extern void berror();
  94
  95 static int infinity;
  96 static int maxrhs;
  97 static int ngotos;
  98 static unsigned *F;
  99 static short **includes;
 100 static shorts **lookback;
 101 static short **R;
 102 static short *INDEX;
 103 static short *VERTICES;
 104 static int top;
 105
 106
 107 void
 108 lalr()
 109 {
 110   tokensetsize = WORDSIZE(ntokens);
 111
 112   set_state_table();
 113   set_accessing_symbol();
 114   set_shift_table();
 115   set_reduction_table();
 116   set_maxrhs();
 117   initialize_LA();
 118   set_goto_map();
 119   initialize_F();
 120   build_relations();
 121   compute_FOLLOWS();
 122   compute_lookaheads();
 123 }
 124
 125
 126 void
 127 set_state_table()
 128 {
 129   register core *sp;
 130
 131   state_table = NEW2(nstates, core *);
 132
 133   for (sp = first_state; sp; sp = sp->next)
 134     state_table[sp->number] = sp;
 135 }
 136
 137
 138 void
 139 set_accessing_symbol()
 140 {
 141   register core *sp;
 142
 143   accessing_symbol = NEW2(nstates, short);
 144
 145   for (sp = first_state; sp; sp = sp->next)
 146     accessing_symbol[sp->number] = sp->accessing_symbol;
 147 }
 148
 149
 150 void
 151 set_shift_table()
 152 {
 153   register shifts *sp;
 154
 155   shift_table = NEW2(nstates, shifts *);
 156
 157   for (sp = first_shift; sp; sp = sp->next)
 158     shift_table[sp->number] = sp;
 159 }
 160
 161
 162 void
 163 set_reduction_table()
 164 {
 165   register reductions *rp;
 166
 167   reduction_table = NEW2(nstates, reductions *);
 168
 169   for (rp = first_reduction; rp; rp = rp->next)
 170     reduction_table[rp->number] = rp;
 171 }
 172
 173
 174 void
 175 set_maxrhs()
 176 {
 177   register short *itemp;
 178   register int length;
 179   register int max;
 180
 181   length = 0;
 182   max = 0;
 183   for (itemp = ritem; *itemp; itemp++)
 184     {
 185       if (*itemp > 0)
 186         {
 187           length++;
 188         }
 189       else
 190         {
 191           if (length > max) max = length;
 192           length = 0;
 193         }
 194     }
 195
 196   maxrhs = max;
 197 }
 198
 199
 200 void
 201 initialize_LA()
 202 {
 203   register int i;
 204   register int j;
 205   register int count;
 206   register reductions *rp;
 207   register shifts *sp;
 208   register short *np;
 209
 210   consistent = NEW2(nstates, char);
 211   lookaheads = NEW2(nstates + 1, short);
 212
 213   count = 0;
 214   for (i = 0; i < nstates; i++)
 215     {
 216       register int k;
 217
 218       lookaheads[i] = count;
 219
 220       rp = reduction_table[i];
 221       sp = shift_table[i];
 222       if (rp && (rp->nreds > 1
 223           || (sp && ! ISVAR(accessing_symbol[sp->shifts[0]]))))
 224         count += rp->nreds;
 225       else
 226         consistent[i] = 1;
 227
 228       if (sp)
 229         for (k = 0; k < sp->nshifts; k++)
 230           {
 231             if (accessing_symbol[sp->shifts[k]] == error_token_number)
 232               {
 233                 consistent[i] = 0;
 234                 break;
 235               }
 236           }
 237     }
 238
 239   lookaheads[nstates] = count;
 240
 241   if (count == 0)
 242     {
 243       LA = NEW2(1 * tokensetsize, unsigned);
 244       LAruleno = NEW2(1, short);
 245       lookback = NEW2(1, shorts *);
 246     }
 247   else
 248     {
 249       LA = NEW2(count * tokensetsize, unsigned);
 250       LAruleno = NEW2(count, short);
 251       lookback = NEW2(count, shorts *);
 252     }
 253
 254   np = LAruleno;
 255   for (i = 0; i < nstates; i++)
 256     {
 257       if (!consistent[i])
 258         {
 259           if (rp = reduction_table[i])
 260             for (j = 0; j < rp->nreds; j++)
 261               *np++ = rp->rules[j];
 262         }
 263     }
 264 }
 265
 266
 267 void
 268 set_goto_map()
 269 {
 270   register shifts *sp;
 271   register int i;
 272   register int symbol;
 273   register int k;
 274   register short *temp_map;
 275   register int state2;
 276   register int state1;
 277
 278   goto_map = NEW2(nvars + 1, short) - ntokens;
 279   temp_map = NEW2(nvars + 1, short) - ntokens;
 280
 281   ngotos = 0;
 282   for (sp = first_shift; sp; sp = sp->next)
 283     {
 284       for (i = sp->nshifts - 1; i >= 0; i--)
 285         {
 286           symbol = accessing_symbol[sp->shifts[i]];
 287
 288           if (ISTOKEN(symbol)) break;
 289
 290           if (ngotos == MAXSHORT)
 291             toomany("gotos");
 292
 293           ngotos++;
 294           goto_map[symbol]++;
 295         }
 296     }
 297
 298   k = 0;
 299   for (i = ntokens; i < nsyms; i++)
 300     {
 301       temp_map[i] = k;
 302       k += goto_map[i];
 303     }
 304
 305   for (i = ntokens; i < nsyms; i++)
 306     goto_map[i] = temp_map[i];
 307
 308   goto_map[nsyms] = ngotos;
 309   temp_map[nsyms] = ngotos;
 310
 311   from_state = NEW2(ngotos, short);
 312   to_state = NEW2(ngotos, short);
 313
 314   for (sp = first_shift; sp; sp = sp->next)
 315     {
 316       state1 = sp->number;
 317       for (i = sp->nshifts - 1; i >= 0; i--)
 318         {
 319           state2 = sp->shifts[i];
 320           symbol = accessing_symbol[state2];
 321
 322           if (ISTOKEN(symbol)) break;
 323
 324           k = temp_map[symbol]++;
 325           from_state[k] = state1;
 326           to_state[k] = state2;
 327         }
 328     }
 329
 330   FREE(temp_map + ntokens);
 331 }
 332
 333
 334
 335 /*  Map_goto maps a state/symbol pair into its numeric representation.  */
 336
 337 int
 338 map_goto(state, symbol)
 339 int state;
 340 int symbol;
 341 {
 342   register int high;
 343   register int low;
 344   register int middle;
 345   register int s;
 346
 347   low = goto_map[symbol];
 348   high = goto_map[symbol + 1] - 1;
 349
 350   while (low <= high)
 351     {
 352       middle = (low + high) / 2;
 353       s = from_state[middle];
 354       if (s == state)
 355         return (middle);
 356       else if (s < state)
 357         low = middle + 1;
 358       else
 359         high = middle - 1;
 360     }
 361
 362   berror("map_goto");
 363 /* NOTREACHED */
 364   return 0;
 365 }
 366
 367
 368 void
 369 initialize_F()
 370 {
 371   register int i;
 372   register int j;
 373   register int k;
 374   register shifts *sp;
 375   register short *edge;
 376   register unsigned *rowp;
 377   register short *rp;
 378   register short **reads;
 379   register int nedges;
 380   register int stateno;
 381   register int symbol;
 382   register int nwords;
 383
 384   nwords = ngotos * tokensetsize;
 385   F = NEW2(nwords, unsigned);
 386
 387   reads = NEW2(ngotos, short *);
 388   edge = NEW2(ngotos + 1, short);
 389   nedges = 0;
 390
 391   rowp = F;
 392   for (i = 0; i < ngotos; i++)
 393     {
 394       stateno = to_state[i];
 395       sp = shift_table[stateno];
 396
 397       if (sp)
 398         {
 399           k = sp->nshifts;
 400
 401           for (j = 0; j < k; j++)
 402             {
 403               symbol = accessing_symbol[sp->shifts[j]];
 404               if (ISVAR(symbol))
 405                 break;
 406               SETBIT(rowp, symbol);
 407             }
 408
 409           for (; j < k; j++)
 410             {
 411               symbol = accessing_symbol[sp->shifts[j]];
 412               if (nullable[symbol])
 413                 edge[nedges++] = map_goto(stateno, symbol);
 414             }
 415
 416           if (nedges)
 417             {
 418               reads[i] = rp = NEW2(nedges + 1, short);
 419
 420               for (j = 0; j < nedges; j++)
 421                 rp[j] = edge[j];
 422
 423               rp[nedges] = -1;
 424               nedges = 0;
 425             }
 426         }
 427
 428       rowp += tokensetsize;
 429     }
 430
 431   digraph(reads);
 432
 433   for (i = 0; i < ngotos; i++)
 434     {
 435       if (reads[i])
 436         FREE(reads[i]);
 437     }
 438
 439   FREE(reads);
 440   FREE(edge);
 441 }
 442
 443
 444 void
 445 build_relations()
 446 {
 447   register int i;
 448   register int j;
 449   register int k;
 450   register short *rulep;
 451   register short *rp;
 452   register shifts *sp;
 453   register int length;
 454   register int nedges;
 455   register int done;
 456   register int state1;
 457   register int stateno;
 458   register int symbol1;
 459   register int symbol2;
 460   register short *shortp;
 461   register short *edge;
 462   register short *states;
 463   register short **new_includes;
 464
 465   includes = NEW2(ngotos, short *);
 466   edge = NEW2(ngotos + 1, short);
 467   states = NEW2(maxrhs + 1, short);
 468
 469   for (i = 0; i < ngotos; i++)
 470     {
 471       nedges = 0;
 472       state1 = from_state[i];
 473       symbol1 = accessing_symbol[to_state[i]];
 474
 475       for (rulep = derives[symbol1]; *rulep > 0; rulep++)
 476         {
 477           length = 1;
 478           states[0] = state1;
 479           stateno = state1;
 480
 481           for (rp = ritem + rrhs[*rulep]; *rp > 0; rp++)
 482             {
 483               symbol2 = *rp;
 484               sp = shift_table[stateno];
 485               k = sp->nshifts;
 486
 487               for (j = 0; j < k; j++)
 488                 {
 489                   stateno = sp->shifts[j];
 490                   if (accessing_symbol[stateno] == symbol2) break;
 491                 }
 492
 493               states[length++] = stateno;
 494             }
 495
 496           if (!consistent[stateno])
 497             add_lookback_edge(stateno, *rulep, i);
 498
 499           length--;
 500           done = 0;
 501           while (!done)
 502             {
 503               done = 1;
 504               rp--;
 505                         /* JF added rp>=ritem &&   I hope to god its right! */
 506               if (rp>=ritem && ISVAR(*rp))
 507                 {
 508                   stateno = states[--length];
 509                   edge[nedges++] = map_goto(stateno, *rp);
 510                   if (nullable[*rp]) done = 0;
 511                 }
 512             }
 513         }
 514
 515       if (nedges)
 516         {
 517           includes[i] = shortp = NEW2(nedges + 1, short);
 518           for (j = 0; j < nedges; j++)
 519             shortp[j] = edge[j];
 520           shortp[nedges] = -1;
 521         }
 522     }
 523
 524   new_includes = transpose(includes, ngotos);
 525
 526   for (i = 0; i < ngotos; i++)
 527     if (includes[i])
 528       FREE(includes[i]);
 529
 530   FREE(includes);
 531
 532   includes = new_includes;
 533
 534   FREE(edge);
 535   FREE(states);
 536 }
 537
 538
 539 void
 540 add_lookback_edge(stateno, ruleno, gotono)
 541 int stateno;
 542 int ruleno;
 543 int gotono;
 544 {
 545   register int i;
 546   register int k;
 547   register int found;
 548   register shorts *sp;
 549
 550   i = lookaheads[stateno];
 551   k = lookaheads[stateno + 1];
 552   found = 0;
 553   while (!found && i < k)
 554     {
 555       if (LAruleno[i] == ruleno)
 556         found = 1;
 557       else
 558         i++;
 559     }
 560
 561   if (found == 0)
 562     berror("add_lookback_edge");
 563
 564   sp = NEW(shorts);
 565   sp->next = lookback[i];
 566   sp->value = gotono;
 567   lookback[i] = sp;
 568 }
 569
 570
 571
 572 short **
 573 transpose(R_arg, n)
 574 short **R_arg;
 575 int n;
 576 {
 577   register short **new_R;
 578   register short **temp_R;
 579   register short *nedges;
 580   register short *sp;
 581   register int i;
 582   register int k;
 583
 584   nedges = NEW2(n, short);
 585
 586   for (i = 0; i < n; i++)
 587     {
 588       sp = R_arg[i];
 589       if (sp)
 590         {
 591           while (*sp >= 0)
 592             nedges[*sp++]++;
 593         }
 594     }
 595
 596   new_R = NEW2(n, short *);
 597   temp_R = NEW2(n, short *);
 598
 599   for (i = 0; i < n; i++)
 600     {
 601       k = nedges[i];
 602       if (k > 0)
 603         {
 604           sp = NEW2(k + 1, short);
 605           new_R[i] = sp;
 606           temp_R[i] = sp;
 607           sp[k] = -1;
 608         }
 609     }
 610
 611   FREE(nedges);
 612
 613   for (i = 0; i < n; i++)
 614     {
 615       sp = R_arg[i];
 616       if (sp)
 617         {
 618           while (*sp >= 0)
 619             *temp_R[*sp++]++ = i;
 620         }
 621     }
 622
 623   FREE(temp_R);
 624
 625   return (new_R);
 626 }
 627
 628
 629 void
 630 compute_FOLLOWS()
 631 {
 632   register int i;
 633
 634   digraph(includes);
 635
 636   for (i = 0; i < ngotos; i++)
 637     {
 638       if (includes[i]) FREE(includes[i]);
 639     }
 640
 641   FREE(includes);
 642 }
 643
 644
 645 void
 646 compute_lookaheads()
 647 {
 648   register int i;
 649   register int n;
 650   register unsigned *fp1;
 651   register unsigned *fp2;
 652   register unsigned *fp3;
 653   register shorts *sp;
 654   register unsigned *rowp;
 655 /*   register short *rulep; JF unused */
 656 /*  register int count; JF unused */
 657   register shorts *sptmp;/* JF */
 658
 659   rowp = LA;
 660   n = lookaheads[nstates];
 661   for (i = 0; i < n; i++)
 662     {
 663       fp3 = rowp + tokensetsize;
 664       for (sp = lookback[i]; sp; sp = sp->next)
 665         {
 666           fp1 = rowp;
 667           fp2 = F + tokensetsize * sp->value;
 668           while (fp1 < fp3)
 669             *fp1++ |= *fp2++;
 670         }
 671
 672       rowp = fp3;
 673     }
 674
 675   for (i = 0; i < n; i++)
 676     {/* JF removed ref to freed storage */
 677       for (sp = lookback[i]; sp; sp = sptmp) {
 678         sptmp=sp->next;
 679         FREE(sp);
 680       }
 681     }
 682
 683   FREE(lookback);
 684   FREE(F);
 685 }
 686
 687
 688 void
 689 digraph(relation)
 690 short **relation;
 691 {
 692   register int i;
 693
 694   infinity = ngotos + 2;
 695   INDEX = NEW2(ngotos + 1, short);
 696   VERTICES = NEW2(ngotos + 1, short);
 697   top = 0;
 698
 699   R = relation;
 700
 701   for (i = 0; i < ngotos; i++)
 702     INDEX[i] = 0;
 703
 704   for (i = 0; i < ngotos; i++)
 705     {
 706       if (INDEX[i] == 0 && R[i])
 707         traverse(i);
 708     }
 709
 710   FREE(INDEX);
 711   FREE(VERTICES);
 712 }
 713
 714
 715 void
 716 traverse(i)
 717 register int i;
 718 {
 719   register unsigned *fp1;
 720   register unsigned *fp2;
 721   register unsigned *fp3;
 722   register int j;
 723   register short *rp;
 724
 725   int height;
 726   unsigned *base;
 727
 728   VERTICES[++top] = i;
 729   INDEX[i] = height = top;
 730
 731   base = F + i * tokensetsize;
 732   fp3 = base + tokensetsize;
 733
 734   rp = R[i];
 735   if (rp)
 736     {
 737       while ((j = *rp++) >= 0)
 738         {
 739           if (INDEX[j] == 0)
 740             traverse(j);
 741
 742           if (INDEX[i] > INDEX[j])
 743             INDEX[i] = INDEX[j];
 744
 745           fp1 = base;
 746           fp2 = F + j * tokensetsize;
 747
 748           while (fp1 < fp3)
 749             *fp1++ |= *fp2++;
 750         }
 751     }
 752
 753   if (INDEX[i] == height)
 754     {
 755       for (;;)
 756         {
 757           j = VERTICES[top--];
 758           INDEX[j] = infinity;
 759
 760           if (i == j)
 761             break;
 762
 763           fp1 = base;
 764           fp2 = F + j * tokensetsize;
 765
 766           while (fp1 < fp3)
 767             *fp2++ = *fp1++;
 768         }
 769     }
 770 }