src/lalr.c

   1 /* Compute look-ahead criteria for bison,
   2    Copyright (C) 1984, 1986, 1989 Free Software Foundation, Inc.
   3
   4 This file is part of Bison, the GNU Compiler Compiler.
   5
   6 Bison is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 Bison is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Bison; see the file COPYING.  If not, write to
  18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
  19
  20
  21 /* Compute how to make the finite state machine deterministic;
  22  find which rules need lookahead in each state, and which lookahead tokens they accept.
  23
  24 lalr(), the entry point, builds these data structures:
  25
  26 goto_map, from_state and to_state
  27  record each shift transition which accepts a variable (a nonterminal).
  28 ngotos is the number of such transitions.
  29 from_state[t] is the state number which a transition leads from
  30 and to_state[t] is the state number it leads to.
  31 All the transitions that accept a particular variable are grouped together and
  32 goto_map[i - ntokens] is the index in from_state and to_state of the first of them.
  33
  34 consistent[s] is nonzero if no lookahead is needed to decide what to do in state s.
  35
  36 LAruleno is a vector which records the rules that need lookahead in various states.
  37 The elements of LAruleno that apply to state s are those from
  38  lookaheads[s] through lookaheads[s+1]-1.
  39 Each element of LAruleno is a rule number.
  40
  41 If lr is the length of LAruleno, then a number from 0 to lr-1
  42 can specify both a rule and a state where the rule might be applied.
  43
  44 LA is a lr by ntokens matrix of bits.
  45 LA[l, i] is 1 if the rule LAruleno[l] is applicable in the appropriate state
  46  when the next token is symbol i.
  47 If LA[l, i] and LA[l, j] are both 1 for i != j, it is a conflict.
  48 */
  49
  50 #include <stdio.h>
  51 #include "system.h"
  52 #include "machine.h"
  53 #include "types.h"
  54 #include "state.h"
  55 #include "alloc.h"
  56 #include "gram.h"
  57
  58
  59 extern short **derives;
  60 extern char *nullable;
  61
  62
  63 int tokensetsize;
  64 short *lookaheads;
  65 short *LAruleno;
  66 unsigned *LA;
  67 short *accessing_symbol;
  68 char *consistent;
  69 core **state_table;
  70 shifts **shift_table;
  71 reductions **reduction_table;
  72 short *goto_map;
  73 short *from_state;
  74 short *to_state;
  75
  76 void lalr PARAMS((void));
  77 short **transpose PARAMS((short **, int));
  78 void set_state_table PARAMS((void));
  79 void set_accessing_symbol PARAMS((void));
  80 void set_shift_table PARAMS((void));
  81 void set_reduction_table PARAMS((void));
  82 void set_maxrhs PARAMS((void));
  83 void initialize_LA PARAMS((void));
  84 void set_goto_map PARAMS((void));
  85 int map_goto PARAMS((int, int));
  86 void initialize_F PARAMS((void));
  87 void build_relations PARAMS((void));
  88 void add_lookback_edge PARAMS((int, int, int));
  89 void compute_FOLLOWS PARAMS((void));
  90 void compute_lookaheads PARAMS((void));
  91 void digraph PARAMS((short **));
  92 void traverse PARAMS((register int));
  93
  94 extern void toomany PARAMS((char *));
  95 extern void berror PARAMS((char *));
  96
  97 static int infinity;
  98 static int maxrhs;
  99 static int ngotos;
 100 static unsigned *F;
 101 static short **includes;
 102 static shorts **lookback;
 103 static short **R;
 104 static short *INDEX;
 105 static short *VERTICES;
 106 static int top;
 107
 108
 109 void
 110 lalr (void)
 111 {
 112   tokensetsize = WORDSIZE(ntokens);
 113
 114   set_state_table();
 115   set_accessing_symbol();
 116   set_shift_table();
 117   set_reduction_table();
 118   set_maxrhs();
 119   initialize_LA();
 120   set_goto_map();
 121   initialize_F();
 122   build_relations();
 123   compute_FOLLOWS();
 124   compute_lookaheads();
 125 }
 126
 127
 128 void
 129 set_state_table (void)
 130 {
 131   register core *sp;
 132
 133   state_table = NEW2(nstates, core *);
 134
 135   for (sp = first_state; sp; sp = sp->next)
 136     state_table[sp->number] = sp;
 137 }
 138
 139
 140 void
 141 set_accessing_symbol (void)
 142 {
 143   register core *sp;
 144
 145   accessing_symbol = NEW2(nstates, short);
 146
 147   for (sp = first_state; sp; sp = sp->next)
 148     accessing_symbol[sp->number] = sp->accessing_symbol;
 149 }
 150
 151
 152 void
 153 set_shift_table (void)
 154 {
 155   register shifts *sp;
 156
 157   shift_table = NEW2(nstates, shifts *);
 158
 159   for (sp = first_shift; sp; sp = sp->next)
 160     shift_table[sp->number] = sp;
 161 }
 162
 163
 164 void
 165 set_reduction_table (void)
 166 {
 167   register reductions *rp;
 168
 169   reduction_table = NEW2(nstates, reductions *);
 170
 171   for (rp = first_reduction; rp; rp = rp->next)
 172     reduction_table[rp->number] = rp;
 173 }
 174
 175
 176 void
 177 set_maxrhs (void)
 178 {
 179   register short *itemp;
 180   register int length;
 181   register int max;
 182
 183   length = 0;
 184   max = 0;
 185   for (itemp = ritem; *itemp; itemp++)
 186     {
 187       if (*itemp > 0)
 188         {
 189           length++;
 190         }
 191       else
 192         {
 193           if (length > max) max = length;
 194           length = 0;
 195         }
 196     }
 197
 198   maxrhs = max;
 199 }
 200
 201
 202 void
 203 initialize_LA (void)
 204 {
 205   register int i;
 206   register int j;
 207   register int count;
 208   register reductions *rp;
 209   register shifts *sp;
 210   register short *np;
 211
 212   consistent = NEW2(nstates, char);
 213   lookaheads = NEW2(nstates + 1, short);
 214
 215   count = 0;
 216   for (i = 0; i < nstates; i++)
 217     {
 218       register int k;
 219
 220       lookaheads[i] = count;
 221
 222       rp = reduction_table[i];
 223       sp = shift_table[i];
 224       if (rp && (rp->nreds > 1
 225           || (sp && ! ISVAR(accessing_symbol[sp->shifts[0]]))))
 226         count += rp->nreds;
 227       else
 228         consistent[i] = 1;
 229
 230       if (sp)
 231         for (k = 0; k < sp->nshifts; k++)
 232           {
 233             if (accessing_symbol[sp->shifts[k]] == error_token_number)
 234               {
 235                 consistent[i] = 0;
 236                 break;
 237               }
 238           }
 239     }
 240
 241   lookaheads[nstates] = count;
 242
 243   if (count == 0)
 244     {
 245       LA = NEW2(1 * tokensetsize, unsigned);
 246       LAruleno = NEW2(1, short);
 247       lookback = NEW2(1, shorts *);
 248     }
 249   else
 250     {
 251       LA = NEW2(count * tokensetsize, unsigned);
 252       LAruleno = NEW2(count, short);
 253       lookback = NEW2(count, shorts *);
 254     }
 255
 256   np = LAruleno;
 257   for (i = 0; i < nstates; i++)
 258     {
 259       if (!consistent[i])
 260         {
 261           if ((rp = reduction_table[i]))
 262             for (j = 0; j < rp->nreds; j++)
 263               *np++ = rp->rules[j];
 264         }
 265     }
 266 }
 267
 268
 269 void
 270 set_goto_map (void)
 271 {
 272   register shifts *sp;
 273   register int i;
 274   register int symbol;
 275   register int k;
 276   register short *temp_map;
 277   register int state2;
 278   register int state1;
 279
 280   goto_map = NEW2(nvars + 1, short) - ntokens;
 281   temp_map = NEW2(nvars + 1, short) - ntokens;
 282
 283   ngotos = 0;
 284   for (sp = first_shift; sp; sp = sp->next)
 285     {
 286       for (i = sp->nshifts - 1; i >= 0; i--)
 287         {
 288           symbol = accessing_symbol[sp->shifts[i]];
 289
 290           if (ISTOKEN(symbol)) break;
 291
 292           if (ngotos == MAXSHORT)
 293             toomany(_("gotos"));
 294
 295           ngotos++;
 296           goto_map[symbol]++;
 297         }
 298     }
 299
 300   k = 0;
 301   for (i = ntokens; i < nsyms; i++)
 302     {
 303       temp_map[i] = k;
 304       k += goto_map[i];
 305     }
 306
 307   for (i = ntokens; i < nsyms; i++)
 308     goto_map[i] = temp_map[i];
 309
 310   goto_map[nsyms] = ngotos;
 311   temp_map[nsyms] = ngotos;
 312
 313   from_state = NEW2(ngotos, short);
 314   to_state = NEW2(ngotos, short);
 315
 316   for (sp = first_shift; sp; sp = sp->next)
 317     {
 318       state1 = sp->number;
 319       for (i = sp->nshifts - 1; i >= 0; i--)
 320         {
 321           state2 = sp->shifts[i];
 322           symbol = accessing_symbol[state2];
 323
 324           if (ISTOKEN(symbol)) break;
 325
 326           k = temp_map[symbol]++;
 327           from_state[k] = state1;
 328           to_state[k] = state2;
 329         }
 330     }
 331
 332   FREE(temp_map + ntokens);
 333 }
 334
 335
 336
 337 /*  Map_goto maps a state/symbol pair into its numeric representation.  */
 338
 339 int
 340 map_goto (int state, int symbol)
 341 {
 342   register int high;
 343   register int low;
 344   register int middle;
 345   register int s;
 346
 347   low = goto_map[symbol];
 348   high = goto_map[symbol + 1] - 1;
 349
 350   while (low <= high)
 351     {
 352       middle = (low + high) / 2;
 353       s = from_state[middle];
 354       if (s == state)
 355         return (middle);
 356       else if (s < state)
 357         low = middle + 1;
 358       else
 359         high = middle - 1;
 360     }
 361
 362   berror("map_goto");
 363 /* NOTREACHED */
 364   return 0;
 365 }
 366
 367
 368 void
 369 initialize_F (void)
 370 {
 371   register int i;
 372   register int j;
 373   register int k;
 374   register shifts *sp;
 375   register short *edge;
 376   register unsigned *rowp;
 377   register short *rp;
 378   register short **reads;
 379   register int nedges;
 380   register int stateno;
 381   register int symbol;
 382   register int nwords;
 383
 384   nwords = ngotos * tokensetsize;
 385   F = NEW2(nwords, unsigned);
 386
 387   reads = NEW2(ngotos, short *);
 388   edge = NEW2(ngotos + 1, short);
 389   nedges = 0;
 390
 391   rowp = F;
 392   for (i = 0; i < ngotos; i++)
 393     {
 394       stateno = to_state[i];
 395       sp = shift_table[stateno];
 396
 397       if (sp)
 398         {
 399           k = sp->nshifts;
 400
 401           for (j = 0; j < k; j++)
 402             {
 403               symbol = accessing_symbol[sp->shifts[j]];
 404               if (ISVAR(symbol))
 405                 break;
 406               SETBIT(rowp, symbol);
 407             }
 408
 409           for (; j < k; j++)
 410             {
 411               symbol = accessing_symbol[sp->shifts[j]];
 412               if (nullable[symbol])
 413                 edge[nedges++] = map_goto(stateno, symbol);
 414             }
 415
 416           if (nedges)
 417             {
 418               reads[i] = rp = NEW2(nedges + 1, short);
 419
 420               for (j = 0; j < nedges; j++)
 421                 rp[j] = edge[j];
 422
 423               rp[nedges] = -1;
 424               nedges = 0;
 425             }
 426         }
 427
 428       rowp += tokensetsize;
 429     }
 430
 431   digraph(reads);
 432
 433   for (i = 0; i < ngotos; i++)
 434     {
 435       if (reads[i])
 436         FREE(reads[i]);
 437     }
 438
 439   FREE(reads);
 440   FREE(edge);
 441 }
 442
 443
 444 void
 445 build_relations (void)
 446 {
 447   register int i;
 448   register int j;
 449   register int k;
 450   register short *rulep;
 451   register short *rp;
 452   register shifts *sp;
 453   register int length;
 454   register int nedges;
 455   register int done;
 456   register int state1;
 457   register int stateno;
 458   register int symbol1;
 459   register int symbol2;
 460   register short *shortp;
 461   register short *edge;
 462   register short *states;
 463   register short **new_includes;
 464
 465   includes = NEW2(ngotos, short *);
 466   edge = NEW2(ngotos + 1, short);
 467   states = NEW2(maxrhs + 1, short);
 468
 469   for (i = 0; i < ngotos; i++)
 470     {
 471       nedges = 0;
 472       state1 = from_state[i];
 473       symbol1 = accessing_symbol[to_state[i]];
 474
 475       for (rulep = derives[symbol1]; *rulep > 0; rulep++)
 476         {
 477           length = 1;
 478           states[0] = state1;
 479           stateno = state1;
 480
 481           for (rp = ritem + rrhs[*rulep]; *rp > 0; rp++)
 482             {
 483               symbol2 = *rp;
 484               sp = shift_table[stateno];
 485               k = sp->nshifts;
 486
 487               for (j = 0; j < k; j++)
 488                 {
 489                   stateno = sp->shifts[j];
 490                   if (accessing_symbol[stateno] == symbol2) break;
 491                 }
 492
 493               states[length++] = stateno;
 494             }
 495
 496           if (!consistent[stateno])
 497             add_lookback_edge(stateno, *rulep, i);
 498
 499           length--;
 500           done = 0;
 501           while (!done)
 502             {
 503               done = 1;
 504               rp--;
 505                         /* JF added rp>=ritem &&   I hope to god its right! */
 506               if (rp>=ritem && ISVAR(*rp))
 507                 {
 508                   stateno = states[--length];
 509                   edge[nedges++] = map_goto(stateno, *rp);
 510                   if (nullable[*rp]) done = 0;
 511                 }
 512             }
 513         }
 514
 515       if (nedges)
 516         {
 517           includes[i] = shortp = NEW2(nedges + 1, short);
 518           for (j = 0; j < nedges; j++)
 519             shortp[j] = edge[j];
 520           shortp[nedges] = -1;
 521         }
 522     }
 523
 524   new_includes = transpose(includes, ngotos);
 525
 526   for (i = 0; i < ngotos; i++)
 527     if (includes[i])
 528       FREE(includes[i]);
 529
 530   FREE(includes);
 531
 532   includes = new_includes;
 533
 534   FREE(edge);
 535   FREE(states);
 536 }
 537
 538
 539 void
 540 add_lookback_edge (int stateno, int ruleno, int gotono)
 541 {
 542   register int i;
 543   register int k;
 544   register int found;
 545   register shorts *sp;
 546
 547   i = lookaheads[stateno];
 548   k = lookaheads[stateno + 1];
 549   found = 0;
 550   while (!found && i < k)
 551     {
 552       if (LAruleno[i] == ruleno)
 553         found = 1;
 554       else
 555         i++;
 556     }
 557
 558   if (found == 0)
 559     berror("add_lookback_edge");
 560
 561   sp = NEW(shorts);
 562   sp->next = lookback[i];
 563   sp->value = gotono;
 564   lookback[i] = sp;
 565 }
 566
 567
 568
 569 short **
 570 transpose (short **R_arg, int n)
 571 {
 572   register short **new_R;
 573   register short **temp_R;
 574   register short *nedges;
 575   register short *sp;
 576   register int i;
 577   register int k;
 578
 579   nedges = NEW2(n, short);
 580
 581   for (i = 0; i < n; i++)
 582     {
 583       sp = R_arg[i];
 584       if (sp)
 585         {
 586           while (*sp >= 0)
 587             nedges[*sp++]++;
 588         }
 589     }
 590
 591   new_R = NEW2(n, short *);
 592   temp_R = NEW2(n, short *);
 593
 594   for (i = 0; i < n; i++)
 595     {
 596       k = nedges[i];
 597       if (k > 0)
 598         {
 599           sp = NEW2(k + 1, short);
 600           new_R[i] = sp;
 601           temp_R[i] = sp;
 602           sp[k] = -1;
 603         }
 604     }
 605
 606   FREE(nedges);
 607
 608   for (i = 0; i < n; i++)
 609     {
 610       sp = R_arg[i];
 611       if (sp)
 612         {
 613           while (*sp >= 0)
 614             *temp_R[*sp++]++ = i;
 615         }
 616     }
 617
 618   FREE(temp_R);
 619
 620   return (new_R);
 621 }
 622
 623
 624 void
 625 compute_FOLLOWS (void)
 626 {
 627   register int i;
 628
 629   digraph(includes);
 630
 631   for (i = 0; i < ngotos; i++)
 632     {
 633       if (includes[i]) FREE(includes[i]);
 634     }
 635
 636   FREE(includes);
 637 }
 638
 639
 640 void
 641 compute_lookaheads (void)
 642 {
 643   register int i;
 644   register int n;
 645   register unsigned *fp1;
 646   register unsigned *fp2;
 647   register unsigned *fp3;
 648   register shorts *sp;
 649   register unsigned *rowp;
 650 /*   register short *rulep; JF unused */
 651 /*  register int count; JF unused */
 652   register shorts *sptmp;/* JF */
 653
 654   rowp = LA;
 655   n = lookaheads[nstates];
 656   for (i = 0; i < n; i++)
 657     {
 658       fp3 = rowp + tokensetsize;
 659       for (sp = lookback[i]; sp; sp = sp->next)
 660         {
 661           fp1 = rowp;
 662           fp2 = F + tokensetsize * sp->value;
 663           while (fp1 < fp3)
 664             *fp1++ |= *fp2++;
 665         }
 666
 667       rowp = fp3;
 668     }
 669
 670   for (i = 0; i < n; i++)
 671     {/* JF removed ref to freed storage */
 672       for (sp = lookback[i]; sp; sp = sptmp) {
 673         sptmp=sp->next;
 674         FREE(sp);
 675       }
 676     }
 677
 678   FREE(lookback);
 679   FREE(F);
 680 }
 681
 682
 683 void
 684 digraph (short **relation)
 685 {
 686   register int i;
 687
 688   infinity = ngotos + 2;
 689   INDEX = NEW2(ngotos + 1, short);
 690   VERTICES = NEW2(ngotos + 1, short);
 691   top = 0;
 692
 693   R = relation;
 694
 695   for (i = 0; i < ngotos; i++)
 696     INDEX[i] = 0;
 697
 698   for (i = 0; i < ngotos; i++)
 699     {
 700       if (INDEX[i] == 0 && R[i])
 701         traverse(i);
 702     }
 703
 704   FREE(INDEX);
 705   FREE(VERTICES);
 706 }
 707
 708
 709 void
 710 traverse (register int i)
 711 {
 712   register unsigned *fp1;
 713   register unsigned *fp2;
 714   register unsigned *fp3;
 715   register int j;
 716   register short *rp;
 717
 718   int height;
 719   unsigned *base;
 720
 721   VERTICES[++top] = i;
 722   INDEX[i] = height = top;
 723
 724   base = F + i * tokensetsize;
 725   fp3 = base + tokensetsize;
 726
 727   rp = R[i];
 728   if (rp)
 729     {
 730       while ((j = *rp++) >= 0)
 731         {
 732           if (INDEX[j] == 0)
 733             traverse(j);
 734
 735           if (INDEX[i] > INDEX[j])
 736             INDEX[i] = INDEX[j];
 737
 738           fp1 = base;
 739           fp2 = F + j * tokensetsize;
 740
 741           while (fp1 < fp3)
 742             *fp1++ |= *fp2++;
 743         }
 744     }
 745
 746   if (INDEX[i] == height)
 747     {
 748       for (;;)
 749         {
 750           j = VERTICES[top--];
 751           INDEX[j] = infinity;
 752
 753           if (i == j)
 754             break;
 755
 756           fp1 = base;
 757           fp2 = F + j * tokensetsize;
 758
 759           while (fp1 < fp3)
 760             *fp2++ = *fp1++;
 761         }
 762     }
 763 }