src/LR0.c

   1 /* Generate the nondeterministic finite state machine for bison,
   2    Copyright 1984, 1986, 1989, 2000, 2001  Free Software Foundation, Inc.
   3
   4    This file is part of Bison, the GNU Compiler Compiler.
   5
   6    Bison is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2, or (at your option)
   9    any later version.
  10
  11    Bison is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with Bison; see the file COPYING.  If not, write to
  18    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19    Boston, MA 02111-1307, USA.  */
  20
  21
  22 /* See comments in state.h for the data structures that represent it.
  23    The entry point is generate_states.  */
  24
  25 #include "system.h"
  26 #include "getargs.h"
  27 #include "reader.h"
  28 #include "gram.h"
  29 #include "state.h"
  30 #include "complain.h"
  31 #include "closure.h"
  32 #include "LR0.h"
  33 #include "lalr.h"
  34 #include "reduce.h"
  35
  36 int nstates;
  37 int final_state;
  38 state_t *first_state = NULL;
  39 shifts *first_shift = NULL;
  40
  41 static state_t *this_state = NULL;
  42 static state_t *last_state = NULL;
  43 static shifts *last_shift = NULL;
  44
  45 static int nshifts;
  46 static short *shift_symbol = NULL;
  47
  48 static short *redset = NULL;
  49 static short *shiftset = NULL;
  50
  51 static short **kernel_base = NULL;
  52 static int *kernel_size = NULL;
  53 static short *kernel_items = NULL;
  54
  55 /* hash table for states, to recognize equivalent ones.  */
  56
  57 #define STATE_HASH_SIZE 1009
  58 static state_t **state_hash = NULL;
  59
  60 \f
  61 static void
  62 allocate_itemsets (void)
  63 {
  64   int i;
  65
  66   /* Count the number of occurrences of all the symbols in RITEMS.
  67      Note that useless productions (hence useless nonterminals) are
  68      browsed too, hence we need to allocate room for _all_ the
  69      symbols.  */
  70   int count = 0;
  71   short *symbol_count = XCALLOC (short, nsyms + nuseless_nonterminals);
  72
  73   for (i = 0; ritem[i]; ++i)
  74     if (ritem[i] > 0)
  75       {
  76         count++;
  77         symbol_count[ritem[i]]++;
  78       }
  79
  80   /* See comments before new_itemsets.  All the vectors of items
  81      live inside KERNEL_ITEMS.  The number of active items after
  82      some symbol cannot be more than the number of times that symbol
  83      appears as an item, which is symbol_count[symbol].
  84      We allocate that much space for each symbol.  */
  85
  86   kernel_base = XCALLOC (short *, nsyms);
  87   if (count)
  88     kernel_items = XCALLOC (short, count);
  89
  90   count = 0;
  91   for (i = 0; i < nsyms; i++)
  92     {
  93       kernel_base[i] = kernel_items + count;
  94       count += symbol_count[i];
  95     }
  96
  97   free (symbol_count);
  98   kernel_size = XCALLOC (int, nsyms);
  99 }
 100
 101
 102 static void
 103 allocate_storage (void)
 104 {
 105   allocate_itemsets ();
 106
 107   shiftset = XCALLOC (short, nsyms);
 108   redset = XCALLOC (short, nrules + 1);
 109   state_hash = XCALLOC (state_t *, STATE_HASH_SIZE);
 110 }
 111
 112
 113 static void
 114 free_storage (void)
 115 {
 116   free (shift_symbol);
 117   free (redset);
 118   free (shiftset);
 119   free (kernel_base);
 120   free (kernel_size);
 121   XFREE (kernel_items);
 122   free (state_hash);
 123 }
 124
 125
 126
 127
 128 /*----------------------------------------------------------------.
 129 | Find which symbols can be shifted in the current state, and for |
 130 | each one record which items would be active after that shift.   |
 131 | Uses the contents of itemset.                                   |
 132 |                                                                 |
 133 | shift_symbol is set to a vector of the symbols that can be      |
 134 | shifted.  For each symbol in the grammar, kernel_base[symbol]   |
 135 | points to a vector of item numbers activated if that symbol is  |
 136 | shifted, and kernel_size[symbol] is their numbers.              |
 137 `----------------------------------------------------------------*/
 138
 139 static void
 140 new_itemsets (void)
 141 {
 142   int i;
 143
 144   if (trace_flag)
 145     fprintf (stderr, "Entering new_itemsets, state = %d\n",
 146              this_state->number);
 147
 148   for (i = 0; i < nsyms; i++)
 149     kernel_size[i] = 0;
 150
 151   shift_symbol = XCALLOC (short, nsyms);
 152   nshifts = 0;
 153
 154   for (i = 0; i < nitemset; ++i)
 155     {
 156       int symbol = ritem[itemset[i]];
 157       if (symbol > 0)
 158         {
 159           if (!kernel_size[symbol])
 160             {
 161               shift_symbol[nshifts] = symbol;
 162               nshifts++;
 163             }
 164
 165           kernel_base[symbol][kernel_size[symbol]] = itemset[i] + 1;
 166           kernel_size[symbol]++;
 167         }
 168     }
 169 }
 170
 171
 172
 173 /*-----------------------------------------------------------------.
 174 | Subroutine of get_state.  Create a new state for those items, if |
 175 | necessary.                                                       |
 176 `-----------------------------------------------------------------*/
 177
 178 static state_t *
 179 new_state (int symbol)
 180 {
 181   state_t *p;
 182
 183   if (trace_flag)
 184     fprintf (stderr, "Entering new_state, state = %d, symbol = %d (%s)\n",
 185              this_state->number, symbol, tags[symbol]);
 186
 187   if (nstates >= MAXSHORT)
 188     fatal (_("too many states (max %d)"), MAXSHORT);
 189
 190   p = STATE_ALLOC (kernel_size[symbol]);
 191   p->accessing_symbol = symbol;
 192   p->number = nstates;
 193   p->nitems = kernel_size[symbol];
 194
 195   shortcpy (p->items, kernel_base[symbol], kernel_size[symbol]);
 196
 197   last_state->next = p;
 198   last_state = p;
 199   nstates++;
 200
 201   return p;
 202 }
 203
 204
 205 /*--------------------------------------------------------------.
 206 | Find the state number for the state we would get to (from the |
 207 | current state) by shifting symbol.  Create a new state if no  |
 208 | equivalent one exists already.  Used by append_states.        |
 209 `--------------------------------------------------------------*/
 210
 211 static int
 212 get_state (int symbol)
 213 {
 214   int key;
 215   int i;
 216   state_t *sp;
 217
 218   if (trace_flag)
 219     fprintf (stderr, "Entering get_state, state = %d, symbol = %d (%s)\n",
 220              this_state->number, symbol, tags[symbol]);
 221
 222   /* Add up the target state's active item numbers to get a hash key.
 223      */
 224   key = 0;
 225   for (i = 0; i < kernel_size[symbol]; ++i)
 226     key += kernel_base[symbol][i];
 227   key = key % STATE_HASH_SIZE;
 228   sp = state_hash[key];
 229
 230   if (sp)
 231     {
 232       int found = 0;
 233       while (!found)
 234         {
 235           if (sp->nitems == kernel_size[symbol])
 236             {
 237               found = 1;
 238               for (i = 0; i < kernel_size[symbol]; ++i)
 239                 if (kernel_base[symbol][i] != sp->items[i])
 240                   found = 0;
 241             }
 242
 243           if (!found)
 244             {
 245               if (sp->link)
 246                 {
 247                   sp = sp->link;
 248                 }
 249               else              /* bucket exhausted and no match */
 250                 {
 251                   sp = sp->link = new_state (symbol);
 252                   found = 1;
 253                 }
 254             }
 255         }
 256     }
 257   else                          /* bucket is empty */
 258     {
 259       state_hash[key] = sp = new_state (symbol);
 260     }
 261
 262   if (trace_flag)
 263     fprintf (stderr, "Exiting get_state => %d\n", sp->number);
 264
 265   return sp->number;
 266 }
 267
 268 /*------------------------------------------------------------------.
 269 | Use the information computed by new_itemsets to find the state    |
 270 | numbers reached by each shift transition from the current state.  |
 271 |                                                                   |
 272 | shiftset is set up as a vector of state numbers of those states.  |
 273 `------------------------------------------------------------------*/
 274
 275 static void
 276 append_states (void)
 277 {
 278   int i;
 279   int j;
 280   int symbol;
 281
 282   if (trace_flag)
 283     fprintf (stderr, "Entering append_states, state = %d\n",
 284              this_state->number);
 285
 286   /* first sort shift_symbol into increasing order */
 287
 288   for (i = 1; i < nshifts; i++)
 289     {
 290       symbol = shift_symbol[i];
 291       j = i;
 292       while (j > 0 && shift_symbol[j - 1] > symbol)
 293         {
 294           shift_symbol[j] = shift_symbol[j - 1];
 295           j--;
 296         }
 297       shift_symbol[j] = symbol;
 298     }
 299
 300   for (i = 0; i < nshifts; i++)
 301     shiftset[i] = get_state (shift_symbol[i]);
 302 }
 303
 304
 305 static void
 306 new_states (void)
 307 {
 308   first_state = last_state = this_state = STATE_ALLOC (0);
 309   nstates = 1;
 310 }
 311
 312
 313 /*------------------------------------------------------------.
 314 | Save the NSHIFTS of SHIFTSET into the current linked list.  |
 315 `------------------------------------------------------------*/
 316
 317 static void
 318 save_shifts (void)
 319 {
 320   shifts *p = shifts_new (nshifts);
 321
 322   p->number = this_state->number;
 323
 324   shortcpy (p->shifts, shiftset, nshifts);
 325
 326   if (last_shift)
 327     last_shift->next = p;
 328   else
 329     first_shift = p;
 330   last_shift = p;
 331 }
 332
 333
 334 /*------------------------------------------------------------------.
 335 | Subroutine of augment_automaton.  Create the next-to-final state, |
 336 | to which a shift has already been made in the initial state.      |
 337 |                                                                   |
 338 | The task of this state consists in shifting (actually, it's a     |
 339 | goto, but shifts and gotos are both stored in SHIFTS) the start   |
 340 | symbols, hence the name.                                          |
 341 `------------------------------------------------------------------*/
 342
 343 static void
 344 insert_start_shifting_state (void)
 345 {
 346   state_t *statep;
 347   shifts *sp;
 348
 349   statep = STATE_ALLOC (0);
 350   statep->number = nstates;
 351
 352   /* The distinctive feature of this state from the
 353      eof_shifting_state, is that it is labeled as post-start-symbol
 354      shifting.  I fail to understand why this state, and the
 355      post-start-start can't be merged into one.  But it does fail if
 356      you try. --akim */
 357   statep->accessing_symbol = start_symbol;
 358
 359   last_state->next = statep;
 360   last_state = statep;
 361
 362   /* Make a shift from this state to (what will be) the final state.  */
 363   sp = shifts_new (1);
 364   sp->number = nstates++;
 365   sp->shifts[0] = nstates;
 366
 367   last_shift->next = sp;
 368   last_shift = sp;
 369 }
 370
 371
 372 /*-----------------------------------------------------------------.
 373 | Subroutine of augment_automaton.  Create the final state, which  |
 374 | shifts `0', the end of file.  The initial state shifts the start |
 375 | symbol, and goes to here.                                        |
 376 `-----------------------------------------------------------------*/
 377
 378 static void
 379 insert_eof_shifting_state (void)
 380 {
 381   state_t *statep;
 382   shifts *sp;
 383
 384   /* Make the final state--the one that follows a shift from the
 385      next-to-final state.
 386      The symbol for that shift is 0 (end-of-file).  */
 387   statep = STATE_ALLOC (0);
 388   statep->number = nstates;
 389
 390   last_state->next = statep;
 391   last_state = statep;
 392
 393   /* Make the shift from the final state to the termination state.  */
 394   sp = shifts_new (1);
 395   sp->number = nstates++;
 396   sp->shifts[0] = nstates;
 397
 398   last_shift->next = sp;
 399   last_shift = sp;
 400 }
 401
 402
 403 /*---------------------------------------------------------------.
 404 | Subroutine of augment_automaton.  Create the accepting state.  |
 405 `---------------------------------------------------------------*/
 406
 407 static void
 408 insert_accepting_state (void)
 409 {
 410   state_t *statep;
 411
 412    /* Note that the variable `final_state' refers to what we sometimes
 413       call the termination state.  */
 414   final_state = nstates;
 415
 416   /* Make the termination state.  */
 417   statep = STATE_ALLOC (0);
 418   statep->number = nstates++;
 419   last_state->next = statep;
 420   last_state = statep;
 421 }
 422
 423
 424
 425
 426
 427 /*------------------------------------------------------------------.
 428 | Make sure that the initial state has a shift that accepts the     |
 429 | grammar's start symbol and goes to the next-to-final state, which |
 430 | has a shift going to the final state, which has a shift to the    |
 431 | termination state.  Create such states and shifts if they don't   |
 432 | happen to exist already.                                          |
 433 `------------------------------------------------------------------*/
 434
 435 static void
 436 augment_automaton (void)
 437 {
 438   if (!first_shift->nshifts)
 439     {
 440       /* There are no shifts for any state.  Make one shift, from the
 441          initial state to the next-to-final state.  */
 442
 443       shifts *sp = shifts_new (1);
 444       sp->shifts[0] = nstates;
 445
 446       /* Initialize the chain of shifts with sp.  */
 447       first_shift = sp;
 448       last_shift = sp;
 449
 450       /* Create the next-to-final state, with shift to
 451          what will be the final state.  */
 452       insert_start_shifting_state ();
 453     }
 454   else if (first_shift->number == 0)
 455     {
 456       state_t *statep = first_state->next;
 457       shifts *sp = first_shift;
 458       shifts *sp1 = NULL;
 459       /* The states reached by shifts from FIRST_STATE are numbered
 460          1..(SP->NSHIFTS).  Look for one reached by START_SYMBOL.  */
 461       while (statep->accessing_symbol != start_symbol
 462              && statep->number < sp->nshifts)
 463         statep = statep->next;
 464
 465       if (statep->accessing_symbol == start_symbol)
 466         {
 467           /* We already have a next-to-final state.
 468              Make sure it has a shift to what will be the final state.  */
 469           while (sp && sp->number < statep->number)
 470             {
 471               sp1 = sp;
 472               sp = sp->next;
 473             }
 474
 475           if (sp && sp->number == statep->number)
 476             {
 477               int i;
 478               shifts *sp2 = shifts_new (sp->nshifts + 1);
 479               sp2->number = statep->number;
 480               sp2->shifts[0] = nstates;
 481               for (i = sp->nshifts; i > 0; i--)
 482                 sp2->shifts[i] = sp->shifts[i - 1];
 483
 484               /* Patch sp2 into the chain of shifts in place of sp,
 485                  following sp1.  */
 486               sp2->next = sp->next;
 487               sp1->next = sp2;
 488               if (sp == last_shift)
 489                 last_shift = sp2;
 490               XFREE (sp);
 491             }
 492           else
 493             {
 494               shifts *sp2 = shifts_new (1);
 495               sp2->number = statep->number;
 496               sp2->shifts[0] = nstates;
 497
 498               /* Patch sp2 into the chain of shifts between sp1 and sp.  */
 499               sp2->next = sp;
 500               sp1->next = sp2;
 501               if (sp == 0)
 502                 last_shift = sp2;
 503             }
 504         }
 505       else
 506         {
 507           int i, k;
 508           shifts *sp2;
 509           sp = first_shift;
 510
 511           /* There is no next-to-final state as yet.  */
 512           /* Add one more shift in first_shift,
 513              going to the next-to-final state (yet to be made).  */
 514           sp2 = shifts_new (sp->nshifts + 1);
 515
 516           /* Stick this shift into the vector at the proper place.  */
 517           statep = first_state->next;
 518           for (k = 0, i = 0; i < sp->nshifts; k++, i++)
 519             {
 520               if (statep->accessing_symbol > start_symbol && i == k)
 521                 sp2->shifts[k++] = nstates;
 522               sp2->shifts[k] = sp->shifts[i];
 523               statep = statep->next;
 524             }
 525           if (i == k)
 526             sp2->shifts[k++] = nstates;
 527
 528           /* Patch sp2 into the chain of shifts
 529              in place of sp, at the beginning.  */
 530           sp2->next = sp->next;
 531           first_shift = sp2;
 532           if (last_shift == sp)
 533             last_shift = sp2;
 534
 535           XFREE (sp);
 536
 537           /* Create the next-to-final state, with shift to
 538              what will be the final state.  */
 539           insert_start_shifting_state ();
 540         }
 541     }
 542   else
 543     {
 544       /* The initial state didn't even have any shifts.
 545          Give it one shift, to the next-to-final state.  */
 546       shifts *sp = shifts_new (1);
 547       sp->shifts[0] = nstates;
 548
 549       /* Patch sp into the chain of shifts at the beginning.  */
 550       sp->next = first_shift;
 551       first_shift = sp;
 552
 553       /* Create the next-to-final state, with shift to
 554          what will be the final state.  */
 555       insert_start_shifting_state ();
 556     }
 557
 558   insert_eof_shifting_state ();
 559   insert_accepting_state ();
 560 }
 561
 562
 563 /*----------------------------------------------------------------.
 564 | Find which rules can be used for reduction transitions from the |
 565 | current state and make a reductions structure for the state to  |
 566 | record their rule numbers.                                      |
 567 `----------------------------------------------------------------*/
 568
 569 static void
 570 save_reductions (void)
 571 {
 572   int count;
 573   int i;
 574
 575   /* Find and count the active items that represent ends of rules. */
 576
 577   count = 0;
 578   for (i = 0; i < nitemset; ++i)
 579     {
 580       int item = ritem[itemset[i]];
 581       if (item < 0)
 582         redset[count++] = -item;
 583     }
 584
 585   /* Make a reductions structure and copy the data into it.  */
 586
 587   if (count)
 588     {
 589       reductions *p = REDUCTIONS_ALLOC (count);
 590       p->nreds = count;
 591       shortcpy (p->rules, redset, count);
 592
 593       this_state->reductions = p;
 594     }
 595 }
 596
 597 \f
 598 /*-------------------------------------------------------------------.
 599 | Compute the nondeterministic finite state machine (see state.h for |
 600 | details) from the grammar.                                         |
 601 `-------------------------------------------------------------------*/
 602
 603 void
 604 generate_states (void)
 605 {
 606   allocate_storage ();
 607   new_closure (nitems);
 608   new_states ();
 609
 610   while (this_state)
 611     {
 612       if (trace_flag)
 613         fprintf (stderr, "Processing state %d (reached by %s)\n",
 614                  this_state->number, tags[this_state->accessing_symbol]);
 615       /* Set up ruleset and itemset for the transitions out of this
 616          state.  ruleset gets a 1 bit for each rule that could reduce
 617          now.  itemset gets a vector of all the items that could be
 618          accepted next.  */
 619       closure (this_state->items, this_state->nitems);
 620       /* record the reductions allowed out of this state */
 621       save_reductions ();
 622       /* find the itemsets of the states that shifts can reach */
 623       new_itemsets ();
 624       /* find or create the core structures for those states */
 625       append_states ();
 626
 627       /* create the shifts structures for the shifts to those states,
 628          now that the state numbers transitioning to are known */
 629       save_shifts ();
 630
 631       /* states are queued when they are created; process them all */
 632       this_state = this_state->next;
 633     }
 634
 635   /* discard various storage */
 636   free_closure ();
 637   free_storage ();
 638
 639   /* set up initial and final states as parser wants them */
 640   augment_automaton ();
 641 }