src/LR0.c

/* Generate the LR(0) parser states for Bison.

   Copyright (C) 1984, 1986, 1989, 2000-2002, 2004-2012 Free Software
   Foundation, Inc.

   This file is part of Bison, the GNU Compiler Compiler.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */


/* See comments in state.h for the data structures that represent it.
   The entry point is generate_states.  */

#include <config.h>
#include "system.h"

#include <bitset.h>

#include "LR0.h"
#include "closure.h"
#include "complain.h"
#include "getargs.h"
#include "gram.h"
#include "lalr.h"
#include "reader.h"
#include "reduce.h"
#include "state.h"
#include "symtab.h"

typedef struct state_list
{
  struct state_list *next;
  state *state;
} state_list;

static state_list *first_state = NULL;
static state_list *last_state = NULL;


/*------------------------------------------------------------------.
| A state was just discovered from another state.  Queue it for     |
| later examination, in order to find its transitions.  Return it.  |
`------------------------------------------------------------------*/

static state *
state_list_append (symbol_number sym, size_t core_size, item_number *core)
{
  state_list *node = xmalloc (sizeof *node);
  state *s = state_new (sym, core_size, core);

  if (trace_flag & trace_automaton)
    fprintf (stderr, "state_list_append (state = %d, symbol = %d (%s))\n",
             nstates, sym, symbols[sym]->tag);

  node->next = NULL;
  node->state = s;

  if (!first_state)
    first_state = node;
  if (last_state)
    last_state->next = node;
  last_state = node;

  return s;
}

static int nshifts;
static symbol_number *shift_symbol;

static rule **redset;
static state **shiftset;

static item_number **kernel_base;
static int *kernel_size;
static item_number *kernel_items;

\f
static void
allocate_itemsets (void)
{
  symbol_number i;
  rule_number r;
  item_number *rhsp;

  /* Count the number of occurrences of all the symbols in RITEMS.
     Note that useless productions (hence useless nonterminals) are
     browsed too, hence we need to allocate room for _all_ the
     symbols.  */
  size_t count = 0;
  size_t *symbol_count = xcalloc (nsyms + nuseless_nonterminals,
                                  sizeof *symbol_count);

  for (r = 0; r < nrules; ++r)
    for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
      {
        count++;
        symbol_count[*rhsp]++;
      }

  /* See comments before new_itemsets.  All the vectors of items
     live inside KERNEL_ITEMS.  The number of active items after
     some symbol S cannot be more than the number of times that S
     appears as an item, which is SYMBOL_COUNT[S].
     We allocate that much space for each symbol.  */

  kernel_base = xnmalloc (nsyms, sizeof *kernel_base);
  kernel_items = xnmalloc (count, sizeof *kernel_items);

  count = 0;
  for (i = 0; i < nsyms; i++)
    {
      kernel_base[i] = kernel_items + count;
      count += symbol_count[i];
    }

  free (symbol_count);
  kernel_size = xnmalloc (nsyms, sizeof *kernel_size);
}


static void
allocate_storage (void)
{
  allocate_itemsets ();

  shiftset = xnmalloc (nsyms, sizeof *shiftset);
  redset = xnmalloc (nrules, sizeof *redset);
  state_hash_new ();
  shift_symbol = xnmalloc (nsyms, sizeof *shift_symbol);
}


static void
free_storage (void)
{
  free (shift_symbol);
  free (redset);
  free (shiftset);
  free (kernel_base);
  free (kernel_size);
  free (kernel_items);
  state_hash_free ();
}


/*---------------------------------------------------------------.
| Find which symbols can be shifted in S, and for each one       |
| record which items would be active after that shift.  Uses the |
| contents of itemset.                                           |
|                                                                |
| shift_symbol is set to a vector of the symbols that can be     |
| shifted.  For each symbol in the grammar, kernel_base[symbol]  |
| points to a vector of item numbers activated if that symbol is |
| shifted, and kernel_size[symbol] is their numbers.             |
|                                                                |
| itemset is sorted on item index in ritem, which is sorted on   |
| rule number.  Compute each kernel_base[symbol] with the same   |
| sort.                                                          |
`---------------------------------------------------------------*/

static void
new_itemsets (state *s)
{
  size_t i;

  if (trace_flag & trace_automaton)
    fprintf (stderr, "Entering new_itemsets, state = %d\n", s->number);

  memset (kernel_size, 0, nsyms * sizeof *kernel_size);

  nshifts = 0;

  for (i = 0; i < nitemset; ++i)
    if (item_number_is_symbol_number (ritem[itemset[i]]))
      {
        symbol_number sym = item_number_as_symbol_number (ritem[itemset[i]]);
        if (!kernel_size[sym])
          {
            shift_symbol[nshifts] = sym;
            nshifts++;
          }

        kernel_base[sym][kernel_size[sym]] = itemset[i] + 1;
        kernel_size[sym]++;
      }
}


/*--------------------------------------------------------------.
| Find the state we would get to (from the current state) by    |
| shifting SYM.  Create a new state if no equivalent one exists |
| already.  Used by append_states.                              |
`--------------------------------------------------------------*/

static state *
get_state (symbol_number sym, size_t core_size, item_number *core)
{
  state *s;

  if (trace_flag & trace_automaton)
    fprintf (stderr, "Entering get_state, symbol = %d (%s)\n",
             sym, symbols[sym]->tag);

  s = state_hash_lookup (core_size, core);
  if (!s)
    s = state_list_append (sym, core_size, core);

  if (trace_flag & trace_automaton)
    fprintf (stderr, "Exiting get_state => %d\n", s->number);

  return s;
}

/*---------------------------------------------------------------.
| Use the information computed by new_itemsets to find the state |
| numbers reached by each shift transition from S.               |
|                                                                |
| SHIFTSET is set up as a vector of those states.                |
`---------------------------------------------------------------*/

static void
append_states (state *s)
{
  int i;

  if (trace_flag & trace_automaton)
    fprintf (stderr, "Entering append_states, state = %d\n", s->number);

  /* First sort shift_symbol into increasing order.  */

  for (i = 1; i < nshifts; i++)
    {
      symbol_number sym = shift_symbol[i];
      int j;
      for (j = i; 0 < j && sym < shift_symbol[j - 1]; j--)
        shift_symbol[j] = shift_symbol[j - 1];
      shift_symbol[j] = sym;
    }

  for (i = 0; i < nshifts; i++)
    {
      symbol_number sym = shift_symbol[i];
      shiftset[i] = get_state (sym, kernel_size[sym], kernel_base[sym]);
    }
}


/*----------------------------------------------------------------.
| Find which rules can be used for reduction transitions from the |
| current state and make a reductions structure for the state to  |
| record their rule numbers.                                      |
`----------------------------------------------------------------*/

static void
save_reductions (state *s)
{
  int count = 0;
  size_t i;

  /* Find and count the active items that represent ends of rules. */
  for (i = 0; i < nitemset; ++i)
    {
      item_number item = ritem[itemset[i]];
      if (item_number_is_rule_number (item))
        {
          rule_number r = item_number_as_rule_number (item);
          redset[count++] = &rules[r];
          if (r == 0)
            {
              /* This is "reduce 0", i.e., accept. */
              aver (!final_state);
              final_state = s;
            }
        }
    }

  /* Make a reductions structure and copy the data into it.  */
  state_reductions_set (s, count, redset);
}

\f
/*---------------.
| Build STATES.  |
`---------------*/

static void
set_states (void)
{
  states = xcalloc (nstates, sizeof *states);

  while (first_state)
    {
      state_list *this = first_state;

      /* Pessimization, but simplification of the code: make sure all
         the states have valid transitions and reductions members,
         even if reduced to 0.  It is too soon for errs, which are
         computed later, but set_conflicts.  */
      state *s = this->state;
      if (!s->transitions)
        state_transitions_set (s, 0, 0);
      if (!s->reductions)
        state_reductions_set (s, 0, 0);

      states[s->number] = s;

      first_state = this->next;
      free (this);
    }
  first_state = NULL;
  last_state = NULL;
}


/*-------------------------------------------------------------------.
| Compute the LR(0) parser states (see state.h for details) from the |
| grammar.                                                           |
`-------------------------------------------------------------------*/

void
generate_states (void)
{
  item_number initial_core = 0;
  state_list *list = NULL;
  allocate_storage ();
  new_closure (nritems);

  /* Create the initial state.  The 0 at the lhs is the index of the
     item of this initial rule.  */
  state_list_append (0, 1, &initial_core);

  /* States are queued when they are created; process them all.  */
  for (list = first_state; list; list = list->next)
    {
      state *s = list->state;
      if (trace_flag & trace_automaton)
        fprintf (stderr, "Processing state %d (reached by %s)\n",
                 s->number,
                 symbols[s->accessing_symbol]->tag);
      /* Set up itemset for the transitions out of this state.  itemset gets a
         vector of all the items that could be accepted next.  */
      closure (s->items, s->nitems);
      /* Record the reductions allowed out of this state.  */
      save_reductions (s);
      /* Find the itemsets of the states that shifts can reach.  */
      new_itemsets (s);
      /* Find or create the core structures for those states.  */
      append_states (s);

      /* Create the shifts structures for the shifts to those states,
         now that the state numbers transitioning to are known.  */
      state_transitions_set (s, nshifts, shiftset);
    }

  /* discard various storage */
  free_closure ();
  free_storage ();

  /* Set up STATES. */
  set_states ();
}
Commit	Line	Data
	1	/* Generate the LR(0) parser states for Bison.
	2
	3	Copyright (C) 1984, 1986, 1989, 2000-2002, 2004-2012 Free Software
	4	Foundation, Inc.
	5
	6	This file is part of Bison, the GNU Compiler Compiler.
	7
	8	This program is free software: you can redistribute it and/or modify
	9	it under the terms of the GNU General Public License as published by
	10	the Free Software Foundation, either version 3 of the License, or
	11	(at your option) any later version.
	12
	13	This program is distributed in the hope that it will be useful,
	14	but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	GNU General Public License for more details.
	17
	18	You should have received a copy of the GNU General Public License
	19	along with this program. If not, see <http://www.gnu.org/licenses/>. */
	20
	21
	22	/* See comments in state.h for the data structures that represent it.
	23	The entry point is generate_states. */
	24
	25	#include <config.h>
	26	#include "system.h"
	27
	28	#include <bitset.h>
	29
	30	#include "LR0.h"
	31	#include "closure.h"
	32	#include "complain.h"
	33	#include "getargs.h"
	34	#include "gram.h"
	35	#include "lalr.h"
	36	#include "reader.h"
	37	#include "reduce.h"
	38	#include "state.h"
	39	#include "symtab.h"
	40
	41	typedef struct state_list
	42	{
	43	struct state_list *next;
	44	state *state;
	45	} state_list;
	46
	47	static state_list *first_state = NULL;
	48	static state_list *last_state = NULL;
	49
	50
	51	/*------------------------------------------------------------------.
	52	\| A state was just discovered from another state. Queue it for \|
	53	\| later examination, in order to find its transitions. Return it. \|
	54	`------------------------------------------------------------------*/
	55
	56	static state *
	57	state_list_append (symbol_number sym, size_t core_size, item_number *core)
	58	{
	59	state_list node = xmalloc (sizeof node);
	60	state *s = state_new (sym, core_size, core);
	61
	62	if (trace_flag & trace_automaton)
	63	fprintf (stderr, "state_list_append (state = %d, symbol = %d (%s))\n",
	64	nstates, sym, symbols[sym]->tag);
	65
	66	node->next = NULL;
	67	node->state = s;
	68
	69	if (!first_state)
	70	first_state = node;
	71	if (last_state)
	72	last_state->next = node;
	73	last_state = node;
	74
	75	return s;
	76	}
	77
	78	static int nshifts;
	79	static symbol_number *shift_symbol;
	80
	81	static rule **redset;
	82	static state **shiftset;
	83
	84	static item_number **kernel_base;
	85	static int *kernel_size;
	86	static item_number *kernel_items;
	87
	88	\f
	89	static void
	90	allocate_itemsets (void)
	91	{
	92	symbol_number i;
	93	rule_number r;
	94	item_number *rhsp;
	95
	96	/* Count the number of occurrences of all the symbols in RITEMS.
	97	Note that useless productions (hence useless nonterminals) are
	98	browsed too, hence we need to allocate room for _all_ the
	99	symbols. */
	100	size_t count = 0;
	101	size_t *symbol_count = xcalloc (nsyms + nuseless_nonterminals,
	102	sizeof *symbol_count);
	103
	104	for (r = 0; r < nrules; ++r)
	105	for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
	106	{
	107	count++;
	108	symbol_count[*rhsp]++;
	109	}
	110
	111	/* See comments before new_itemsets. All the vectors of items
	112	live inside KERNEL_ITEMS. The number of active items after
	113	some symbol S cannot be more than the number of times that S
	114	appears as an item, which is SYMBOL_COUNT[S].
	115	We allocate that much space for each symbol. */
	116
	117	kernel_base = xnmalloc (nsyms, sizeof *kernel_base);
	118	kernel_items = xnmalloc (count, sizeof *kernel_items);
	119
	120	count = 0;
	121	for (i = 0; i < nsyms; i++)
	122	{
	123	kernel_base[i] = kernel_items + count;
	124	count += symbol_count[i];
	125	}
	126
	127	free (symbol_count);
	128	kernel_size = xnmalloc (nsyms, sizeof *kernel_size);
	129	}
	130
	131
	132	static void
	133	allocate_storage (void)
	134	{
	135	allocate_itemsets ();
	136
	137	shiftset = xnmalloc (nsyms, sizeof *shiftset);
	138	redset = xnmalloc (nrules, sizeof *redset);
	139	state_hash_new ();
	140	shift_symbol = xnmalloc (nsyms, sizeof *shift_symbol);
	141	}
	142
	143
	144	static void
	145	free_storage (void)
	146	{
	147	free (shift_symbol);
	148	free (redset);
	149	free (shiftset);
	150	free (kernel_base);
	151	free (kernel_size);
	152	free (kernel_items);
	153	state_hash_free ();
	154	}
	155
	156
	157
	158
	159	/*---------------------------------------------------------------.
	160	\| Find which symbols can be shifted in S, and for each one \|
	161	\| record which items would be active after that shift. Uses the \|
	162	\| contents of itemset. \|
	163	\| \|
	164	\| shift_symbol is set to a vector of the symbols that can be \|
	165	\| shifted. For each symbol in the grammar, kernel_base[symbol] \|
	166	\| points to a vector of item numbers activated if that symbol is \|
	167	\| shifted, and kernel_size[symbol] is their numbers. \|
	168	\| \|
	169	\| itemset is sorted on item index in ritem, which is sorted on \|
	170	\| rule number. Compute each kernel_base[symbol] with the same \|
	171	\| sort. \|
	172	`---------------------------------------------------------------*/
	173
	174	static void
	175	new_itemsets (state *s)
	176	{
	177	size_t i;
	178
	179	if (trace_flag & trace_automaton)
	180	fprintf (stderr, "Entering new_itemsets, state = %d\n", s->number);
	181
	182	memset (kernel_size, 0, nsyms * sizeof *kernel_size);
	183
	184	nshifts = 0;
	185
	186	for (i = 0; i < nitemset; ++i)
	187	if (item_number_is_symbol_number (ritem[itemset[i]]))
	188	{
	189	symbol_number sym = item_number_as_symbol_number (ritem[itemset[i]]);
	190	if (!kernel_size[sym])
	191	{
	192	shift_symbol[nshifts] = sym;
	193	nshifts++;
	194	}
	195
	196	kernel_base[sym][kernel_size[sym]] = itemset[i] + 1;
	197	kernel_size[sym]++;
	198	}
	199	}
	200
	201
	202
	203	/*--------------------------------------------------------------.
	204	\| Find the state we would get to (from the current state) by \|
	205	\| shifting SYM. Create a new state if no equivalent one exists \|
	206	\| already. Used by append_states. \|
	207	`--------------------------------------------------------------*/
	208
	209	static state *
	210	get_state (symbol_number sym, size_t core_size, item_number *core)
	211	{
	212	state *s;
	213
	214	if (trace_flag & trace_automaton)
	215	fprintf (stderr, "Entering get_state, symbol = %d (%s)\n",
	216	sym, symbols[sym]->tag);
	217
	218	s = state_hash_lookup (core_size, core);
	219	if (!s)
	220	s = state_list_append (sym, core_size, core);
	221
	222	if (trace_flag & trace_automaton)
	223	fprintf (stderr, "Exiting get_state => %d\n", s->number);
	224
	225	return s;
	226	}
	227
	228	/*---------------------------------------------------------------.
	229	\| Use the information computed by new_itemsets to find the state \|
	230	\| numbers reached by each shift transition from S. \|
	231	\| \|
	232	\| SHIFTSET is set up as a vector of those states. \|
	233	`---------------------------------------------------------------*/
	234
	235	static void
	236	append_states (state *s)
	237	{
	238	int i;
	239
	240	if (trace_flag & trace_automaton)
	241	fprintf (stderr, "Entering append_states, state = %d\n", s->number);
	242
	243	/* First sort shift_symbol into increasing order. */
	244
	245	for (i = 1; i < nshifts; i++)
	246	{
	247	symbol_number sym = shift_symbol[i];
	248	int j;
	249	for (j = i; 0 < j && sym < shift_symbol[j - 1]; j--)
	250	shift_symbol[j] = shift_symbol[j - 1];
	251	shift_symbol[j] = sym;
	252	}
	253
	254	for (i = 0; i < nshifts; i++)
	255	{
	256	symbol_number sym = shift_symbol[i];
	257	shiftset[i] = get_state (sym, kernel_size[sym], kernel_base[sym]);
	258	}
	259	}
	260
	261
	262	/*----------------------------------------------------------------.
	263	\| Find which rules can be used for reduction transitions from the \|
	264	\| current state and make a reductions structure for the state to \|
	265	\| record their rule numbers. \|
	266	`----------------------------------------------------------------*/
	267
	268	static void
	269	save_reductions (state *s)
	270	{
	271	int count = 0;
	272	size_t i;
	273
	274	/* Find and count the active items that represent ends of rules. */
	275	for (i = 0; i < nitemset; ++i)
	276	{
	277	item_number item = ritem[itemset[i]];
	278	if (item_number_is_rule_number (item))
	279	{
	280	rule_number r = item_number_as_rule_number (item);
	281	redset[count++] = &rules[r];
	282	if (r == 0)
	283	{
	284	/* This is "reduce 0", i.e., accept. */
	285	aver (!final_state);
	286	final_state = s;
	287	}
	288	}
	289	}
	290
	291	/* Make a reductions structure and copy the data into it. */
	292	state_reductions_set (s, count, redset);
	293	}
	294
	295	\f
	296	/*---------------.
	297	\| Build STATES. \|
	298	`---------------*/
	299
	300	static void
	301	set_states (void)
	302	{
	303	states = xcalloc (nstates, sizeof *states);
	304
	305	while (first_state)
	306	{
	307	state_list *this = first_state;
	308
	309	/* Pessimization, but simplification of the code: make sure all
	310	the states have valid transitions and reductions members,
	311	even if reduced to 0. It is too soon for errs, which are
	312	computed later, but set_conflicts. */
	313	state *s = this->state;
	314	if (!s->transitions)
	315	state_transitions_set (s, 0, 0);
	316	if (!s->reductions)
	317	state_reductions_set (s, 0, 0);
	318
	319	states[s->number] = s;
	320
	321	first_state = this->next;
	322	free (this);
	323	}
	324	first_state = NULL;
	325	last_state = NULL;
	326	}
	327
	328
	329	/*-------------------------------------------------------------------.
	330	\| Compute the LR(0) parser states (see state.h for details) from the \|
	331	\| grammar. \|
	332	`-------------------------------------------------------------------*/
	333
	334	void
	335	generate_states (void)
	336	{
	337	item_number initial_core = 0;
	338	state_list *list = NULL;
	339	allocate_storage ();
	340	new_closure (nritems);
	341
	342	/* Create the initial state. The 0 at the lhs is the index of the
	343	item of this initial rule. */
	344	state_list_append (0, 1, &initial_core);
	345
	346	/* States are queued when they are created; process them all. */
	347	for (list = first_state; list; list = list->next)
	348	{
	349	state *s = list->state;
	350	if (trace_flag & trace_automaton)
	351	fprintf (stderr, "Processing state %d (reached by %s)\n",
	352	s->number,
	353	symbols[s->accessing_symbol]->tag);
	354	/* Set up itemset for the transitions out of this state. itemset gets a
	355	vector of all the items that could be accepted next. */
	356	closure (s->items, s->nitems);
	357	/* Record the reductions allowed out of this state. */
	358	save_reductions (s);
	359	/* Find the itemsets of the states that shifts can reach. */
	360	new_itemsets (s);
	361	/* Find or create the core structures for those states. */
	362	append_states (s);
	363
	364	/* Create the shifts structures for the shifts to those states,
	365	now that the state numbers transitioning to are known. */
	366	state_transitions_set (s, nshifts, shiftset);
	367	}
	368
	369	/* discard various storage */
	370	free_closure ();
	371	free_storage ();
	372
	373	/* Set up STATES. */
	374	set_states ();
	375	}