src/LR0.c

/* Generate the nondeterministic finite state machine for Bison.

   Copyright (C) 1984, 1986, 1989, 2000, 2001, 2002, 2004, 2005 Free
   Software Foundation, Inc.

   This file is part of Bison, the GNU Compiler Compiler.

   Bison is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   Bison is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with Bison; see the file COPYING.  If not, write to
   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
   Boston, MA 02110-1301, USA.  */


/* See comments in state.h for the data structures that represent it.
   The entry point is generate_states.  */

#include <config.h>
#include "system.h"

#include <bitset.h>
#include <quotearg.h>

#include "LR0.h"
#include "closure.h"
#include "complain.h"
#include "getargs.h"
#include "gram.h"
#include "gram.h"
#include "lalr.h"
#include "reader.h"
#include "reduce.h"
#include "state.h"
#include "symtab.h"

typedef struct state_list
{
  struct state_list *next;
  state *state;
} state_list;

static state_list *first_state = NULL;
static state_list *last_state = NULL;


/*------------------------------------------------------------------.
| A state was just discovered from another state.  Queue it for     |
| later examination, in order to find its transitions.  Return it.  |
`------------------------------------------------------------------*/

static state *
state_list_append (symbol_number sym, size_t core_size, item_number *core)
{
  state_list *node = xmalloc (sizeof *node);
  state *s = state_new (sym, core_size, core);

  if (trace_flag & trace_automaton)
    fprintf (stderr, "state_list_append (state = %d, symbol = %d (%s))\n",
	     nstates, sym, symbols[sym]->tag);

  node->next = NULL;
  node->state = s;

  if (!first_state)
    first_state = node;
  if (last_state)
    last_state->next = node;
  last_state = node;

  return s;
}

static int nshifts;
static symbol_number *shift_symbol;

static rule **redset;
static state **shiftset;

static item_number **kernel_base;
static int *kernel_size;
static item_number *kernel_items;

\f
static void
allocate_itemsets (void)
{
  symbol_number i;
  rule_number r;
  item_number *rhsp;

  /* Count the number of occurrences of all the symbols in RITEMS.
     Note that useless productions (hence useless nonterminals) are
     browsed too, hence we need to allocate room for _all_ the
     symbols.  */
  size_t count = 0;
  size_t *symbol_count = xcalloc (nsyms + nuseless_nonterminals,
				  sizeof *symbol_count);

  for (r = 0; r < nrules; ++r)
    for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
      {
	count++;
	symbol_count[*rhsp]++;
      }

  /* See comments before new_itemsets.  All the vectors of items
     live inside KERNEL_ITEMS.  The number of active items after
     some symbol S cannot be more than the number of times that S
     appears as an item, which is SYMBOL_COUNT[S].
     We allocate that much space for each symbol.  */

  kernel_base = xnmalloc (nsyms, sizeof *kernel_base);
  kernel_items = xnmalloc (count, sizeof *kernel_items);

  count = 0;
  for (i = 0; i < nsyms; i++)
    {
      kernel_base[i] = kernel_items + count;
      count += symbol_count[i];
    }

  free (symbol_count);
  kernel_size = xnmalloc (nsyms, sizeof *kernel_size);
}


static void
allocate_storage (void)
{
  allocate_itemsets ();

  shiftset = xnmalloc (nsyms, sizeof *shiftset);
  redset = xnmalloc (nrules, sizeof *redset);
  state_hash_new ();
  shift_symbol = xnmalloc (nsyms, sizeof *shift_symbol);
}


static void
free_storage (void)
{
  free (shift_symbol);
  free (redset);
  free (shiftset);
  free (kernel_base);
  free (kernel_size);
  free (kernel_items);
  state_hash_free ();
}


/*---------------------------------------------------------------.
| Find which symbols can be shifted in S, and for each one       |
| record which items would be active after that shift.  Uses the |
| contents of itemset.                                           |
|                                                                |
| shift_symbol is set to a vector of the symbols that can be     |
| shifted.  For each symbol in the grammar, kernel_base[symbol]  |
| points to a vector of item numbers activated if that symbol is |
| shifted, and kernel_size[symbol] is their numbers.             |
`---------------------------------------------------------------*/

static void
new_itemsets (state *s)
{
  size_t i;

  if (trace_flag & trace_automaton)
    fprintf (stderr, "Entering new_itemsets, state = %d\n", s->number);

  memset (kernel_size, 0, nsyms * sizeof *kernel_size);

  nshifts = 0;

  for (i = 0; i < nritemset; ++i)
    if (ritem[itemset[i]] >= 0)
      {
	symbol_number sym = item_number_as_symbol_number (ritem[itemset[i]]);
	if (!kernel_size[sym])
	  {
	    shift_symbol[nshifts] = sym;
	    nshifts++;
	  }

	kernel_base[sym][kernel_size[sym]] = itemset[i] + 1;
	kernel_size[sym]++;
      }
}


/*--------------------------------------------------------------.
| Find the state we would get to (from the current state) by    |
| shifting SYM.  Create a new state if no equivalent one exists |
| already.  Used by append_states.                              |
`--------------------------------------------------------------*/

static state *
get_state (symbol_number sym, size_t core_size, item_number *core)
{
  state *s;

  if (trace_flag & trace_automaton)
    fprintf (stderr, "Entering get_state, symbol = %d (%s)\n",
	     sym, symbols[sym]->tag);

  s = state_hash_lookup (core_size, core);
  if (!s)
    s = state_list_append (sym, core_size, core);

  if (trace_flag & trace_automaton)
    fprintf (stderr, "Exiting get_state => %d\n", s->number);

  return s;
}

/*---------------------------------------------------------------.
| Use the information computed by new_itemsets to find the state |
| numbers reached by each shift transition from S.		 |
|                                                                |
| SHIFTSET is set up as a vector of those states.                |
`---------------------------------------------------------------*/

static void
append_states (state *s)
{
  int i;

  if (trace_flag & trace_automaton)
    fprintf (stderr, "Entering append_states, state = %d\n", s->number);

  /* First sort shift_symbol into increasing order.  */

  for (i = 1; i < nshifts; i++)
    {
      symbol_number sym = shift_symbol[i];
      int j;
      for (j = i; 0 < j && sym < shift_symbol[j - 1]; j--)
	shift_symbol[j] = shift_symbol[j - 1];
      shift_symbol[j] = sym;
    }

  for (i = 0; i < nshifts; i++)
    {
      symbol_number sym = shift_symbol[i];
      shiftset[i] = get_state (sym, kernel_size[sym], kernel_base[sym]);
    }
}


/*----------------------------------------------------------------.
| Find which rules can be used for reduction transitions from the |
| current state and make a reductions structure for the state to  |
| record their rule numbers.                                      |
`----------------------------------------------------------------*/

static void
save_reductions (state *s)
{
  int count = 0;
  size_t i;

  /* Find and count the active items that represent ends of rules. */
  for (i = 0; i < nritemset; ++i)
    {
      item_number item = ritem[itemset[i]];
      if (item_number_is_rule_number (item))
	{
	  rule_number r = item_number_as_rule_number (item);
	  redset[count++] = &rules[r];
	  if (r == 0)
	    {
	      /* This is "reduce 0", i.e., accept. */
	      assert (!final_state);
	      final_state = s;
	    }
	}
    }

  /* Make a reductions structure and copy the data into it.  */
  state_reductions_set (s, count, redset);
}

\f
/*---------------.
| Build STATES.  |
`---------------*/

static void
set_states (void)
{
  states = xcalloc (nstates, sizeof *states);

  while (first_state)
    {
      state_list *this = first_state;

      /* Pessimization, but simplification of the code: make sure all
	 the states have valid transitions and reductions members,
	 even if reduced to 0.  It is too soon for errs, which are
	 computed later, but set_conflicts.  */
      state *s = this->state;
      if (!s->transitions)
	state_transitions_set (s, 0, 0);
      if (!s->reductions)
	state_reductions_set (s, 0, 0);

      states[s->number] = s;

      first_state = this->next;
      free (this);
    }
  first_state = NULL;
  last_state = NULL;
}


/*-------------------------------------------------------------------.
| Compute the nondeterministic finite state machine (see state.h for |
| details) from the grammar.                                         |
`-------------------------------------------------------------------*/

void
generate_states (void)
{
  item_number initial_core = 0;
  state_list *list = NULL;
  allocate_storage ();
  new_closure (nritems);

  /* Create the initial state.  The 0 at the lhs is the index of the
     item of this initial rule.  */
  state_list_append (0, 1, &initial_core);

  /* States are queued when they are created; process them all.  */
  for (list = first_state; list; list = list->next)
    {
      state *s = list->state;
      if (trace_flag & trace_automaton)
	fprintf (stderr, "Processing state %d (reached by %s)\n",
		 s->number,
		 symbols[s->accessing_symbol]->tag);
      /* Set up ruleset and itemset for the transitions out of this
         state.  ruleset gets a 1 bit for each rule that could reduce
         now.  itemset gets a vector of all the items that could be
         accepted next.  */
      closure (s->items, s->nitems);
      /* Record the reductions allowed out of this state.  */
      save_reductions (s);
      /* Find the itemsets of the states that shifts can reach.  */
      new_itemsets (s);
      /* Find or create the core structures for those states.  */
      append_states (s);

      /* Create the shifts structures for the shifts to those states,
	 now that the state numbers transitioning to are known.  */
      state_transitions_set (s, nshifts, shiftset);
    }

  /* discard various storage */
  free_closure ();
  free_storage ();

  /* Set up STATES. */
  set_states ();
}
Commit	Line	Data
	1	/* Generate the nondeterministic finite state machine for Bison.
	2
	3	Copyright (C) 1984, 1986, 1989, 2000, 2001, 2002, 2004, 2005 Free
	4	Software Foundation, Inc.
	5
	6	This file is part of Bison, the GNU Compiler Compiler.
	7
	8	Bison is free software; you can redistribute it and/or modify
	9	it under the terms of the GNU General Public License as published by
	10	the Free Software Foundation; either version 2, or (at your option)
	11	any later version.
	12
	13	Bison is distributed in the hope that it will be useful,
	14	but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	GNU General Public License for more details.
	17
	18	You should have received a copy of the GNU General Public License
	19	along with Bison; see the file COPYING. If not, write to
	20	the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
	21	Boston, MA 02110-1301, USA. */
	22
	23
	24	/* See comments in state.h for the data structures that represent it.
	25	The entry point is generate_states. */
	26
	27	#include <config.h>
	28	#include "system.h"
	29
	30	#include <bitset.h>
	31	#include <quotearg.h>
	32
	33	#include "LR0.h"
	34	#include "closure.h"
	35	#include "complain.h"
	36	#include "getargs.h"
	37	#include "gram.h"
	38	#include "gram.h"
	39	#include "lalr.h"
	40	#include "reader.h"
	41	#include "reduce.h"
	42	#include "state.h"
	43	#include "symtab.h"
	44
	45	typedef struct state_list
	46	{
	47	struct state_list *next;
	48	state *state;
	49	} state_list;
	50
	51	static state_list *first_state = NULL;
	52	static state_list *last_state = NULL;
	53
	54
	55	/*------------------------------------------------------------------.
	56	\| A state was just discovered from another state. Queue it for \|
	57	\| later examination, in order to find its transitions. Return it. \|
	58	`------------------------------------------------------------------*/
	59
	60	static state *
	61	state_list_append (symbol_number sym, size_t core_size, item_number *core)
	62	{
	63	state_list node = xmalloc (sizeof node);
	64	state *s = state_new (sym, core_size, core);
	65
	66	if (trace_flag & trace_automaton)
	67	fprintf (stderr, "state_list_append (state = %d, symbol = %d (%s))\n",
	68	nstates, sym, symbols[sym]->tag);
	69
	70	node->next = NULL;
	71	node->state = s;
	72
	73	if (!first_state)
	74	first_state = node;
	75	if (last_state)
	76	last_state->next = node;
	77	last_state = node;
	78
	79	return s;
	80	}
	81
	82	static int nshifts;
	83	static symbol_number *shift_symbol;
	84
	85	static rule **redset;
	86	static state **shiftset;
	87
	88	static item_number **kernel_base;
	89	static int *kernel_size;
	90	static item_number *kernel_items;
	91
	92	\f
	93	static void
	94	allocate_itemsets (void)
	95	{
	96	symbol_number i;
	97	rule_number r;
	98	item_number *rhsp;
	99
	100	/* Count the number of occurrences of all the symbols in RITEMS.
	101	Note that useless productions (hence useless nonterminals) are
	102	browsed too, hence we need to allocate room for _all_ the
	103	symbols. */
	104	size_t count = 0;
	105	size_t *symbol_count = xcalloc (nsyms + nuseless_nonterminals,
	106	sizeof *symbol_count);
	107
	108	for (r = 0; r < nrules; ++r)
	109	for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
	110	{
	111	count++;
	112	symbol_count[*rhsp]++;
	113	}
	114
	115	/* See comments before new_itemsets. All the vectors of items
	116	live inside KERNEL_ITEMS. The number of active items after
	117	some symbol S cannot be more than the number of times that S
	118	appears as an item, which is SYMBOL_COUNT[S].
	119	We allocate that much space for each symbol. */
	120
	121	kernel_base = xnmalloc (nsyms, sizeof *kernel_base);
	122	kernel_items = xnmalloc (count, sizeof *kernel_items);
	123
	124	count = 0;
	125	for (i = 0; i < nsyms; i++)
	126	{
	127	kernel_base[i] = kernel_items + count;
	128	count += symbol_count[i];
	129	}
	130
	131	free (symbol_count);
	132	kernel_size = xnmalloc (nsyms, sizeof *kernel_size);
	133	}
	134
	135
	136	static void
	137	allocate_storage (void)
	138	{
	139	allocate_itemsets ();
	140
	141	shiftset = xnmalloc (nsyms, sizeof *shiftset);
	142	redset = xnmalloc (nrules, sizeof *redset);
	143	state_hash_new ();
	144	shift_symbol = xnmalloc (nsyms, sizeof *shift_symbol);
	145	}
	146
	147
	148	static void
	149	free_storage (void)
	150	{
	151	free (shift_symbol);
	152	free (redset);
	153	free (shiftset);
	154	free (kernel_base);
	155	free (kernel_size);
	156	free (kernel_items);
	157	state_hash_free ();
	158	}
	159
	160
	161
	162
	163	/*---------------------------------------------------------------.
	164	\| Find which symbols can be shifted in S, and for each one \|
	165	\| record which items would be active after that shift. Uses the \|
	166	\| contents of itemset. \|
	167	\| \|
	168	\| shift_symbol is set to a vector of the symbols that can be \|
	169	\| shifted. For each symbol in the grammar, kernel_base[symbol] \|
	170	\| points to a vector of item numbers activated if that symbol is \|
	171	\| shifted, and kernel_size[symbol] is their numbers. \|
	172	`---------------------------------------------------------------*/
	173
	174	static void
	175	new_itemsets (state *s)
	176	{
	177	size_t i;
	178
	179	if (trace_flag & trace_automaton)
	180	fprintf (stderr, "Entering new_itemsets, state = %d\n", s->number);
	181
	182	memset (kernel_size, 0, nsyms * sizeof *kernel_size);
	183
	184	nshifts = 0;
	185
	186	for (i = 0; i < nritemset; ++i)
	187	if (ritem[itemset[i]] >= 0)
	188	{
	189	symbol_number sym = item_number_as_symbol_number (ritem[itemset[i]]);
	190	if (!kernel_size[sym])
	191	{
	192	shift_symbol[nshifts] = sym;
	193	nshifts++;
	194	}
	195
	196	kernel_base[sym][kernel_size[sym]] = itemset[i] + 1;
	197	kernel_size[sym]++;
	198	}
	199	}
	200
	201
	202
	203	/*--------------------------------------------------------------.
	204	\| Find the state we would get to (from the current state) by \|
	205	\| shifting SYM. Create a new state if no equivalent one exists \|
	206	\| already. Used by append_states. \|
	207	`--------------------------------------------------------------*/
	208
	209	static state *
	210	get_state (symbol_number sym, size_t core_size, item_number *core)
	211	{
	212	state *s;
	213
	214	if (trace_flag & trace_automaton)
	215	fprintf (stderr, "Entering get_state, symbol = %d (%s)\n",
	216	sym, symbols[sym]->tag);
	217
	218	s = state_hash_lookup (core_size, core);
	219	if (!s)
	220	s = state_list_append (sym, core_size, core);
	221
	222	if (trace_flag & trace_automaton)
	223	fprintf (stderr, "Exiting get_state => %d\n", s->number);
	224
	225	return s;
	226	}
	227
	228	/*---------------------------------------------------------------.
	229	\| Use the information computed by new_itemsets to find the state \|
	230	\| numbers reached by each shift transition from S. \|
	231	\| \|
	232	\| SHIFTSET is set up as a vector of those states. \|
	233	`---------------------------------------------------------------*/
	234
	235	static void
	236	append_states (state *s)
	237	{
	238	int i;
	239
	240	if (trace_flag & trace_automaton)
	241	fprintf (stderr, "Entering append_states, state = %d\n", s->number);
	242
	243	/* First sort shift_symbol into increasing order. */
	244
	245	for (i = 1; i < nshifts; i++)
	246	{
	247	symbol_number sym = shift_symbol[i];
	248	int j;
	249	for (j = i; 0 < j && sym < shift_symbol[j - 1]; j--)
	250	shift_symbol[j] = shift_symbol[j - 1];
	251	shift_symbol[j] = sym;
	252	}
	253
	254	for (i = 0; i < nshifts; i++)
	255	{
	256	symbol_number sym = shift_symbol[i];
	257	shiftset[i] = get_state (sym, kernel_size[sym], kernel_base[sym]);
	258	}
	259	}
	260
	261
	262	/*----------------------------------------------------------------.
	263	\| Find which rules can be used for reduction transitions from the \|
	264	\| current state and make a reductions structure for the state to \|
	265	\| record their rule numbers. \|
	266	`----------------------------------------------------------------*/
	267
	268	static void
	269	save_reductions (state *s)
	270	{
	271	int count = 0;
	272	size_t i;
	273
	274	/* Find and count the active items that represent ends of rules. */
	275	for (i = 0; i < nritemset; ++i)
	276	{
	277	item_number item = ritem[itemset[i]];
	278	if (item_number_is_rule_number (item))
	279	{
	280	rule_number r = item_number_as_rule_number (item);
	281	redset[count++] = &rules[r];
	282	if (r == 0)
	283	{
	284	/* This is "reduce 0", i.e., accept. */
	285	assert (!final_state);
	286	final_state = s;
	287	}
	288	}
	289	}
	290
	291	/* Make a reductions structure and copy the data into it. */
	292	state_reductions_set (s, count, redset);
	293	}
	294
	295	\f
	296	/*---------------.
	297	\| Build STATES. \|
	298	`---------------*/
	299
	300	static void
	301	set_states (void)
	302	{
	303	states = xcalloc (nstates, sizeof *states);
	304
	305	while (first_state)
	306	{
	307	state_list *this = first_state;
	308
	309	/* Pessimization, but simplification of the code: make sure all
	310	the states have valid transitions and reductions members,
	311	even if reduced to 0. It is too soon for errs, which are
	312	computed later, but set_conflicts. */
	313	state *s = this->state;
	314	if (!s->transitions)
	315	state_transitions_set (s, 0, 0);
	316	if (!s->reductions)
	317	state_reductions_set (s, 0, 0);
	318
	319	states[s->number] = s;
	320
	321	first_state = this->next;
	322	free (this);
	323	}
	324	first_state = NULL;
	325	last_state = NULL;
	326	}
	327
	328
	329	/*-------------------------------------------------------------------.
	330	\| Compute the nondeterministic finite state machine (see state.h for \|
	331	\| details) from the grammar. \|
	332	`-------------------------------------------------------------------*/
	333
	334	void
	335	generate_states (void)
	336	{
	337	item_number initial_core = 0;
	338	state_list *list = NULL;
	339	allocate_storage ();
	340	new_closure (nritems);
	341
	342	/* Create the initial state. The 0 at the lhs is the index of the
	343	item of this initial rule. */
	344	state_list_append (0, 1, &initial_core);
	345
	346	/* States are queued when they are created; process them all. */
	347	for (list = first_state; list; list = list->next)
	348	{
	349	state *s = list->state;
	350	if (trace_flag & trace_automaton)
	351	fprintf (stderr, "Processing state %d (reached by %s)\n",
	352	s->number,
	353	symbols[s->accessing_symbol]->tag);
	354	/* Set up ruleset and itemset for the transitions out of this
	355	state. ruleset gets a 1 bit for each rule that could reduce
	356	now. itemset gets a vector of all the items that could be
	357	accepted next. */
	358	closure (s->items, s->nitems);
	359	/* Record the reductions allowed out of this state. */
	360	save_reductions (s);
	361	/* Find the itemsets of the states that shifts can reach. */
	362	new_itemsets (s);
	363	/* Find or create the core structures for those states. */
	364	append_states (s);
	365
	366	/* Create the shifts structures for the shifts to those states,
	367	now that the state numbers transitioning to are known. */
	368	state_transitions_set (s, nshifts, shiftset);
	369	}
	370
	371	/* discard various storage */
	372	free_closure ();
	373	free_storage ();
	374
	375	/* Set up STATES. */
	376	set_states ();
	377	}