src/tables.c

/* Output the generated parsing program for Bison.

   Copyright (C) 1984, 1986, 1989, 1992, 2000-2006, 2009-2010 Free
   Software Foundation, Inc.

   This file is part of Bison, the GNU Compiler Compiler.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#include <config.h>
#include "system.h"

#include <bitsetv.h>
#include <quotearg.h>

#include "complain.h"
#include "conflicts.h"
#include "files.h"
#include "getargs.h"
#include "gram.h"
#include "lalr.h"
#include "muscle-tab.h"
#include "reader.h"
#include "symtab.h"
#include "tables.h"

/* Several tables are indexed both by state and nonterminal numbers.
   We call such an index a `vector'; i.e., a vector is either a state
   or a nonterminal number.

   Of course vector_number_t ought to be wide enough to contain
   state_number and symbol_number.  */
typedef int vector_number;

#if 0 /* Not currently used.  */
static inline vector_number
state_number_to_vector_number (state_number s)
{
  return s;
}
#endif

static inline vector_number
symbol_number_to_vector_number (symbol_number sym)
{
  return state_number_as_int (nstates) + sym - ntokens;
}

int nvectors;


/* FROMS and TOS are indexed by vector_number.

   If VECTOR is a nonterminal, (FROMS[VECTOR], TOS[VECTOR]) form an
   array of state numbers of the non defaulted GOTO on VECTOR.

   If VECTOR is a state, TOS[VECTOR] is the array of actions to do on
   the (array of) symbols FROMS[VECTOR].

   In both cases, TALLY[VECTOR] is the size of the arrays
   FROMS[VECTOR], TOS[VECTOR]; and WIDTH[VECTOR] =
   (FROMS[VECTOR][SIZE] - FROMS[VECTOR][0] + 1) where SIZE =
   TALLY[VECTOR].

   FROMS therefore contains symbol_number and action_number,
   TOS state_number and action_number,
   TALLY sizes,
   WIDTH differences of FROMS.

   Let base_number be the type of FROMS, TOS, and WIDTH.  */
#define BASE_MAXIMUM INT_MAX
#define BASE_MINIMUM INT_MIN

static base_number **froms;
static base_number **tos;
static unsigned int **conflict_tos;
static int *tally;
static base_number *width;


/* For a given state, N = ACTROW[SYMBOL]:

   If N = 0, stands for `run the default action'.
   If N = MIN, stands for `raise a syntax error'.
   If N > 0, stands for `shift SYMBOL and go to n'.
   If N < 0, stands for `reduce -N'.  */
typedef int action_number;
#define ACTION_NUMBER_MINIMUM INT_MIN

static action_number *actrow;

/* FROMS and TOS are reordered to be compressed.  ORDER[VECTOR] is the
   new vector number of VECTOR.  We skip `empty' vectors (i.e.,
   TALLY[VECTOR] = 0), and call these `entries'.  */
static vector_number *order;
static int nentries;

base_number *base = NULL;
/* A distinguished value of BASE, negative infinite.  During the
   computation equals to BASE_MINIMUM, later mapped to BASE_NINF to
   keep parser tables small.  */
base_number base_ninf = 0;
static base_number *pos = NULL;

static unsigned int *conflrow;
unsigned int *conflict_table;
unsigned int *conflict_list;
int conflict_list_cnt;
static int conflict_list_free;

/* TABLE_SIZE is the allocated size of both TABLE and CHECK.  We start
   with more or less the original hard-coded value (which was
   SHRT_MAX).  */
static int table_size = 32768;
base_number *table;
base_number *check;
/* The value used in TABLE to denote explicit syntax errors
   (%nonassoc), a negative infinite.  First defaults to ACTION_NUMBER_MINIMUM,
   but in order to keep small tables, renumbered as TABLE_ERROR, which
   is the smallest (non error) value minus 1.  */
base_number table_ninf = 0;
static int lowzero;
int high;

state_number *yydefgoto;
rule_number *yydefact;

/*----------------------------------------------------------------.
| If TABLE (and CHECK) appear to be small to be addressed at      |
| DESIRED, grow them.  Note that TABLE[DESIRED] is to be used, so |
| the desired size is at least DESIRED + 1.                       |
`----------------------------------------------------------------*/

static void
table_grow (int desired)
{
  int old_size = table_size;

  while (table_size <= desired)
    table_size *= 2;

  if (trace_flag & trace_resource)
    fprintf (stderr, "growing table and check from: %d to %d\n",
	     old_size, table_size);

  table = xnrealloc (table, table_size, sizeof *table);
  conflict_table = xnrealloc (conflict_table, table_size,
			      sizeof *conflict_table);
  check = xnrealloc (check, table_size, sizeof *check);

  for (/* Nothing. */; old_size < table_size; ++old_size)
    {
      table[old_size] = 0;
      conflict_table[old_size] = 0;
      check[old_size] = -1;
    }
}


/*-------------------------------------------------------------------.
| For GLR parsers, for each conflicted token in S, as indicated      |
| by non-zero entries in CONFLROW, create a list of possible	     |
| reductions that are alternatives to the shift or reduction	     |
| currently recorded for that token in S.  Store the alternative     |
| reductions followed by a 0 in CONFLICT_LIST, updating		     |
| CONFLICT_LIST_CNT, and storing an index to the start of the list   |
| back into CONFLROW.						     |
`-------------------------------------------------------------------*/

static void
conflict_row (state *s)
{
  int i, j;
  reductions *reds = s->reductions;

  if (!nondeterministic_parser)
    return;

  for (j = 0; j < ntokens; j += 1)
    if (conflrow[j])
      {
	conflrow[j] = conflict_list_cnt;

	/* Find all reductions for token J, and record all that do not
	   match ACTROW[J].  */
	for (i = 0; i < reds->num; i += 1)
	  if (bitset_test (reds->lookahead_tokens[i], j)
	      && (actrow[j]
		  != rule_number_as_item_number (reds->rules[i]->number)))
	    {
	      aver (0 < conflict_list_free);
	      conflict_list[conflict_list_cnt] = reds->rules[i]->number + 1;
	      conflict_list_cnt += 1;
	      conflict_list_free -= 1;
	    }

	/* Leave a 0 at the end.  */
	aver (0 < conflict_list_free);
	conflict_list[conflict_list_cnt] = 0;
	conflict_list_cnt += 1;
	conflict_list_free -= 1;
      }
}


/*------------------------------------------------------------------.
| Decide what to do for each type of token if seen as the           |
| lookahead in specified state.  The value returned is used as the  |
| default action (yydefact) for the state.  In addition, ACTROW is  |
| filled with what to do for each kind of token, index by symbol    |
| number, with zero meaning do the default action.  The value       |
| ACTION_NUMBER_MINIMUM, a very negative number, means this	    |
| situation is an error.  The parser recognizes this value	    |
| specially.							    |
|                                                                   |
| This is where conflicts are resolved.  The loop over lookahead    |
| rules considered lower-numbered rules last, and the last rule     |
| considered that likes a token gets to handle it.                  |
|                                                                   |
| For GLR parsers, also sets CONFLROW[SYM] to an index into         |
| CONFLICT_LIST iff there is an unresolved conflict (s/r or r/r)    |
| with symbol SYM. The default reduction is not used for a symbol   |
| that has any such conflicts.                                      |
`------------------------------------------------------------------*/

static rule *
action_row (state *s)
{
  int i;
  rule *default_reduction = NULL;
  reductions *reds = s->reductions;
  transitions *trans = s->transitions;
  errs *errp = s->errs;
  /* Set to nonzero to inhibit having any default reduction.  */
  bool nodefault = false;
  bool conflicted = false;

  for (i = 0; i < ntokens; i++)
    actrow[i] = conflrow[i] = 0;

  if (reds->lookahead_tokens)
    {
      int j;
      bitset_iterator biter;
      /* loop over all the rules available here which require
	 lookahead (in reverse order to give precedence to the first
	 rule) */
      for (i = reds->num - 1; i >= 0; --i)
	/* and find each token which the rule finds acceptable
	   to come next */
	BITSET_FOR_EACH (biter, reds->lookahead_tokens[i], j, 0)
	{
	  /* and record this rule as the rule to use if that
	     token follows.  */
	  if (actrow[j] != 0)
	    {
	      conflicted = true;
	      conflrow[j] = 1;
	    }
	  actrow[j] = rule_number_as_item_number (reds->rules[i]->number);
	}
    }

  /* Now see which tokens are allowed for shifts in this state.  For
     them, record the shift as the thing to do.  So shift is preferred
     to reduce.  */
  FOR_EACH_SHIFT (trans, i)
    {
      symbol_number sym = TRANSITION_SYMBOL (trans, i);
      state *shift_state = trans->states[i];

      if (actrow[sym] != 0)
	{
	  conflicted = true;
	  conflrow[sym] = 1;
	}
      actrow[sym] = state_number_as_int (shift_state->number);

      /* Do not use any default reduction if there is a shift for
	 error */
      if (sym == errtoken->number)
	nodefault = true;
    }

  /* See which tokens are an explicit error in this state (due to
     %nonassoc).  For them, record ACTION_NUMBER_MINIMUM as the
     action.  */
  for (i = 0; i < errp->num; i++)
    {
      symbol *sym = errp->symbols[i];
      actrow[sym->number] = ACTION_NUMBER_MINIMUM;
    }

  /* Turn off default reductions where requested by the user.  See
     state_lookahead_tokens_count in lalr.c to understand when states are
     labeled as consistent.  */
  {
    char *default_reductions =
      muscle_percent_define_get ("lr.default-reductions");
    if (0 != strcmp (default_reductions, "all") && !s->consistent)
      nodefault = true;
    free (default_reductions);
  }

  /* Now find the most common reduction and make it the default action
     for this state.  */

  if (reds->num >= 1 && !nodefault)
    {
      if (s->consistent)
	default_reduction = reds->rules[0];
      else
	{
	  int max = 0;
	  for (i = 0; i < reds->num; i++)
	    {
	      int count = 0;
	      rule *r = reds->rules[i];
	      symbol_number j;

	      for (j = 0; j < ntokens; j++)
		if (actrow[j] == rule_number_as_item_number (r->number))
		  count++;

	      if (count > max)
		{
		  max = count;
		  default_reduction = r;
		}
	    }

	  /* GLR parsers need space for conflict lists, so we can't
	     default conflicted entries.  For non-conflicted entries
	     or as long as we are not building a GLR parser,
	     actions that match the default are replaced with zero,
	     which means "use the default". */

	  if (max > 0)
	    {
	      int j;
	      for (j = 0; j < ntokens; j++)
		if (actrow[j]
                    == rule_number_as_item_number (default_reduction->number)
		    && ! (nondeterministic_parser && conflrow[j]))
		  actrow[j] = 0;
	    }
	}
    }

  /* If have no default reduction, the default is an error.
     So replace any action which says "error" with "use default".  */

  if (!default_reduction)
    for (i = 0; i < ntokens; i++)
      if (actrow[i] == ACTION_NUMBER_MINIMUM)
	actrow[i] = 0;

  if (conflicted)
    conflict_row (s);

  return default_reduction;
}


/*----------------------------------------.
| Set FROMS, TOS, TALLY and WIDTH for S.  |
`----------------------------------------*/

static void
save_row (state_number s)
{
  symbol_number i;
  int count;
  base_number *sp;
  base_number *sp1;
  base_number *sp2;
  unsigned int *sp3;

  /* Number of non default actions in S.  */
  count = 0;
  for (i = 0; i < ntokens; i++)
    if (actrow[i] != 0)
      count++;

  if (count == 0)
    return;

  /* Allocate non defaulted actions.  */
  froms[s] = sp = sp1 = xnmalloc (count, sizeof *sp1);
  tos[s] = sp2 = xnmalloc (count, sizeof *sp2);
  conflict_tos[s] = sp3 =
    nondeterministic_parser ? xnmalloc (count, sizeof *sp3) : NULL;

  /* Store non defaulted actions.  */
  for (i = 0; i < ntokens; i++)
    if (actrow[i] != 0)
      {
	*sp1++ = i;
	*sp2++ = actrow[i];
	if (nondeterministic_parser)
	  *sp3++ = conflrow[i];
      }

  tally[s] = count;
  width[s] = sp1[-1] - sp[0] + 1;
}


/*------------------------------------------------------------------.
| Figure out the actions for the specified state, indexed by        |
| lookahead token type.                                             |
|                                                                   |
| The YYDEFACT table is output now.  The detailed info is saved for |
| putting into YYTABLE later.                                       |
`------------------------------------------------------------------*/

static void
token_actions (void)
{
  state_number i;
  symbol_number j;
  rule_number r;

  int nconflict = nondeterministic_parser ? conflicts_total_count () : 0;

  yydefact = xnmalloc (nstates, sizeof *yydefact);

  actrow = xnmalloc (ntokens, sizeof *actrow);
  conflrow = xnmalloc (ntokens, sizeof *conflrow);

  conflict_list = xnmalloc (1 + 2 * nconflict, sizeof *conflict_list);
  conflict_list_free = 2 * nconflict;
  conflict_list_cnt = 1;

  /* Find the rules which are reduced.  */
  if (!nondeterministic_parser)
    for (r = 0; r < nrules; ++r)
      rules[r].useful = false;

  for (i = 0; i < nstates; ++i)
    {
      rule *default_reduction = action_row (states[i]);
      yydefact[i] = default_reduction ? default_reduction->number + 1 : 0;
      save_row (i);

      /* Now that the parser was computed, we can find which rules are
	 really reduced, and which are not because of SR or RR
	 conflicts.  */
      if (!nondeterministic_parser)
	{
	  for (j = 0; j < ntokens; ++j)
	    if (actrow[j] < 0 && actrow[j] != ACTION_NUMBER_MINIMUM)
	      rules[item_number_as_rule_number (actrow[j])].useful = true;
	  if (yydefact[i])
	    rules[yydefact[i] - 1].useful = true;
	}
    }

  free (actrow);
  free (conflrow);
}


/*------------------------------------------------------------------.
| Compute FROMS[VECTOR], TOS[VECTOR], TALLY[VECTOR], WIDTH[VECTOR], |
| i.e., the information related to non defaulted GOTO on the nterm  |
| SYM.                                                              |
|                                                                   |
| DEFAULT_STATE is the principal destination on SYM, i.e., the      |
| default GOTO destination on SYM.                                  |
`------------------------------------------------------------------*/

static void
save_column (symbol_number sym, state_number default_state)
{
  goto_number i;
  base_number *sp;
  base_number *sp1;
  base_number *sp2;
  int count;
  vector_number symno = symbol_number_to_vector_number (sym);

  goto_number begin = goto_map[sym - ntokens];
  goto_number end = goto_map[sym - ntokens + 1];

  /* Number of non default GOTO.  */
  count = 0;
  for (i = begin; i < end; i++)
    if (to_state[i] != default_state)
      count++;

  if (count == 0)
    return;

  /* Allocate room for non defaulted gotos.  */
  froms[symno] = sp = sp1 = xnmalloc (count, sizeof *sp1);
  tos[symno] = sp2 = xnmalloc (count, sizeof *sp2);

  /* Store the state numbers of the non defaulted gotos.  */
  for (i = begin; i < end; i++)
    if (to_state[i] != default_state)
      {
	*sp1++ = from_state[i];
	*sp2++ = to_state[i];
      }

  tally[symno] = count;
  width[symno] = sp1[-1] - sp[0] + 1;
}


/*-------------------------------------------------------------.
| Return `the' most common destination GOTO on SYM (a nterm).  |
`-------------------------------------------------------------*/

static state_number
default_goto (symbol_number sym, size_t state_count[])
{
  state_number s;
  goto_number i;
  goto_number m = goto_map[sym - ntokens];
  goto_number n = goto_map[sym - ntokens + 1];
  state_number default_state = -1;
  size_t max = 0;

  if (m == n)
    return -1;

  for (s = 0; s < nstates; s++)
    state_count[s] = 0;

  for (i = m; i < n; i++)
    state_count[to_state[i]]++;

  for (s = 0; s < nstates; s++)
    if (state_count[s] > max)
      {
	max = state_count[s];
	default_state = s;
      }

  return default_state;
}


/*-------------------------------------------------------------------.
| Figure out what to do after reducing with each rule, depending on  |
| the saved state from before the beginning of parsing the data that |
| matched this rule.                                                 |
|                                                                    |
| The YYDEFGOTO table is output now.  The detailed info is saved for |
| putting into YYTABLE later.                                        |
`-------------------------------------------------------------------*/

static void
goto_actions (void)
{
  symbol_number i;
  size_t *state_count = xnmalloc (nstates, sizeof *state_count);
  yydefgoto = xnmalloc (nvars, sizeof *yydefgoto);

  /* For a given nterm I, STATE_COUNT[S] is the number of times there
     is a GOTO to S on I.  */
  for (i = ntokens; i < nsyms; ++i)
    {
      state_number default_state = default_goto (i, state_count);
      save_column (i, default_state);
      yydefgoto[i - ntokens] = default_state;
    }
  free (state_count);
}


/*------------------------------------------------------------------.
| Compute ORDER, a reordering of vectors, in order to decide how to |
| pack the actions and gotos information into yytable.              |
`------------------------------------------------------------------*/

static void
sort_actions (void)
{
  int i;

  nentries = 0;

  for (i = 0; i < nvectors; i++)
    if (tally[i] > 0)
      {
	int k;
	int t = tally[i];
	int w = width[i];
	int j = nentries - 1;

	while (j >= 0 && (width[order[j]] < w))
	  j--;

	while (j >= 0 && (width[order[j]] == w) && (tally[order[j]] < t))
	  j--;

	for (k = nentries - 1; k > j; k--)
	  order[k + 1] = order[k];

	order[j + 1] = i;
	nentries++;
      }
}


/* If VECTOR is a state which actions (reflected by FROMS, TOS, TALLY
   and WIDTH of VECTOR) are common to a previous state, return this
   state number.

   In any other case, return -1.  */

static state_number
matching_state (vector_number vector)
{
  vector_number i = order[vector];
  int t;
  int w;
  int prev;

  /* If VECTOR is a nterm, return -1.  */
  if (nstates <= i)
    return -1;

  t = tally[i];
  w = width[i];

  /* If VECTOR has GLR conflicts, return -1 */
  if (conflict_tos[i] != NULL)
    {
      int j;
      for (j = 0; j < t; j += 1)
	if (conflict_tos[i][j] != 0)
	  return -1;
    }

  for (prev = vector - 1; prev >= 0; prev--)
    {
      vector_number j = order[prev];
      int k;
      int match = 1;

      /* Given how ORDER was computed, if the WIDTH or TALLY is
	 different, there cannot be a matching state.  */
      if (width[j] != w || tally[j] != t)
	return -1;

      for (k = 0; match && k < t; k++)
	if (tos[j][k] != tos[i][k] || froms[j][k] != froms[i][k]
	    || (conflict_tos[j] != NULL && conflict_tos[j][k] != 0))
	  match = 0;

      if (match)
	return j;
    }

  return -1;
}


static base_number
pack_vector (vector_number vector)
{
  vector_number i = order[vector];
  int j;
  int t = tally[i];
  int loc = 0;
  base_number *from = froms[i];
  base_number *to = tos[i];
  unsigned int *conflict_to = conflict_tos[i];

  aver (t != 0);

  for (j = lowzero - from[0]; ; j++)
    {
      int k;
      bool ok = true;

      aver (j < table_size);

      for (k = 0; ok && k < t; k++)
	{
	  loc = j + state_number_as_int (from[k]);
	  if (table_size <= loc)
	    table_grow (loc);

	  if (table[loc] != 0)
	    ok = false;
	}

      for (k = 0; ok && k < vector; k++)
	if (pos[k] == j)
	  ok = false;

      if (ok)
	{
	  for (k = 0; k < t; k++)
	    {
	      loc = j + from[k];
	      table[loc] = to[k];
	      if (nondeterministic_parser && conflict_to != NULL)
		conflict_table[loc] = conflict_to[k];
	      check[loc] = from[k];
	    }

	  while (table[lowzero] != 0)
	    lowzero++;

	  if (loc > high)
	    high = loc;

	  aver (BASE_MINIMUM <= j && j <= BASE_MAXIMUM);
	  return j;
	}
    }
}


/*-------------------------------------------------------------.
| Remap the negative infinite in TAB from NINF to the greatest |
| possible smallest value.  Return it.                         |
|                                                              |
| In most case this allows us to use shorts instead of ints in |
| parsers.                                                     |
`-------------------------------------------------------------*/

static base_number
table_ninf_remap (base_number tab[], int size, base_number ninf)
{
  base_number res = 0;
  int i;

  for (i = 0; i < size; i++)
    if (tab[i] < res && tab[i] != ninf)
      res = tab[i];

  --res;

  for (i = 0; i < size; i++)
    if (tab[i] == ninf)
      tab[i] = res;

  return res;
}

static void
pack_table (void)
{
  int i;

  base = xnmalloc (nvectors, sizeof *base);
  pos = xnmalloc (nentries, sizeof *pos);
  table = xcalloc (table_size, sizeof *table);
  conflict_table = xcalloc (table_size, sizeof *conflict_table);
  check = xnmalloc (table_size, sizeof *check);

  lowzero = 0;
  high = 0;

  for (i = 0; i < nvectors; i++)
    base[i] = BASE_MINIMUM;

  for (i = 0; i < table_size; i++)
    check[i] = -1;

  for (i = 0; i < nentries; i++)
    {
      state_number s = matching_state (i);
      base_number place;

      if (s < 0)
	/* A new set of state actions, or a nonterminal.  */
	place = pack_vector (i);
      else
	/* Action of I were already coded for S.  */
	place = base[s];

      pos[i] = place;
      base[order[i]] = place;
    }

  /* Use the greatest possible negative infinites.  */
  base_ninf = table_ninf_remap (base, nvectors, BASE_MINIMUM);
  table_ninf = table_ninf_remap (table, high + 1, ACTION_NUMBER_MINIMUM);

  free (pos);
}

\f

/*-----------------------------------------------------------------.
| Compute and output yydefact, yydefgoto, yypact, yypgoto, yytable |
| and yycheck.                                                     |
`-----------------------------------------------------------------*/

void
tables_generate (void)
{
  int i;

  /* This is a poor way to make sure the sizes are properly
     correlated.  In particular the signedness is not taken into
     account.  But it's not useless.  */
  verify (sizeof nstates <= sizeof nvectors
	  && sizeof nvars <= sizeof nvectors);

  nvectors = state_number_as_int (nstates) + nvars;

  froms = xcalloc (nvectors, sizeof *froms);
  tos = xcalloc (nvectors, sizeof *tos);
  conflict_tos = xcalloc (nvectors, sizeof *conflict_tos);
  tally = xcalloc (nvectors, sizeof *tally);
  width = xnmalloc (nvectors, sizeof *width);

  token_actions ();

  goto_actions ();
  free (goto_map);
  free (from_state);
  free (to_state);

  order = xcalloc (nvectors, sizeof *order);
  sort_actions ();
  pack_table ();
  free (order);

  free (tally);
  free (width);

  for (i = 0; i < nvectors; i++)
    {
      free (froms[i]);
      free (tos[i]);
      free (conflict_tos[i]);
    }

  free (froms);
  free (tos);
  free (conflict_tos);
}


/*-------------------------.
| Free the parser tables.  |
`-------------------------*/

void
tables_free (void)
{
  free (base);
  free (conflict_table);
  free (conflict_list);
  free (table);
  free (check);
  free (yydefgoto);
  free (yydefact);
}