src/reader.c

/* Input parser for Bison

   Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003,
   2005, 2006 Free Software Foundation, Inc.

   This file is part of Bison, the GNU Compiler Compiler.

   Bison is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   Bison is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with Bison; see the file COPYING.  If not, write to
   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
   Boston, MA 02110-1301, USA.  */

#include <config.h>
#include "system.h"
#include <assert.h>

#include <quotearg.h>

#include "complain.h"
#include "conflicts.h"
#include "files.h"
#include "getargs.h"
#include "gram.h"
#include "muscle_tab.h"
#include "reader.h"
#include "symlist.h"
#include "symtab.h"
#include "scan-gram.h"
#include "scan-code.h"

static void check_and_convert_grammar (void);

static symbol_list *grammar = NULL;
static bool start_flag = false;
merger_list *merge_functions;

/* Was %union seen?  */
bool union_seen = false;

/* Was a tag seen?  */
bool tag_seen = false;

/* Should rules have a default precedence?  */
bool default_prec = true;
\f
/*-----------------------.
| Set the start symbol.  |
`-----------------------*/

void
grammar_start_symbol_set (symbol *sym, location loc)
{
  if (start_flag)
    complain_at (loc, _("multiple %s declarations"), "%start");
  else
    {
      start_flag = true;
      startsymbol = sym;
      startsymbol_location = loc;
    }
}


/*---------------------------------------------------------------------.
| There are two prologues: one before the first %union and one after.  |
|  Augment the one specified by POST.                                  |
`---------------------------------------------------------------------*/

void
prologue_augment (const char *prologue, location loc, bool post)
{
  struct obstack *oout =
    !post ? &pre_prologue_obstack : &post_prologue_obstack;

  obstack_fgrow1 (oout, "]b4_syncline(%d, [[", loc.start.line);
  /* FIXME: Protection of M4 characters missing here.  See
     output.c:escaped_output.  */
  MUSCLE_OBSTACK_SGROW (oout,
			quotearg_style (c_quoting_style, loc.start.file));
  obstack_sgrow (oout, "]])[\n");
  obstack_sgrow (oout, prologue);
}

\f

/*------------------------------------------------------------------------.
| Return the merger index for a merging function named NAME.  Records the |
| function, if new, in MERGER_LIST.                                       |
`------------------------------------------------------------------------*/

static int
get_merge_function (uniqstr name)
{
  merger_list *syms;
  merger_list head;
  int n;

  if (! glr_parser)
    return 0;

  head.next = merge_functions;
  for (syms = &head, n = 1; syms->next; syms = syms->next, n += 1)
    if (UNIQSTR_EQ (name, syms->next->name))
      break;
  if (syms->next == NULL)
    {
      syms->next = xmalloc (sizeof syms->next[0]);
      syms->next->name = uniqstr_new (name);
      /* After all symbol type declarations have been parsed, packgram invokes
	 record_merge_function_type to set the type.  */
      syms->next->type = NULL;
      syms->next->next = NULL;
      merge_functions = head.next;
    }
  return n;
}

/*-------------------------------------------------------------------------.
| For the existing merging function with index MERGER, record the result   |
| type as TYPE as required by the lhs of the rule whose %merge declaration |
| is at DECLARATION_LOC.                                                   |
`-------------------------------------------------------------------------*/

static void
record_merge_function_type (int merger, uniqstr type, location declaration_loc)
{
  int merger_find;
  merger_list *merge_function;

  if (merger <= 0)
    return;

  if (type == NULL)
    type = uniqstr_new ("");

  merger_find = 1;
  for (merge_function = merge_functions;
       merge_function != NULL && merger_find != merger;
       merge_function = merge_function->next)
    merger_find += 1;
  assert (merge_function != NULL && merger_find == merger);
  if (merge_function->type != NULL && !UNIQSTR_EQ (merge_function->type, type))
    {
      complain_at (declaration_loc,
		   _("result type clash on merge function `%s': <%s> != <%s>"),
		   merge_function->name, type, merge_function->type);
      complain_at (merge_function->type_declaration_location,
		   _("previous declaration"));
    }
  merge_function->type = uniqstr_new (type);
  merge_function->type_declaration_location = declaration_loc;
}

/*--------------------------------------.
| Free all merge-function definitions.	|
`--------------------------------------*/

void
free_merger_functions (void)
{
  merger_list *L0 = merge_functions;
  while (L0)
    {
      merger_list *L1 = L0->next;
      free (L0);
      L0 = L1;
    }
}

\f
/*-------------------------------------------------------------------.
| Parse the input grammar into a one symbol_list structure.  Each    |
| rule is represented by a sequence of symbols: the left hand side   |
| followed by the contents of the right hand side, followed by a     |
| null pointer instead of a symbol to terminate the rule.  The next  |
| symbol is the lhs of the following rule.                           |
|                                                                    |
| All actions are copied out, labelled by the rule number they apply |
| to.                                                                |
`-------------------------------------------------------------------*/

/* The (currently) last symbol of GRAMMAR. */
static symbol_list *grammar_end = NULL;

/* Append SYM to the grammar.  */
static void
grammar_symbol_append (symbol *sym, location loc)
{
  symbol_list *p = symbol_list_sym_new (sym, loc);

  if (grammar_end)
    grammar_end->next = p;
  else
    grammar = p;

  grammar_end = p;

  /* A null SYM stands for an end of rule; it is not an actual
     part of it.  */
  if (sym)
    ++nritems;
}

/* The rule currently being defined, and the previous rule.
   CURRENT_RULE points to the first LHS of the current rule, while
   PREVIOUS_RULE_END points to the *end* of the previous rule (NULL).  */
static symbol_list *current_rule = NULL;
static symbol_list *previous_rule_end = NULL;


/*----------------------------------------------.
| Create a new rule for LHS in to the GRAMMAR.  |
`----------------------------------------------*/

void
grammar_current_rule_begin (symbol *lhs, location loc)
{
  /* Start a new rule and record its lhs.  */
  ++nrules;
  previous_rule_end = grammar_end;
  grammar_symbol_append (lhs, loc);
  current_rule = grammar_end;

  /* Mark the rule's lhs as a nonterminal if not already so.  */
  if (lhs->class == unknown_sym)
    {
      lhs->class = nterm_sym;
      lhs->number = nvars;
      ++nvars;
    }
  else if (lhs->class == token_sym)
    complain_at (loc, _("rule given for %s, which is a token"), lhs->tag);
}


/*----------------------------------------------------------------------.
| A symbol should be used if it has a destructor, or if it is a         |
| mid-rule symbol (i.e., the generated LHS replacing a mid-rule         |
| action) that was assigned to, as in "exp: { $$ = 1; } { $$ = $1; }".  |
`----------------------------------------------------------------------*/

static bool
symbol_should_be_used (symbol_list const *s)
{
  return (symbol_destructor_get (s->content.sym)
	  || (s->midrule && s->midrule->used));
}

/*----------------------------------------------------------------.
| Check that the rule R is properly defined.  For instance, there |
| should be no type clash on the default action.                  |
`----------------------------------------------------------------*/

static void
grammar_rule_check (const symbol_list *r)
{
  /* Type check.

     If there is an action, then there is nothing we can do: the user
     is allowed to shoot herself in the foot.

     Don't worry about the default action if $$ is untyped, since $$'s
     value can't be used.  */
  if (!r->action && r->content.sym->type_name)
    {
      symbol *first_rhs = r->next->content.sym;
      /* If $$ is being set in default way, report if any type mismatch.  */
      if (first_rhs)
	{
	  char const *lhs_type = r->content.sym->type_name;
	  const char *rhs_type =
	    first_rhs->type_name ? first_rhs->type_name : "";
	  if (!UNIQSTR_EQ (lhs_type, rhs_type))
	    warn_at (r->location,
		     _("type clash on default action: <%s> != <%s>"),
		     lhs_type, rhs_type);
	}
      /* Warn if there is no default for $$ but we need one.  */
      else
	warn_at (r->location,
		 _("empty rule for typed nonterminal, and no action"));
    }

  /* Check that symbol values that should be used are in fact used.  */
  {
    symbol_list const *l = r;
    int n = 0;
    for (; l && l->content.sym; l = l->next, ++n)
      if (! (l->used
	     || !symbol_should_be_used (l)
	     /* The default action, $$ = $1, `uses' both.  */
	     || (!r->action && (n == 0 || n == 1))))
	{
	  if (n)
	    warn_at (r->location, _("unused value: $%d"), n);
	  else
	    warn_at (r->location, _("unset value: $$"));
	}
  }
}


/*-------------------------------------.
| End the currently being grown rule.  |
`-------------------------------------*/

void
grammar_current_rule_end (location loc)
{
  /* Put an empty link in the list to mark the end of this rule  */
  grammar_symbol_append (NULL, grammar_end->location);
  current_rule->location = loc;
}


/*-------------------------------------------------------------------.
| The previous action turns out the be a mid-rule action.  Attach it |
| to the current rule, i.e., create a dummy symbol, attach it this   |
| mid-rule action, and append this dummy nonterminal to the current  |
| rule.                                                              |
`-------------------------------------------------------------------*/

void
grammar_midrule_action (void)
{
  /* Since the action was written out with this rule's number, we must
     give the new rule this number by inserting the new rule before
     it.  */

  /* Make a DUMMY nonterminal, whose location is that of the midrule
     action.  Create the MIDRULE.  */
  location dummy_location = current_rule->action_location;
  symbol *dummy = dummy_symbol_get (dummy_location);
  symbol_list *midrule = symbol_list_sym_new (dummy, dummy_location);

  /* Make a new rule, whose body is empty, before the current one, so
     that the action just read can belong to it.  */
  ++nrules;
  ++nritems;
  /* Attach its location and actions to that of the DUMMY.  */
  midrule->location = dummy_location;
  midrule->action = current_rule->action;
  midrule->action_location = dummy_location;
  current_rule->action = NULL;
  /* The action has not been translated yet, so $$ use hasn't been
     detected yet.  */
  midrule->used = false;

  if (previous_rule_end)
    previous_rule_end->next = midrule;
  else
    grammar = midrule;

  /* End the dummy's rule.  */
  midrule->next = symbol_list_sym_new (NULL, dummy_location);
  midrule->next->next = current_rule;

  previous_rule_end = midrule->next;

  /* Insert the dummy nonterminal replacing the midrule action into
     the current rule.  Bind it to its dedicated rule.  */
  grammar_current_rule_symbol_append (dummy, dummy_location);
  grammar_end->midrule = midrule;
  midrule->midrule_parent_rule = current_rule;
  midrule->midrule_parent_rhs_index = symbol_list_length (current_rule->next);
}

/* Set the precedence symbol of the current rule to PRECSYM. */

void
grammar_current_rule_prec_set (symbol *precsym, location loc)
{
  if (current_rule->ruleprec)
    complain_at (loc, _("only one %s allowed per rule"), "%prec");
  current_rule->ruleprec = precsym;
}

/* Attach dynamic precedence DPREC to the current rule. */

void
grammar_current_rule_dprec_set (int dprec, location loc)
{
  if (! glr_parser)
    warn_at (loc, _("%s affects only GLR parsers"), "%dprec");
  if (dprec <= 0)
    complain_at (loc, _("%s must be followed by positive number"), "%dprec");
  else if (current_rule->dprec != 0)
    complain_at (loc, _("only one %s allowed per rule"), "%dprec");
  current_rule->dprec = dprec;
}

/* Attach a merge function NAME with argument type TYPE to current
   rule. */

void
grammar_current_rule_merge_set (uniqstr name, location loc)
{
  if (! glr_parser)
    warn_at (loc, _("%s affects only GLR parsers"), "%merge");
  if (current_rule->merger != 0)
    complain_at (loc, _("only one %s allowed per rule"), "%merge");
  current_rule->merger = get_merge_function (name);
  current_rule->merger_declaration_location = loc;
}

/* Attach SYM to the current rule.  If needed, move the previous
   action as a mid-rule action.  */

void
grammar_current_rule_symbol_append (symbol *sym, location loc)
{
  if (current_rule->action)
    grammar_midrule_action ();
  grammar_symbol_append (sym, loc);
}

/* Attach an ACTION to the current rule.  */

void
grammar_current_rule_action_append (const char *action, location loc)
{
  if (current_rule->action)
    grammar_midrule_action ();
  /* After all symbol declarations have been parsed, packgram invokes
     translate_rule_action.  */
  current_rule->action = action;
  current_rule->action_location = loc;
}

\f
/*---------------------------------------------------------------.
| Convert the rules into the representation using RRHS, RLHS and |
| RITEM.                                                         |
`---------------------------------------------------------------*/

static void
packgram (void)
{
  unsigned int itemno = 0;
  rule_number ruleno = 0;
  symbol_list *p = grammar;

  ritem = xnmalloc (nritems + 1, sizeof *ritem);

  /* This sentinel is used by build_relations in gram.c.  */
  *ritem++ = 0;

  rules = xnmalloc (nrules, sizeof *rules);

  while (p)
    {
      int rule_length = 0;
      symbol *ruleprec = p->ruleprec;
      record_merge_function_type (p->merger, p->content.sym->type_name,
				  p->merger_declaration_location);
      rules[ruleno].user_number = ruleno;
      rules[ruleno].number = ruleno;
      rules[ruleno].lhs = p->content.sym;
      rules[ruleno].rhs = ritem + itemno;
      rules[ruleno].prec = NULL;
      rules[ruleno].dprec = p->dprec;
      rules[ruleno].merger = p->merger;
      rules[ruleno].precsym = NULL;
      rules[ruleno].location = p->location;
      rules[ruleno].useful = true;
      rules[ruleno].action = p->action ? translate_rule_action (p) : NULL;
      rules[ruleno].action_location = p->action_location;

      /* If this rule contains midrules, rest assured that
	 grammar_midrule_action inserted the midrules into grammar before this
	 rule.  Thus, the midrule actions have already been scanned in order to
	 set `used' flags for this rule's rhs, so grammar_rule_check will work
	 properly.  */
      /* Don't check the generated rule 0.  It has no action, so some rhs
	 symbols may appear unused, but the parsing algorithm ensures that
	 %destructor's are invoked appropriately.  */
      if (p != grammar)
	grammar_rule_check (p);

      for (p = p->next; p && p->content.sym; p = p->next)
	{
	  ++rule_length;

	  /* Don't allow rule_length == INT_MAX, since that might
	     cause confusion with strtol if INT_MAX == LONG_MAX.  */
	  if (rule_length == INT_MAX)
	      fatal_at (rules[ruleno].location, _("rule is too long"));

	  /* item_number = symbol_number.
	     But the former needs to contain more: negative rule numbers. */
	  ritem[itemno++] =
            symbol_number_as_item_number (p->content.sym->number);
	  /* A rule gets by default the precedence and associativity
	     of its last token.  */
	  if (p->content.sym->class == token_sym && default_prec)
	    rules[ruleno].prec = p->content.sym;
	}

      /* If this rule has a %prec,
         the specified symbol's precedence replaces the default.  */
      if (ruleprec)
	{
	  rules[ruleno].precsym = ruleprec;
	  rules[ruleno].prec = ruleprec;
	}
      /* An item ends by the rule number (negated).  */
      ritem[itemno++] = rule_number_as_item_number (ruleno);
      assert (itemno < ITEM_NUMBER_MAX);
      ++ruleno;
      assert (ruleno < RULE_NUMBER_MAX);

      if (p)
	p = p->next;
    }

  assert (itemno == nritems);

  if (trace_flag & trace_sets)
    ritem_print (stderr);
}
\f
/*------------------------------------------------------------------.
| Read in the grammar specification and record it in the format     |
| described in gram.h.  All actions are copied into ACTION_OBSTACK, |
| in each case forming the body of a C function (YYACTION) which    |
| contains a switch statement to decide which action to execute.    |
`------------------------------------------------------------------*/

void
reader (void)
{
  /* Initialize the symbol table.  */
  symbols_new ();

  /* Construct the accept symbol. */
  accept = symbol_get ("$accept", empty_location);
  accept->class = nterm_sym;
  accept->number = nvars++;

  /* Construct the error token */
  errtoken = symbol_get ("error", empty_location);
  errtoken->class = token_sym;
  errtoken->number = ntokens++;

  /* Construct a token that represents all undefined literal tokens.
     It is always token number 2.  */
  undeftoken = symbol_get ("$undefined", empty_location);
  undeftoken->class = token_sym;
  undeftoken->number = ntokens++;

  /* Initialize the obstacks. */
  obstack_init (&pre_prologue_obstack);
  obstack_init (&post_prologue_obstack);

  gram_in = xfopen (grammar_file, "r");

  gram__flex_debug = trace_flag & trace_scan;
  gram_debug = trace_flag & trace_parse;
  gram_scanner_initialize ();
  gram_parse ();

  if (! complaint_issued)
    check_and_convert_grammar ();

  xfclose (gram_in);
}


/*-------------------------------------------------------------.
| Check the grammar that has just been read, and convert it to |
| internal form.					       |
`-------------------------------------------------------------*/

static void
check_and_convert_grammar (void)
{
  /* Grammar has been read.  Do some checking.  */
  if (nrules == 0)
    fatal (_("no rules in the input grammar"));

  /* Report any undefined symbols and consider them nonterminals.  */
  symbols_check_defined ();

  /* If the user did not define her ENDTOKEN, do it now. */
  if (!endtoken)
    {
      endtoken = symbol_get ("$end", empty_location);
      endtoken->class = token_sym;
      endtoken->number = 0;
      /* Value specified by POSIX.  */
      endtoken->user_token_number = 0;
    }

  /* Find the start symbol if no %start.  */
  if (!start_flag)
    {
      symbol_list *node;
      for (node = grammar;
           node != NULL && symbol_is_dummy (node->content.sym);
           node = node->next)
        {
          for (node = node->next;
               node != NULL && node->content.sym != NULL;
               node = node->next)
            ;
        }
      assert (node != NULL);
      grammar_start_symbol_set (node->content.sym,
                                node->content.sym->location);
    }

  /* Insert the initial rule, whose line is that of the first rule
     (not that of the start symbol):

     accept: %start EOF.  */
  {
    symbol_list *p = symbol_list_sym_new (accept, empty_location);
    p->location = grammar->location;
    p->next = symbol_list_sym_new (startsymbol, empty_location);
    p->next->next = symbol_list_sym_new (endtoken, empty_location);
    p->next->next->next = symbol_list_sym_new (NULL, empty_location);
    p->next->next->next->next = grammar;
    nrules += 1;
    nritems += 3;
    grammar = p;
  }

  assert (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars);

  /* Assign the symbols their symbol numbers.  Write #defines for the
     token symbols into FDEFINES if requested.  */
  symbols_pack ();

  /* Convert the grammar into the format described in gram.h.  */
  packgram ();

  /* The grammar as a symbol_list is no longer needed. */
  LIST_FREE (symbol_list, grammar);
}
Commit	Line	Data
	1	/* Input parser for Bison
	2
	3	Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003,
	4	2005, 2006 Free Software Foundation, Inc.
	5
	6	This file is part of Bison, the GNU Compiler Compiler.
	7
	8	Bison is free software; you can redistribute it and/or modify
	9	it under the terms of the GNU General Public License as published by
	10	the Free Software Foundation; either version 2, or (at your option)
	11	any later version.
	12
	13	Bison is distributed in the hope that it will be useful,
	14	but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	GNU General Public License for more details.
	17
	18	You should have received a copy of the GNU General Public License
	19	along with Bison; see the file COPYING. If not, write to
	20	the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
	21	Boston, MA 02110-1301, USA. */
	22
	23	#include <config.h>
	24	#include "system.h"
	25	#include <assert.h>
	26
	27	#include <quotearg.h>
	28
	29	#include "complain.h"
	30	#include "conflicts.h"
	31	#include "files.h"
	32	#include "getargs.h"
	33	#include "gram.h"
	34	#include "muscle_tab.h"
	35	#include "reader.h"
	36	#include "symlist.h"
	37	#include "symtab.h"
	38	#include "scan-gram.h"
	39	#include "scan-code.h"
	40
	41	static void check_and_convert_grammar (void);
	42
	43	static symbol_list *grammar = NULL;
	44	static bool start_flag = false;
	45	merger_list *merge_functions;
	46
	47	/* Was %union seen? */
	48	bool union_seen = false;
	49
	50	/* Was a tag seen? */
	51	bool tag_seen = false;
	52
	53	/* Should rules have a default precedence? */
	54	bool default_prec = true;
	55	\f
	56	/*-----------------------.
	57	\| Set the start symbol. \|
	58	`-----------------------*/
	59
	60	void
	61	grammar_start_symbol_set (symbol *sym, location loc)
	62	{
	63	if (start_flag)
	64	complain_at (loc, _("multiple %s declarations"), "%start");
	65	else
	66	{
	67	start_flag = true;
	68	startsymbol = sym;
	69	startsymbol_location = loc;
	70	}
	71	}
	72
	73
	74	/*---------------------------------------------------------------------.
	75	\| There are two prologues: one before the first %union and one after. \|
	76	\| Augment the one specified by POST. \|
	77	`---------------------------------------------------------------------*/
	78
	79	void
	80	prologue_augment (const char *prologue, location loc, bool post)
	81	{
	82	struct obstack *oout =
	83	!post ? &pre_prologue_obstack : &post_prologue_obstack;
	84
	85	obstack_fgrow1 (oout, "]b4_syncline(%d, [[", loc.start.line);
	86	/* FIXME: Protection of M4 characters missing here. See
	87	output.c:escaped_output. */
	88	MUSCLE_OBSTACK_SGROW (oout,
	89	quotearg_style (c_quoting_style, loc.start.file));
	90	obstack_sgrow (oout, "]])[\n");
	91	obstack_sgrow (oout, prologue);
	92	}
	93
	94	\f
	95
	96	/*------------------------------------------------------------------------.
	97	\| Return the merger index for a merging function named NAME. Records the \|
	98	\| function, if new, in MERGER_LIST. \|
	99	`------------------------------------------------------------------------*/
	100
	101	static int
	102	get_merge_function (uniqstr name)
	103	{
	104	merger_list *syms;
	105	merger_list head;
	106	int n;
	107
	108	if (! glr_parser)
	109	return 0;
	110
	111	head.next = merge_functions;
	112	for (syms = &head, n = 1; syms->next; syms = syms->next, n += 1)
	113	if (UNIQSTR_EQ (name, syms->next->name))
	114	break;
	115	if (syms->next == NULL)
	116	{
	117	syms->next = xmalloc (sizeof syms->next[0]);
	118	syms->next->name = uniqstr_new (name);
	119	/* After all symbol type declarations have been parsed, packgram invokes
	120	record_merge_function_type to set the type. */
	121	syms->next->type = NULL;
	122	syms->next->next = NULL;
	123	merge_functions = head.next;
	124	}
	125	return n;
	126	}
	127
	128	/*-------------------------------------------------------------------------.
	129	\| For the existing merging function with index MERGER, record the result \|
	130	\| type as TYPE as required by the lhs of the rule whose %merge declaration \|
	131	\| is at DECLARATION_LOC. \|
	132	`-------------------------------------------------------------------------*/
	133
	134	static void
	135	record_merge_function_type (int merger, uniqstr type, location declaration_loc)
	136	{
	137	int merger_find;
	138	merger_list *merge_function;
	139
	140	if (merger <= 0)
	141	return;
	142
	143	if (type == NULL)
	144	type = uniqstr_new ("");
	145
	146	merger_find = 1;
	147	for (merge_function = merge_functions;
	148	merge_function != NULL && merger_find != merger;
	149	merge_function = merge_function->next)
	150	merger_find += 1;
	151	assert (merge_function != NULL && merger_find == merger);
	152	if (merge_function->type != NULL && !UNIQSTR_EQ (merge_function->type, type))
	153	{
	154	complain_at (declaration_loc,
	155	_("result type clash on merge function `%s': <%s> != <%s>"),
	156	merge_function->name, type, merge_function->type);
	157	complain_at (merge_function->type_declaration_location,
	158	_("previous declaration"));
	159	}
	160	merge_function->type = uniqstr_new (type);
	161	merge_function->type_declaration_location = declaration_loc;
	162	}
	163
	164	/*--------------------------------------.
	165	\| Free all merge-function definitions. \|
	166	`--------------------------------------*/
	167
	168	void
	169	free_merger_functions (void)
	170	{
	171	merger_list *L0 = merge_functions;
	172	while (L0)
	173	{
	174	merger_list *L1 = L0->next;
	175	free (L0);
	176	L0 = L1;
	177	}
	178	}
	179
	180	\f
	181	/*-------------------------------------------------------------------.
	182	\| Parse the input grammar into a one symbol_list structure. Each \|
	183	\| rule is represented by a sequence of symbols: the left hand side \|
	184	\| followed by the contents of the right hand side, followed by a \|
	185	\| null pointer instead of a symbol to terminate the rule. The next \|
	186	\| symbol is the lhs of the following rule. \|
	187	\| \|
	188	\| All actions are copied out, labelled by the rule number they apply \|
	189	\| to. \|
	190	`-------------------------------------------------------------------*/
	191
	192	/* The (currently) last symbol of GRAMMAR. */
	193	static symbol_list *grammar_end = NULL;
	194
	195	/* Append SYM to the grammar. */
	196	static void
	197	grammar_symbol_append (symbol *sym, location loc)
	198	{
	199	symbol_list *p = symbol_list_sym_new (sym, loc);
	200
	201	if (grammar_end)
	202	grammar_end->next = p;
	203	else
	204	grammar = p;
	205
	206	grammar_end = p;
	207
	208	/* A null SYM stands for an end of rule; it is not an actual
	209	part of it. */
	210	if (sym)
	211	++nritems;
	212	}
	213
	214	/* The rule currently being defined, and the previous rule.
	215	CURRENT_RULE points to the first LHS of the current rule, while
	216	PREVIOUS_RULE_END points to the end of the previous rule (NULL). */
	217	static symbol_list *current_rule = NULL;
	218	static symbol_list *previous_rule_end = NULL;
	219
	220
	221	/*----------------------------------------------.
	222	\| Create a new rule for LHS in to the GRAMMAR. \|
	223	`----------------------------------------------*/
	224
	225	void
	226	grammar_current_rule_begin (symbol *lhs, location loc)
	227	{
	228	/* Start a new rule and record its lhs. */
	229	++nrules;
	230	previous_rule_end = grammar_end;
	231	grammar_symbol_append (lhs, loc);
	232	current_rule = grammar_end;
	233
	234	/* Mark the rule's lhs as a nonterminal if not already so. */
	235	if (lhs->class == unknown_sym)
	236	{
	237	lhs->class = nterm_sym;
	238	lhs->number = nvars;
	239	++nvars;
	240	}
	241	else if (lhs->class == token_sym)
	242	complain_at (loc, _("rule given for %s, which is a token"), lhs->tag);
	243	}
	244
	245
	246	/*----------------------------------------------------------------------.
	247	\| A symbol should be used if it has a destructor, or if it is a \|
	248	\| mid-rule symbol (i.e., the generated LHS replacing a mid-rule \|
	249	\| action) that was assigned to, as in "exp: { $$ = 1; } { $$ = $1; }". \|
	250	`----------------------------------------------------------------------*/
	251
	252	static bool
	253	symbol_should_be_used (symbol_list const *s)
	254	{
	255	return (symbol_destructor_get (s->content.sym)
	256	\|\| (s->midrule && s->midrule->used));
	257	}
	258
	259	/*----------------------------------------------------------------.
	260	\| Check that the rule R is properly defined. For instance, there \|
	261	\| should be no type clash on the default action. \|
	262	`----------------------------------------------------------------*/
	263
	264	static void
	265	grammar_rule_check (const symbol_list *r)
	266	{
	267	/* Type check.
	268
	269	If there is an action, then there is nothing we can do: the user
	270	is allowed to shoot herself in the foot.
	271
	272	Don't worry about the default action if $$ is untyped, since $$'s
	273	value can't be used. */
	274	if (!r->action && r->content.sym->type_name)
	275	{
	276	symbol *first_rhs = r->next->content.sym;
	277	/* If $$ is being set in default way, report if any type mismatch. */
	278	if (first_rhs)
	279	{
	280	char const *lhs_type = r->content.sym->type_name;
	281	const char *rhs_type =
	282	first_rhs->type_name ? first_rhs->type_name : "";
	283	if (!UNIQSTR_EQ (lhs_type, rhs_type))
	284	warn_at (r->location,
	285	_("type clash on default action: <%s> != <%s>"),
	286	lhs_type, rhs_type);
	287	}
	288	/* Warn if there is no default for $$ but we need one. */
	289	else
	290	warn_at (r->location,
	291	_("empty rule for typed nonterminal, and no action"));
	292	}
	293
	294	/* Check that symbol values that should be used are in fact used. */
	295	{
	296	symbol_list const *l = r;
	297	int n = 0;
	298	for (; l && l->content.sym; l = l->next, ++n)
	299	if (! (l->used
	300	\|\| !symbol_should_be_used (l)
	301	/* The default action, $$ = $1, `uses' both. */
	302	\|\| (!r->action && (n == 0 \|\| n == 1))))
	303	{
	304	if (n)
	305	warn_at (r->location, _("unused value: $%d"), n);
	306	else
	307	warn_at (r->location, _("unset value: $$"));
	308	}
	309	}
	310	}
	311
	312
	313	/*-------------------------------------.
	314	\| End the currently being grown rule. \|
	315	`-------------------------------------*/
	316
	317	void
	318	grammar_current_rule_end (location loc)
	319	{
	320	/* Put an empty link in the list to mark the end of this rule */
	321	grammar_symbol_append (NULL, grammar_end->location);
	322	current_rule->location = loc;
	323	}
	324
	325
	326	/*-------------------------------------------------------------------.
	327	\| The previous action turns out the be a mid-rule action. Attach it \|
	328	\| to the current rule, i.e., create a dummy symbol, attach it this \|
	329	\| mid-rule action, and append this dummy nonterminal to the current \|
	330	\| rule. \|
	331	`-------------------------------------------------------------------*/
	332
	333	void
	334	grammar_midrule_action (void)
	335	{
	336	/* Since the action was written out with this rule's number, we must
	337	give the new rule this number by inserting the new rule before
	338	it. */
	339
	340	/* Make a DUMMY nonterminal, whose location is that of the midrule
	341	action. Create the MIDRULE. */
	342	location dummy_location = current_rule->action_location;
	343	symbol *dummy = dummy_symbol_get (dummy_location);
	344	symbol_list *midrule = symbol_list_sym_new (dummy, dummy_location);
	345
	346	/* Make a new rule, whose body is empty, before the current one, so
	347	that the action just read can belong to it. */
	348	++nrules;
	349	++nritems;
	350	/* Attach its location and actions to that of the DUMMY. */
	351	midrule->location = dummy_location;
	352	midrule->action = current_rule->action;
	353	midrule->action_location = dummy_location;
	354	current_rule->action = NULL;
	355	/* The action has not been translated yet, so $$ use hasn't been
	356	detected yet. */
	357	midrule->used = false;
	358
	359	if (previous_rule_end)
	360	previous_rule_end->next = midrule;
	361	else
	362	grammar = midrule;
	363
	364	/* End the dummy's rule. */
	365	midrule->next = symbol_list_sym_new (NULL, dummy_location);
	366	midrule->next->next = current_rule;
	367
	368	previous_rule_end = midrule->next;
	369
	370	/* Insert the dummy nonterminal replacing the midrule action into
	371	the current rule. Bind it to its dedicated rule. */
	372	grammar_current_rule_symbol_append (dummy, dummy_location);
	373	grammar_end->midrule = midrule;
	374	midrule->midrule_parent_rule = current_rule;
	375	midrule->midrule_parent_rhs_index = symbol_list_length (current_rule->next);
	376	}
	377
	378	/* Set the precedence symbol of the current rule to PRECSYM. */
	379
	380	void
	381	grammar_current_rule_prec_set (symbol *precsym, location loc)
	382	{
	383	if (current_rule->ruleprec)
	384	complain_at (loc, _("only one %s allowed per rule"), "%prec");
	385	current_rule->ruleprec = precsym;
	386	}
	387
	388	/* Attach dynamic precedence DPREC to the current rule. */
	389
	390	void
	391	grammar_current_rule_dprec_set (int dprec, location loc)
	392	{
	393	if (! glr_parser)
	394	warn_at (loc, _("%s affects only GLR parsers"), "%dprec");
	395	if (dprec <= 0)
	396	complain_at (loc, _("%s must be followed by positive number"), "%dprec");
	397	else if (current_rule->dprec != 0)
	398	complain_at (loc, _("only one %s allowed per rule"), "%dprec");
	399	current_rule->dprec = dprec;
	400	}
	401
	402	/* Attach a merge function NAME with argument type TYPE to current
	403	rule. */
	404
	405	void
	406	grammar_current_rule_merge_set (uniqstr name, location loc)
	407	{
	408	if (! glr_parser)
	409	warn_at (loc, _("%s affects only GLR parsers"), "%merge");
	410	if (current_rule->merger != 0)
	411	complain_at (loc, _("only one %s allowed per rule"), "%merge");
	412	current_rule->merger = get_merge_function (name);
	413	current_rule->merger_declaration_location = loc;
	414	}
	415
	416	/* Attach SYM to the current rule. If needed, move the previous
	417	action as a mid-rule action. */
	418
	419	void
	420	grammar_current_rule_symbol_append (symbol *sym, location loc)
	421	{
	422	if (current_rule->action)
	423	grammar_midrule_action ();
	424	grammar_symbol_append (sym, loc);
	425	}
	426
	427	/* Attach an ACTION to the current rule. */
	428
	429	void
	430	grammar_current_rule_action_append (const char *action, location loc)
	431	{
	432	if (current_rule->action)
	433	grammar_midrule_action ();
	434	/* After all symbol declarations have been parsed, packgram invokes
	435	translate_rule_action. */
	436	current_rule->action = action;
	437	current_rule->action_location = loc;
	438	}
	439
	440	\f
	441	/*---------------------------------------------------------------.
	442	\| Convert the rules into the representation using RRHS, RLHS and \|
	443	\| RITEM. \|
	444	`---------------------------------------------------------------*/
	445
	446	static void
	447	packgram (void)
	448	{
	449	unsigned int itemno = 0;
	450	rule_number ruleno = 0;
	451	symbol_list *p = grammar;
	452
	453	ritem = xnmalloc (nritems + 1, sizeof *ritem);
	454
	455	/* This sentinel is used by build_relations in gram.c. */
	456	*ritem++ = 0;
	457
	458	rules = xnmalloc (nrules, sizeof *rules);
	459
	460	while (p)
	461	{
	462	int rule_length = 0;
	463	symbol *ruleprec = p->ruleprec;
	464	record_merge_function_type (p->merger, p->content.sym->type_name,
	465	p->merger_declaration_location);
	466	rules[ruleno].user_number = ruleno;
	467	rules[ruleno].number = ruleno;
	468	rules[ruleno].lhs = p->content.sym;
	469	rules[ruleno].rhs = ritem + itemno;
	470	rules[ruleno].prec = NULL;
	471	rules[ruleno].dprec = p->dprec;
	472	rules[ruleno].merger = p->merger;
	473	rules[ruleno].precsym = NULL;
	474	rules[ruleno].location = p->location;
	475	rules[ruleno].useful = true;
	476	rules[ruleno].action = p->action ? translate_rule_action (p) : NULL;
	477	rules[ruleno].action_location = p->action_location;
	478
	479	/* If this rule contains midrules, rest assured that
	480	grammar_midrule_action inserted the midrules into grammar before this
	481	rule. Thus, the midrule actions have already been scanned in order to
	482	set `used' flags for this rule's rhs, so grammar_rule_check will work
	483	properly. */
	484	/* Don't check the generated rule 0. It has no action, so some rhs
	485	symbols may appear unused, but the parsing algorithm ensures that
	486	%destructor's are invoked appropriately. */
	487	if (p != grammar)
	488	grammar_rule_check (p);
	489
	490	for (p = p->next; p && p->content.sym; p = p->next)
	491	{
	492	++rule_length;
	493
	494	/* Don't allow rule_length == INT_MAX, since that might
	495	cause confusion with strtol if INT_MAX == LONG_MAX. */
	496	if (rule_length == INT_MAX)
	497	fatal_at (rules[ruleno].location, _("rule is too long"));
	498
	499	/* item_number = symbol_number.
	500	But the former needs to contain more: negative rule numbers. */
	501	ritem[itemno++] =
	502	symbol_number_as_item_number (p->content.sym->number);
	503	/* A rule gets by default the precedence and associativity
	504	of its last token. */
	505	if (p->content.sym->class == token_sym && default_prec)
	506	rules[ruleno].prec = p->content.sym;
	507	}
	508
	509	/* If this rule has a %prec,
	510	the specified symbol's precedence replaces the default. */
	511	if (ruleprec)
	512	{
	513	rules[ruleno].precsym = ruleprec;
	514	rules[ruleno].prec = ruleprec;
	515	}
	516	/* An item ends by the rule number (negated). */
	517	ritem[itemno++] = rule_number_as_item_number (ruleno);
	518	assert (itemno < ITEM_NUMBER_MAX);
	519	++ruleno;
	520	assert (ruleno < RULE_NUMBER_MAX);
	521
	522	if (p)
	523	p = p->next;
	524	}
	525
	526	assert (itemno == nritems);
	527
	528	if (trace_flag & trace_sets)
	529	ritem_print (stderr);
	530	}
	531	\f
	532	/*------------------------------------------------------------------.
	533	\| Read in the grammar specification and record it in the format \|
	534	\| described in gram.h. All actions are copied into ACTION_OBSTACK, \|
	535	\| in each case forming the body of a C function (YYACTION) which \|
	536	\| contains a switch statement to decide which action to execute. \|
	537	`------------------------------------------------------------------*/
	538
	539	void
	540	reader (void)
	541	{
	542	/* Initialize the symbol table. */
	543	symbols_new ();
	544
	545	/* Construct the accept symbol. */
	546	accept = symbol_get ("$accept", empty_location);
	547	accept->class = nterm_sym;
	548	accept->number = nvars++;
	549
	550	/* Construct the error token */
	551	errtoken = symbol_get ("error", empty_location);
	552	errtoken->class = token_sym;
	553	errtoken->number = ntokens++;
	554
	555	/* Construct a token that represents all undefined literal tokens.
	556	It is always token number 2. */
	557	undeftoken = symbol_get ("$undefined", empty_location);
	558	undeftoken->class = token_sym;
	559	undeftoken->number = ntokens++;
	560
	561	/* Initialize the obstacks. */
	562	obstack_init (&pre_prologue_obstack);
	563	obstack_init (&post_prologue_obstack);
	564
	565	gram_in = xfopen (grammar_file, "r");
	566
	567	gram__flex_debug = trace_flag & trace_scan;
	568	gram_debug = trace_flag & trace_parse;
	569	gram_scanner_initialize ();
	570	gram_parse ();
	571
	572	if (! complaint_issued)
	573	check_and_convert_grammar ();
	574
	575	xfclose (gram_in);
	576	}
	577
	578
	579	/*-------------------------------------------------------------.
	580	\| Check the grammar that has just been read, and convert it to \|
	581	\| internal form. \|
	582	`-------------------------------------------------------------*/
	583
	584	static void
	585	check_and_convert_grammar (void)
	586	{
	587	/* Grammar has been read. Do some checking. */
	588	if (nrules == 0)
	589	fatal (_("no rules in the input grammar"));
	590
	591	/* Report any undefined symbols and consider them nonterminals. */
	592	symbols_check_defined ();
	593
	594	/* If the user did not define her ENDTOKEN, do it now. */
	595	if (!endtoken)
	596	{
	597	endtoken = symbol_get ("$end", empty_location);
	598	endtoken->class = token_sym;
	599	endtoken->number = 0;
	600	/* Value specified by POSIX. */
	601	endtoken->user_token_number = 0;
	602	}
	603
	604	/* Find the start symbol if no %start. */
	605	if (!start_flag)
	606	{
	607	symbol_list *node;
	608	for (node = grammar;
	609	node != NULL && symbol_is_dummy (node->content.sym);
	610	node = node->next)
	611	{
	612	for (node = node->next;
	613	node != NULL && node->content.sym != NULL;
	614	node = node->next)
	615	;
	616	}
	617	assert (node != NULL);
	618	grammar_start_symbol_set (node->content.sym,
	619	node->content.sym->location);
	620	}
	621
	622	/* Insert the initial rule, whose line is that of the first rule
	623	(not that of the start symbol):
	624
	625	accept: %start EOF. */
	626	{
	627	symbol_list *p = symbol_list_sym_new (accept, empty_location);
	628	p->location = grammar->location;
	629	p->next = symbol_list_sym_new (startsymbol, empty_location);
	630	p->next->next = symbol_list_sym_new (endtoken, empty_location);
	631	p->next->next->next = symbol_list_sym_new (NULL, empty_location);
	632	p->next->next->next->next = grammar;
	633	nrules += 1;
	634	nritems += 3;
	635	grammar = p;
	636	}
	637
	638	assert (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars);
	639
	640	/* Assign the symbols their symbol numbers. Write #defines for the
	641	token symbols into FDEFINES if requested. */
	642	symbols_pack ();
	643
	644	/* Convert the grammar into the format described in gram.h. */
	645	packgram ();
	646
	647	/* The grammar as a symbol_list is no longer needed. */
	648	LIST_FREE (symbol_list, grammar);
	649	}