src/reader.c

/* Input parser for Bison

   Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003,
   2005, 2006 Free Software Foundation, Inc.

   This file is part of Bison, the GNU Compiler Compiler.

   Bison is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   Bison is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with Bison; see the file COPYING.  If not, write to
   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
   Boston, MA 02110-1301, USA.  */

#include <config.h>
#include "system.h"
#include <assert.h>

#include <quotearg.h>

#include "complain.h"
#include "conflicts.h"
#include "files.h"
#include "getargs.h"
#include "gram.h"
#include "muscle_tab.h"
#include "reader.h"
#include "symlist.h"
#include "symtab.h"
#include "scan-gram.h"
#include "scan-code.h"

static void check_and_convert_grammar (void);

static symbol_list *grammar = NULL;
static bool start_flag = false;
merger_list *merge_functions;

/* Was %union seen?  */
bool typed = false;

/* Should rules have a default precedence?  */
bool default_prec = true;
\f
/*-----------------------.
| Set the start symbol.  |
`-----------------------*/

void
grammar_start_symbol_set (symbol *sym, location loc)
{
  if (start_flag)
    complain_at (loc, _("multiple %s declarations"), "%start");
  else
    {
      start_flag = true;
      startsymbol = sym;
      startsymbol_location = loc;
    }
}


/*---------------------------------------------------------------------.
| There are two prologues: one before the first %union and one after.  |
|  Augment the one specified by POST.                                  |
`---------------------------------------------------------------------*/

void
prologue_augment (const char *prologue, location loc, bool post)
{
  struct obstack *oout =
    !post ? &pre_prologue_obstack : &post_prologue_obstack;

  obstack_fgrow1 (oout, "]b4_syncline(%d, [[", loc.start.line);
  /* FIXME: Protection of M4 characters missing here.  See
     output.c:escaped_output.  */
  MUSCLE_OBSTACK_SGROW (oout,
			quotearg_style (c_quoting_style, loc.start.file));
  obstack_sgrow (oout, "]])[\n");
  obstack_sgrow (oout, prologue);
}

\f

/*------------------------------------------------------------------------.
| Return the merger index for a merging function named NAME.  Records the |
| function, if new, in MERGER_LIST.                                       |
`------------------------------------------------------------------------*/

static int
get_merge_function (uniqstr name)
{
  merger_list *syms;
  merger_list head;
  int n;

  if (! glr_parser)
    return 0;

  head.next = merge_functions;
  for (syms = &head, n = 1; syms->next; syms = syms->next, n += 1)
    if (UNIQSTR_EQ (name, syms->next->name))
      break;
  if (syms->next == NULL)
    {
      syms->next = xmalloc (sizeof syms->next[0]);
      syms->next->name = uniqstr_new (name);
      /* After all symbol type declarations have been parsed, packgram invokes
	 record_merge_function_type to set the type.  */
      syms->next->type = NULL;
      syms->next->next = NULL;
      merge_functions = head.next;
    }
  return n;
}

/*-------------------------------------------------------------------------.
| For the existing merging function with index MERGER, record the result   |
| type as TYPE as required by the lhs of the rule whose %merge declaration |
| is at DECLARATION_LOC.                                                   |
`-------------------------------------------------------------------------*/

static void
record_merge_function_type (int merger, uniqstr type, location declaration_loc)
{
  int merger_find;
  merger_list *merge_function;

  if (merger <= 0)
    return;

  if (type == NULL)
    type = uniqstr_new ("");

  merger_find = 1;
  for (merge_function = merge_functions;
       merge_function != NULL && merger_find != merger;
       merge_function = merge_function->next)
    merger_find += 1;
  assert (merge_function != NULL && merger_find == merger);
  if (merge_function->type != NULL && !UNIQSTR_EQ (merge_function->type, type))
    {
      complain_at (declaration_loc,
		   _("result type clash on merge function `%s': <%s> != <%s>"),
		   merge_function->name, type, merge_function->type);
      complain_at (merge_function->type_declaration_location,
		   _("previous declaration"));
    }
  merge_function->type = uniqstr_new (type);
  merge_function->type_declaration_location = declaration_loc;
}

/*--------------------------------------.
| Free all merge-function definitions.	|
`--------------------------------------*/

void
free_merger_functions (void)
{
  merger_list *L0 = merge_functions;
  while (L0)
    {
      merger_list *L1 = L0->next;
      free (L0);
      L0 = L1;
    }
}

\f
/*-------------------------------------------------------------------.
| Parse the input grammar into a one symbol_list structure.  Each    |
| rule is represented by a sequence of symbols: the left hand side   |
| followed by the contents of the right hand side, followed by a     |
| null pointer instead of a symbol to terminate the rule.  The next  |
| symbol is the lhs of the following rule.                           |
|                                                                    |
| All actions are copied out, labelled by the rule number they apply |
| to.                                                                |
`-------------------------------------------------------------------*/

/* The (currently) last symbol of GRAMMAR. */
static symbol_list *grammar_end = NULL;

/* Append SYM to the grammar.  */
static void
grammar_symbol_append (symbol *sym, location loc)
{
  symbol_list *p = symbol_list_new (sym, loc);

  if (grammar_end)
    grammar_end->next = p;
  else
    grammar = p;

  grammar_end = p;

  /* A null SYM stands for an end of rule; it is not an actual
     part of it.  */
  if (sym)
    ++nritems;
}

/* The rule currently being defined, and the previous rule.
   CURRENT_RULE points to the first LHS of the current rule, while
   PREVIOUS_RULE_END points to the *end* of the previous rule (NULL).  */
static symbol_list *current_rule = NULL;
static symbol_list *previous_rule_end = NULL;


/*----------------------------------------------.
| Create a new rule for LHS in to the GRAMMAR.  |
`----------------------------------------------*/

void
grammar_current_rule_begin (symbol *lhs, location loc)
{
  if (!start_flag)
    {
      startsymbol = lhs;
      startsymbol_location = loc;
      start_flag = true;
    }

  /* Start a new rule and record its lhs.  */
  ++nrules;
  previous_rule_end = grammar_end;
  grammar_symbol_append (lhs, loc);
  current_rule = grammar_end;

  /* Mark the rule's lhs as a nonterminal if not already so.  */
  if (lhs->class == unknown_sym)
    {
      lhs->class = nterm_sym;
      lhs->number = nvars;
      ++nvars;
    }
  else if (lhs->class == token_sym)
    complain_at (loc, _("rule given for %s, which is a token"), lhs->tag);
}


/*----------------------------------------------------------------------.
| A symbol should be used if it has a destructor, or if it is a         |
| mid-rule symbol (i.e., the generated LHS replacing a mid-rule         |
| action) that was assigned to, as in "exp: { $$ = 1; } { $$ = $1; }".  |
`----------------------------------------------------------------------*/

static bool
symbol_should_be_used (symbol_list const *s)
{
  return (s->sym->destructor
	  || (s->midrule && s->midrule->used));
}

/*----------------------------------------------------------------.
| Check that the rule R is properly defined.  For instance, there |
| should be no type clash on the default action.                  |
`----------------------------------------------------------------*/

static void
grammar_rule_check (const symbol_list *r)
{
  /* Type check.

     If there is an action, then there is nothing we can do: the user
     is allowed to shoot herself in the foot.

     Don't worry about the default action if $$ is untyped, since $$'s
     value can't be used.  */
  if (!r->action && r->sym->type_name)
    {
      symbol *first_rhs = r->next->sym;
      /* If $$ is being set in default way, report if any type mismatch.  */
      if (first_rhs)
	{
	  char const *lhs_type = r->sym->type_name;
	  const char *rhs_type =
	    first_rhs->type_name ? first_rhs->type_name : "";
	  if (!UNIQSTR_EQ (lhs_type, rhs_type))
	    warn_at (r->location,
		     _("type clash on default action: <%s> != <%s>"),
		     lhs_type, rhs_type);
	}
      /* Warn if there is no default for $$ but we need one.  */
      else
	warn_at (r->location,
		 _("empty rule for typed nonterminal, and no action"));
    }

  /* Check that symbol values that should be used are in fact used.  */
  {
    symbol_list const *l = r;
    int n = 0;
    for (; l && l->sym; l = l->next, ++n)
      if (! (l->used
	     || !symbol_should_be_used (l)
	     /* The default action, $$ = $1, `uses' both.  */
	     || (!r->action && (n == 0 || n == 1))))
	{
	  if (n)
	    warn_at (r->location, _("unused value: $%d"), n);
	  else
	    warn_at (r->location, _("unset value: $$"));
	}
  }
}


/*-------------------------------------.
| End the currently being grown rule.  |
`-------------------------------------*/

void
grammar_current_rule_end (location loc)
{
  /* Put an empty link in the list to mark the end of this rule  */
  grammar_symbol_append (NULL, grammar_end->location);
  current_rule->location = loc;
}


/*-------------------------------------------------------------------.
| The previous action turns out the be a mid-rule action.  Attach it |
| to the current rule, i.e., create a dummy symbol, attach it this   |
| mid-rule action, and append this dummy nonterminal to the current  |
| rule.                                                              |
`-------------------------------------------------------------------*/

void
grammar_midrule_action (void)
{
  /* Since the action was written out with this rule's number, we must
     give the new rule this number by inserting the new rule before
     it.  */

  /* Make a DUMMY nonterminal, whose location is that of the midrule
     action.  Create the MIDRULE.  */
  location dummy_location = current_rule->action_location;
  symbol *dummy = dummy_symbol_get (dummy_location);
  symbol_list *midrule = symbol_list_new (dummy, dummy_location);

  /* Make a new rule, whose body is empty, before the current one, so
     that the action just read can belong to it.  */
  ++nrules;
  ++nritems;
  /* Attach its location and actions to that of the DUMMY.  */
  midrule->location = dummy_location;
  midrule->action = current_rule->action;
  midrule->action_location = dummy_location;
  current_rule->action = NULL;
  /* The action has not been translated yet, so $$ use hasn't been
     detected yet.  */
  midrule->used = false;

  if (previous_rule_end)
    previous_rule_end->next = midrule;
  else
    grammar = midrule;

  /* End the dummy's rule.  */
  midrule->next = symbol_list_new (NULL, dummy_location);
  midrule->next->next = current_rule;

  previous_rule_end = midrule->next;

  /* Insert the dummy nonterminal replacing the midrule action into
     the current rule.  Bind it to its dedicated rule.  */
  grammar_current_rule_symbol_append (dummy, dummy_location);
  grammar_end->midrule = midrule;
  midrule->midrule_parent_rule = current_rule;
  midrule->midrule_parent_rhs_index = symbol_list_length (current_rule->next);
}

/* Set the precedence symbol of the current rule to PRECSYM. */

void
grammar_current_rule_prec_set (symbol *precsym, location loc)
{
  if (current_rule->ruleprec)
    complain_at (loc, _("only one %s allowed per rule"), "%prec");
  current_rule->ruleprec = precsym;
}

/* Attach dynamic precedence DPREC to the current rule. */

void
grammar_current_rule_dprec_set (int dprec, location loc)
{
  if (! glr_parser)
    warn_at (loc, _("%s affects only GLR parsers"), "%dprec");
  if (dprec <= 0)
    complain_at (loc, _("%s must be followed by positive number"), "%dprec");
  else if (current_rule->dprec != 0)
    complain_at (loc, _("only one %s allowed per rule"), "%dprec");
  current_rule->dprec = dprec;
}

/* Attach a merge function NAME with argument type TYPE to current
   rule. */

void
grammar_current_rule_merge_set (uniqstr name, location loc)
{
  if (! glr_parser)
    warn_at (loc, _("%s affects only GLR parsers"), "%merge");
  if (current_rule->merger != 0)
    complain_at (loc, _("only one %s allowed per rule"), "%merge");
  current_rule->merger = get_merge_function (name);
  current_rule->merger_declaration_location = loc;
}

/* Attach SYM to the current rule.  If needed, move the previous
   action as a mid-rule action.  */

void
grammar_current_rule_symbol_append (symbol *sym, location loc)
{
  if (current_rule->action)
    grammar_midrule_action ();
  grammar_symbol_append (sym, loc);
}

/* Attach an ACTION to the current rule.  */

void
grammar_current_rule_action_append (const char *action, location loc)
{
  if (current_rule->action)
    grammar_midrule_action ();
  /* After all symbol declarations have been parsed, packgram invokes
     translate_rule_action.  */
  current_rule->action = action;
  current_rule->action_location = loc;
}

\f
/*---------------------------------------------------------------.
| Convert the rules into the representation using RRHS, RLHS and |
| RITEM.                                                         |
`---------------------------------------------------------------*/

static void
packgram (void)
{
  unsigned int itemno = 0;
  rule_number ruleno = 0;
  symbol_list *p = grammar;

  ritem = xnmalloc (nritems + 1, sizeof *ritem);

  /* This sentinel is used by build_relations in gram.c.  */
  *ritem++ = 0;

  rules = xnmalloc (nrules, sizeof *rules);

  while (p)
    {
      int rule_length = 0;
      symbol *ruleprec = p->ruleprec;
      record_merge_function_type (p->merger, p->sym->type_name,
				  p->merger_declaration_location);
      rules[ruleno].user_number = ruleno;
      rules[ruleno].number = ruleno;
      rules[ruleno].lhs = p->sym;
      rules[ruleno].rhs = ritem + itemno;
      rules[ruleno].prec = NULL;
      rules[ruleno].dprec = p->dprec;
      rules[ruleno].merger = p->merger;
      rules[ruleno].precsym = NULL;
      rules[ruleno].location = p->location;
      rules[ruleno].useful = true;
      rules[ruleno].action = p->action ? translate_rule_action (p) : NULL;
      rules[ruleno].action_location = p->action_location;

      /* If this rule contains midrules, rest assured that
	 grammar_midrule_action inserted the midrules into grammar before this
	 rule.  Thus, the midrule actions have already been scanned in order to
	 set `used' flags for this rule's rhs, so grammar_rule_check will work
	 properly.  */
      grammar_rule_check (p);

      for (p = p->next; p && p->sym; p = p->next)
	{
	  ++rule_length;

	  /* Don't allow rule_length == INT_MAX, since that might
	     cause confusion with strtol if INT_MAX == LONG_MAX.  */
	  if (rule_length == INT_MAX)
	      fatal_at (rules[ruleno].location, _("rule is too long"));

	  /* item_number = symbol_number.
	     But the former needs to contain more: negative rule numbers. */
	  ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
	  /* A rule gets by default the precedence and associativity
	     of its last token.  */
	  if (p->sym->class == token_sym && default_prec)
	    rules[ruleno].prec = p->sym;
	}

      /* If this rule has a %prec,
         the specified symbol's precedence replaces the default.  */
      if (ruleprec)
	{
	  rules[ruleno].precsym = ruleprec;
	  rules[ruleno].prec = ruleprec;
	}
      /* An item ends by the rule number (negated).  */
      ritem[itemno++] = rule_number_as_item_number (ruleno);
      assert (itemno < ITEM_NUMBER_MAX);
      ++ruleno;
      assert (ruleno < RULE_NUMBER_MAX);

      if (p)
	p = p->next;
    }

  assert (itemno == nritems);

  if (trace_flag & trace_sets)
    ritem_print (stderr);
}
\f
/*------------------------------------------------------------------.
| Read in the grammar specification and record it in the format     |
| described in gram.h.  All actions are copied into ACTION_OBSTACK, |
| in each case forming the body of a C function (YYACTION) which    |
| contains a switch statement to decide which action to execute.    |
`------------------------------------------------------------------*/

void
reader (void)
{
  /* Initialize the symbol table.  */
  symbols_new ();

  /* Construct the accept symbol. */
  accept = symbol_get ("$accept", empty_location);
  accept->class = nterm_sym;
  accept->number = nvars++;

  /* Construct the error token */
  errtoken = symbol_get ("error", empty_location);
  errtoken->class = token_sym;
  errtoken->number = ntokens++;

  /* Construct a token that represents all undefined literal tokens.
     It is always token number 2.  */
  undeftoken = symbol_get ("$undefined", empty_location);
  undeftoken->class = token_sym;
  undeftoken->number = ntokens++;

  /* Initialize the obstacks. */
  obstack_init (&pre_prologue_obstack);
  obstack_init (&post_prologue_obstack);

  gram_in = xfopen (grammar_file, "r");

  gram__flex_debug = trace_flag & trace_scan;
  gram_debug = trace_flag & trace_parse;
  gram_scanner_initialize ();
  gram_parse ();

  if (! complaint_issued)
    check_and_convert_grammar ();

  xfclose (gram_in);
}


/*-------------------------------------------------------------.
| Check the grammar that has just been read, and convert it to |
| internal form.					       |
`-------------------------------------------------------------*/

static void
check_and_convert_grammar (void)
{
  /* Grammar has been read.  Do some checking.  */
  if (nrules == 0)
    fatal (_("no rules in the input grammar"));

  /* Report any undefined symbols and consider them nonterminals.  */
  symbols_check_defined ();

  /* If the user did not define her ENDTOKEN, do it now. */
  if (!endtoken)
    {
      endtoken = symbol_get ("$end", empty_location);
      endtoken->class = token_sym;
      endtoken->number = 0;
      /* Value specified by POSIX.  */
      endtoken->user_token_number = 0;
    }

  /* Insert the initial rule, whose line is that of the first rule
     (not that of the start symbol):

     accept: %start EOF.  */
  {
    symbol_list *p = symbol_list_new (accept, empty_location);
    p->location = grammar->location;
    p->next = symbol_list_new (startsymbol, empty_location);
    p->next->next = symbol_list_new (endtoken, empty_location);
    p->next->next->next = symbol_list_new (NULL, empty_location);
    p->next->next->next->next = grammar;
    nrules += 1;
    nritems += 3;
    grammar = p;
  }

  assert (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars);

  /* Assign the symbols their symbol numbers.  Write #defines for the
     token symbols into FDEFINES if requested.  */
  symbols_pack ();

  /* Convert the grammar into the format described in gram.h.  */
  packgram ();

  /* The grammar as a symbol_list is no longer needed. */
  LIST_FREE (symbol_list, grammar);
}
Commit	Line	Data
	1	/* Input parser for Bison
	2
	3	Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003,
	4	2005, 2006 Free Software Foundation, Inc.
	5
	6	This file is part of Bison, the GNU Compiler Compiler.
	7
	8	Bison is free software; you can redistribute it and/or modify
	9	it under the terms of the GNU General Public License as published by
	10	the Free Software Foundation; either version 2, or (at your option)
	11	any later version.
	12
	13	Bison is distributed in the hope that it will be useful,
	14	but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	GNU General Public License for more details.
	17
	18	You should have received a copy of the GNU General Public License
	19	along with Bison; see the file COPYING. If not, write to
	20	the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
	21	Boston, MA 02110-1301, USA. */
	22
	23	#include <config.h>
	24	#include "system.h"
	25	#include <assert.h>
	26
	27	#include <quotearg.h>
	28
	29	#include "complain.h"
	30	#include "conflicts.h"
	31	#include "files.h"
	32	#include "getargs.h"
	33	#include "gram.h"
	34	#include "muscle_tab.h"
	35	#include "reader.h"
	36	#include "symlist.h"
	37	#include "symtab.h"
	38	#include "scan-gram.h"
	39	#include "scan-code.h"
	40
	41	static void check_and_convert_grammar (void);
	42
	43	static symbol_list *grammar = NULL;
	44	static bool start_flag = false;
	45	merger_list *merge_functions;
	46
	47	/* Was %union seen? */
	48	bool typed = false;
	49
	50	/* Should rules have a default precedence? */
	51	bool default_prec = true;
	52	\f
	53	/*-----------------------.
	54	\| Set the start symbol. \|
	55	`-----------------------*/
	56
	57	void
	58	grammar_start_symbol_set (symbol *sym, location loc)
	59	{
	60	if (start_flag)
	61	complain_at (loc, _("multiple %s declarations"), "%start");
	62	else
	63	{
	64	start_flag = true;
	65	startsymbol = sym;
	66	startsymbol_location = loc;
	67	}
	68	}
	69
	70
	71	/*---------------------------------------------------------------------.
	72	\| There are two prologues: one before the first %union and one after. \|
	73	\| Augment the one specified by POST. \|
	74	`---------------------------------------------------------------------*/
	75
	76	void
	77	prologue_augment (const char *prologue, location loc, bool post)
	78	{
	79	struct obstack *oout =
	80	!post ? &pre_prologue_obstack : &post_prologue_obstack;
	81
	82	obstack_fgrow1 (oout, "]b4_syncline(%d, [[", loc.start.line);
	83	/* FIXME: Protection of M4 characters missing here. See
	84	output.c:escaped_output. */
	85	MUSCLE_OBSTACK_SGROW (oout,
	86	quotearg_style (c_quoting_style, loc.start.file));
	87	obstack_sgrow (oout, "]])[\n");
	88	obstack_sgrow (oout, prologue);
	89	}
	90
	91	\f
	92
	93	/*------------------------------------------------------------------------.
	94	\| Return the merger index for a merging function named NAME. Records the \|
	95	\| function, if new, in MERGER_LIST. \|
	96	`------------------------------------------------------------------------*/
	97
	98	static int
	99	get_merge_function (uniqstr name)
	100	{
	101	merger_list *syms;
	102	merger_list head;
	103	int n;
	104
	105	if (! glr_parser)
	106	return 0;
	107
	108	head.next = merge_functions;
	109	for (syms = &head, n = 1; syms->next; syms = syms->next, n += 1)
	110	if (UNIQSTR_EQ (name, syms->next->name))
	111	break;
	112	if (syms->next == NULL)
	113	{
	114	syms->next = xmalloc (sizeof syms->next[0]);
	115	syms->next->name = uniqstr_new (name);
	116	/* After all symbol type declarations have been parsed, packgram invokes
	117	record_merge_function_type to set the type. */
	118	syms->next->type = NULL;
	119	syms->next->next = NULL;
	120	merge_functions = head.next;
	121	}
	122	return n;
	123	}
	124
	125	/*-------------------------------------------------------------------------.
	126	\| For the existing merging function with index MERGER, record the result \|
	127	\| type as TYPE as required by the lhs of the rule whose %merge declaration \|
	128	\| is at DECLARATION_LOC. \|
	129	`-------------------------------------------------------------------------*/
	130
	131	static void
	132	record_merge_function_type (int merger, uniqstr type, location declaration_loc)
	133	{
	134	int merger_find;
	135	merger_list *merge_function;
	136
	137	if (merger <= 0)
	138	return;
	139
	140	if (type == NULL)
	141	type = uniqstr_new ("");
	142
	143	merger_find = 1;
	144	for (merge_function = merge_functions;
	145	merge_function != NULL && merger_find != merger;
	146	merge_function = merge_function->next)
	147	merger_find += 1;
	148	assert (merge_function != NULL && merger_find == merger);
	149	if (merge_function->type != NULL && !UNIQSTR_EQ (merge_function->type, type))
	150	{
	151	complain_at (declaration_loc,
	152	_("result type clash on merge function `%s': <%s> != <%s>"),
	153	merge_function->name, type, merge_function->type);
	154	complain_at (merge_function->type_declaration_location,
	155	_("previous declaration"));
	156	}
	157	merge_function->type = uniqstr_new (type);
	158	merge_function->type_declaration_location = declaration_loc;
	159	}
	160
	161	/*--------------------------------------.
	162	\| Free all merge-function definitions. \|
	163	`--------------------------------------*/
	164
	165	void
	166	free_merger_functions (void)
	167	{
	168	merger_list *L0 = merge_functions;
	169	while (L0)
	170	{
	171	merger_list *L1 = L0->next;
	172	free (L0);
	173	L0 = L1;
	174	}
	175	}
	176
	177	\f
	178	/*-------------------------------------------------------------------.
	179	\| Parse the input grammar into a one symbol_list structure. Each \|
	180	\| rule is represented by a sequence of symbols: the left hand side \|
	181	\| followed by the contents of the right hand side, followed by a \|
	182	\| null pointer instead of a symbol to terminate the rule. The next \|
	183	\| symbol is the lhs of the following rule. \|
	184	\| \|
	185	\| All actions are copied out, labelled by the rule number they apply \|
	186	\| to. \|
	187	`-------------------------------------------------------------------*/
	188
	189	/* The (currently) last symbol of GRAMMAR. */
	190	static symbol_list *grammar_end = NULL;
	191
	192	/* Append SYM to the grammar. */
	193	static void
	194	grammar_symbol_append (symbol *sym, location loc)
	195	{
	196	symbol_list *p = symbol_list_new (sym, loc);
	197
	198	if (grammar_end)
	199	grammar_end->next = p;
	200	else
	201	grammar = p;
	202
	203	grammar_end = p;
	204
	205	/* A null SYM stands for an end of rule; it is not an actual
	206	part of it. */
	207	if (sym)
	208	++nritems;
	209	}
	210
	211	/* The rule currently being defined, and the previous rule.
	212	CURRENT_RULE points to the first LHS of the current rule, while
	213	PREVIOUS_RULE_END points to the end of the previous rule (NULL). */
	214	static symbol_list *current_rule = NULL;
	215	static symbol_list *previous_rule_end = NULL;
	216
	217
	218	/*----------------------------------------------.
	219	\| Create a new rule for LHS in to the GRAMMAR. \|
	220	`----------------------------------------------*/
	221
	222	void
	223	grammar_current_rule_begin (symbol *lhs, location loc)
	224	{
	225	if (!start_flag)
	226	{
	227	startsymbol = lhs;
	228	startsymbol_location = loc;
	229	start_flag = true;
	230	}
	231
	232	/* Start a new rule and record its lhs. */
	233	++nrules;
	234	previous_rule_end = grammar_end;
	235	grammar_symbol_append (lhs, loc);
	236	current_rule = grammar_end;
	237
	238	/* Mark the rule's lhs as a nonterminal if not already so. */
	239	if (lhs->class == unknown_sym)
	240	{
	241	lhs->class = nterm_sym;
	242	lhs->number = nvars;
	243	++nvars;
	244	}
	245	else if (lhs->class == token_sym)
	246	complain_at (loc, _("rule given for %s, which is a token"), lhs->tag);
	247	}
	248
	249
	250	/*----------------------------------------------------------------------.
	251	\| A symbol should be used if it has a destructor, or if it is a \|
	252	\| mid-rule symbol (i.e., the generated LHS replacing a mid-rule \|
	253	\| action) that was assigned to, as in "exp: { $$ = 1; } { $$ = $1; }". \|
	254	`----------------------------------------------------------------------*/
	255
	256	static bool
	257	symbol_should_be_used (symbol_list const *s)
	258	{
	259	return (s->sym->destructor
	260	\|\| (s->midrule && s->midrule->used));
	261	}
	262
	263	/*----------------------------------------------------------------.
	264	\| Check that the rule R is properly defined. For instance, there \|
	265	\| should be no type clash on the default action. \|
	266	`----------------------------------------------------------------*/
	267
	268	static void
	269	grammar_rule_check (const symbol_list *r)
	270	{
	271	/* Type check.
	272
	273	If there is an action, then there is nothing we can do: the user
	274	is allowed to shoot herself in the foot.
	275
	276	Don't worry about the default action if $$ is untyped, since $$'s
	277	value can't be used. */
	278	if (!r->action && r->sym->type_name)
	279	{
	280	symbol *first_rhs = r->next->sym;
	281	/* If $$ is being set in default way, report if any type mismatch. */
	282	if (first_rhs)
	283	{
	284	char const *lhs_type = r->sym->type_name;
	285	const char *rhs_type =
	286	first_rhs->type_name ? first_rhs->type_name : "";
	287	if (!UNIQSTR_EQ (lhs_type, rhs_type))
	288	warn_at (r->location,
	289	_("type clash on default action: <%s> != <%s>"),
	290	lhs_type, rhs_type);
	291	}
	292	/* Warn if there is no default for $$ but we need one. */
	293	else
	294	warn_at (r->location,
	295	_("empty rule for typed nonterminal, and no action"));
	296	}
	297
	298	/* Check that symbol values that should be used are in fact used. */
	299	{
	300	symbol_list const *l = r;
	301	int n = 0;
	302	for (; l && l->sym; l = l->next, ++n)
	303	if (! (l->used
	304	\|\| !symbol_should_be_used (l)
	305	/* The default action, $$ = $1, `uses' both. */
	306	\|\| (!r->action && (n == 0 \|\| n == 1))))
	307	{
	308	if (n)
	309	warn_at (r->location, _("unused value: $%d"), n);
	310	else
	311	warn_at (r->location, _("unset value: $$"));
	312	}
	313	}
	314	}
	315
	316
	317	/*-------------------------------------.
	318	\| End the currently being grown rule. \|
	319	`-------------------------------------*/
	320
	321	void
	322	grammar_current_rule_end (location loc)
	323	{
	324	/* Put an empty link in the list to mark the end of this rule */
	325	grammar_symbol_append (NULL, grammar_end->location);
	326	current_rule->location = loc;
	327	}
	328
	329
	330	/*-------------------------------------------------------------------.
	331	\| The previous action turns out the be a mid-rule action. Attach it \|
	332	\| to the current rule, i.e., create a dummy symbol, attach it this \|
	333	\| mid-rule action, and append this dummy nonterminal to the current \|
	334	\| rule. \|
	335	`-------------------------------------------------------------------*/
	336
	337	void
	338	grammar_midrule_action (void)
	339	{
	340	/* Since the action was written out with this rule's number, we must
	341	give the new rule this number by inserting the new rule before
	342	it. */
	343
	344	/* Make a DUMMY nonterminal, whose location is that of the midrule
	345	action. Create the MIDRULE. */
	346	location dummy_location = current_rule->action_location;
	347	symbol *dummy = dummy_symbol_get (dummy_location);
	348	symbol_list *midrule = symbol_list_new (dummy, dummy_location);
	349
	350	/* Make a new rule, whose body is empty, before the current one, so
	351	that the action just read can belong to it. */
	352	++nrules;
	353	++nritems;
	354	/* Attach its location and actions to that of the DUMMY. */
	355	midrule->location = dummy_location;
	356	midrule->action = current_rule->action;
	357	midrule->action_location = dummy_location;
	358	current_rule->action = NULL;
	359	/* The action has not been translated yet, so $$ use hasn't been
	360	detected yet. */
	361	midrule->used = false;
	362
	363	if (previous_rule_end)
	364	previous_rule_end->next = midrule;
	365	else
	366	grammar = midrule;
	367
	368	/* End the dummy's rule. */
	369	midrule->next = symbol_list_new (NULL, dummy_location);
	370	midrule->next->next = current_rule;
	371
	372	previous_rule_end = midrule->next;
	373
	374	/* Insert the dummy nonterminal replacing the midrule action into
	375	the current rule. Bind it to its dedicated rule. */
	376	grammar_current_rule_symbol_append (dummy, dummy_location);
	377	grammar_end->midrule = midrule;
	378	midrule->midrule_parent_rule = current_rule;
	379	midrule->midrule_parent_rhs_index = symbol_list_length (current_rule->next);
	380	}
	381
	382	/* Set the precedence symbol of the current rule to PRECSYM. */
	383
	384	void
	385	grammar_current_rule_prec_set (symbol *precsym, location loc)
	386	{
	387	if (current_rule->ruleprec)
	388	complain_at (loc, _("only one %s allowed per rule"), "%prec");
	389	current_rule->ruleprec = precsym;
	390	}
	391
	392	/* Attach dynamic precedence DPREC to the current rule. */
	393
	394	void
	395	grammar_current_rule_dprec_set (int dprec, location loc)
	396	{
	397	if (! glr_parser)
	398	warn_at (loc, _("%s affects only GLR parsers"), "%dprec");
	399	if (dprec <= 0)
	400	complain_at (loc, _("%s must be followed by positive number"), "%dprec");
	401	else if (current_rule->dprec != 0)
	402	complain_at (loc, _("only one %s allowed per rule"), "%dprec");
	403	current_rule->dprec = dprec;
	404	}
	405
	406	/* Attach a merge function NAME with argument type TYPE to current
	407	rule. */
	408
	409	void
	410	grammar_current_rule_merge_set (uniqstr name, location loc)
	411	{
	412	if (! glr_parser)
	413	warn_at (loc, _("%s affects only GLR parsers"), "%merge");
	414	if (current_rule->merger != 0)
	415	complain_at (loc, _("only one %s allowed per rule"), "%merge");
	416	current_rule->merger = get_merge_function (name);
	417	current_rule->merger_declaration_location = loc;
	418	}
	419
	420	/* Attach SYM to the current rule. If needed, move the previous
	421	action as a mid-rule action. */
	422
	423	void
	424	grammar_current_rule_symbol_append (symbol *sym, location loc)
	425	{
	426	if (current_rule->action)
	427	grammar_midrule_action ();
	428	grammar_symbol_append (sym, loc);
	429	}
	430
	431	/* Attach an ACTION to the current rule. */
	432
	433	void
	434	grammar_current_rule_action_append (const char *action, location loc)
	435	{
	436	if (current_rule->action)
	437	grammar_midrule_action ();
	438	/* After all symbol declarations have been parsed, packgram invokes
	439	translate_rule_action. */
	440	current_rule->action = action;
	441	current_rule->action_location = loc;
	442	}
	443
	444	\f
	445	/*---------------------------------------------------------------.
	446	\| Convert the rules into the representation using RRHS, RLHS and \|
	447	\| RITEM. \|
	448	`---------------------------------------------------------------*/
	449
	450	static void
	451	packgram (void)
	452	{
	453	unsigned int itemno = 0;
	454	rule_number ruleno = 0;
	455	symbol_list *p = grammar;
	456
	457	ritem = xnmalloc (nritems + 1, sizeof *ritem);
	458
	459	/* This sentinel is used by build_relations in gram.c. */
	460	*ritem++ = 0;
	461
	462	rules = xnmalloc (nrules, sizeof *rules);
	463
	464	while (p)
	465	{
	466	int rule_length = 0;
	467	symbol *ruleprec = p->ruleprec;
	468	record_merge_function_type (p->merger, p->sym->type_name,
	469	p->merger_declaration_location);
	470	rules[ruleno].user_number = ruleno;
	471	rules[ruleno].number = ruleno;
	472	rules[ruleno].lhs = p->sym;
	473	rules[ruleno].rhs = ritem + itemno;
	474	rules[ruleno].prec = NULL;
	475	rules[ruleno].dprec = p->dprec;
	476	rules[ruleno].merger = p->merger;
	477	rules[ruleno].precsym = NULL;
	478	rules[ruleno].location = p->location;
	479	rules[ruleno].useful = true;
	480	rules[ruleno].action = p->action ? translate_rule_action (p) : NULL;
	481	rules[ruleno].action_location = p->action_location;
	482
	483	/* If this rule contains midrules, rest assured that
	484	grammar_midrule_action inserted the midrules into grammar before this
	485	rule. Thus, the midrule actions have already been scanned in order to
	486	set `used' flags for this rule's rhs, so grammar_rule_check will work
	487	properly. */
	488	grammar_rule_check (p);
	489
	490	for (p = p->next; p && p->sym; p = p->next)
	491	{
	492	++rule_length;
	493
	494	/* Don't allow rule_length == INT_MAX, since that might
	495	cause confusion with strtol if INT_MAX == LONG_MAX. */
	496	if (rule_length == INT_MAX)
	497	fatal_at (rules[ruleno].location, _("rule is too long"));
	498
	499	/* item_number = symbol_number.
	500	But the former needs to contain more: negative rule numbers. */
	501	ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
	502	/* A rule gets by default the precedence and associativity
	503	of its last token. */
	504	if (p->sym->class == token_sym && default_prec)
	505	rules[ruleno].prec = p->sym;
	506	}
	507
	508	/* If this rule has a %prec,
	509	the specified symbol's precedence replaces the default. */
	510	if (ruleprec)
	511	{
	512	rules[ruleno].precsym = ruleprec;
	513	rules[ruleno].prec = ruleprec;
	514	}
	515	/* An item ends by the rule number (negated). */
	516	ritem[itemno++] = rule_number_as_item_number (ruleno);
	517	assert (itemno < ITEM_NUMBER_MAX);
	518	++ruleno;
	519	assert (ruleno < RULE_NUMBER_MAX);
	520
	521	if (p)
	522	p = p->next;
	523	}
	524
	525	assert (itemno == nritems);
	526
	527	if (trace_flag & trace_sets)
	528	ritem_print (stderr);
	529	}
	530	\f
	531	/*------------------------------------------------------------------.
	532	\| Read in the grammar specification and record it in the format \|
	533	\| described in gram.h. All actions are copied into ACTION_OBSTACK, \|
	534	\| in each case forming the body of a C function (YYACTION) which \|
	535	\| contains a switch statement to decide which action to execute. \|
	536	`------------------------------------------------------------------*/
	537
	538	void
	539	reader (void)
	540	{
	541	/* Initialize the symbol table. */
	542	symbols_new ();
	543
	544	/* Construct the accept symbol. */
	545	accept = symbol_get ("$accept", empty_location);
	546	accept->class = nterm_sym;
	547	accept->number = nvars++;
	548
	549	/* Construct the error token */
	550	errtoken = symbol_get ("error", empty_location);
	551	errtoken->class = token_sym;
	552	errtoken->number = ntokens++;
	553
	554	/* Construct a token that represents all undefined literal tokens.
	555	It is always token number 2. */
	556	undeftoken = symbol_get ("$undefined", empty_location);
	557	undeftoken->class = token_sym;
	558	undeftoken->number = ntokens++;
	559
	560	/* Initialize the obstacks. */
	561	obstack_init (&pre_prologue_obstack);
	562	obstack_init (&post_prologue_obstack);
	563
	564	gram_in = xfopen (grammar_file, "r");
	565
	566	gram__flex_debug = trace_flag & trace_scan;
	567	gram_debug = trace_flag & trace_parse;
	568	gram_scanner_initialize ();
	569	gram_parse ();
	570
	571	if (! complaint_issued)
	572	check_and_convert_grammar ();
	573
	574	xfclose (gram_in);
	575	}
	576
	577
	578	/*-------------------------------------------------------------.
	579	\| Check the grammar that has just been read, and convert it to \|
	580	\| internal form. \|
	581	`-------------------------------------------------------------*/
	582
	583	static void
	584	check_and_convert_grammar (void)
	585	{
	586	/* Grammar has been read. Do some checking. */
	587	if (nrules == 0)
	588	fatal (_("no rules in the input grammar"));
	589
	590	/* Report any undefined symbols and consider them nonterminals. */
	591	symbols_check_defined ();
	592
	593	/* If the user did not define her ENDTOKEN, do it now. */
	594	if (!endtoken)
	595	{
	596	endtoken = symbol_get ("$end", empty_location);
	597	endtoken->class = token_sym;
	598	endtoken->number = 0;
	599	/* Value specified by POSIX. */
	600	endtoken->user_token_number = 0;
	601	}
	602
	603	/* Insert the initial rule, whose line is that of the first rule
	604	(not that of the start symbol):
	605
	606	accept: %start EOF. */
	607	{
	608	symbol_list *p = symbol_list_new (accept, empty_location);
	609	p->location = grammar->location;
	610	p->next = symbol_list_new (startsymbol, empty_location);
	611	p->next->next = symbol_list_new (endtoken, empty_location);
	612	p->next->next->next = symbol_list_new (NULL, empty_location);
	613	p->next->next->next->next = grammar;
	614	nrules += 1;
	615	nritems += 3;
	616	grammar = p;
	617	}
	618
	619	assert (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars);
	620
	621	/* Assign the symbols their symbol numbers. Write #defines for the
	622	token symbols into FDEFINES if requested. */
	623	symbols_pack ();
	624
	625	/* Convert the grammar into the format described in gram.h. */
	626	packgram ();
	627
	628	/* The grammar as a symbol_list is no longer needed. */
	629	LIST_FREE (symbol_list, grammar);
	630	}