[bison.git] / src / reader.c

/* Input parser for bison
   Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
   Free Software Foundation, Inc.

   This file is part of Bison, the GNU Compiler Compiler.

   Bison is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   Bison is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with Bison; see the file COPYING.  If not, write to
   the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.  */


#include "system.h"
#include "quotearg.h"
#include "quote.h"
#include "getargs.h"
#include "files.h"
#include "symtab.h"
#include "symlist.h"
#include "gram.h"
#include "complain.h"
#include "output.h"
#include "reader.h"
#include "conflicts.h"
#include "muscle_tab.h"

static symbol_list_t *grammar = NULL;
static int start_flag = 0;
merger_list *merge_functions;

/* Nonzero if %union has been seen.  */
int typed = 0;
\f
/*-----------------------.
| Set the start symbol.  |
`-----------------------*/

void
grammar_start_symbol_set (symbol_t *s, location_t l)
{
  if (start_flag)
    complain_at (l, _("multiple %s declarations"), "%start");
  else
    {
      start_flag = 1;
      startsymbol = s;
      startsymbol_location = l;
    }
}


/*----------------------------------------------------------------.
| There are two prologues: one before %union, one after.  Augment |
| the current one.                                                |
`----------------------------------------------------------------*/

void
prologue_augment (const char *prologue, location_t location)
{
  struct obstack *oout =
    !typed ? &pre_prologue_obstack : &post_prologue_obstack;

  if (!no_lines_flag)
    {
      obstack_fgrow2 (oout, muscle_find ("linef"),
		      location.first_line,
		      quotearg_style (c_quoting_style,
				      muscle_find ("filename")));
    }
  obstack_sgrow (oout, prologue);
}


/*----------------------.
| Handle the epilogue.  |
`----------------------*/

void
epilogue_set (const char *epilogue, location_t location)
{
  if (!no_lines_flag)
    {
      obstack_fgrow2 (&muscle_obstack, muscle_find ("linef"),
		      location.first_line,
		      quotearg_style (c_quoting_style,
				      muscle_find ("filename")));
    }
  obstack_sgrow (&muscle_obstack, epilogue);
  obstack_1grow (&muscle_obstack, 0);
  muscle_insert ("epilogue", obstack_finish (&muscle_obstack));
}


\f

 /*-------------------------------------------------------------------.
| Return the merger index for a merging function named NAME, whose   |
| arguments have type TYPE.  Records the function, if new, in        |
| merger_list.							     |
`-------------------------------------------------------------------*/

static int
get_merge_function (const char* name, const char* type,
		    location_t loc)
{
  merger_list *syms;
  merger_list head;
  int n;

  if (! glr_parser)
    return 0;

  if (type == NULL)
    type = "";

  head.next = merge_functions;
  for (syms = &head, n = 1; syms->next != NULL; syms = syms->next, n += 1)
    if (strcmp (name, syms->next->name) == 0)
      break;
  if (syms->next == NULL)
    {
      syms->next = XMALLOC (merger_list, 1);
      syms->next->name = xstrdup (name);
      syms->next->type = xstrdup (type);
      syms->next->next = NULL;
      merge_functions = head.next;
    }
  else if (strcmp (type, syms->next->type) != 0)
    warn_at (loc, _("result type clash on merge function %s: `%s' vs. `%s'"),
	     name, type, syms->next->type);
  return n;
}

/*--------------------------------------.
| Free all merge-function definitions.	|
`--------------------------------------*/

void
free_merger_functions (void)
{
  merger_list *L0;
  if (! glr_parser)
    return;
  L0 = merge_functions;
  while (L0 != NULL)
    {
      merger_list *L1 = L0->next;
      free (L0);
      L0 = L1;
    }
}

\f
/*-------------------------------------------------------------------.
| Parse the input grammar into a one symbol_list_t structure.  Each  |
| rule is represented by a sequence of symbols: the left hand side   |
| followed by the contents of the right hand side, followed by a     |
| null pointer instead of a symbol to terminate the rule.  The next  |
| symbol is the lhs of the following rule.                           |
|                                                                    |
| All actions are copied out, labelled by the rule number they apply |
| to.                                                                |
|                                                                    |
| Bison used to allow some %directives in the rules sections, but    |
| this is no longer consider appropriate: (i) the documented grammar |
| doesn't claim it, (ii), it would promote bad style, (iii), error   |
| recovery for %directives consists in skipping the junk until a `%' |
| is seen and helrp synchronizing.  This scheme is definitely wrong  |
| in the rules section.                                              |
`-------------------------------------------------------------------*/

/* The (currently) last symbol of GRAMMAR. */
symbol_list_t *grammar_end = NULL;

/* Append S to the GRAMMAR. */
void
grammar_symbol_append (symbol_t *symbol, location_t location)
{
  symbol_list_t *p = symbol_list_new (symbol, location);

  if (grammar_end)
    grammar_end->next = p;
  else
    grammar = p;

  grammar_end = p;
}

/* The rule currently being defined, and the previous rule.
   CURRENT_RULE points to the first LHS of the current rule, while
   PREVIOUS_RULE_END points to the *end* of the previous rule (NULL).  */
symbol_list_t *current_rule = NULL;
symbol_list_t *previous_rule_end = NULL;


/*----------------------------------------------.
| Create a new rule for LHS in to the GRAMMAR.  |
`----------------------------------------------*/

void
grammar_rule_begin (symbol_t *lhs, location_t location)
{
  if (!start_flag)
    {
      startsymbol = lhs;
      startsymbol_location = location;
      start_flag = 1;
    }

  /* Start a new rule and record its lhs.  */
  ++nrules;
  ++nritems;

  previous_rule_end = grammar_end;
  grammar_symbol_append (lhs, location);
  current_rule = grammar_end;

  /* Mark the rule's lhs as a nonterminal if not already so.  */

  if (lhs->class == unknown_sym)
    {
      lhs->class = nterm_sym;
      lhs->number = nvars;
      ++nvars;
    }
  else if (lhs->class == token_sym)
    complain_at (location, _("rule given for %s, which is a token"), lhs->tag);
}

/* Check that the last rule (CURRENT_RULE) is properly defined.  For
   instance, there should be no type clash on the default action.  */

static void
grammar_current_rule_check (void)
{
  symbol_t *lhs = current_rule->sym;
  char const *lhs_type = lhs->type_name;
  symbol_t *first_rhs = current_rule->next->sym;

  /* If there is an action, then there is nothing we can do: the user
     is allowed to shoot herself in the foot.  */
  if (current_rule->action)
    return;

  /* Don't worry about the default action if $$ is untyped, since $$'s
     value can't be used.  */
  if (! lhs_type)
    return;

  /* If $$ is being set in default way, report if any type mismatch.  */
  if (first_rhs)
    {
      const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
      if (strcmp (lhs_type, rhs_type))
	complain_at (current_rule->location,
		     _("type clash (`%s' `%s') on default action"),
		     lhs_type, rhs_type);
    }
  /* Warn if there is no default for $$ but we need one.  */
  else
    complain_at (current_rule->location,
		 _("empty rule for typed nonterminal, and no action"));
}


/*-------------------------------------.
| End the currently being grown rule.  |
`-------------------------------------*/

void
grammar_rule_end (location_t location)
{
  /* Put an empty link in the list to mark the end of this rule  */
  grammar_symbol_append (NULL, grammar_end->location);
  current_rule->location = location;
  grammar_current_rule_check ();
}


/*-------------------------------------------------------------------.
| The previous action turns out the be a mid-rule action.  Attach it |
| to the current rule, i.e., create a dummy symbol, attach it this   |
| mid-rule action, and append this dummy nonterminal to the current  |
| rule.                                                              |
`-------------------------------------------------------------------*/

void
grammar_midrule_action (void)
{
  /* Since the action was written out with this rule's number, we must
     give the new rule this number by inserting the new rule before
     it.  */

  /* Make a DUMMY nonterminal, whose location is that of the midrule
     action.  Create the MIDRULE.  */
  location_t dummy_location = current_rule->action_location;
  symbol_t *dummy = dummy_symbol_get (dummy_location);
  symbol_list_t *midrule = symbol_list_new (dummy, dummy_location);

  /* Make a new rule, whose body is empty, before the current one, so
     that the action just read can belong to it.  */
  ++nrules;
  ++nritems;
  /* Attach its location and actions to that of the DUMMY.  */
  midrule->location = dummy_location;
  midrule->action = current_rule->action;
  midrule->action_location = dummy_location;
  current_rule->action = NULL;

  if (previous_rule_end)
    previous_rule_end->next = midrule;
  else
    grammar = midrule;

  /* End the dummy's rule.  */
  previous_rule_end = symbol_list_new (NULL, dummy_location);
  previous_rule_end->next = current_rule;

  midrule->next = previous_rule_end;

  /* Insert the dummy nonterminal replacing the midrule action into
     the current rule.  */
  grammar_current_rule_symbol_append (dummy, dummy_location);
}

/* Set the precedence symbol of the current rule to PRECSYM. */

void
grammar_current_rule_prec_set (symbol_t *precsym, location_t location)
{
  if (current_rule->ruleprec)
    complain_at (location, _("two @prec's in a row"));
  current_rule->ruleprec = precsym;
}

/* Attach dynamic precedence DPREC to the current rule. */

void
grammar_current_rule_dprec_set (int dprec, location_t location)
{
  if (! glr_parser)
    warn_at (location, _("%%dprec affects only GLR parsers"));
  if (dprec <= 0)
    complain_at (location, _("%%dprec must be followed by positive number"));
  else if (current_rule->dprec != 0)
    complain_at (location, _("only one %%dprec allowed per rule"));
  current_rule->dprec = dprec;
}

/* Attach a merge function NAME with argument type TYPE to current
   rule. */

void
grammar_current_rule_merge_set (const char* name, location_t location)
{
  if (! glr_parser)
    warn_at (location, _("%%merge affects only GLR parsers"));
  if (current_rule->merger != 0)
    complain_at (location, _("only one %%merge allowed per rule"));
  current_rule->merger =
    get_merge_function (name, current_rule->sym->type_name, location);
}

/* Attach a SYMBOL to the current rule.  If needed, move the previous
   action as a mid-rule action.  */

void
grammar_current_rule_symbol_append (symbol_t *symbol, location_t location)
{
  if (current_rule->action)
    grammar_midrule_action ();
  ++nritems;
  grammar_symbol_append (symbol, location);
}

/* Attach an ACTION to the current rule.  If needed, move the previous
   action as a mid-rule action.  */

void
grammar_current_rule_action_append (const char *action, location_t location)
{
  if (current_rule->action)
    grammar_midrule_action ();
  current_rule->action = action;
  current_rule->action_location = location;
}

\f
/*---------------------------------------------------------------.
| Convert the rules into the representation using RRHS, RLHS and |
| RITEM.                                                         |
`---------------------------------------------------------------*/

static void
packgram (void)
{
  unsigned int itemno = 0;
  rule_number_t ruleno = 0;
  symbol_list_t *p = grammar;

  ritem = XCALLOC (item_number_t, nritems);
  rules = XCALLOC (rule_t, nrules);

  while (p)
    {
      symbol_t *ruleprec = p->ruleprec;
      rules[ruleno].user_number = ruleno;
      rules[ruleno].number = ruleno;
      rules[ruleno].lhs = p->sym;
      rules[ruleno].rhs = ritem + itemno;
      rules[ruleno].location = p->location;
      rules[ruleno].useful = TRUE;
      rules[ruleno].action = p->action;
      rules[ruleno].action_location = p->action_location;
      rules[ruleno].dprec = p->dprec;
      rules[ruleno].merger = p->merger;

      p = p->next;
      while (p && p->sym)
	{
	  /* item_number_t = symbol_number_t.
	     But the former needs to contain more: negative rule numbers. */
	  ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
	  /* A rule gets by default the precedence and associativity
	     of the last token in it.  */
	  if (p->sym->class == token_sym)
	    rules[ruleno].prec = p->sym;
	  if (p)
	    p = p->next;
	}

      /* If this rule has a %prec,
         the specified symbol's precedence replaces the default.  */
      if (ruleprec)
	{
	  rules[ruleno].precsym = ruleprec;
	  rules[ruleno].prec = ruleprec;
	}
      ritem[itemno++] = rule_number_as_item_number (ruleno);
      ++ruleno;

      if (p)
	p = p->next;
    }

  assert (itemno == nritems);

  if (trace_flag & trace_sets)
    ritem_print (stderr);
}
\f
/*------------------------------------------------------------------.
| Read in the grammar specification and record it in the format     |
| described in gram.h.  All actions are copied into ACTION_OBSTACK, |
| in each case forming the body of a C function (YYACTION) which    |
| contains a switch statement to decide which action to execute.    |
`------------------------------------------------------------------*/

void
reader (void)
{
  gram_control_t gram_control;

  /* Initialize the symbol table.  */
  symbols_new ();

  /* Construct the accept symbol. */
  accept = symbol_get ("$accept", empty_location);
  accept->class = nterm_sym;
  accept->number = nvars++;

  /* Construct the error token */
  errtoken = symbol_get ("error", empty_location);
  errtoken->class = token_sym;
  errtoken->number = ntokens++;

  /* Construct a token that represents all undefined literal tokens.
     It is always token number 2.  */
  undeftoken = symbol_get ("$undefined", empty_location);
  undeftoken->class = token_sym;
  undeftoken->number = ntokens++;

  /* Initialize the obstacks. */
  obstack_init (&pre_prologue_obstack);
  obstack_init (&post_prologue_obstack);

  finput = xfopen (infile, "r");
  gram_in = finput;

  gram_debug = !!getenv ("parse");
  gram__flex_debug = !!getenv ("scan");
  scanner_initialize ();
  gram_parse (&gram_control);

  /* If something went wrong during the parsing, don't try to
     continue.  */
  if (complain_message_count)
    exit (1);

  /* Grammar has been read.  Do some checking */
  if (nrules == 0)
    fatal (_("no rules in the input grammar"));

  /* Report any undefined symbols and consider them nonterminals.  */
  symbols_check_defined ();

  /* If the user did not define her ENDTOKEN, do it now. */
  if (!endtoken)
    {
      endtoken = symbol_get ("$end", empty_location);
      endtoken->class = token_sym;
      endtoken->number = 0;
      /* Value specified by POSIX.  */
      endtoken->user_token_number = 0;
    }

  /* Insert the initial rule, which line is that of the first rule
     (not that of the start symbol):

     accept: %start EOF.  */
  {
    symbol_list_t *p = symbol_list_new (accept, empty_location);
    p->location = grammar->location;
    p->next = symbol_list_new (startsymbol, empty_location);
    p->next->next = symbol_list_new (endtoken, empty_location);
    p->next->next->next = symbol_list_new (NULL, empty_location);
    p->next->next->next->next = grammar;
    nrules += 1;
    nritems += 3;
    grammar = p;
  }

  if (SYMBOL_NUMBER_MAX < nsyms)
    fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
	   SYMBOL_NUMBER_MAX);

  assert (nsyms == ntokens + nvars);

  xfclose (finput);

  /* Assign the symbols their symbol numbers.  Write #defines for the
     token symbols into FDEFINES if requested.  */
  symbols_pack ();

  /* Convert the grammar into the format described in gram.h.  */
  packgram ();

  /* The grammar as a symbol_list_t is no longer needed. */
  LIST_FREE (symbol_list_t, grammar);
}
Commit	Line	Data
1ff442ca	1	/* Input parser for bison
76514394	2	Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3	3	Free Software Foundation, Inc.
1ff442ca	4
41aca2e0	5	This file is part of Bison, the GNU Compiler Compiler.
1ff442ca	6
41aca2e0 AD	7	Bison is free software; you can redistribute it and/or modify
	8	it under the terms of the GNU General Public License as published by
	9	the Free Software Foundation; either version 2, or (at your option)
	10	any later version.
1ff442ca	11
41aca2e0 AD	12	Bison is distributed in the hope that it will be useful,
	13	but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	15	GNU General Public License for more details.
1ff442ca	16
41aca2e0 AD	17	You should have received a copy of the GNU General Public License
	18	along with Bison; see the file COPYING. If not, write to
	19	the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
	20	Boston, MA 02111-1307, USA. */
1ff442ca NF	21
1ff442ca NF	22
1ff442ca	23	#include "system.h"
2a91a95e AD	24	#include "quotearg.h"
2a91a95e AD	25	#include "quote.h"
ceed8467	26	#include "getargs.h"
1ff442ca	27	#include "files.h"
1ff442ca	28	#include "symtab.h"
56c47203	29	#include "symlist.h"
1ff442ca	30	#include "gram.h"
a0f6b076	31	#include "complain.h"
6c89f1c1	32	#include "output.h"
b2ca4022	33	#include "reader.h"
340ef489	34	#include "conflicts.h"
11d82f03	35	#include "muscle_tab.h"
1ff442ca	36
56c47203	37	static symbol_list_t *grammar = NULL;
280a38c3	38	static int start_flag = 0;
676385e2	39	merger_list *merge_functions;
1ff442ca	40
d7020c20	41	/* Nonzero if %union has been seen. */
e9955c83	42	int typed = 0;
0d533154	43	\f
e9955c83 AD	44	/*-----------------------.
	45	\| Set the start symbol. \|
	46	`-----------------------*/
1ff442ca	47
e9955c83	48	void
8efe435c	49	grammar_start_symbol_set (symbol_t *s, location_t l)
1ff442ca NF	50	{
1ff442ca NF	51	if (start_flag)
e776192e	52	complain_at (l, _("multiple %s declarations"), "%start");
943819bf RS	53	else
	54	{
	55	start_flag = 1;
e9955c83	56	startsymbol = s;
8efe435c	57	startsymbol_location = l;
943819bf	58	}
1ff442ca NF	59	}
1ff442ca NF	60
1ff442ca	61
d7020c20	62	/*----------------------------------------------------------------.
e9955c83 AD	63	\| There are two prologues: one before %union, one after. Augment \|
e9955c83 AD	64	\| the current one. \|
d7020c20	65	`----------------------------------------------------------------*/
1ff442ca	66
e9955c83	67	void
0c15323d	68	prologue_augment (const char *prologue, location_t location)
b6610515	69	{
e9955c83 AD	70	struct obstack *oout =
e9955c83 AD	71	!typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515	72
e9955c83	73	if (!no_lines_flag)
b6610515	74	{
e9955c83	75	obstack_fgrow2 (oout, muscle_find ("linef"),
0c15323d AD	76	location.first_line,
	77	quotearg_style (c_quoting_style,
	78	muscle_find ("filename")));
b6610515	79	}
e9955c83	80	obstack_sgrow (oout, prologue);
b6610515 RA	81	}
b6610515 RA	82
2ba3b73c	83
426cf563	84
a870c567	85
e9955c83 AD	86	/*----------------------.
	87	\| Handle the epilogue. \|
	88	`----------------------*/
426cf563	89
e9955c83	90	void
0c15323d	91	epilogue_set (const char *epilogue, location_t location)
2ba3b73c	92	{
e9955c83	93	if (!no_lines_flag)
1ff442ca	94	{
592e8d4d	95	obstack_fgrow2 (&muscle_obstack, muscle_find ("linef"),
0c15323d AD	96	location.first_line,
	97	quotearg_style (c_quoting_style,
	98	muscle_find ("filename")));
1ff442ca	99	}
592e8d4d AD	100	obstack_sgrow (&muscle_obstack, epilogue);
	101	obstack_1grow (&muscle_obstack, 0);
	102	muscle_insert ("epilogue", obstack_finish (&muscle_obstack));
1ff442ca	103	}
1ff442ca	104
a70083a3	105
a70083a3 AD	106	\f
a70083a3 AD	107
676385e2 PH	108	/*-------------------------------------------------------------------.
	109	\| Return the merger index for a merging function named NAME, whose \|
	110	\| arguments have type TYPE. Records the function, if new, in \|
	111	\| merger_list. \|
	112	`-------------------------------------------------------------------*/
	113
	114	static int
a5d50994 AD	115	get_merge_function (const char* name, const char* type,
a5d50994 AD	116	location_t loc)
676385e2 PH	117	{
	118	merger_list *syms;
	119	merger_list head;
	120	int n;
	121
	122	if (! glr_parser)
	123	return 0;
	124
	125	if (type == NULL)
	126	type = "";
	127
	128	head.next = merge_functions;
39f41916	129	for (syms = &head, n = 1; syms->next != NULL; syms = syms->next, n += 1)
676385e2 PH	130	if (strcmp (name, syms->next->name) == 0)
676385e2 PH	131	break;
a5d50994 AD	132	if (syms->next == NULL)
	133	{
	134	syms->next = XMALLOC (merger_list, 1);
b906441c AD	135	syms->next->name = xstrdup (name);
b906441c AD	136	syms->next->type = xstrdup (type);
a5d50994 AD	137	syms->next->next = NULL;
	138	merge_functions = head.next;
	139	}
	140	else if (strcmp (type, syms->next->type) != 0)
	141	warn_at (loc, _("result type clash on merge function %s: `%s' vs. `%s'"),
	142	name, type, syms->next->type);
676385e2 PH	143	return n;
	144	}
	145
	146	/*--------------------------------------.
	147	\| Free all merge-function definitions. \|
	148	`--------------------------------------*/
	149
	150	void
	151	free_merger_functions (void)
	152	{
	153	merger_list *L0;
	154	if (! glr_parser)
	155	return;
	156	L0 = merge_functions;
	157	while (L0 != NULL)
	158	{
	159	merger_list *L1 = L0->next;
	160	free (L0);
	161	L0 = L1;
	162	}
	163	}
	164
a70083a3	165	\f
107f7dfb	166	/*-------------------------------------------------------------------.
32e1e0a4	167	\| Parse the input grammar into a one symbol_list_t structure. Each \|
107f7dfb AD	168	\| rule is represented by a sequence of symbols: the left hand side \|
	169	\| followed by the contents of the right hand side, followed by a \|
	170	\| null pointer instead of a symbol to terminate the rule. The next \|
	171	\| symbol is the lhs of the following rule. \|
	172	\| \|
fdbcd8e2 AD	173	\| All actions are copied out, labelled by the rule number they apply \|
fdbcd8e2 AD	174	\| to. \|
107f7dfb AD	175	\| \|
	176	\| Bison used to allow some %directives in the rules sections, but \|
	177	\| this is no longer consider appropriate: (i) the documented grammar \|
	178	\| doesn't claim it, (ii), it would promote bad style, (iii), error \|
	179	\| recovery for %directives consists in skipping the junk until a `%' \|
	180	\| is seen and helrp synchronizing. This scheme is definitely wrong \|
	181	\| in the rules section. \|
	182	`-------------------------------------------------------------------*/
1ff442ca	183
f6d0f937	184	/* The (currently) last symbol of GRAMMAR. */
56c47203	185	symbol_list_t *grammar_end = NULL;
f6d0f937 AD	186
f6d0f937 AD	187	/* Append S to the GRAMMAR. */
e9955c83	188	void
8efe435c	189	grammar_symbol_append (symbol_t *symbol, location_t location)
f6d0f937	190	{
56c47203	191	symbol_list_t *p = symbol_list_new (symbol, location);
f6d0f937 AD	192
	193	if (grammar_end)
	194	grammar_end->next = p;
	195	else
	196	grammar = p;
	197
	198	grammar_end = p;
	199	}
	200
8efe435c AD	201	/* The rule currently being defined, and the previous rule.
	202	CURRENT_RULE points to the first LHS of the current rule, while
	203	PREVIOUS_RULE_END points to the end of the previous rule (NULL). */
56c47203 AD	204	symbol_list_t *current_rule = NULL;
56c47203 AD	205	symbol_list_t *previous_rule_end = NULL;
da4160c3 AD	206
da4160c3 AD	207
8efe435c AD	208	/*----------------------------------------------.
	209	\| Create a new rule for LHS in to the GRAMMAR. \|
	210	`----------------------------------------------*/
da4160c3	211
e9955c83	212	void
8efe435c	213	grammar_rule_begin (symbol_t *lhs, location_t location)
da4160c3 AD	214	{
	215	if (!start_flag)
	216	{
	217	startsymbol = lhs;
8efe435c	218	startsymbol_location = location;
da4160c3 AD	219	start_flag = 1;
	220	}
	221
	222	/* Start a new rule and record its lhs. */
	223	++nrules;
	224	++nritems;
	225
8efe435c AD	226	previous_rule_end = grammar_end;
8efe435c AD	227	grammar_symbol_append (lhs, location);
da4160c3 AD	228	current_rule = grammar_end;
	229
	230	/* Mark the rule's lhs as a nonterminal if not already so. */
	231
	232	if (lhs->class == unknown_sym)
	233	{
	234	lhs->class = nterm_sym;
	235	lhs->number = nvars;
	236	++nvars;
	237	}
	238	else if (lhs->class == token_sym)
e776192e	239	complain_at (location, _("rule given for %s, which is a token"), lhs->tag);
da4160c3 AD	240	}
da4160c3 AD	241
e9955c83 AD	242	/* Check that the last rule (CURRENT_RULE) is properly defined. For
	243	instance, there should be no type clash on the default action. */
	244
	245	static void
	246	grammar_current_rule_check (void)
	247	{
	248	symbol_t *lhs = current_rule->sym;
3f4c0f80	249	char const *lhs_type = lhs->type_name;
e9955c83 AD	250	symbol_t *first_rhs = current_rule->next->sym;
	251
	252	/* If there is an action, then there is nothing we can do: the user
3f4c0f80	253	is allowed to shoot herself in the foot. */
e9955c83 AD	254	if (current_rule->action)
	255	return;
	256
3f4c0f80 PE	257	/* Don't worry about the default action if $$ is untyped, since $$'s
	258	value can't be used. */
	259	if (! lhs_type)
	260	return;
	261
	262	/* If $$ is being set in default way, report if any type mismatch. */
e9955c83 AD	263	if (first_rhs)
e9955c83 AD	264	{
e9955c83 AD	265	const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
e9955c83 AD	266	if (strcmp (lhs_type, rhs_type))
e776192e AD	267	complain_at (current_rule->location,
	268	_("type clash (`%s' `%s') on default action"),
	269	lhs_type, rhs_type);
e9955c83 AD	270	}
	271	/* Warn if there is no default for $$ but we need one. */
	272	else
3f4c0f80 PE	273	complain_at (current_rule->location,
3f4c0f80 PE	274	_("empty rule for typed nonterminal, and no action"));
e9955c83 AD	275	}
	276
	277
8efe435c AD	278	/*-------------------------------------.
	279	\| End the currently being grown rule. \|
	280	`-------------------------------------*/
e9955c83 AD	281
e9955c83 AD	282	void
8efe435c	283	grammar_rule_end (location_t location)
e9955c83 AD	284	{
e9955c83 AD	285	/* Put an empty link in the list to mark the end of this rule */
8efe435c AD	286	grammar_symbol_append (NULL, grammar_end->location);
8efe435c AD	287	current_rule->location = location;
e9955c83 AD	288	grammar_current_rule_check ();
	289	}
	290
	291
8efe435c AD	292	/*-------------------------------------------------------------------.
	293	\| The previous action turns out the be a mid-rule action. Attach it \|
	294	\| to the current rule, i.e., create a dummy symbol, attach it this \|
	295	\| mid-rule action, and append this dummy nonterminal to the current \|
	296	\| rule. \|
	297	`-------------------------------------------------------------------*/
1485e106	298
e9955c83	299	void
1485e106 AD	300	grammar_midrule_action (void)
	301	{
	302	/* Since the action was written out with this rule's number, we must
	303	give the new rule this number by inserting the new rule before
	304	it. */
	305
8efe435c AD	306	/* Make a DUMMY nonterminal, whose location is that of the midrule
8efe435c AD	307	action. Create the MIDRULE. */
8efe435c	308	location_t dummy_location = current_rule->action_location;
39f41916	309	symbol_t *dummy = dummy_symbol_get (dummy_location);
56c47203	310	symbol_list_t *midrule = symbol_list_new (dummy, dummy_location);
1485e106 AD	311
	312	/* Make a new rule, whose body is empty, before the current one, so
	313	that the action just read can belong to it. */
	314	++nrules;
	315	++nritems;
8efe435c AD	316	/* Attach its location and actions to that of the DUMMY. */
	317	midrule->location = dummy_location;
	318	midrule->action = current_rule->action;
	319	midrule->action_location = dummy_location;
1485e106 AD	320	current_rule->action = NULL;
1485e106 AD	321
8efe435c AD	322	if (previous_rule_end)
8efe435c AD	323	previous_rule_end->next = midrule;
1485e106	324	else
8efe435c	325	grammar = midrule;
1485e106	326
8efe435c AD	327	/* End the dummy's rule. */
	328	previous_rule_end = symbol_list_new (NULL, dummy_location);
	329	previous_rule_end->next = current_rule;
1485e106	330
8efe435c	331	midrule->next = previous_rule_end;
1485e106	332
8efe435c AD	333	/* Insert the dummy nonterminal replacing the midrule action into
	334	the current rule. */
	335	grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106 AD	336	}
1485e106 AD	337
9af3fbce AD	338	/* Set the precedence symbol of the current rule to PRECSYM. */
9af3fbce AD	339
e9955c83	340	void
e776192e	341	grammar_current_rule_prec_set (symbol_t *precsym, location_t location)
9af3fbce AD	342	{
9af3fbce AD	343	if (current_rule->ruleprec)
e776192e	344	complain_at (location, _("two @prec's in a row"));
9af3fbce AD	345	current_rule->ruleprec = precsym;
	346	}
	347
676385e2 PH	348	/* Attach dynamic precedence DPREC to the current rule. */
	349
	350	void
	351	grammar_current_rule_dprec_set (int dprec, location_t location)
	352	{
	353	if (! glr_parser)
	354	warn_at (location, _("%%dprec affects only GLR parsers"));
	355	if (dprec <= 0)
	356	complain_at (location, _("%%dprec must be followed by positive number"));
39f41916	357	else if (current_rule->dprec != 0)
676385e2 PH	358	complain_at (location, _("only one %%dprec allowed per rule"));
	359	current_rule->dprec = dprec;
	360	}
	361
	362	/* Attach a merge function NAME with argument type TYPE to current
	363	rule. */
	364
	365	void
	366	grammar_current_rule_merge_set (const char* name, location_t location)
	367	{
	368	if (! glr_parser)
	369	warn_at (location, _("%%merge affects only GLR parsers"));
39f41916	370	if (current_rule->merger != 0)
676385e2	371	complain_at (location, _("only one %%merge allowed per rule"));
39f41916	372	current_rule->merger =
a5d50994	373	get_merge_function (name, current_rule->sym->type_name, location);
676385e2 PH	374	}
676385e2 PH	375
2e047461 AD	376	/* Attach a SYMBOL to the current rule. If needed, move the previous
	377	action as a mid-rule action. */
	378
e9955c83	379	void
8efe435c	380	grammar_current_rule_symbol_append (symbol_t *symbol, location_t location)
2e047461 AD	381	{
	382	if (current_rule->action)
	383	grammar_midrule_action ();
	384	++nritems;
8efe435c	385	grammar_symbol_append (symbol, location);
2e047461 AD	386	}
2e047461 AD	387
2e047461 AD	388	/* Attach an ACTION to the current rule. If needed, move the previous
	389	action as a mid-rule action. */
	390
e9955c83	391	void
8efe435c	392	grammar_current_rule_action_append (const char *action, location_t location)
2e047461 AD	393	{
	394	if (current_rule->action)
	395	grammar_midrule_action ();
	396	current_rule->action = action;
8efe435c	397	current_rule->action_location = location;
2e047461 AD	398	}
2e047461 AD	399
a70083a3	400	\f
a70083a3 AD	401	/*---------------------------------------------------------------.
a70083a3 AD	402	\| Convert the rules into the representation using RRHS, RLHS and \|
d9b739c3	403	\| RITEM. \|
a70083a3	404	`---------------------------------------------------------------*/
1ff442ca	405
4a120d45	406	static void
118fb205	407	packgram (void)
1ff442ca	408	{
9222837b	409	unsigned int itemno = 0;
4b3d3a8e	410	rule_number_t ruleno = 0;
9222837b	411	symbol_list_t *p = grammar;
1ff442ca	412
a900a624	413	ritem = XCALLOC (item_number_t, nritems);
4b3d3a8e	414	rules = XCALLOC (rule_t, nrules);
1ff442ca	415
1ff442ca NF	416	while (p)
1ff442ca NF	417	{
db8837cb	418	symbol_t *ruleprec = p->ruleprec;
d7e1f00c	419	rules[ruleno].user_number = ruleno;
c3b407f4	420	rules[ruleno].number = ruleno;
bba97eb2	421	rules[ruleno].lhs = p->sym;
99013900	422	rules[ruleno].rhs = ritem + itemno;
8efe435c	423	rules[ruleno].location = p->location;
1a2b5d37 AD	424	rules[ruleno].useful = TRUE;
1a2b5d37 AD	425	rules[ruleno].action = p->action;
8efe435c	426	rules[ruleno].action_location = p->action_location;
676385e2 PH	427	rules[ruleno].dprec = p->dprec;
676385e2 PH	428	rules[ruleno].merger = p->merger;
1ff442ca NF	429
	430	p = p->next;
	431	while (p && p->sym)
	432	{
a49aecd5	433	/* item_number_t = symbol_number_t.
5fbb0954	434	But the former needs to contain more: negative rule numbers. */
a49aecd5	435	ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca NF	436	/* A rule gets by default the precedence and associativity
1ff442ca NF	437	of the last token in it. */
d7020c20	438	if (p->sym->class == token_sym)
03b31c0c	439	rules[ruleno].prec = p->sym;
a70083a3 AD	440	if (p)
a70083a3 AD	441	p = p->next;
1ff442ca NF	442	}
	443
	444	/* If this rule has a %prec,
a70083a3	445	the specified symbol's precedence replaces the default. */
1ff442ca NF	446	if (ruleprec)
1ff442ca NF	447	{
03b31c0c AD	448	rules[ruleno].precsym = ruleprec;
03b31c0c AD	449	rules[ruleno].prec = ruleprec;
1ff442ca	450	}
4b3d3a8e	451	ritem[itemno++] = rule_number_as_item_number (ruleno);
f3849179	452	++ruleno;
1ff442ca	453
a70083a3 AD	454	if (p)
a70083a3 AD	455	p = p->next;
1ff442ca NF	456	}
1ff442ca NF	457
5123689b	458	assert (itemno == nritems);
3067fbef	459
273a74fa	460	if (trace_flag & trace_sets)
3067fbef	461	ritem_print (stderr);
1ff442ca	462	}
a70083a3	463	\f
fdbcd8e2 AD	464	/*------------------------------------------------------------------.
	465	\| Read in the grammar specification and record it in the format \|
	466	\| described in gram.h. All actions are copied into ACTION_OBSTACK, \|
	467	\| in each case forming the body of a C function (YYACTION) which \|
	468	\| contains a switch statement to decide which action to execute. \|
	469	`------------------------------------------------------------------*/
a70083a3 AD	470
	471	void
	472	reader (void)
	473	{
e9955c83	474	gram_control_t gram_control;
a70083a3 AD	475
a70083a3 AD	476	/* Initialize the symbol table. */
db8837cb	477	symbols_new ();
b6610515	478
88bce5a2 AD	479	/* Construct the accept symbol. */
	480	accept = symbol_get ("$accept", empty_location);
	481	accept->class = nterm_sym;
	482	accept->number = nvars++;
30171f79	483
a70083a3	484	/* Construct the error token */
39f41916	485	errtoken = symbol_get ("error", empty_location);
d7020c20	486	errtoken->class = token_sym;
72a23c97	487	errtoken->number = ntokens++;
b6610515	488
a70083a3 AD	489	/* Construct a token that represents all undefined literal tokens.
a70083a3 AD	490	It is always token number 2. */
88bce5a2	491	undeftoken = symbol_get ("$undefined", empty_location);
d7020c20	492	undeftoken->class = token_sym;
72a23c97	493	undeftoken->number = ntokens++;
a70083a3	494
331dbc1b	495	/* Initialize the obstacks. */
0dd1580a RA	496	obstack_init (&pre_prologue_obstack);
0dd1580a RA	497	obstack_init (&post_prologue_obstack);
331dbc1b AD	498
331dbc1b AD	499	finput = xfopen (infile, "r");
e9955c83 AD	500	gram_in = finput;
	501
	502	gram_debug = !!getenv ("parse");
	503	gram__flex_debug = !!getenv ("scan");
1d6412ad	504	scanner_initialize ();
e9955c83	505	gram_parse (&gram_control);
331dbc1b	506
b275314e AD	507	/* If something went wrong during the parsing, don't try to
	508	continue. */
	509	if (complain_message_count)
	510	exit (1);
	511
e9955c83 AD	512	/* Grammar has been read. Do some checking */
	513	if (nrules == 0)
	514	fatal (_("no rules in the input grammar"));
	515
	516	/* Report any undefined symbols and consider them nonterminals. */
	517	symbols_check_defined ();
b7c49edf	518
88bce5a2 AD	519	/* If the user did not define her ENDTOKEN, do it now. */
88bce5a2 AD	520	if (!endtoken)
b7c49edf	521	{
88bce5a2 AD	522	endtoken = symbol_get ("$end", empty_location);
	523	endtoken->class = token_sym;
	524	endtoken->number = 0;
b7c49edf	525	/* Value specified by POSIX. */
88bce5a2	526	endtoken->user_token_number = 0;
b7c49edf AD	527	}
b7c49edf AD	528
e9955c83 AD	529	/* Insert the initial rule, which line is that of the first rule
	530	(not that of the start symbol):
	531
88bce5a2	532	accept: %start EOF. */
e9955c83	533	{
88bce5a2	534	symbol_list_t *p = symbol_list_new (accept, empty_location);
8efe435c AD	535	p->location = grammar->location;
8efe435c AD	536	p->next = symbol_list_new (startsymbol, empty_location);
88bce5a2	537	p->next->next = symbol_list_new (endtoken, empty_location);
8efe435c	538	p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83 AD	539	p->next->next->next->next = grammar;
	540	nrules += 1;
	541	nritems += 3;
	542	grammar = p;
	543	}
	544
242a6e48	545	if (SYMBOL_NUMBER_MAX < nsyms)
e9955c83	546	fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
242a6e48	547	SYMBOL_NUMBER_MAX);
e9955c83 AD	548
e9955c83 AD	549	assert (nsyms == ntokens + nvars);
b0c4483e	550
331dbc1b AD	551	xfclose (finput);
331dbc1b AD	552
a70083a3 AD	553	/* Assign the symbols their symbol numbers. Write #defines for the
a70083a3 AD	554	token symbols into FDEFINES if requested. */
2f1afb73	555	symbols_pack ();
93ede233	556
a70083a3 AD	557	/* Convert the grammar into the format described in gram.h. */
a70083a3 AD	558	packgram ();
8419d367	559
56c47203 AD	560	/* The grammar as a symbol_list_t is no longer needed. */
56c47203 AD	561	LIST_FREE (symbol_list_t, grammar);
a70083a3	562	}