[bison.git] / src / reader.c

/* Input parser for Bison

   Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003
   Free Software Foundation, Inc.

   This file is part of Bison, the GNU Compiler Compiler.

   Bison is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   Bison is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with Bison; see the file COPYING.  If not, write to
   the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.  */

#include "system.h"

#include <quotearg.h>

#include "complain.h"
#include "conflicts.h"
#include "files.h"
#include "getargs.h"
#include "gram.h"
#include "muscle_tab.h"
#include "output.h"
#include "reader.h"
#include "symlist.h"
#include "symtab.h"

static symbol_list *grammar = NULL;
static int start_flag = 0;
merger_list *merge_functions;

/* Nonzero if %union has been seen.  */
int typed = 0;
\f
/*-----------------------.
| Set the start symbol.  |
`-----------------------*/

void
grammar_start_symbol_set (symbol *sym, location loc)
{
  if (start_flag)
    complain_at (loc, _("multiple %s declarations"), "%start");
  else
    {
      start_flag = 1;
      startsymbol = sym;
      startsymbol_location = loc;
    }
}


/*----------------------------------------------------------------.
| There are two prologues: one before %union, one after.  Augment |
| the current one.                                                |
`----------------------------------------------------------------*/

void
prologue_augment (const char *prologue, location loc)
{
  struct obstack *oout =
    !typed ? &pre_prologue_obstack : &post_prologue_obstack;

  obstack_fgrow1 (oout, "]b4_syncline([[%d]], [[", loc.start.line);
  MUSCLE_OBSTACK_SGROW (oout,
			quotearg_style (c_quoting_style, loc.start.file));
  obstack_sgrow (oout, "]])[\n");
  obstack_sgrow (oout, prologue);
}


/*----------------------.
| Handle the epilogue.  |
`----------------------*/

void
epilogue_augment (const char *epilogue, location loc)
{
  char *extension = NULL;
  obstack_fgrow1 (&muscle_obstack, "]b4_syncline([[%d]], [[", loc.start.line);
  MUSCLE_OBSTACK_SGROW (&muscle_obstack,
			quotearg_style (c_quoting_style, loc.start.file));
  obstack_sgrow (&muscle_obstack, "]])[\n");
  obstack_sgrow (&muscle_obstack, epilogue);
  obstack_1grow (&muscle_obstack, 0);
  extension = obstack_finish (&muscle_obstack);
  muscle_grow ("epilogue", extension, "");
  obstack_free (&muscle_obstack, extension);
}


\f

/*-------------------------------------------------------------------.
| Return the merger index for a merging function named NAME, whose   |
| arguments have type TYPE.  Records the function, if new, in        |
| MERGER_LIST.							     |
`-------------------------------------------------------------------*/

static int
get_merge_function (uniqstr name, uniqstr type, location loc)
{
  merger_list *syms;
  merger_list head;
  int n;

  if (! glr_parser)
    return 0;

  if (type == NULL)
    type = uniqstr_new ("");

  head.next = merge_functions;
  for (syms = &head, n = 1; syms->next != NULL; syms = syms->next, n += 1)
    if (UNIQSTR_EQ (name, syms->next->name))
      break;
  if (syms->next == NULL)
    {
      MALLOC (syms->next, 1);
      syms->next->name = uniqstr_new (name);
      syms->next->type = uniqstr_new (type);
      syms->next->next = NULL;
      merge_functions = head.next;
    }
  else if (!UNIQSTR_EQ (type, syms->next->type))
    warn_at (loc, _("result type clash on merge function %s: <%s> != <%s>"),
	     name, type, syms->next->type);
  return n;
}

/*--------------------------------------.
| Free all merge-function definitions.	|
`--------------------------------------*/

void
free_merger_functions (void)
{
  merger_list *L0;
  if (! glr_parser)
    return;
  L0 = merge_functions;
  while (L0 != NULL)
    {
      merger_list *L1 = L0->next;
      free (L0);
      L0 = L1;
    }
}

\f
/*-------------------------------------------------------------------.
| Parse the input grammar into a one symbol_list structure.  Each    |
| rule is represented by a sequence of symbols: the left hand side   |
| followed by the contents of the right hand side, followed by a     |
| null pointer instead of a symbol to terminate the rule.  The next  |
| symbol is the lhs of the following rule.                           |
|                                                                    |
| All actions are copied out, labelled by the rule number they apply |
| to.                                                                |
|                                                                    |
| Bison used to allow some %directives in the rules sections, but    |
| this is no longer consider appropriate: (i) the documented grammar |
| doesn't claim it, (ii), it would promote bad style, (iii), error   |
| recovery for %directives consists in skipping the junk until a `%' |
| is seen and helrp synchronizing.  This scheme is definitely wrong  |
| in the rules section.                                              |
`-------------------------------------------------------------------*/

/* The (currently) last symbol of GRAMMAR. */
symbol_list *grammar_end = NULL;

/* Append SYM to the grammar.  */
void
grammar_symbol_append (symbol *sym, location loc)
{
  symbol_list *p = symbol_list_new (sym, loc);

  if (grammar_end)
    grammar_end->next = p;
  else
    grammar = p;

  grammar_end = p;
}

/* The rule currently being defined, and the previous rule.
   CURRENT_RULE points to the first LHS of the current rule, while
   PREVIOUS_RULE_END points to the *end* of the previous rule (NULL).  */
symbol_list *current_rule = NULL;
symbol_list *previous_rule_end = NULL;


/*----------------------------------------------.
| Create a new rule for LHS in to the GRAMMAR.  |
`----------------------------------------------*/

void
grammar_rule_begin (symbol *lhs, location loc)
{
  if (!start_flag)
    {
      startsymbol = lhs;
      startsymbol_location = loc;
      start_flag = 1;
    }

  /* Start a new rule and record its lhs.  */
  ++nrules;
  ++nritems;

  previous_rule_end = grammar_end;
  grammar_symbol_append (lhs, loc);
  current_rule = grammar_end;

  /* Mark the rule's lhs as a nonterminal if not already so.  */

  if (lhs->class == unknown_sym)
    {
      lhs->class = nterm_sym;
      lhs->number = nvars;
      ++nvars;
    }
  else if (lhs->class == token_sym)
    complain_at (loc, _("rule given for %s, which is a token"), lhs->tag);
}

/* Check that the last rule (CURRENT_RULE) is properly defined.  For
   instance, there should be no type clash on the default action.  */

static void
grammar_current_rule_check (void)
{
  symbol *lhs = current_rule->sym;
  char const *lhs_type = lhs->type_name;
  symbol *first_rhs = current_rule->next->sym;

  /* If there is an action, then there is nothing we can do: the user
     is allowed to shoot herself in the foot.  */
  if (current_rule->action)
    return;

  /* Don't worry about the default action if $$ is untyped, since $$'s
     value can't be used.  */
  if (! lhs_type)
    return;

  /* If $$ is being set in default way, report if any type mismatch.  */
  if (first_rhs)
    {
      const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
      if (!UNIQSTR_EQ (lhs_type, rhs_type))
	warn_at (current_rule->location,
		 _("type clash on default action: <%s> != <%s>"),
		 lhs_type, rhs_type);
    }
  /* Warn if there is no default for $$ but we need one.  */
  else
    warn_at (current_rule->location,
	     _("empty rule for typed nonterminal, and no action"));
}


/*-------------------------------------.
| End the currently being grown rule.  |
`-------------------------------------*/

void
grammar_rule_end (location loc)
{
  /* Put an empty link in the list to mark the end of this rule  */
  grammar_symbol_append (NULL, grammar_end->location);
  current_rule->location = loc;
  grammar_current_rule_check ();
}


/*-------------------------------------------------------------------.
| The previous action turns out the be a mid-rule action.  Attach it |
| to the current rule, i.e., create a dummy symbol, attach it this   |
| mid-rule action, and append this dummy nonterminal to the current  |
| rule.                                                              |
`-------------------------------------------------------------------*/

void
grammar_midrule_action (void)
{
  /* Since the action was written out with this rule's number, we must
     give the new rule this number by inserting the new rule before
     it.  */

  /* Make a DUMMY nonterminal, whose location is that of the midrule
     action.  Create the MIDRULE.  */
  location dummy_location = current_rule->action_location;
  symbol *dummy = dummy_symbol_get (dummy_location);
  symbol_list *midrule = symbol_list_new (dummy, dummy_location);

  /* Make a new rule, whose body is empty, before the current one, so
     that the action just read can belong to it.  */
  ++nrules;
  ++nritems;
  /* Attach its location and actions to that of the DUMMY.  */
  midrule->location = dummy_location;
  midrule->action = current_rule->action;
  midrule->action_location = dummy_location;
  current_rule->action = NULL;

  if (previous_rule_end)
    previous_rule_end->next = midrule;
  else
    grammar = midrule;

  /* End the dummy's rule.  */
  previous_rule_end = symbol_list_new (NULL, dummy_location);
  previous_rule_end->next = current_rule;

  midrule->next = previous_rule_end;

  /* Insert the dummy nonterminal replacing the midrule action into
     the current rule.  */
  grammar_current_rule_symbol_append (dummy, dummy_location);
}

/* Set the precedence symbol of the current rule to PRECSYM. */

void
grammar_current_rule_prec_set (symbol *precsym, location loc)
{
  if (current_rule->ruleprec)
    complain_at (loc, _("only one %s allowed per rule"), "%prec");
  current_rule->ruleprec = precsym;
}

/* Attach dynamic precedence DPREC to the current rule. */

void
grammar_current_rule_dprec_set (int dprec, location loc)
{
  if (! glr_parser)
    warn_at (loc, _("%s affects only GLR parsers"), "%dprec");
  if (dprec <= 0)
    complain_at (loc, _("%s must be followed by positive number"), "%dprec");
  else if (current_rule->dprec != 0)
    complain_at (loc, _("only one %s allowed per rule"), "%dprec");
  current_rule->dprec = dprec;
}

/* Attach a merge function NAME with argument type TYPE to current
   rule. */

void
grammar_current_rule_merge_set (uniqstr name, location loc)
{
  if (! glr_parser)
    warn_at (loc, _("%s affects only GLR parsers"), "%merge");
  if (current_rule->merger != 0)
    complain_at (loc, _("only one %s allowed per rule"), "%merge");
  current_rule->merger =
    get_merge_function (name, current_rule->sym->type_name, loc);
}

/* Attach SYM to the current rule.  If needed, move the previous
   action as a mid-rule action.  */

void
grammar_current_rule_symbol_append (symbol *sym, location loc)
{
  if (current_rule->action)
    grammar_midrule_action ();
  ++nritems;
  grammar_symbol_append (sym, loc);
}

/* Attach an ACTION to the current rule.  If needed, move the previous
   action as a mid-rule action.  */

void
grammar_current_rule_action_append (const char *action, location loc)
{
  if (current_rule->action)
    grammar_midrule_action ();
  current_rule->action = action;
  current_rule->action_location = loc;
}

\f
/*---------------------------------------------------------------.
| Convert the rules into the representation using RRHS, RLHS and |
| RITEM.                                                         |
`---------------------------------------------------------------*/

static void
packgram (void)
{
  unsigned int itemno = 0;
  rule_number ruleno = 0;
  symbol_list *p = grammar;

  CALLOC (ritem, nritems);
  CALLOC (rules, nrules);

  while (p)
    {
      symbol *ruleprec = p->ruleprec;
      rules[ruleno].user_number = ruleno;
      rules[ruleno].number = ruleno;
      rules[ruleno].lhs = p->sym;
      rules[ruleno].rhs = ritem + itemno;
      rules[ruleno].location = p->location;
      rules[ruleno].useful = true;
      rules[ruleno].action = p->action;
      rules[ruleno].action_location = p->action_location;
      rules[ruleno].dprec = p->dprec;
      rules[ruleno].merger = p->merger;

      p = p->next;
      while (p && p->sym)
	{
	  /* item_number = symbol_number.
	     But the former needs to contain more: negative rule numbers. */
	  ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
	  /* A rule gets by default the precedence and associativity
	     of the last token in it.  */
	  if (p->sym->class == token_sym)
	    rules[ruleno].prec = p->sym;
	  if (p)
	    p = p->next;
	}

      /* If this rule has a %prec,
         the specified symbol's precedence replaces the default.  */
      if (ruleprec)
	{
	  rules[ruleno].precsym = ruleprec;
	  rules[ruleno].prec = ruleprec;
	}
      ritem[itemno++] = rule_number_as_item_number (ruleno);
      ++ruleno;

      if (p)
	p = p->next;
    }

  if (itemno != nritems)
    abort ();

  if (trace_flag & trace_sets)
    ritem_print (stderr);
}
\f
/*------------------------------------------------------------------.
| Read in the grammar specification and record it in the format     |
| described in gram.h.  All actions are copied into ACTION_OBSTACK, |
| in each case forming the body of a C function (YYACTION) which    |
| contains a switch statement to decide which action to execute.    |
`------------------------------------------------------------------*/

void
reader (void)
{
  /* Initialize the symbol table.  */
  symbols_new ();

  /* Construct the accept symbol. */
  accept = symbol_get ("$accept", empty_location);
  accept->class = nterm_sym;
  accept->number = nvars++;

  /* Construct the error token */
  errtoken = symbol_get ("error", empty_location);
  errtoken->class = token_sym;
  errtoken->number = ntokens++;

  /* Construct a token that represents all undefined literal tokens.
     It is always token number 2.  */
  undeftoken = symbol_get ("$undefined", empty_location);
  undeftoken->class = token_sym;
  undeftoken->number = ntokens++;

  /* Initialize the obstacks. */
  obstack_init (&pre_prologue_obstack);
  obstack_init (&post_prologue_obstack);

  finput = xfopen (grammar_file, "r");
  gram_in = finput;

  gram__flex_debug = trace_flag & trace_scan;
  gram_debug = trace_flag & trace_parse;
  scanner_initialize ();
  gram_parse ();

  /* If something went wrong during the parsing, don't try to
     continue.  */
  if (complaint_issued)
    return;

  /* Grammar has been read.  Do some checking */
  if (nrules == 0)
    fatal (_("no rules in the input grammar"));

  /* Report any undefined symbols and consider them nonterminals.  */
  symbols_check_defined ();

  /* If the user did not define her ENDTOKEN, do it now. */
  if (!endtoken)
    {
      endtoken = symbol_get ("$end", empty_location);
      endtoken->class = token_sym;
      endtoken->number = 0;
      /* Value specified by POSIX.  */
      endtoken->user_token_number = 0;
    }

  /* Insert the initial rule, which line is that of the first rule
     (not that of the start symbol):

     accept: %start EOF.  */
  {
    symbol_list *p = symbol_list_new (accept, empty_location);
    p->location = grammar->location;
    p->next = symbol_list_new (startsymbol, empty_location);
    p->next->next = symbol_list_new (endtoken, empty_location);
    p->next->next->next = symbol_list_new (NULL, empty_location);
    p->next->next->next->next = grammar;
    nrules += 1;
    nritems += 3;
    grammar = p;
  }

  if (! (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars))
    abort ();

  xfclose (finput);

  /* Assign the symbols their symbol numbers.  Write #defines for the
     token symbols into FDEFINES if requested.  */
  symbols_pack ();

  /* Convert the grammar into the format described in gram.h.  */
  packgram ();

  /* The grammar as a symbol_list is no longer needed. */
  LIST_FREE (symbol_list, grammar);
}
Commit	Line	Data
35dcf428	1	/* Input parser for Bison
9c4637fa	2
a737b216	3	Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003
a70083a3	4	Free Software Foundation, Inc.
1ff442ca	5
41aca2e0	6	This file is part of Bison, the GNU Compiler Compiler.
1ff442ca	7
41aca2e0 AD	8	Bison is free software; you can redistribute it and/or modify
	9	it under the terms of the GNU General Public License as published by
	10	the Free Software Foundation; either version 2, or (at your option)
	11	any later version.
1ff442ca	12
41aca2e0 AD	13	Bison is distributed in the hope that it will be useful,
	14	but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	GNU General Public License for more details.
1ff442ca	17
41aca2e0 AD	18	You should have received a copy of the GNU General Public License
	19	along with Bison; see the file COPYING. If not, write to
	20	the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
	21	Boston, MA 02111-1307, USA. */
1ff442ca	22
1ff442ca	23	#include "system.h"
17ee7397 PE	24
	25	#include <quotearg.h>
	26
	27	#include "complain.h"
	28	#include "conflicts.h"
1ff442ca	29	#include "files.h"
17ee7397	30	#include "getargs.h"
1ff442ca	31	#include "gram.h"
17ee7397	32	#include "muscle_tab.h"
6c89f1c1	33	#include "output.h"
b2ca4022	34	#include "reader.h"
17ee7397 PE	35	#include "symlist.h"
17ee7397 PE	36	#include "symtab.h"
1ff442ca	37
17ee7397	38	static symbol_list *grammar = NULL;
280a38c3	39	static int start_flag = 0;
676385e2	40	merger_list *merge_functions;
1ff442ca	41
d7020c20	42	/* Nonzero if %union has been seen. */
e9955c83	43	int typed = 0;
0d533154	44	\f
e9955c83 AD	45	/*-----------------------.
	46	\| Set the start symbol. \|
	47	`-----------------------*/
1ff442ca	48
e9955c83	49	void
a737b216	50	grammar_start_symbol_set (symbol *sym, location loc)
1ff442ca NF	51	{
1ff442ca NF	52	if (start_flag)
17ee7397	53	complain_at (loc, _("multiple %s declarations"), "%start");
943819bf RS	54	else
	55	{
	56	start_flag = 1;
a737b216	57	startsymbol = sym;
17ee7397	58	startsymbol_location = loc;
943819bf	59	}
1ff442ca NF	60	}
1ff442ca NF	61
1ff442ca	62
d7020c20	63	/*----------------------------------------------------------------.
e9955c83 AD	64	\| There are two prologues: one before %union, one after. Augment \|
e9955c83 AD	65	\| the current one. \|
d7020c20	66	`----------------------------------------------------------------*/
1ff442ca	67
e9955c83	68	void
17ee7397	69	prologue_augment (const char *prologue, location loc)
b6610515	70	{
e9955c83 AD	71	struct obstack *oout =
e9955c83 AD	72	!typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515	73
17ee7397 PE	74	obstack_fgrow1 (oout, "]b4_syncline([[%d]], [[", loc.start.line);
	75	MUSCLE_OBSTACK_SGROW (oout,
	76	quotearg_style (c_quoting_style, loc.start.file));
6c239755	77	obstack_sgrow (oout, "]])[\n");
e9955c83	78	obstack_sgrow (oout, prologue);
b6610515 RA	79	}
b6610515 RA	80
2ba3b73c	81
426cf563	82
a870c567	83
e9955c83 AD	84	/*----------------------.
	85	\| Handle the epilogue. \|
	86	`----------------------*/
426cf563	87
e9955c83	88	void
17ee7397	89	epilogue_augment (const char *epilogue, location loc)
2ba3b73c	90	{
7ec2d4cd	91	char *extension = NULL;
17ee7397	92	obstack_fgrow1 (&muscle_obstack, "]b4_syncline([[%d]], [[", loc.start.line);
6c239755	93	MUSCLE_OBSTACK_SGROW (&muscle_obstack,
17ee7397	94	quotearg_style (c_quoting_style, loc.start.file));
6c239755	95	obstack_sgrow (&muscle_obstack, "]])[\n");
592e8d4d AD	96	obstack_sgrow (&muscle_obstack, epilogue);
592e8d4d AD	97	obstack_1grow (&muscle_obstack, 0);
7ec2d4cd AD	98	extension = obstack_finish (&muscle_obstack);
	99	muscle_grow ("epilogue", extension, "");
	100	obstack_free (&muscle_obstack, extension);
1ff442ca	101	}
1ff442ca	102
a70083a3	103
a70083a3 AD	104	\f
a70083a3 AD	105
3e6656f9	106	/*-------------------------------------------------------------------.
676385e2 PH	107	\| Return the merger index for a merging function named NAME, whose \|
676385e2 PH	108	\| arguments have type TYPE. Records the function, if new, in \|
95612cfa	109	\| MERGER_LIST. \|
676385e2 PH	110	`-------------------------------------------------------------------*/
	111
	112	static int
17ee7397	113	get_merge_function (uniqstr name, uniqstr type, location loc)
676385e2 PH	114	{
	115	merger_list *syms;
	116	merger_list head;
	117	int n;
	118
	119	if (! glr_parser)
	120	return 0;
	121
	122	if (type == NULL)
17ee7397	123	type = uniqstr_new ("");
676385e2 PH	124
676385e2 PH	125	head.next = merge_functions;
39f41916	126	for (syms = &head, n = 1; syms->next != NULL; syms = syms->next, n += 1)
17ee7397	127	if (UNIQSTR_EQ (name, syms->next->name))
676385e2	128	break;
a5d50994 AD	129	if (syms->next == NULL)
a5d50994 AD	130	{
9c4637fa	131	MALLOC (syms->next, 1);
17ee7397 PE	132	syms->next->name = uniqstr_new (name);
17ee7397 PE	133	syms->next->type = uniqstr_new (type);
a5d50994 AD	134	syms->next->next = NULL;
	135	merge_functions = head.next;
	136	}
17ee7397	137	else if (!UNIQSTR_EQ (type, syms->next->type))
45a8a65d PE	138	warn_at (loc, _("result type clash on merge function %s: <%s> != <%s>"),
45a8a65d PE	139	name, type, syms->next->type);
676385e2 PH	140	return n;
	141	}
	142
	143	/*--------------------------------------.
	144	\| Free all merge-function definitions. \|
	145	`--------------------------------------*/
	146
	147	void
	148	free_merger_functions (void)
	149	{
	150	merger_list *L0;
	151	if (! glr_parser)
	152	return;
	153	L0 = merge_functions;
	154	while (L0 != NULL)
	155	{
	156	merger_list *L1 = L0->next;
	157	free (L0);
	158	L0 = L1;
	159	}
	160	}
	161
a70083a3	162	\f
107f7dfb	163	/*-------------------------------------------------------------------.
17ee7397	164	\| Parse the input grammar into a one symbol_list structure. Each \|
107f7dfb AD	165	\| rule is represented by a sequence of symbols: the left hand side \|
	166	\| followed by the contents of the right hand side, followed by a \|
	167	\| null pointer instead of a symbol to terminate the rule. The next \|
	168	\| symbol is the lhs of the following rule. \|
	169	\| \|
fdbcd8e2 AD	170	\| All actions are copied out, labelled by the rule number they apply \|
fdbcd8e2 AD	171	\| to. \|
107f7dfb AD	172	\| \|
	173	\| Bison used to allow some %directives in the rules sections, but \|
	174	\| this is no longer consider appropriate: (i) the documented grammar \|
	175	\| doesn't claim it, (ii), it would promote bad style, (iii), error \|
	176	\| recovery for %directives consists in skipping the junk until a `%' \|
	177	\| is seen and helrp synchronizing. This scheme is definitely wrong \|
	178	\| in the rules section. \|
	179	`-------------------------------------------------------------------*/
1ff442ca	180
f6d0f937	181	/* The (currently) last symbol of GRAMMAR. */
17ee7397	182	symbol_list *grammar_end = NULL;
f6d0f937	183
52328c6e	184	/* Append SYM to the grammar. */
e9955c83	185	void
17ee7397	186	grammar_symbol_append (symbol *sym, location loc)
f6d0f937	187	{
17ee7397	188	symbol_list *p = symbol_list_new (sym, loc);
f6d0f937 AD	189
	190	if (grammar_end)
	191	grammar_end->next = p;
	192	else
	193	grammar = p;
	194
	195	grammar_end = p;
	196	}
	197
8efe435c AD	198	/* The rule currently being defined, and the previous rule.
	199	CURRENT_RULE points to the first LHS of the current rule, while
	200	PREVIOUS_RULE_END points to the end of the previous rule (NULL). */
17ee7397 PE	201	symbol_list *current_rule = NULL;
17ee7397 PE	202	symbol_list *previous_rule_end = NULL;
da4160c3 AD	203
da4160c3 AD	204
8efe435c AD	205	/*----------------------------------------------.
	206	\| Create a new rule for LHS in to the GRAMMAR. \|
	207	`----------------------------------------------*/
da4160c3	208
e9955c83	209	void
17ee7397	210	grammar_rule_begin (symbol *lhs, location loc)
da4160c3 AD	211	{
	212	if (!start_flag)
	213	{
	214	startsymbol = lhs;
17ee7397	215	startsymbol_location = loc;
da4160c3 AD	216	start_flag = 1;
	217	}
	218
	219	/* Start a new rule and record its lhs. */
	220	++nrules;
	221	++nritems;
	222
8efe435c	223	previous_rule_end = grammar_end;
17ee7397	224	grammar_symbol_append (lhs, loc);
da4160c3 AD	225	current_rule = grammar_end;
	226
	227	/* Mark the rule's lhs as a nonterminal if not already so. */
	228
	229	if (lhs->class == unknown_sym)
	230	{
	231	lhs->class = nterm_sym;
	232	lhs->number = nvars;
	233	++nvars;
	234	}
	235	else if (lhs->class == token_sym)
17ee7397	236	complain_at (loc, _("rule given for %s, which is a token"), lhs->tag);
da4160c3 AD	237	}
da4160c3 AD	238
e9955c83 AD	239	/* Check that the last rule (CURRENT_RULE) is properly defined. For
	240	instance, there should be no type clash on the default action. */
	241
	242	static void
	243	grammar_current_rule_check (void)
	244	{
17ee7397	245	symbol *lhs = current_rule->sym;
3f4c0f80	246	char const *lhs_type = lhs->type_name;
17ee7397	247	symbol *first_rhs = current_rule->next->sym;
e9955c83 AD	248
e9955c83 AD	249	/* If there is an action, then there is nothing we can do: the user
3f4c0f80	250	is allowed to shoot herself in the foot. */
e9955c83 AD	251	if (current_rule->action)
	252	return;
	253
3f4c0f80 PE	254	/* Don't worry about the default action if $$ is untyped, since $$'s
	255	value can't be used. */
	256	if (! lhs_type)
	257	return;
	258
	259	/* If $$ is being set in default way, report if any type mismatch. */
e9955c83 AD	260	if (first_rhs)
e9955c83 AD	261	{
e9955c83	262	const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
17ee7397	263	if (!UNIQSTR_EQ (lhs_type, rhs_type))
e9273511 PE	264	warn_at (current_rule->location,
	265	_("type clash on default action: <%s> != <%s>"),
	266	lhs_type, rhs_type);
e9955c83 AD	267	}
	268	/* Warn if there is no default for $$ but we need one. */
	269	else
e9273511 PE	270	warn_at (current_rule->location,
e9273511 PE	271	_("empty rule for typed nonterminal, and no action"));
e9955c83 AD	272	}
	273
	274
8efe435c AD	275	/*-------------------------------------.
	276	\| End the currently being grown rule. \|
	277	`-------------------------------------*/
e9955c83 AD	278
e9955c83 AD	279	void
17ee7397	280	grammar_rule_end (location loc)
e9955c83 AD	281	{
e9955c83 AD	282	/* Put an empty link in the list to mark the end of this rule */
8efe435c	283	grammar_symbol_append (NULL, grammar_end->location);
17ee7397	284	current_rule->location = loc;
e9955c83 AD	285	grammar_current_rule_check ();
	286	}
	287
	288
8efe435c AD	289	/*-------------------------------------------------------------------.
	290	\| The previous action turns out the be a mid-rule action. Attach it \|
	291	\| to the current rule, i.e., create a dummy symbol, attach it this \|
	292	\| mid-rule action, and append this dummy nonterminal to the current \|
	293	\| rule. \|
	294	`-------------------------------------------------------------------*/
1485e106	295
e9955c83	296	void
1485e106 AD	297	grammar_midrule_action (void)
	298	{
	299	/* Since the action was written out with this rule's number, we must
	300	give the new rule this number by inserting the new rule before
	301	it. */
	302
8efe435c AD	303	/* Make a DUMMY nonterminal, whose location is that of the midrule
8efe435c AD	304	action. Create the MIDRULE. */
17ee7397 PE	305	location dummy_location = current_rule->action_location;
	306	symbol *dummy = dummy_symbol_get (dummy_location);
	307	symbol_list *midrule = symbol_list_new (dummy, dummy_location);
1485e106 AD	308
	309	/* Make a new rule, whose body is empty, before the current one, so
	310	that the action just read can belong to it. */
	311	++nrules;
	312	++nritems;
8efe435c AD	313	/* Attach its location and actions to that of the DUMMY. */
	314	midrule->location = dummy_location;
	315	midrule->action = current_rule->action;
	316	midrule->action_location = dummy_location;
1485e106 AD	317	current_rule->action = NULL;
1485e106 AD	318
8efe435c AD	319	if (previous_rule_end)
8efe435c AD	320	previous_rule_end->next = midrule;
1485e106	321	else
8efe435c	322	grammar = midrule;
1485e106	323
8efe435c AD	324	/* End the dummy's rule. */
	325	previous_rule_end = symbol_list_new (NULL, dummy_location);
	326	previous_rule_end->next = current_rule;
1485e106	327
8efe435c	328	midrule->next = previous_rule_end;
1485e106	329
8efe435c AD	330	/* Insert the dummy nonterminal replacing the midrule action into
	331	the current rule. */
	332	grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106 AD	333	}
1485e106 AD	334
9af3fbce AD	335	/* Set the precedence symbol of the current rule to PRECSYM. */
9af3fbce AD	336
e9955c83	337	void
17ee7397	338	grammar_current_rule_prec_set (symbol *precsym, location loc)
9af3fbce AD	339	{
9af3fbce AD	340	if (current_rule->ruleprec)
17ee7397	341	complain_at (loc, _("only one %s allowed per rule"), "%prec");
9af3fbce AD	342	current_rule->ruleprec = precsym;
	343	}
	344
676385e2 PH	345	/* Attach dynamic precedence DPREC to the current rule. */
	346
	347	void
17ee7397	348	grammar_current_rule_dprec_set (int dprec, location loc)
676385e2 PH	349	{
676385e2 PH	350	if (! glr_parser)
17ee7397	351	warn_at (loc, _("%s affects only GLR parsers"), "%dprec");
676385e2	352	if (dprec <= 0)
17ee7397	353	complain_at (loc, _("%s must be followed by positive number"), "%dprec");
39f41916	354	else if (current_rule->dprec != 0)
17ee7397	355	complain_at (loc, _("only one %s allowed per rule"), "%dprec");
676385e2 PH	356	current_rule->dprec = dprec;
	357	}
	358
	359	/* Attach a merge function NAME with argument type TYPE to current
	360	rule. */
	361
	362	void
17ee7397	363	grammar_current_rule_merge_set (uniqstr name, location loc)
676385e2 PH	364	{
676385e2 PH	365	if (! glr_parser)
17ee7397	366	warn_at (loc, _("%s affects only GLR parsers"), "%merge");
39f41916	367	if (current_rule->merger != 0)
17ee7397	368	complain_at (loc, _("only one %s allowed per rule"), "%merge");
39f41916	369	current_rule->merger =
17ee7397	370	get_merge_function (name, current_rule->sym->type_name, loc);
676385e2 PH	371	}
676385e2 PH	372
17ee7397	373	/* Attach SYM to the current rule. If needed, move the previous
2e047461 AD	374	action as a mid-rule action. */
2e047461 AD	375
e9955c83	376	void
17ee7397	377	grammar_current_rule_symbol_append (symbol *sym, location loc)
2e047461 AD	378	{
	379	if (current_rule->action)
	380	grammar_midrule_action ();
	381	++nritems;
17ee7397	382	grammar_symbol_append (sym, loc);
2e047461 AD	383	}
2e047461 AD	384
2e047461 AD	385	/* Attach an ACTION to the current rule. If needed, move the previous
	386	action as a mid-rule action. */
	387
e9955c83	388	void
17ee7397	389	grammar_current_rule_action_append (const char *action, location loc)
2e047461 AD	390	{
	391	if (current_rule->action)
	392	grammar_midrule_action ();
	393	current_rule->action = action;
17ee7397	394	current_rule->action_location = loc;
2e047461 AD	395	}
2e047461 AD	396
a70083a3	397	\f
a70083a3 AD	398	/*---------------------------------------------------------------.
a70083a3 AD	399	\| Convert the rules into the representation using RRHS, RLHS and \|
d9b739c3	400	\| RITEM. \|
a70083a3	401	`---------------------------------------------------------------*/
1ff442ca	402
4a120d45	403	static void
118fb205	404	packgram (void)
1ff442ca	405	{
9222837b	406	unsigned int itemno = 0;
17ee7397 PE	407	rule_number ruleno = 0;
17ee7397 PE	408	symbol_list *p = grammar;
1ff442ca	409
9c4637fa PE	410	CALLOC (ritem, nritems);
9c4637fa PE	411	CALLOC (rules, nrules);
1ff442ca	412
1ff442ca NF	413	while (p)
1ff442ca NF	414	{
17ee7397	415	symbol *ruleprec = p->ruleprec;
d7e1f00c	416	rules[ruleno].user_number = ruleno;
c3b407f4	417	rules[ruleno].number = ruleno;
bba97eb2	418	rules[ruleno].lhs = p->sym;
99013900	419	rules[ruleno].rhs = ritem + itemno;
8efe435c	420	rules[ruleno].location = p->location;
b4afb6bb	421	rules[ruleno].useful = true;
1a2b5d37	422	rules[ruleno].action = p->action;
8efe435c	423	rules[ruleno].action_location = p->action_location;
676385e2 PH	424	rules[ruleno].dprec = p->dprec;
676385e2 PH	425	rules[ruleno].merger = p->merger;
1ff442ca NF	426
	427	p = p->next;
	428	while (p && p->sym)
	429	{
17ee7397	430	/* item_number = symbol_number.
5fbb0954	431	But the former needs to contain more: negative rule numbers. */
a49aecd5	432	ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca NF	433	/* A rule gets by default the precedence and associativity
1ff442ca NF	434	of the last token in it. */
d7020c20	435	if (p->sym->class == token_sym)
03b31c0c	436	rules[ruleno].prec = p->sym;
a70083a3 AD	437	if (p)
a70083a3 AD	438	p = p->next;
1ff442ca NF	439	}
	440
	441	/* If this rule has a %prec,
a70083a3	442	the specified symbol's precedence replaces the default. */
1ff442ca NF	443	if (ruleprec)
1ff442ca NF	444	{
03b31c0c AD	445	rules[ruleno].precsym = ruleprec;
03b31c0c AD	446	rules[ruleno].prec = ruleprec;
1ff442ca	447	}
4b3d3a8e	448	ritem[itemno++] = rule_number_as_item_number (ruleno);
f3849179	449	++ruleno;
1ff442ca	450
a70083a3 AD	451	if (p)
a70083a3 AD	452	p = p->next;
1ff442ca NF	453	}
1ff442ca NF	454
35dcf428 PE	455	if (itemno != nritems)
35dcf428 PE	456	abort ();
3067fbef	457
273a74fa	458	if (trace_flag & trace_sets)
3067fbef	459	ritem_print (stderr);
1ff442ca	460	}
a70083a3	461	\f
fdbcd8e2 AD	462	/*------------------------------------------------------------------.
	463	\| Read in the grammar specification and record it in the format \|
	464	\| described in gram.h. All actions are copied into ACTION_OBSTACK, \|
	465	\| in each case forming the body of a C function (YYACTION) which \|
	466	\| contains a switch statement to decide which action to execute. \|
	467	`------------------------------------------------------------------*/
a70083a3 AD	468
	469	void
	470	reader (void)
	471	{
a70083a3	472	/* Initialize the symbol table. */
db8837cb	473	symbols_new ();
b6610515	474
88bce5a2 AD	475	/* Construct the accept symbol. */
	476	accept = symbol_get ("$accept", empty_location);
	477	accept->class = nterm_sym;
	478	accept->number = nvars++;
30171f79	479
a70083a3	480	/* Construct the error token */
39f41916	481	errtoken = symbol_get ("error", empty_location);
d7020c20	482	errtoken->class = token_sym;
72a23c97	483	errtoken->number = ntokens++;
b6610515	484
a70083a3 AD	485	/* Construct a token that represents all undefined literal tokens.
a70083a3 AD	486	It is always token number 2. */
88bce5a2	487	undeftoken = symbol_get ("$undefined", empty_location);
d7020c20	488	undeftoken->class = token_sym;
72a23c97	489	undeftoken->number = ntokens++;
a70083a3	490
331dbc1b	491	/* Initialize the obstacks. */
0dd1580a RA	492	obstack_init (&pre_prologue_obstack);
0dd1580a RA	493	obstack_init (&post_prologue_obstack);
331dbc1b	494
95612cfa	495	finput = xfopen (grammar_file, "r");
e9955c83 AD	496	gram_in = finput;
e9955c83 AD	497
473d0a75 AD	498	gram__flex_debug = trace_flag & trace_scan;
473d0a75 AD	499	gram_debug = trace_flag & trace_parse;
1d6412ad	500	scanner_initialize ();
78c3da9e	501	gram_parse ();
331dbc1b	502
b275314e AD	503	/* If something went wrong during the parsing, don't try to
b275314e AD	504	continue. */
b4afb6bb	505	if (complaint_issued)
f956c304	506	return;
b275314e	507
e9955c83 AD	508	/* Grammar has been read. Do some checking */
	509	if (nrules == 0)
	510	fatal (_("no rules in the input grammar"));
	511
	512	/* Report any undefined symbols and consider them nonterminals. */
	513	symbols_check_defined ();
b7c49edf	514
88bce5a2 AD	515	/* If the user did not define her ENDTOKEN, do it now. */
88bce5a2 AD	516	if (!endtoken)
b7c49edf	517	{
88bce5a2 AD	518	endtoken = symbol_get ("$end", empty_location);
	519	endtoken->class = token_sym;
	520	endtoken->number = 0;
b7c49edf	521	/* Value specified by POSIX. */
88bce5a2	522	endtoken->user_token_number = 0;
b7c49edf AD	523	}
b7c49edf AD	524
e9955c83 AD	525	/* Insert the initial rule, which line is that of the first rule
	526	(not that of the start symbol):
	527
88bce5a2	528	accept: %start EOF. */
e9955c83	529	{
17ee7397	530	symbol_list *p = symbol_list_new (accept, empty_location);
8efe435c AD	531	p->location = grammar->location;
8efe435c AD	532	p->next = symbol_list_new (startsymbol, empty_location);
88bce5a2	533	p->next->next = symbol_list_new (endtoken, empty_location);
8efe435c	534	p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83 AD	535	p->next->next->next->next = grammar;
	536	nrules += 1;
	537	nritems += 3;
	538	grammar = p;
	539	}
	540
17ee7397	541	if (! (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars))
35dcf428	542	abort ();
b0c4483e	543
331dbc1b AD	544	xfclose (finput);
331dbc1b AD	545
a70083a3 AD	546	/* Assign the symbols their symbol numbers. Write #defines for the
a70083a3 AD	547	token symbols into FDEFINES if requested. */
2f1afb73	548	symbols_pack ();
93ede233	549
a70083a3 AD	550	/* Convert the grammar into the format described in gram.h. */
a70083a3 AD	551	packgram ();
8419d367	552
17ee7397 PE	553	/* The grammar as a symbol_list is no longer needed. */
17ee7397 PE	554	LIST_FREE (symbol_list, grammar);
a70083a3	555	}