[bison.git] / src / reader.c

/* Input parser for bison
   Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
   Free Software Foundation, Inc.

   This file is part of Bison, the GNU Compiler Compiler.

   Bison is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   Bison is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with Bison; see the file COPYING.  If not, write to
   the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.  */


#include "system.h"
#include "quotearg.h"
#include "quote.h"
#include "getargs.h"
#include "files.h"
#include "symtab.h"
#include "symlist.h"
#include "gram.h"
#include "complain.h"
#include "output.h"
#include "reader.h"
#include "conflicts.h"
#include "muscle_tab.h"

static symbol_list_t *grammar = NULL;
static int start_flag = 0;
merger_list *merge_functions;

/* Nonzero if %union has been seen.  */
int typed = 0;
\f
/*-----------------------.
| Set the start symbol.  |
`-----------------------*/

void
grammar_start_symbol_set (symbol_t *s, location_t l)
{
  if (start_flag)
    complain_at (l, _("multiple %s declarations"), "%start");
  else
    {
      start_flag = 1;
      startsymbol = s;
      startsymbol_location = l;
    }
}


/*----------------------------------------------------------------.
| There are two prologues: one before %union, one after.  Augment |
| the current one.                                                |
`----------------------------------------------------------------*/

void
prologue_augment (const char *prologue, location_t location)
{
  struct obstack *oout =
    !typed ? &pre_prologue_obstack : &post_prologue_obstack;

  if (!no_lines_flag)
    {
      obstack_fgrow2 (oout, muscle_find ("linef"),
		      location.first_line,
		      quotearg_style (c_quoting_style,
				      muscle_find ("filename")));
    }
  obstack_sgrow (oout, prologue);
}


/*----------------------.
| Handle the epilogue.  |
`----------------------*/

void
epilogue_set (const char *epilogue, location_t location)
{
  if (!no_lines_flag)
    {
      obstack_fgrow2 (&muscle_obstack, muscle_find ("linef"),
		      location.first_line,
		      quotearg_style (c_quoting_style,
				      muscle_find ("filename")));
    }
  obstack_sgrow (&muscle_obstack, epilogue);
  obstack_1grow (&muscle_obstack, 0);
  muscle_insert ("epilogue", obstack_finish (&muscle_obstack));
}


\f

 /*-------------------------------------------------------------------.
| Return the merger index for a merging function named NAME, whose   |
| arguments have type TYPE.  Records the function, if new, in        |
| merger_list.							     |
`-------------------------------------------------------------------*/

static int
get_merge_function (const char* name, const char* type,
		    location_t loc)
{
  merger_list *syms;
  merger_list head;
  int n;

  if (! glr_parser)
    return 0;

  if (type == NULL)
    type = "";

  head.next = merge_functions;
  for (syms = &head, n = 1; syms->next != NULL; syms = syms->next, n += 1)
    if (strcmp (name, syms->next->name) == 0)
      break;
  if (syms->next == NULL)
    {
      syms->next = XMALLOC (merger_list, 1);
      syms->next->name = xstrdup (name);
      syms->next->type = xstrdup (type);
      syms->next->next = NULL;
      merge_functions = head.next;
    }
  else if (strcmp (type, syms->next->type) != 0)
    warn_at (loc, _("result type clash on merge function %s: `%s' vs. `%s'"),
	     name, type, syms->next->type);
  return n;
}

/*--------------------------------------.
| Free all merge-function definitions.	|
`--------------------------------------*/

void
free_merger_functions (void)
{
  merger_list *L0;
  if (! glr_parser)
    return;
  L0 = merge_functions;
  while (L0 != NULL)
    {
      merger_list *L1 = L0->next;
      free (L0);
      L0 = L1;
    }
}

\f
/*-------------------------------------------------------------------.
| Parse the input grammar into a one symbol_list_t structure.  Each  |
| rule is represented by a sequence of symbols: the left hand side   |
| followed by the contents of the right hand side, followed by a     |
| null pointer instead of a symbol to terminate the rule.  The next  |
| symbol is the lhs of the following rule.                           |
|                                                                    |
| All actions are copied out, labelled by the rule number they apply |
| to.                                                                |
|                                                                    |
| Bison used to allow some %directives in the rules sections, but    |
| this is no longer consider appropriate: (i) the documented grammar |
| doesn't claim it, (ii), it would promote bad style, (iii), error   |
| recovery for %directives consists in skipping the junk until a `%' |
| is seen and helrp synchronizing.  This scheme is definitely wrong  |
| in the rules section.                                              |
`-------------------------------------------------------------------*/

/* The (currently) last symbol of GRAMMAR. */
symbol_list_t *grammar_end = NULL;

/* Append S to the GRAMMAR. */
void
grammar_symbol_append (symbol_t *symbol, location_t location)
{
  symbol_list_t *p = symbol_list_new (symbol, location);

  if (grammar_end)
    grammar_end->next = p;
  else
    grammar = p;

  grammar_end = p;
}

/* The rule currently being defined, and the previous rule.
   CURRENT_RULE points to the first LHS of the current rule, while
   PREVIOUS_RULE_END points to the *end* of the previous rule (NULL).  */
symbol_list_t *current_rule = NULL;
symbol_list_t *previous_rule_end = NULL;


/*----------------------------------------------.
| Create a new rule for LHS in to the GRAMMAR.  |
`----------------------------------------------*/

void
grammar_rule_begin (symbol_t *lhs, location_t location)
{
  if (!start_flag)
    {
      startsymbol = lhs;
      startsymbol_location = location;
      start_flag = 1;
    }

  /* Start a new rule and record its lhs.  */
  ++nrules;
  ++nritems;

  previous_rule_end = grammar_end;
  grammar_symbol_append (lhs, location);
  current_rule = grammar_end;

  /* Mark the rule's lhs as a nonterminal if not already so.  */

  if (lhs->class == unknown_sym)
    {
      lhs->class = nterm_sym;
      lhs->number = nvars;
      ++nvars;
    }
  else if (lhs->class == token_sym)
    complain_at (location, _("rule given for %s, which is a token"), lhs->tag);
}

/* Check that the last rule (CURRENT_RULE) is properly defined.  For
   instance, there should be no type clash on the default action.  */

static void
grammar_current_rule_check (void)
{
  symbol_t *lhs = current_rule->sym;
  char const *lhs_type = lhs->type_name;
  symbol_t *first_rhs = current_rule->next->sym;

  /* If there is an action, then there is nothing we can do: the user
     is allowed to shoot herself in the foot.  */
  if (current_rule->action)
    return;

  /* Don't worry about the default action if $$ is untyped, since $$'s
     value can't be used.  */
  if (! lhs_type)
    return;

  /* If $$ is being set in default way, report if any type mismatch.  */
  if (first_rhs)
    {
      const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
      if (strcmp (lhs_type, rhs_type))
	complain_at (current_rule->location,
		     _("type clash (`%s' `%s') on default action"),
		     lhs_type, rhs_type);
    }
  /* Warn if there is no default for $$ but we need one.  */
  else
    complain_at (current_rule->location,
		 _("empty rule for typed nonterminal, and no action"));
}


/*-------------------------------------.
| End the currently being grown rule.  |
`-------------------------------------*/

void
grammar_rule_end (location_t location)
{
  /* Put an empty link in the list to mark the end of this rule  */
  grammar_symbol_append (NULL, grammar_end->location);
  current_rule->location = location;
  grammar_current_rule_check ();
}


/*-------------------------------------------------------------------.
| The previous action turns out the be a mid-rule action.  Attach it |
| to the current rule, i.e., create a dummy symbol, attach it this   |
| mid-rule action, and append this dummy nonterminal to the current  |
| rule.                                                              |
`-------------------------------------------------------------------*/

void
grammar_midrule_action (void)
{
  /* Since the action was written out with this rule's number, we must
     give the new rule this number by inserting the new rule before
     it.  */

  /* Make a DUMMY nonterminal, whose location is that of the midrule
     action.  Create the MIDRULE.  */
  location_t dummy_location = current_rule->action_location;
  symbol_t *dummy = dummy_symbol_get (dummy_location);
  symbol_list_t *midrule = symbol_list_new (dummy, dummy_location);

  /* Make a new rule, whose body is empty, before the current one, so
     that the action just read can belong to it.  */
  ++nrules;
  ++nritems;
  /* Attach its location and actions to that of the DUMMY.  */
  midrule->location = dummy_location;
  midrule->action = current_rule->action;
  midrule->action_location = dummy_location;
  current_rule->action = NULL;

  if (previous_rule_end)
    previous_rule_end->next = midrule;
  else
    grammar = midrule;

  /* End the dummy's rule.  */
  previous_rule_end = symbol_list_new (NULL, dummy_location);
  previous_rule_end->next = current_rule;

  midrule->next = previous_rule_end;

  /* Insert the dummy nonterminal replacing the midrule action into
     the current rule.  */
  grammar_current_rule_symbol_append (dummy, dummy_location);
}

/* Set the precedence symbol of the current rule to PRECSYM. */

void
grammar_current_rule_prec_set (symbol_t *precsym, location_t location)
{
  if (current_rule->ruleprec)
    complain_at (location, _("only one %s allowed per rule"), "%prec");
  current_rule->ruleprec = precsym;
}

/* Attach dynamic precedence DPREC to the current rule. */

void
grammar_current_rule_dprec_set (int dprec, location_t location)
{
  if (! glr_parser)
    warn_at (location, _("%s affects only GLR parsers"), "%dprec");
  if (dprec <= 0)
    complain_at (location,
		 _("%s must be followed by positive number"), "%dprec");
  else if (current_rule->dprec != 0)
    complain_at (location, _("only one %s allowed per rule"), "%dprec");
  current_rule->dprec = dprec;
}

/* Attach a merge function NAME with argument type TYPE to current
   rule. */

void
grammar_current_rule_merge_set (const char* name, location_t location)
{
  if (! glr_parser)
    warn_at (location, _("%s affects only GLR parsers"), "%merge");
  if (current_rule->merger != 0)
    complain_at (location, _("only one %s allowed per rule"), "%merge");
  current_rule->merger =
    get_merge_function (name, current_rule->sym->type_name, location);
}

/* Attach a SYMBOL to the current rule.  If needed, move the previous
   action as a mid-rule action.  */

void
grammar_current_rule_symbol_append (symbol_t *symbol, location_t location)
{
  if (current_rule->action)
    grammar_midrule_action ();
  ++nritems;
  grammar_symbol_append (symbol, location);
}

/* Attach an ACTION to the current rule.  If needed, move the previous
   action as a mid-rule action.  */

void
grammar_current_rule_action_append (const char *action, location_t location)
{
  if (current_rule->action)
    grammar_midrule_action ();
  current_rule->action = action;
  current_rule->action_location = location;
}

\f
/*---------------------------------------------------------------.
| Convert the rules into the representation using RRHS, RLHS and |
| RITEM.                                                         |
`---------------------------------------------------------------*/

static void
packgram (void)
{
  unsigned int itemno = 0;
  rule_number_t ruleno = 0;
  symbol_list_t *p = grammar;

  ritem = XCALLOC (item_number_t, nritems);
  rules = XCALLOC (rule_t, nrules);

  while (p)
    {
      symbol_t *ruleprec = p->ruleprec;
      rules[ruleno].user_number = ruleno;
      rules[ruleno].number = ruleno;
      rules[ruleno].lhs = p->sym;
      rules[ruleno].rhs = ritem + itemno;
      rules[ruleno].location = p->location;
      rules[ruleno].useful = true;
      rules[ruleno].action = p->action;
      rules[ruleno].action_location = p->action_location;
      rules[ruleno].dprec = p->dprec;
      rules[ruleno].merger = p->merger;

      p = p->next;
      while (p && p->sym)
	{
	  /* item_number_t = symbol_number_t.
	     But the former needs to contain more: negative rule numbers. */
	  ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
	  /* A rule gets by default the precedence and associativity
	     of the last token in it.  */
	  if (p->sym->class == token_sym)
	    rules[ruleno].prec = p->sym;
	  if (p)
	    p = p->next;
	}

      /* If this rule has a %prec,
         the specified symbol's precedence replaces the default.  */
      if (ruleprec)
	{
	  rules[ruleno].precsym = ruleprec;
	  rules[ruleno].prec = ruleprec;
	}
      ritem[itemno++] = rule_number_as_item_number (ruleno);
      ++ruleno;

      if (p)
	p = p->next;
    }

  assert (itemno == nritems);

  if (trace_flag & trace_sets)
    ritem_print (stderr);
}
\f
/*------------------------------------------------------------------.
| Read in the grammar specification and record it in the format     |
| described in gram.h.  All actions are copied into ACTION_OBSTACK, |
| in each case forming the body of a C function (YYACTION) which    |
| contains a switch statement to decide which action to execute.    |
`------------------------------------------------------------------*/

void
reader (void)
{
  gram_control_t gram_control;

  /* Initialize the symbol table.  */
  symbols_new ();

  /* Construct the accept symbol. */
  accept = symbol_get ("$accept", empty_location);
  accept->class = nterm_sym;
  accept->number = nvars++;

  /* Construct the error token */
  errtoken = symbol_get ("error", empty_location);
  errtoken->class = token_sym;
  errtoken->number = ntokens++;

  /* Construct a token that represents all undefined literal tokens.
     It is always token number 2.  */
  undeftoken = symbol_get ("$undefined", empty_location);
  undeftoken->class = token_sym;
  undeftoken->number = ntokens++;

  /* Initialize the obstacks. */
  obstack_init (&pre_prologue_obstack);
  obstack_init (&post_prologue_obstack);

  finput = xfopen (infile, "r");
  gram_in = finput;

  gram__flex_debug = trace_flag & trace_scan;
  gram_debug = trace_flag & trace_parse;
  scanner_initialize ();
  gram_parse (&gram_control);

  /* If something went wrong during the parsing, don't try to
     continue.  */
  if (complaint_issued)
    return;

  /* Grammar has been read.  Do some checking */
  if (nrules == 0)
    fatal (_("no rules in the input grammar"));

  /* Report any undefined symbols and consider them nonterminals.  */
  symbols_check_defined ();

  /* If the user did not define her ENDTOKEN, do it now. */
  if (!endtoken)
    {
      endtoken = symbol_get ("$end", empty_location);
      endtoken->class = token_sym;
      endtoken->number = 0;
      /* Value specified by POSIX.  */
      endtoken->user_token_number = 0;
    }

  /* Insert the initial rule, which line is that of the first rule
     (not that of the start symbol):

     accept: %start EOF.  */
  {
    symbol_list_t *p = symbol_list_new (accept, empty_location);
    p->location = grammar->location;
    p->next = symbol_list_new (startsymbol, empty_location);
    p->next->next = symbol_list_new (endtoken, empty_location);
    p->next->next->next = symbol_list_new (NULL, empty_location);
    p->next->next->next->next = grammar;
    nrules += 1;
    nritems += 3;
    grammar = p;
  }

  if (SYMBOL_NUMBER_MAX < nsyms)
    fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
	   SYMBOL_NUMBER_MAX);

  assert (nsyms == ntokens + nvars);

  xfclose (finput);

  /* Assign the symbols their symbol numbers.  Write #defines for the
     token symbols into FDEFINES if requested.  */
  symbols_pack ();

  /* Convert the grammar into the format described in gram.h.  */
  packgram ();

  /* The grammar as a symbol_list_t is no longer needed. */
  LIST_FREE (symbol_list_t, grammar);
}
Commit	Line	Data
1ff442ca	1	/* Input parser for bison
76514394	2	Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3	3	Free Software Foundation, Inc.
1ff442ca	4
41aca2e0	5	This file is part of Bison, the GNU Compiler Compiler.
1ff442ca	6
41aca2e0 AD	7	Bison is free software; you can redistribute it and/or modify
	8	it under the terms of the GNU General Public License as published by
	9	the Free Software Foundation; either version 2, or (at your option)
	10	any later version.
1ff442ca	11
41aca2e0 AD	12	Bison is distributed in the hope that it will be useful,
	13	but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	15	GNU General Public License for more details.
1ff442ca	16
41aca2e0 AD	17	You should have received a copy of the GNU General Public License
	18	along with Bison; see the file COPYING. If not, write to
	19	the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
	20	Boston, MA 02111-1307, USA. */
1ff442ca NF	21
1ff442ca NF	22
1ff442ca	23	#include "system.h"
2a91a95e AD	24	#include "quotearg.h"
2a91a95e AD	25	#include "quote.h"
ceed8467	26	#include "getargs.h"
1ff442ca	27	#include "files.h"
1ff442ca	28	#include "symtab.h"
56c47203	29	#include "symlist.h"
1ff442ca	30	#include "gram.h"
a0f6b076	31	#include "complain.h"
6c89f1c1	32	#include "output.h"
b2ca4022	33	#include "reader.h"
340ef489	34	#include "conflicts.h"
11d82f03	35	#include "muscle_tab.h"
1ff442ca	36
56c47203	37	static symbol_list_t *grammar = NULL;
280a38c3	38	static int start_flag = 0;
676385e2	39	merger_list *merge_functions;
1ff442ca	40
d7020c20	41	/* Nonzero if %union has been seen. */
e9955c83	42	int typed = 0;
0d533154	43	\f
e9955c83 AD	44	/*-----------------------.
	45	\| Set the start symbol. \|
	46	`-----------------------*/
1ff442ca	47
e9955c83	48	void
8efe435c	49	grammar_start_symbol_set (symbol_t *s, location_t l)
1ff442ca NF	50	{
1ff442ca NF	51	if (start_flag)
e776192e	52	complain_at (l, _("multiple %s declarations"), "%start");
943819bf RS	53	else
	54	{
	55	start_flag = 1;
e9955c83	56	startsymbol = s;
8efe435c	57	startsymbol_location = l;
943819bf	58	}
1ff442ca NF	59	}
1ff442ca NF	60
1ff442ca	61
d7020c20	62	/*----------------------------------------------------------------.
e9955c83 AD	63	\| There are two prologues: one before %union, one after. Augment \|
e9955c83 AD	64	\| the current one. \|
d7020c20	65	`----------------------------------------------------------------*/
1ff442ca	66
e9955c83	67	void
0c15323d	68	prologue_augment (const char *prologue, location_t location)
b6610515	69	{
e9955c83 AD	70	struct obstack *oout =
e9955c83 AD	71	!typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515	72
e9955c83	73	if (!no_lines_flag)
b6610515	74	{
e9955c83	75	obstack_fgrow2 (oout, muscle_find ("linef"),
0c15323d AD	76	location.first_line,
	77	quotearg_style (c_quoting_style,
	78	muscle_find ("filename")));
b6610515	79	}
e9955c83	80	obstack_sgrow (oout, prologue);
b6610515 RA	81	}
b6610515 RA	82
2ba3b73c	83
426cf563	84
a870c567	85
e9955c83 AD	86	/*----------------------.
	87	\| Handle the epilogue. \|
	88	`----------------------*/
426cf563	89
e9955c83	90	void
0c15323d	91	epilogue_set (const char *epilogue, location_t location)
2ba3b73c	92	{
e9955c83	93	if (!no_lines_flag)
1ff442ca	94	{
592e8d4d	95	obstack_fgrow2 (&muscle_obstack, muscle_find ("linef"),
0c15323d AD	96	location.first_line,
	97	quotearg_style (c_quoting_style,
	98	muscle_find ("filename")));
1ff442ca	99	}
592e8d4d AD	100	obstack_sgrow (&muscle_obstack, epilogue);
	101	obstack_1grow (&muscle_obstack, 0);
	102	muscle_insert ("epilogue", obstack_finish (&muscle_obstack));
1ff442ca	103	}
1ff442ca	104
a70083a3	105
a70083a3 AD	106	\f
a70083a3 AD	107
676385e2 PH	108	/*-------------------------------------------------------------------.
	109	\| Return the merger index for a merging function named NAME, whose \|
	110	\| arguments have type TYPE. Records the function, if new, in \|
	111	\| merger_list. \|
	112	`-------------------------------------------------------------------*/
	113
	114	static int
a5d50994 AD	115	get_merge_function (const char* name, const char* type,
a5d50994 AD	116	location_t loc)
676385e2 PH	117	{
	118	merger_list *syms;
	119	merger_list head;
	120	int n;
	121
	122	if (! glr_parser)
	123	return 0;
	124
	125	if (type == NULL)
	126	type = "";
	127
	128	head.next = merge_functions;
39f41916	129	for (syms = &head, n = 1; syms->next != NULL; syms = syms->next, n += 1)
676385e2 PH	130	if (strcmp (name, syms->next->name) == 0)
676385e2 PH	131	break;
a5d50994 AD	132	if (syms->next == NULL)
	133	{
	134	syms->next = XMALLOC (merger_list, 1);
b906441c AD	135	syms->next->name = xstrdup (name);
b906441c AD	136	syms->next->type = xstrdup (type);
a5d50994 AD	137	syms->next->next = NULL;
	138	merge_functions = head.next;
	139	}
	140	else if (strcmp (type, syms->next->type) != 0)
	141	warn_at (loc, _("result type clash on merge function %s: `%s' vs. `%s'"),
	142	name, type, syms->next->type);
676385e2 PH	143	return n;
	144	}
	145
	146	/*--------------------------------------.
	147	\| Free all merge-function definitions. \|
	148	`--------------------------------------*/
	149
	150	void
	151	free_merger_functions (void)
	152	{
	153	merger_list *L0;
	154	if (! glr_parser)
	155	return;
	156	L0 = merge_functions;
	157	while (L0 != NULL)
	158	{
	159	merger_list *L1 = L0->next;
	160	free (L0);
	161	L0 = L1;
	162	}
	163	}
	164
a70083a3	165	\f
107f7dfb	166	/*-------------------------------------------------------------------.
32e1e0a4	167	\| Parse the input grammar into a one symbol_list_t structure. Each \|
107f7dfb AD	168	\| rule is represented by a sequence of symbols: the left hand side \|
	169	\| followed by the contents of the right hand side, followed by a \|
	170	\| null pointer instead of a symbol to terminate the rule. The next \|
	171	\| symbol is the lhs of the following rule. \|
	172	\| \|
fdbcd8e2 AD	173	\| All actions are copied out, labelled by the rule number they apply \|
fdbcd8e2 AD	174	\| to. \|
107f7dfb AD	175	\| \|
	176	\| Bison used to allow some %directives in the rules sections, but \|
	177	\| this is no longer consider appropriate: (i) the documented grammar \|
	178	\| doesn't claim it, (ii), it would promote bad style, (iii), error \|
	179	\| recovery for %directives consists in skipping the junk until a `%' \|
	180	\| is seen and helrp synchronizing. This scheme is definitely wrong \|
	181	\| in the rules section. \|
	182	`-------------------------------------------------------------------*/
1ff442ca	183
f6d0f937	184	/* The (currently) last symbol of GRAMMAR. */
56c47203	185	symbol_list_t *grammar_end = NULL;
f6d0f937 AD	186
f6d0f937 AD	187	/* Append S to the GRAMMAR. */
e9955c83	188	void
8efe435c	189	grammar_symbol_append (symbol_t *symbol, location_t location)
f6d0f937	190	{
56c47203	191	symbol_list_t *p = symbol_list_new (symbol, location);
f6d0f937 AD	192
	193	if (grammar_end)
	194	grammar_end->next = p;
	195	else
	196	grammar = p;
	197
	198	grammar_end = p;
	199	}
	200
8efe435c AD	201	/* The rule currently being defined, and the previous rule.
	202	CURRENT_RULE points to the first LHS of the current rule, while
	203	PREVIOUS_RULE_END points to the end of the previous rule (NULL). */
56c47203 AD	204	symbol_list_t *current_rule = NULL;
56c47203 AD	205	symbol_list_t *previous_rule_end = NULL;
da4160c3 AD	206
da4160c3 AD	207
8efe435c AD	208	/*----------------------------------------------.
	209	\| Create a new rule for LHS in to the GRAMMAR. \|
	210	`----------------------------------------------*/
da4160c3	211
e9955c83	212	void
8efe435c	213	grammar_rule_begin (symbol_t *lhs, location_t location)
da4160c3 AD	214	{
	215	if (!start_flag)
	216	{
	217	startsymbol = lhs;
8efe435c	218	startsymbol_location = location;
da4160c3 AD	219	start_flag = 1;
	220	}
	221
	222	/* Start a new rule and record its lhs. */
	223	++nrules;
	224	++nritems;
	225
8efe435c AD	226	previous_rule_end = grammar_end;
8efe435c AD	227	grammar_symbol_append (lhs, location);
da4160c3 AD	228	current_rule = grammar_end;
	229
	230	/* Mark the rule's lhs as a nonterminal if not already so. */
	231
	232	if (lhs->class == unknown_sym)
	233	{
	234	lhs->class = nterm_sym;
	235	lhs->number = nvars;
	236	++nvars;
	237	}
	238	else if (lhs->class == token_sym)
e776192e	239	complain_at (location, _("rule given for %s, which is a token"), lhs->tag);
da4160c3 AD	240	}
da4160c3 AD	241
e9955c83 AD	242	/* Check that the last rule (CURRENT_RULE) is properly defined. For
	243	instance, there should be no type clash on the default action. */
	244
	245	static void
	246	grammar_current_rule_check (void)
	247	{
	248	symbol_t *lhs = current_rule->sym;
3f4c0f80	249	char const *lhs_type = lhs->type_name;
e9955c83 AD	250	symbol_t *first_rhs = current_rule->next->sym;
	251
	252	/* If there is an action, then there is nothing we can do: the user
3f4c0f80	253	is allowed to shoot herself in the foot. */
e9955c83 AD	254	if (current_rule->action)
	255	return;
	256
3f4c0f80 PE	257	/* Don't worry about the default action if $$ is untyped, since $$'s
	258	value can't be used. */
	259	if (! lhs_type)
	260	return;
	261
	262	/* If $$ is being set in default way, report if any type mismatch. */
e9955c83 AD	263	if (first_rhs)
e9955c83 AD	264	{
e9955c83 AD	265	const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
e9955c83 AD	266	if (strcmp (lhs_type, rhs_type))
e776192e AD	267	complain_at (current_rule->location,
	268	_("type clash (`%s' `%s') on default action"),
	269	lhs_type, rhs_type);
e9955c83 AD	270	}
	271	/* Warn if there is no default for $$ but we need one. */
	272	else
3f4c0f80 PE	273	complain_at (current_rule->location,
3f4c0f80 PE	274	_("empty rule for typed nonterminal, and no action"));
e9955c83 AD	275	}
	276
	277
8efe435c AD	278	/*-------------------------------------.
	279	\| End the currently being grown rule. \|
	280	`-------------------------------------*/
e9955c83 AD	281
e9955c83 AD	282	void
8efe435c	283	grammar_rule_end (location_t location)
e9955c83 AD	284	{
e9955c83 AD	285	/* Put an empty link in the list to mark the end of this rule */
8efe435c AD	286	grammar_symbol_append (NULL, grammar_end->location);
8efe435c AD	287	current_rule->location = location;
e9955c83 AD	288	grammar_current_rule_check ();
	289	}
	290
	291
8efe435c AD	292	/*-------------------------------------------------------------------.
	293	\| The previous action turns out the be a mid-rule action. Attach it \|
	294	\| to the current rule, i.e., create a dummy symbol, attach it this \|
	295	\| mid-rule action, and append this dummy nonterminal to the current \|
	296	\| rule. \|
	297	`-------------------------------------------------------------------*/
1485e106	298
e9955c83	299	void
1485e106 AD	300	grammar_midrule_action (void)
	301	{
	302	/* Since the action was written out with this rule's number, we must
	303	give the new rule this number by inserting the new rule before
	304	it. */
	305
8efe435c AD	306	/* Make a DUMMY nonterminal, whose location is that of the midrule
8efe435c AD	307	action. Create the MIDRULE. */
8efe435c	308	location_t dummy_location = current_rule->action_location;
39f41916	309	symbol_t *dummy = dummy_symbol_get (dummy_location);
56c47203	310	symbol_list_t *midrule = symbol_list_new (dummy, dummy_location);
1485e106 AD	311
	312	/* Make a new rule, whose body is empty, before the current one, so
	313	that the action just read can belong to it. */
	314	++nrules;
	315	++nritems;
8efe435c AD	316	/* Attach its location and actions to that of the DUMMY. */
	317	midrule->location = dummy_location;
	318	midrule->action = current_rule->action;
	319	midrule->action_location = dummy_location;
1485e106 AD	320	current_rule->action = NULL;
1485e106 AD	321
8efe435c AD	322	if (previous_rule_end)
8efe435c AD	323	previous_rule_end->next = midrule;
1485e106	324	else
8efe435c	325	grammar = midrule;
1485e106	326
8efe435c AD	327	/* End the dummy's rule. */
	328	previous_rule_end = symbol_list_new (NULL, dummy_location);
	329	previous_rule_end->next = current_rule;
1485e106	330
8efe435c	331	midrule->next = previous_rule_end;
1485e106	332
8efe435c AD	333	/* Insert the dummy nonterminal replacing the midrule action into
	334	the current rule. */
	335	grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106 AD	336	}
1485e106 AD	337
9af3fbce AD	338	/* Set the precedence symbol of the current rule to PRECSYM. */
9af3fbce AD	339
e9955c83	340	void
e776192e	341	grammar_current_rule_prec_set (symbol_t *precsym, location_t location)
9af3fbce AD	342	{
9af3fbce AD	343	if (current_rule->ruleprec)
473d0a75	344	complain_at (location, _("only one %s allowed per rule"), "%prec");
9af3fbce AD	345	current_rule->ruleprec = precsym;
	346	}
	347
676385e2 PH	348	/* Attach dynamic precedence DPREC to the current rule. */
	349
	350	void
	351	grammar_current_rule_dprec_set (int dprec, location_t location)
	352	{
	353	if (! glr_parser)
473d0a75	354	warn_at (location, _("%s affects only GLR parsers"), "%dprec");
676385e2	355	if (dprec <= 0)
473d0a75 AD	356	complain_at (location,
473d0a75 AD	357	_("%s must be followed by positive number"), "%dprec");
39f41916	358	else if (current_rule->dprec != 0)
473d0a75	359	complain_at (location, _("only one %s allowed per rule"), "%dprec");
676385e2 PH	360	current_rule->dprec = dprec;
	361	}
	362
	363	/* Attach a merge function NAME with argument type TYPE to current
	364	rule. */
	365
	366	void
	367	grammar_current_rule_merge_set (const char* name, location_t location)
	368	{
	369	if (! glr_parser)
473d0a75	370	warn_at (location, _("%s affects only GLR parsers"), "%merge");
39f41916	371	if (current_rule->merger != 0)
473d0a75	372	complain_at (location, _("only one %s allowed per rule"), "%merge");
39f41916	373	current_rule->merger =
a5d50994	374	get_merge_function (name, current_rule->sym->type_name, location);
676385e2 PH	375	}
676385e2 PH	376
2e047461 AD	377	/* Attach a SYMBOL to the current rule. If needed, move the previous
	378	action as a mid-rule action. */
	379
e9955c83	380	void
8efe435c	381	grammar_current_rule_symbol_append (symbol_t *symbol, location_t location)
2e047461 AD	382	{
	383	if (current_rule->action)
	384	grammar_midrule_action ();
	385	++nritems;
8efe435c	386	grammar_symbol_append (symbol, location);
2e047461 AD	387	}
2e047461 AD	388
2e047461 AD	389	/* Attach an ACTION to the current rule. If needed, move the previous
	390	action as a mid-rule action. */
	391
e9955c83	392	void
8efe435c	393	grammar_current_rule_action_append (const char *action, location_t location)
2e047461 AD	394	{
	395	if (current_rule->action)
	396	grammar_midrule_action ();
	397	current_rule->action = action;
8efe435c	398	current_rule->action_location = location;
2e047461 AD	399	}
2e047461 AD	400
a70083a3	401	\f
a70083a3 AD	402	/*---------------------------------------------------------------.
a70083a3 AD	403	\| Convert the rules into the representation using RRHS, RLHS and \|
d9b739c3	404	\| RITEM. \|
a70083a3	405	`---------------------------------------------------------------*/
1ff442ca	406
4a120d45	407	static void
118fb205	408	packgram (void)
1ff442ca	409	{
9222837b	410	unsigned int itemno = 0;
4b3d3a8e	411	rule_number_t ruleno = 0;
9222837b	412	symbol_list_t *p = grammar;
1ff442ca	413
a900a624	414	ritem = XCALLOC (item_number_t, nritems);
4b3d3a8e	415	rules = XCALLOC (rule_t, nrules);
1ff442ca	416
1ff442ca NF	417	while (p)
1ff442ca NF	418	{
db8837cb	419	symbol_t *ruleprec = p->ruleprec;
d7e1f00c	420	rules[ruleno].user_number = ruleno;
c3b407f4	421	rules[ruleno].number = ruleno;
bba97eb2	422	rules[ruleno].lhs = p->sym;
99013900	423	rules[ruleno].rhs = ritem + itemno;
8efe435c	424	rules[ruleno].location = p->location;
b4afb6bb	425	rules[ruleno].useful = true;
1a2b5d37	426	rules[ruleno].action = p->action;
8efe435c	427	rules[ruleno].action_location = p->action_location;
676385e2 PH	428	rules[ruleno].dprec = p->dprec;
676385e2 PH	429	rules[ruleno].merger = p->merger;
1ff442ca NF	430
	431	p = p->next;
	432	while (p && p->sym)
	433	{
a49aecd5	434	/* item_number_t = symbol_number_t.
5fbb0954	435	But the former needs to contain more: negative rule numbers. */
a49aecd5	436	ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca NF	437	/* A rule gets by default the precedence and associativity
1ff442ca NF	438	of the last token in it. */
d7020c20	439	if (p->sym->class == token_sym)
03b31c0c	440	rules[ruleno].prec = p->sym;
a70083a3 AD	441	if (p)
a70083a3 AD	442	p = p->next;
1ff442ca NF	443	}
	444
	445	/* If this rule has a %prec,
a70083a3	446	the specified symbol's precedence replaces the default. */
1ff442ca NF	447	if (ruleprec)
1ff442ca NF	448	{
03b31c0c AD	449	rules[ruleno].precsym = ruleprec;
03b31c0c AD	450	rules[ruleno].prec = ruleprec;
1ff442ca	451	}
4b3d3a8e	452	ritem[itemno++] = rule_number_as_item_number (ruleno);
f3849179	453	++ruleno;
1ff442ca	454
a70083a3 AD	455	if (p)
a70083a3 AD	456	p = p->next;
1ff442ca NF	457	}
1ff442ca NF	458
5123689b	459	assert (itemno == nritems);
3067fbef	460
273a74fa	461	if (trace_flag & trace_sets)
3067fbef	462	ritem_print (stderr);
1ff442ca	463	}
a70083a3	464	\f
fdbcd8e2 AD	465	/*------------------------------------------------------------------.
	466	\| Read in the grammar specification and record it in the format \|
	467	\| described in gram.h. All actions are copied into ACTION_OBSTACK, \|
	468	\| in each case forming the body of a C function (YYACTION) which \|
	469	\| contains a switch statement to decide which action to execute. \|
	470	`------------------------------------------------------------------*/
a70083a3 AD	471
	472	void
	473	reader (void)
	474	{
e9955c83	475	gram_control_t gram_control;
a70083a3 AD	476
a70083a3 AD	477	/* Initialize the symbol table. */
db8837cb	478	symbols_new ();
b6610515	479
88bce5a2 AD	480	/* Construct the accept symbol. */
	481	accept = symbol_get ("$accept", empty_location);
	482	accept->class = nterm_sym;
	483	accept->number = nvars++;
30171f79	484
a70083a3	485	/* Construct the error token */
39f41916	486	errtoken = symbol_get ("error", empty_location);
d7020c20	487	errtoken->class = token_sym;
72a23c97	488	errtoken->number = ntokens++;
b6610515	489
a70083a3 AD	490	/* Construct a token that represents all undefined literal tokens.
a70083a3 AD	491	It is always token number 2. */
88bce5a2	492	undeftoken = symbol_get ("$undefined", empty_location);
d7020c20	493	undeftoken->class = token_sym;
72a23c97	494	undeftoken->number = ntokens++;
a70083a3	495
331dbc1b	496	/* Initialize the obstacks. */
0dd1580a RA	497	obstack_init (&pre_prologue_obstack);
0dd1580a RA	498	obstack_init (&post_prologue_obstack);
331dbc1b AD	499
331dbc1b AD	500	finput = xfopen (infile, "r");
e9955c83 AD	501	gram_in = finput;
e9955c83 AD	502
473d0a75 AD	503	gram__flex_debug = trace_flag & trace_scan;
473d0a75 AD	504	gram_debug = trace_flag & trace_parse;
1d6412ad	505	scanner_initialize ();
e9955c83	506	gram_parse (&gram_control);
331dbc1b	507
b275314e AD	508	/* If something went wrong during the parsing, don't try to
b275314e AD	509	continue. */
b4afb6bb	510	if (complaint_issued)
f956c304	511	return;
b275314e	512
e9955c83 AD	513	/* Grammar has been read. Do some checking */
	514	if (nrules == 0)
	515	fatal (_("no rules in the input grammar"));
	516
	517	/* Report any undefined symbols and consider them nonterminals. */
	518	symbols_check_defined ();
b7c49edf	519
88bce5a2 AD	520	/* If the user did not define her ENDTOKEN, do it now. */
88bce5a2 AD	521	if (!endtoken)
b7c49edf	522	{
88bce5a2 AD	523	endtoken = symbol_get ("$end", empty_location);
	524	endtoken->class = token_sym;
	525	endtoken->number = 0;
b7c49edf	526	/* Value specified by POSIX. */
88bce5a2	527	endtoken->user_token_number = 0;
b7c49edf AD	528	}
b7c49edf AD	529
e9955c83 AD	530	/* Insert the initial rule, which line is that of the first rule
	531	(not that of the start symbol):
	532
88bce5a2	533	accept: %start EOF. */
e9955c83	534	{
88bce5a2	535	symbol_list_t *p = symbol_list_new (accept, empty_location);
8efe435c AD	536	p->location = grammar->location;
8efe435c AD	537	p->next = symbol_list_new (startsymbol, empty_location);
88bce5a2	538	p->next->next = symbol_list_new (endtoken, empty_location);
8efe435c	539	p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83 AD	540	p->next->next->next->next = grammar;
	541	nrules += 1;
	542	nritems += 3;
	543	grammar = p;
	544	}
	545
242a6e48	546	if (SYMBOL_NUMBER_MAX < nsyms)
e9955c83	547	fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
242a6e48	548	SYMBOL_NUMBER_MAX);
e9955c83 AD	549
e9955c83 AD	550	assert (nsyms == ntokens + nvars);
b0c4483e	551
331dbc1b AD	552	xfclose (finput);
331dbc1b AD	553
a70083a3 AD	554	/* Assign the symbols their symbol numbers. Write #defines for the
a70083a3 AD	555	token symbols into FDEFINES if requested. */
2f1afb73	556	symbols_pack ();
93ede233	557
a70083a3 AD	558	/* Convert the grammar into the format described in gram.h. */
a70083a3 AD	559	packgram ();
8419d367	560
56c47203 AD	561	/* The grammar as a symbol_list_t is no longer needed. */
56c47203 AD	562	LIST_FREE (symbol_list_t, grammar);
a70083a3	563	}