/* Bison Action Scanner -*- C -*-
- Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
- This program is free software; you can redistribute it and/or modify
+ This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
+ the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301 USA
-*/
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
-%option debug nodefault nounput noyywrap never-interactive
+%option debug nodefault noinput nounput noyywrap never-interactive
%option prefix="code_" outfile="lex.yy.c"
%{
#define code_wrap() 1
#define FLEX_PREFIX(Id) code_ ## Id
-#include "flex-scanner.h"
+#include <src/flex-scanner.h>
-#include "complain.h"
-#include "reader.h"
-#include "getargs.h"
+#include <src/complain.h>
+#include <src/reader.h>
+#include <src/getargs.h>
+#include <src/muscle-tab.h>
+#include <src/scan-code.h>
+#include <src/symlist.h>
+
+#include <c-ctype.h>
#include <get-errno.h>
#include <quote.h>
-#include "scan-code.h"
-#include "symlist.h"
-
/* The current calling start condition: SC_RULE_ACTION or
SC_SYMBOL_ACTION. */
# define YY_DECL static char *code_lex (code_props *self, int sc_context)
static void handle_action_dollar (symbol_list *rule, char *cp,
location dollar_loc);
static void handle_action_at (symbol_list *rule, char *cp, location at_loc);
+
+/* A string to be pushed to obstack after dollar/at has been handled. */
+static char *ref_tail_fields;
+
static location the_location;
static location *loc = &the_location;
/* True if an untyped $$ or $n was seen. */
static bool untyped_var_seen;
+
%}
/* C and C++ comments in code. */
%x SC_COMMENT SC_LINE_COMMENT
white space between the backslash and the newline. */
splice (\\[ \f\t\v]*\n)*
+/* C style identifier. Must start with letter. Will be used for
+ named symbol references. Shall be kept synchronized with
+ scan-gram.l "letter" and "id". */
+letter [-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
+id {letter}({letter}|[0-9])*
+ref -?[0-9]+|{id}|"["{id}"]"|"$"
+
%%
%{
/* Nesting level of the current code in braces. */
int braces_level = 0;
+ /* Whether a semicolon is probably needed.
+ The heuristic is that a semicolon is not needed after `{', `}', `;',
+ or a C preprocessor directive, and that whitespaces and comments
+ do not affect this flag.
+ Note that `{' does not need a semicolon because of `{}'.
+ A semicolon may be needed before a cpp direcive, but don't bother. */
+ bool need_semicolon = false;
+
+ /* Whether in a C preprocessor directive. Don't use a start condition
+ for this because, at the end of strings and comments, we still need
+ to know whether we're in a directive. */
+ bool in_cpp = false;
+
/* This scanner is special: it is invoked only once, henceforth
is expected to return only once. This initialization is
therefore done once per action to translate. */
"'" {
STRING_GROW;
BEGIN SC_CHARACTER;
+ need_semicolon = true;
}
"\"" {
STRING_GROW;
BEGIN SC_STRING;
+ need_semicolon = true;
}
"/"{splice}"*" {
STRING_GROW;
<SC_RULE_ACTION>
{
- "$"("<"{tag}">")?(-?[0-9]+|"$") {
+ "$"("<"{tag}">")?{ref} {
+ ref_tail_fields = 0;
handle_action_dollar (self->rule, yytext, *loc);
+ if (ref_tail_fields) {
+ obstack_sgrow (&obstack_for_string, ref_tail_fields);
+ }
+ need_semicolon = true;
}
- "@"(-?[0-9]+|"$") {
+ "@"{ref} {
+ ref_tail_fields = 0;
handle_action_at (self->rule, yytext, *loc);
+ if (ref_tail_fields) {
+ obstack_sgrow (&obstack_for_string, ref_tail_fields);
+ }
+ need_semicolon = true;
}
-
"$" {
warn_at (*loc, _("stray `$'"));
obstack_sgrow (&obstack_for_string, "$][");
+ need_semicolon = true;
}
"@" {
warn_at (*loc, _("stray `@'"));
obstack_sgrow (&obstack_for_string, "@@");
+ need_semicolon = true;
+ }
+ "[" {
+ obstack_sgrow (&obstack_for_string, "@{");
+ need_semicolon = true;
+ }
+ "]" {
+ obstack_sgrow (&obstack_for_string, "@}");
+ need_semicolon = true;
}
- "{" STRING_GROW; ++braces_level;
+ ";" STRING_GROW; need_semicolon = false;
+ "{" STRING_GROW; ++braces_level; need_semicolon = false;
"}" {
- bool outer_brace = --braces_level < 0;
+ bool outer_brace = --braces_level == 0;
/* As an undocumented Bison extension, append `;' before the last
brace in braced code, so that the user code can omit trailing
`;'. But do not append `;' if emulating Yacc, since Yacc does
- not append one.
-
- FIXME: Bison should warn if a semicolon seems to be necessary
- here, and should omit the semicolon if it seems unnecessary
- (e.g., after ';', '{', or '}', each followed by comments or
- white space). Such a warning shouldn't depend on --yacc; it
- should depend on a new --pedantic option, which would cause
- Bison to warn if it detects an extension to POSIX. --pedantic
- should also diagnose other Bison extensions like %yacc.
- Perhaps there should also be a GCC-style --pedantic-errors
- option, so that such warnings are diagnosed as errors. */
- if (outer_brace && ! yacc_flag)
- obstack_1grow (&obstack_for_string, ';');
+ not append one. */
+ if (outer_brace && !yacc_flag && language_prio == default_prio
+ && skeleton_prio == default_prio && need_semicolon && ! in_cpp)
+ {
+ warn_at (*loc, _("a `;' might be needed at the end of action code"));
+ warn_at (*loc, _("future versions of Bison will not add the `;'"));
+ obstack_1grow (&obstack_for_string, ';');
+ }
STRING_GROW;
+ need_semicolon = false;
}
+
+ /* Preprocessing directives should only be recognized at the beginning
+ of lines, allowing whitespace including comments, but in C/C++,
+ `#' can only be the start of preprocessor directives or within
+ `#define' directives anyway, so don't bother with begin of line. */
+ "#" STRING_GROW; in_cpp = true;
+
+ {splice} STRING_GROW;
+ [\n\r] STRING_GROW; if (in_cpp) in_cpp = need_semicolon = false;
+ [ \t\f] STRING_GROW;
+ . STRING_GROW; need_semicolon = true;
}
<SC_SYMBOL_ACTION>
obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
self->is_value_used = true;
}
- "@$" obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
+ "@$" {
+ obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
+ muscle_percent_define_ensure("locations", the_location, true);
+ }
}
%%
+static inline bool
+is_dot_or_dash (char ch)
+{
+ return ch == '.' || ch == '-';
+}
+
+static inline bool
+contains_dot_or_dash (const char* p)
+{
+ for (; *p; ++p)
+ if (is_dot_or_dash (*p))
+ return true;
+ return false;
+}
+
+/* Defines a variant of a symbolic name resolution. */
+typedef struct
+{
+ /* Index in symbol list. */
+ unsigned symbol_index;
+
+ /* Matched symbol id and loc. */
+ uniqstr id;
+ location loc;
+
+ /* Hiding named reference. */
+ named_ref* hidden_by;
+
+ /* Error flags. May contain zero (no errors) or
+ a combination of VARIANT_* values. */
+ unsigned err;
+} variant;
+
+/* Set when the variant refers to a symbol hidden
+ by an explicit symbol reference. */
+#define VARIANT_HIDDEN (1 << 0)
+
+/* Set when the variant refers to a symbol containing
+ dots or dashes. Will require explicit bracketing. */
+#define VARIANT_BAD_BRACKETING (1 << 1)
+
+/* Set when the variant refers to a symbol which is
+ not visible from current midrule. */
+#define VARIANT_NOT_VISIBLE_FROM_MIDRULE (1 << 2)
+
+static variant *variant_table = 0;
+static unsigned variant_table_size = 0;
+static unsigned variant_count = 0;
+
+static variant *
+variant_table_grow (void)
+{
+ ++variant_count;
+ if (variant_count > variant_table_size)
+ {
+ while (variant_count > variant_table_size)
+ variant_table_size = 2 * variant_table_size + 3;
+ variant_table = xnrealloc (variant_table, variant_table_size,
+ sizeof *variant_table);
+ }
+ return &variant_table[variant_count - 1];
+}
+
+static void
+variant_table_free (void)
+{
+ free (variant_table);
+ variant_table = 0;
+ variant_table_size = variant_count = 0;
+}
+
+static char *
+find_prefix_end (const char *prefix, char *begin, char *end)
+{
+ char *ptr = begin;
+
+ for (; *prefix && ptr != end; ++prefix, ++ptr)
+ if (*prefix != *ptr)
+ return 0;
+
+ if (*prefix)
+ return 0;
+
+ return ptr;
+}
+
+static variant *
+variant_add (uniqstr id, location id_loc, unsigned symbol_index,
+ char *cp, char *cp_end, bool exact_mode)
+{
+ char *prefix_end;
+
+ prefix_end = find_prefix_end (id, cp, cp_end);
+ if (prefix_end &&
+ (prefix_end == cp_end ||
+ (!exact_mode && is_dot_or_dash (*prefix_end))))
+ {
+ variant *r = variant_table_grow ();
+ r->symbol_index = symbol_index;
+ r->id = id;
+ r->loc = id_loc;
+ r->hidden_by = NULL;
+ r->err = 0;
+ return r;
+ }
+ else
+ return NULL;
+}
+
+static const char *
+get_at_spec(unsigned symbol_index)
+{
+ static char at_buf[20];
+ if (symbol_index == 0)
+ strcpy (at_buf, "$$");
+ else
+ snprintf (at_buf, sizeof at_buf, "$%u", symbol_index);
+ return at_buf;
+}
+
+static void
+show_sub_messages (const char* cp, bool exact_mode, int midrule_rhs_index,
+ char dollar_or_at, bool is_warning)
+{
+ unsigned i;
+
+ for (i = 0; i < variant_count; ++i)
+ {
+ const variant *var = &variant_table[i];
+ const char *at_spec = get_at_spec (var->symbol_index);
+
+ if (var->err == 0)
+ {
+ if (is_warning)
+ warn_at (var->loc, _(" refers to: %c%s at %s"),
+ dollar_or_at, var->id, at_spec);
+ else
+ complain_at (var->loc, _(" refers to: %c%s at %s"),
+ dollar_or_at, var->id, at_spec);
+ }
+ else
+ {
+ static struct obstack msg_buf;
+ const char *tail = exact_mode ? "" :
+ cp + strlen (var->id);
+ const char *id = var->hidden_by ? var->hidden_by->id :
+ var->id;
+ location id_loc = var->hidden_by ? var->hidden_by->loc :
+ var->loc;
+
+ /* Create the explanation message. */
+ obstack_init (&msg_buf);
+
+ obstack_fgrow1 (&msg_buf, " possibly meant: %c", dollar_or_at);
+ if (contains_dot_or_dash (id))
+ obstack_fgrow1 (&msg_buf, "[%s]", id);
+ else
+ obstack_sgrow (&msg_buf, id);
+ obstack_sgrow (&msg_buf, tail);
+
+ if (var->err & VARIANT_HIDDEN)
+ {
+ obstack_fgrow1 (&msg_buf, ", hiding %c", dollar_or_at);
+ if (contains_dot_or_dash (var->id))
+ obstack_fgrow1 (&msg_buf, "[%s]", var->id);
+ else
+ obstack_sgrow (&msg_buf, var->id);
+ obstack_sgrow (&msg_buf, tail);
+ }
+
+ obstack_fgrow1 (&msg_buf, " at %s", at_spec);
+
+ if (var->err & VARIANT_NOT_VISIBLE_FROM_MIDRULE)
+ obstack_fgrow1 (&msg_buf, ", cannot be accessed from "
+ "mid-rule action at $%d", midrule_rhs_index);
+
+ obstack_1grow (&msg_buf, '\0');
+ if (is_warning)
+ warn_at (id_loc, _("%s"), (char *) obstack_finish (&msg_buf));
+ else
+ complain_at (id_loc, _("%s"), (char *) obstack_finish (&msg_buf));
+ obstack_free (&msg_buf, 0);
+ }
+ }
+}
+
+/* Returned from "parse_ref" when the reference
+ is inappropriate. */
+#define INVALID_REF (INT_MIN)
+
+/* Returned from "parse_ref" when the reference
+ points to LHS ($$) of the current rule or midrule. */
+#define LHS_REF (INT_MIN + 1)
+
+/* Parse named or positional reference. In case of positional
+ references, can return negative values for $-n "deep" stack
+ accesses. */
+static long int
+parse_ref (char *cp, symbol_list *rule, int rule_length,
+ int midrule_rhs_index, char *text, location text_loc,
+ char dollar_or_at)
+{
+ symbol_list *l;
+ char *cp_end;
+ bool exact_mode;
+ unsigned i;
+ unsigned valid_variants = 0;
+ unsigned valid_variant_index = 0;
+
+ if ('$' == *cp)
+ return LHS_REF;
+
+ if (c_isdigit (*cp) || (*cp == '-' && c_isdigit (* (cp + 1))))
+ {
+ long int num = strtol (cp, &cp, 10);
+ if (1 - INT_MAX + rule_length <= num && num <= rule_length)
+ return num;
+ else
+ {
+ complain_at (text_loc, _("integer out of range: %s"),
+ quote (text));
+ return INVALID_REF;
+ }
+ }
+
+ if ('[' == *cp)
+ {
+ /* Ignore the brackets. */
+ char *p;
+ for (p = ++cp; *p != ']'; ++p)
+ continue;
+ cp_end = p;
+
+ exact_mode = true;
+ }
+ else
+ {
+ /* Take all characters of the name. */
+ char* p;
+ for (p = cp; *p; ++p)
+ if (is_dot_or_dash (*p))
+ {
+ ref_tail_fields = p;
+ break;
+ }
+ for (p = cp; *p; ++p)
+ continue;
+ cp_end = p;
+
+ exact_mode = false;
+ }
+
+ /* Add all relevant variants. */
+ {
+ unsigned symbol_index;
+ variant_count = 0;
+ for (symbol_index = 0, l = rule; !symbol_list_null (l);
+ ++symbol_index, l = l->next)
+ {
+ variant *var;
+ if (l->content_type != SYMLIST_SYMBOL)
+ continue;
+
+ var = variant_add (l->content.sym->tag, l->sym_loc,
+ symbol_index, cp, cp_end, exact_mode);
+ if (var && l->named_ref)
+ var->hidden_by = l->named_ref;
+
+ if (l->named_ref)
+ variant_add (l->named_ref->id, l->named_ref->loc,
+ symbol_index, cp, cp_end, exact_mode);
+ }
+ }
+
+ /* Check errors. */
+ for (i = 0; i < variant_count; ++i)
+ {
+ variant *var = &variant_table[i];
+ unsigned symbol_index = var->symbol_index;
+
+ /* Check visibility from mid-rule actions. */
+ if (midrule_rhs_index != 0
+ && (symbol_index == 0 || midrule_rhs_index < symbol_index))
+ var->err |= VARIANT_NOT_VISIBLE_FROM_MIDRULE;
+
+ /* Check correct bracketing. */
+ if (!exact_mode && contains_dot_or_dash (var->id))
+ var->err |= VARIANT_BAD_BRACKETING;
+
+ /* Check using of hidden symbols. */
+ if (var->hidden_by)
+ var->err |= VARIANT_HIDDEN;
+
+ if (!var->err)
+ {
+ valid_variant_index = i;
+ ++valid_variants;
+ }
+ }
+
+ switch (valid_variants)
+ {
+ case 0:
+ if (variant_count == 0)
+ complain_at (text_loc, _("invalid reference: %s, symbol not found"),
+ quote (text));
+ else
+ {
+ complain_at (text_loc, _("invalid reference: %s"),
+ quote (text));
+ show_sub_messages (cp, exact_mode, midrule_rhs_index,
+ dollar_or_at, false);
+ }
+ return INVALID_REF;
+ case 1:
+ {
+ if (variant_count > 1)
+ {
+ warn_at (text_loc, _("misleading reference: %s"),
+ quote (text));
+ show_sub_messages (cp, exact_mode, midrule_rhs_index,
+ dollar_or_at, true);
+ }
+ {
+ unsigned symbol_index =
+ variant_table[valid_variant_index].symbol_index;
+ return (symbol_index == midrule_rhs_index) ? LHS_REF : symbol_index;
+ }
+ }
+ case 2:
+ default:
+ complain_at (text_loc, _("ambiguous reference: %s"),
+ quote (text));
+ show_sub_messages (cp, exact_mode, midrule_rhs_index,
+ dollar_or_at, false);
+ return INVALID_REF;
+ }
+
+ /* Not reachable. */
+ return INVALID_REF;
+}
+
/* Keeps track of the maximum number of semantic values to the left of
a handle (those referenced by $0, $-1, etc.) are required by the
semantic actions of this grammar. */
{
char const *type_name = NULL;
char *cp = text + 1;
+ char *gt_ptr = 0;
symbol_list *effective_rule;
int effective_rule_length;
+ int n;
if (rule->midrule_parent_rule)
{
type_name = ++cp;
while (*cp != '>')
++cp;
- *cp = '\0';
+
+ /* The '>' symbol will be later replaced by '\0'. Original
+ 'text' is needed for error messages. */
+ gt_ptr = cp;
++cp;
if (untyped_var_seen)
complain_at (dollar_loc, _("explicit type given in untyped grammar"));
tag_seen = true;
}
- if (*cp == '$')
+ n = parse_ref (cp, effective_rule, effective_rule_length,
+ rule->midrule_parent_rhs_index, text, dollar_loc, '$');
+
+ if (gt_ptr)
+ *gt_ptr = '\0';
+
+ switch (n)
{
+ case INVALID_REF:
+ break;
+
+ case LHS_REF:
if (!type_name)
type_name = symbol_list_n_type_name_get (rule, dollar_loc, 0);
obstack_fgrow1 (&obstack_for_string,
"]b4_lhs_value([%s])[", type_name);
rule->action_props.is_value_used = true;
- }
- else
- {
- long int num = strtol (cp, NULL, 10);
-
- if (1 - INT_MAX + effective_rule_length <= num
- && num <= effective_rule_length)
+ break;
+
+ default:
+ if (max_left_semantic_context < 1 - n)
+ max_left_semantic_context = 1 - n;
+ if (!type_name && 0 < n)
+ type_name =
+ symbol_list_n_type_name_get (effective_rule, dollar_loc, n);
+ if (!type_name)
{
- int n = num;
- if (max_left_semantic_context < 1 - n)
- max_left_semantic_context = 1 - n;
- if (!type_name && 0 < n)
- type_name =
- symbol_list_n_type_name_get (effective_rule, dollar_loc, n);
- if (!type_name)
- {
- if (union_seen | tag_seen)
- complain_at (dollar_loc, _("$%d of `%s' has no declared type"),
- n, effective_rule->content.sym->tag);
- else
- untyped_var_seen = true;
- type_name = "";
- }
-
- obstack_fgrow3 (&obstack_for_string,
- "]b4_rhs_value(%d, %d, [%s])[",
- effective_rule_length, n, type_name);
- if (n > 0)
- symbol_list_n_get (effective_rule, n)->action_props.is_value_used =
- true;
+ if (union_seen | tag_seen)
+ complain_at (dollar_loc, _("$%s of `%s' has no declared type"),
+ cp, effective_rule->content.sym->tag);
+ else
+ untyped_var_seen = true;
+ type_name = "";
}
- else
- complain_at (dollar_loc, _("integer out of range: %s"), quote (text));
+
+ obstack_fgrow3 (&obstack_for_string,
+ "]b4_rhs_value(%d, %d, [%s])[",
+ effective_rule_length, n, type_name);
+ if (n > 0)
+ symbol_list_n_get (effective_rule, n)->action_props.is_value_used =
+ true;
+ break;
}
}
handle_action_at (symbol_list *rule, char *text, location at_loc)
{
char *cp = text + 1;
- int effective_rule_length =
- (rule->midrule_parent_rule
- ? rule->midrule_parent_rhs_index - 1
- : symbol_list_length (rule->next));
-
- locations_flag = true;
+ symbol_list *effective_rule;
+ int effective_rule_length;
+ int n;
- if (*cp == '$')
- obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
+ if (rule->midrule_parent_rule)
+ {
+ effective_rule = rule->midrule_parent_rule;
+ effective_rule_length = rule->midrule_parent_rhs_index - 1;
+ }
else
{
- long int num = strtol (cp, NULL, 10);
+ effective_rule = rule;
+ effective_rule_length = symbol_list_length (rule->next);
+ }
- if (1 - INT_MAX + effective_rule_length <= num
- && num <= effective_rule_length)
- {
- int n = num;
- obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[",
- effective_rule_length, n);
- }
- else
- complain_at (at_loc, _("integer out of range: %s"), quote (text));
+ muscle_percent_define_ensure("locations", at_loc, true);
+
+ n = parse_ref (cp, effective_rule, effective_rule_length,
+ rule->midrule_parent_rhs_index, text, at_loc, '@');
+ switch (n)
+ {
+ case INVALID_REF:
+ break;
+
+ case LHS_REF:
+ obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
+ break;
+
+ default:
+ obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[",
+ effective_rule_length, n);
+ break;
}
}
code_props const code_props_none = CODE_PROPS_NONE_INIT;
void
-code_props_plain_init (code_props *self, char const *code, location code_loc)
+code_props_plain_init (code_props *self, char const *code,
+ location code_loc)
{
self->kind = CODE_PROPS_PLAIN;
self->code = code;
self->location = code_loc;
self->is_value_used = false;
self->rule = NULL;
+ self->named_ref = NULL;
}
void
self->location = code_loc;
self->is_value_used = false;
self->rule = NULL;
+ self->named_ref = NULL;
}
void
code_props_rule_action_init (code_props *self, char const *code,
- location code_loc, symbol_list *rule)
+ location code_loc, symbol_list *rule,
+ named_ref *name)
{
self->kind = CODE_PROPS_RULE_ACTION;
self->code = code;
self->location = code_loc;
self->is_value_used = false;
self->rule = rule;
+ self->named_ref = name;
}
void
code_scanner_free (void)
{
obstack_free (&obstack_for_string, 0);
+ variant_table_free ();
+
/* Reclaim Flex's buffers. */
yylex_destroy ();
}