1 /* Bison Action Scanner -*- C -*-
3 Copyright (C) 2006-2011 Free Software Foundation, Inc.
5 This file is part of Bison, the GNU Compiler Compiler.
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 %option debug nodefault noinput nounput noyywrap never-interactive
21 %option prefix="code_" outfile="lex.yy.c"
24 /* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
29 #define FLEX_PREFIX(Id) code_ ## Id
30 #include <src/flex-scanner.h>
32 #include <src/complain.h>
33 #include <src/reader.h>
34 #include <src/getargs.h>
35 #include <src/muscle-tab.h>
36 #include <src/scan-code.h>
37 #include <src/symlist.h>
40 #include <get-errno.h>
43 /* The current calling start condition: SC_RULE_ACTION or
45 # define YY_DECL static char *code_lex (code_props *self, int sc_context)
48 #define YY_USER_ACTION location_compute (loc, &loc->end, yytext, yyleng);
50 static void handle_action_dollar (symbol_list *, char *, unsigned, location);
51 static void handle_action_at (symbol_list const *, char const *, unsigned,
54 static location the_location;
55 static location *loc = &the_location;
57 /* A string representing the most recent translation. */
58 static char *last_string;
60 /* True if an untyped $$ or $n was seen. */
61 static bool untyped_var_seen;
64 /* C and C++ comments in code. */
65 %x SC_COMMENT SC_LINE_COMMENT
66 /* Strings and characters in code. */
67 %x SC_STRING SC_CHARACTER
68 /* Whether in a rule or symbol action. Specifies the translation
70 %x SC_RULE_ACTION SC_SYMBOL_ACTION
73 /* POSIX says that a tag must be both an id and a C union member, but
74 historically almost any character is allowed in a tag. We disallow
75 NUL and newline, as this simplifies our implementation. */
78 /* Zero or more instances of backslash-newline. Following GCC, allow
79 white space between the backslash and the newline. */
80 splice (\\[ \f\t\v]*\n)*
82 /* A Bison identifier. Keep this synchronized with scan-gram.l "id". */
83 letter [-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
84 id {letter}({letter}|[0-9])*
86 /* An identifier that can appear unbracketed in a reference.
87 This happens to be the same as a C-language identifier. */
88 c_letter [abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
89 c_id {c_letter}({c_letter}|[0-9])*
91 ref -?[0-9]+|{c_id}|"["{id}"]"|"$"
96 /* Nesting level of the current code in braces. */
99 /* Whether a semicolon is probably needed.
100 The heuristic is that a semicolon is not needed after `{', `}', `;',
101 or a C preprocessor directive, and that whitespaces and comments
102 do not affect this flag.
103 Note that `{' does not need a semicolon because of `{}'.
104 A semicolon may be needed before a cpp direcive, but don't bother. */
105 bool need_semicolon = false;
107 /* Whether in a C preprocessor directive. Don't use a start condition
108 for this because, at the end of strings and comments, we still need
109 to know whether we're in a directive. */
112 /* This scanner is special: it is invoked only once, henceforth
113 is expected to return only once. This initialization is
114 therefore done once per action to translate. */
115 aver (sc_context == SC_SYMBOL_ACTION
116 || sc_context == SC_RULE_ACTION
117 || sc_context == INITIAL);
121 /*------------------------------------------------------------.
122 | Scanning a C comment. The initial `/ *' is already eaten. |
123 `------------------------------------------------------------*/
127 "*"{splice}"/" STRING_GROW; BEGIN sc_context;
131 /*--------------------------------------------------------------.
132 | Scanning a line comment. The initial `//' is already eaten. |
133 `--------------------------------------------------------------*/
137 "\n" STRING_GROW; BEGIN sc_context;
138 {splice} STRING_GROW;
142 /*--------------------------------------------.
143 | Scanning user-code characters and strings. |
144 `--------------------------------------------*/
146 <SC_CHARACTER,SC_STRING>
148 {splice}|\\{splice}. STRING_GROW;
153 "'" STRING_GROW; BEGIN sc_context;
158 "\"" STRING_GROW; BEGIN sc_context;
162 <SC_RULE_ACTION,SC_SYMBOL_ACTION>{
166 need_semicolon = true;
171 need_semicolon = true;
179 BEGIN SC_LINE_COMMENT;
185 "$"("<"{tag}">")?{ref} {
186 handle_action_dollar (self->rule, yytext, yyleng, *loc);
187 need_semicolon = true;
190 handle_action_at (self->rule, yytext, yyleng, *loc);
191 need_semicolon = true;
194 warn_at (*loc, _("stray `$'"));
195 obstack_sgrow (&obstack_for_string, "$][");
196 need_semicolon = true;
199 warn_at (*loc, _("stray `@'"));
200 obstack_sgrow (&obstack_for_string, "@@");
201 need_semicolon = true;
204 obstack_sgrow (&obstack_for_string, "@{");
205 need_semicolon = true;
208 obstack_sgrow (&obstack_for_string, "@}");
209 need_semicolon = true;
212 ";" STRING_GROW; need_semicolon = false;
213 "{" STRING_GROW; ++braces_level; need_semicolon = false;
215 bool outer_brace = --braces_level == 0;
217 /* As an undocumented Bison extension, append `;' before the last
218 brace in braced code, so that the user code can omit trailing
219 `;'. But do not append `;' if emulating Yacc, since Yacc does
221 if (outer_brace && !yacc_flag && language_prio == default_prio
222 && skeleton_prio == default_prio && need_semicolon && ! in_cpp)
224 warn_at (*loc, _("a `;' might be needed at the end of action code"));
225 warn_at (*loc, _("future versions of Bison will not add the `;'"));
226 obstack_1grow (&obstack_for_string, ';');
230 need_semicolon = false;
233 /* Preprocessing directives should only be recognized at the beginning
234 of lines, allowing whitespace including comments, but in C/C++,
235 `#' can only be the start of preprocessor directives or within
236 `#define' directives anyway, so don't bother with begin of line. */
237 "#" STRING_GROW; in_cpp = true;
239 {splice} STRING_GROW;
240 [\n\r] STRING_GROW; if (in_cpp) in_cpp = need_semicolon = false;
243 /* YYFAIL is undocumented and was formally deprecated in Bison
246 STRING_GROW; need_semicolon = true;
247 warn_at (*loc, _("use of YYFAIL, which is deprecated and will be"
251 /* The sole purpose of this is to make sure identifiers that merely
252 contain YYFAIL don't produce the above warning. */
253 [A-Za-z_][0-9A-Za-z_]* STRING_GROW; need_semicolon = true;
255 . STRING_GROW; need_semicolon = true;
261 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
262 self->is_value_used = true;
265 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
266 muscle_percent_define_ensure("locations", the_location, true);
271 /*-----------------------------------------.
272 | Escape M4 quoting characters in C code. |
273 `-----------------------------------------*/
277 \$ obstack_sgrow (&obstack_for_string, "$][");
278 \@ obstack_sgrow (&obstack_for_string, "@@");
279 \[ obstack_sgrow (&obstack_for_string, "@{");
280 \] obstack_sgrow (&obstack_for_string, "@}");
283 /*-----------------------------------------------------.
284 | By default, grow the string obstack with the input. |
285 `-----------------------------------------------------*/
289 /* End of processing. */
298 is_dot_or_dash (char ch)
300 return ch == '.' || ch == '-';
304 contains_dot_or_dash (const char* p)
307 if (is_dot_or_dash (*p))
312 /* Defines a variant of a symbolic name resolution. */
315 /* Index in symbol list. */
316 unsigned symbol_index;
318 /* Matched symbol id and loc. */
322 /* Hiding named reference. */
323 named_ref* hidden_by;
325 /* Error flags. May contain zero (no errors) or
326 a combination of VARIANT_* values. */
330 /* Set when the variant refers to a symbol hidden
331 by an explicit symbol reference. */
332 #define VARIANT_HIDDEN (1 << 0)
334 /* Set when the variant refers to a symbol which is
335 not visible from current midrule. */
336 #define VARIANT_NOT_VISIBLE_FROM_MIDRULE (1 << 1)
338 static variant *variant_table = 0;
339 static unsigned variant_table_size = 0;
340 static unsigned variant_count = 0;
343 variant_table_grow (void)
346 if (variant_count > variant_table_size)
348 while (variant_count > variant_table_size)
349 variant_table_size = 2 * variant_table_size + 3;
350 variant_table = xnrealloc (variant_table, variant_table_size,
351 sizeof *variant_table);
353 return &variant_table[variant_count - 1];
357 variant_table_free (void)
359 free (variant_table);
361 variant_table_size = variant_count = 0;
364 /* Return TRUE if ID matches the string from CP up to CP_END.
365 The string does not contain null bytes. */
367 identifier_matches (char const *id, char const *cp, char const *cp_end)
375 /* If scanning ID, return a new variant with that ID, at location
376 ID_LOC with index SYMBOL_INDEX. Otherwise, return NULL. The
377 currently scanned identifier starts at CP and ends at CP_END. */
379 variant_add (uniqstr id, location id_loc, unsigned symbol_index,
380 char const *cp, char const *cp_end)
382 if (identifier_matches (id, cp, cp_end))
384 variant *r = variant_table_grow ();
385 r->symbol_index = symbol_index;
397 get_at_spec(unsigned symbol_index)
399 static char at_buf[20];
400 if (symbol_index == 0)
401 strcpy (at_buf, "$$");
403 snprintf (at_buf, sizeof at_buf, "$%u", symbol_index);
407 /* Show a subsidiary message for a problem with a grammar rule. TEXT
408 points to the problematic reference. MIDRULE_RHS_INDEX is the rhs
409 index (1-origin) in the rule. If IS_WARNING, it is a warning,
410 otherwise a complaint. Indent the message INDENT spaces. */
412 show_sub_messages (char const *text, int midrule_rhs_index,
413 bool is_warning, unsigned indent)
415 char dollar_or_at = *text;
418 for (i = 0; i < variant_count; ++i)
420 const variant *var = &variant_table[i];
421 const char *at_spec = get_at_spec (var->symbol_index);
426 warn_at_indent (var->loc, &indent, _("refers to: %c%s at %s"),
427 dollar_or_at, var->id, at_spec);
429 complain_at_indent (var->loc, &indent, _("refers to: %c%s at %s"),
430 dollar_or_at, var->id, at_spec);
434 static struct obstack msg_buf;
435 const char *id = var->hidden_by ? var->hidden_by->id :
437 location id_loc = var->hidden_by ? var->hidden_by->loc :
440 /* Create the explanation message. */
441 obstack_init (&msg_buf);
443 obstack_fgrow1 (&msg_buf, _("possibly meant: %c"), dollar_or_at);
444 if (contains_dot_or_dash (id))
445 obstack_fgrow1 (&msg_buf, "[%s]", id);
447 obstack_sgrow (&msg_buf, id);
449 if (var->err & VARIANT_HIDDEN)
451 obstack_fgrow1 (&msg_buf, _(", hiding %c"), dollar_or_at);
452 if (contains_dot_or_dash (var->id))
453 obstack_fgrow1 (&msg_buf, "[%s]", var->id);
455 obstack_sgrow (&msg_buf, var->id);
458 obstack_fgrow1 (&msg_buf, _(" at %s"), at_spec);
460 if (var->err & VARIANT_NOT_VISIBLE_FROM_MIDRULE)
463 _(", cannot be accessed from mid-rule action at $%d");
464 obstack_fgrow1 (&msg_buf, format, midrule_rhs_index);
467 obstack_1grow (&msg_buf, '\0');
469 warn_at_indent (id_loc, &indent, "%s",
470 (char *) obstack_finish (&msg_buf));
472 complain_at_indent (id_loc, &indent, "%s",
473 (char *) obstack_finish (&msg_buf));
474 obstack_free (&msg_buf, 0);
479 /* Returned from "parse_ref" when the reference
481 #define INVALID_REF (INT_MIN)
483 /* Returned from "parse_ref" when the reference
484 points to LHS ($$) of the current rule or midrule. */
485 #define LHS_REF (INT_MIN + 1)
487 /* Sub-messages indent. */
488 #define SUB_INDENT (4)
490 /* Return the index of a named or positional reference starting at CP
491 for a rule RULE of length RULE_LENGTH. If MIDRULE_RHS_INDEX is
492 nonzero, this is a generated midrule whose rhs index (1-origin) is
493 MIDRULE_RHS_INDEX in the parent rule. The entire semantic value
494 containing the reference is TEXT, of length TEXTLEN. Its location
497 In case of positional references, this can return negative values
498 for $-n "deep" stack accesses. */
500 parse_ref (char const *cp, symbol_list const *rule, int rule_length,
501 int midrule_rhs_index, char const *text, unsigned textlen,
504 symbol_list const *l;
506 bool explicit_bracketing;
508 unsigned valid_variants = 0;
509 unsigned valid_variant_index = 0;
514 if (c_isdigit (*cp) || *cp == '-')
516 long int num = strtol (cp, NULL, 10);
517 if (1 - INT_MAX + rule_length <= num && num <= rule_length)
521 complain_at (text_loc, _("integer out of range: %s"),
527 explicit_bracketing = (*cp == '[');
528 cp += explicit_bracketing;
529 cp_end = text + textlen - explicit_bracketing;
531 /* Add all relevant variants. */
533 unsigned symbol_index;
535 for (symbol_index = 0, l = rule; !symbol_list_null (l);
536 ++symbol_index, l = l->next)
539 if (l->content_type != SYMLIST_SYMBOL)
542 var = variant_add (l->content.sym->tag, l->sym_loc,
543 symbol_index, cp, cp_end);
544 if (var && l->named_ref)
545 var->hidden_by = l->named_ref;
548 variant_add (l->named_ref->id, l->named_ref->loc,
549 symbol_index, cp, cp_end);
554 for (i = 0; i < variant_count; ++i)
556 variant *var = &variant_table[i];
557 unsigned symbol_index = var->symbol_index;
559 /* Check visibility from mid-rule actions. */
560 if (midrule_rhs_index != 0
561 && (symbol_index == 0 || midrule_rhs_index < symbol_index))
562 var->err |= VARIANT_NOT_VISIBLE_FROM_MIDRULE;
564 /* Check using of hidden symbols. */
566 var->err |= VARIANT_HIDDEN;
570 valid_variant_index = i;
575 switch (valid_variants)
579 unsigned len = cp_end - cp;
582 complain_at_indent (text_loc, &indent, _("invalid reference: %s"),
584 indent += SUB_INDENT;
587 location sym_loc = text_loc;
588 sym_loc.start.column += 1;
589 sym_loc.end = sym_loc.start;
591 _("syntax error after `%c', expecting integer, letter,"
592 " `_', `[', or `$'");
593 complain_at_indent (sym_loc, &indent, format, *text);
595 else if (midrule_rhs_index)
598 _("symbol not found in production before $%d: %.*s");
599 complain_at_indent (rule->location, &indent, format,
600 midrule_rhs_index, len, cp);
605 _("symbol not found in production: %.*s");
606 complain_at_indent (rule->location, &indent, format,
610 if (variant_count > 0)
611 show_sub_messages (text, midrule_rhs_index, false, indent);
617 if (variant_count > 1)
619 warn_at_indent (text_loc, &indent, _("misleading reference: %s"),
621 show_sub_messages (text, midrule_rhs_index, true,
622 indent + SUB_INDENT);
625 unsigned symbol_index =
626 variant_table[valid_variant_index].symbol_index;
627 return (symbol_index == midrule_rhs_index) ? LHS_REF : symbol_index;
634 complain_at_indent (text_loc, &indent, _("ambiguous reference: %s"),
636 show_sub_messages (text, midrule_rhs_index, false,
637 indent + SUB_INDENT);
646 /* Keeps track of the maximum number of semantic values to the left of
647 a handle (those referenced by $0, $-1, etc.) are required by the
648 semantic actions of this grammar. */
649 int max_left_semantic_context = 0;
652 /*------------------------------------------------------------------.
653 | TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
655 | Possible inputs: $(<TYPENAME>|)($|integer|c_id|[id]) |
657 | Output to OBSTACK_FOR_STRING a reference to this semantic value. |
658 `------------------------------------------------------------------*/
661 handle_action_dollar (symbol_list *rule, char *text, unsigned textlen,
664 char const *type_name = NULL;
667 symbol_list *effective_rule;
668 int effective_rule_length;
671 if (rule->midrule_parent_rule)
673 effective_rule = rule->midrule_parent_rule;
674 effective_rule_length = rule->midrule_parent_rhs_index - 1;
678 effective_rule = rule;
679 effective_rule_length = symbol_list_length (rule->next);
682 /* Get the type name if explicit. */
689 /* The '>' symbol will be later replaced by '\0'. Original
690 'text' is needed for error messages. */
693 if (untyped_var_seen)
694 complain_at (dollar_loc, _("explicit type given in untyped grammar"));
698 n = parse_ref (cp, effective_rule, effective_rule_length,
699 rule->midrule_parent_rhs_index, text, textlen, dollar_loc);
711 type_name = symbol_list_n_type_name_get (rule, dollar_loc, 0);
715 if (union_seen | tag_seen)
717 if (rule->midrule_parent_rule)
718 complain_at (dollar_loc,
719 _("$$ for the midrule at $%d of `%s'"
720 " has no declared type"),
721 rule->midrule_parent_rhs_index,
722 effective_rule->content.sym->tag);
724 complain_at (dollar_loc, _("$$ of `%s' has no declared type"),
725 rule->content.sym->tag);
728 untyped_var_seen = true;
732 obstack_fgrow1 (&obstack_for_string,
733 "]b4_lhs_value([%s])[", type_name);
734 rule->action_props.is_value_used = true;
738 if (max_left_semantic_context < 1 - n)
739 max_left_semantic_context = 1 - n;
740 if (!type_name && 0 < n)
742 symbol_list_n_type_name_get (effective_rule, dollar_loc, n);
745 if (union_seen | tag_seen)
746 complain_at (dollar_loc, _("$%s of `%s' has no declared type"),
747 cp, effective_rule->content.sym->tag);
749 untyped_var_seen = true;
753 obstack_fgrow3 (&obstack_for_string,
754 "]b4_rhs_value(%d, %d, [%s])[",
755 effective_rule_length, n, type_name);
757 symbol_list_n_get (effective_rule, n)->action_props.is_value_used =
764 /*------------------------------------------------------.
765 | TEXT is a location token (i.e., a `@...'). Output to |
766 | OBSTACK_FOR_STRING a reference to this location. |
767 `------------------------------------------------------*/
770 handle_action_at (symbol_list const *rule, char const *text, unsigned textlen,
773 char const *cp = text + 1;
774 symbol_list const *effective_rule;
775 int effective_rule_length;
778 if (rule->midrule_parent_rule)
780 effective_rule = rule->midrule_parent_rule;
781 effective_rule_length = rule->midrule_parent_rhs_index - 1;
785 effective_rule = rule;
786 effective_rule_length = symbol_list_length (rule->next);
789 muscle_percent_define_ensure("locations", at_loc, true);
791 n = parse_ref (cp, effective_rule, effective_rule_length,
792 rule->midrule_parent_rhs_index, text, textlen, at_loc);
799 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
803 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[",
804 effective_rule_length, n);
810 /*-------------------------.
811 | Initialize the scanner. |
812 `-------------------------*/
814 /* Translate the dollars and ats in \a self, in the context \a sc_context
815 (SC_RULE_ACTION, SC_SYMBOL_ACTION, INITIAL). */
818 translate_action (code_props *self, int sc_context)
821 static bool initialized = false;
824 obstack_init (&obstack_for_string);
829 loc->start = loc->end = self->location.start;
830 yy_switch_to_buffer (yy_scan_string (self->code));
831 res = code_lex (self, sc_context);
832 yy_delete_buffer (YY_CURRENT_BUFFER);
837 /*------------------------------------------------------------------------.
838 | Implementation of the public interface as documented in "scan-code.h". |
839 `------------------------------------------------------------------------*/
842 code_props_none_init (code_props *self)
844 *self = code_props_none;
847 code_props const code_props_none = CODE_PROPS_NONE_INIT;
850 code_props_plain_init (code_props *self, char const *code,
853 self->kind = CODE_PROPS_PLAIN;
855 self->location = code_loc;
856 self->is_value_used = false;
858 self->named_ref = NULL;
862 code_props_symbol_action_init (code_props *self, char const *code,
865 self->kind = CODE_PROPS_SYMBOL_ACTION;
867 self->location = code_loc;
868 self->is_value_used = false;
870 self->named_ref = NULL;
874 code_props_rule_action_init (code_props *self, char const *code,
875 location code_loc, symbol_list *rule,
876 named_ref *name, bool is_predicate)
878 self->kind = CODE_PROPS_RULE_ACTION;
880 self->location = code_loc;
881 self->is_value_used = false;
883 self->named_ref = name;
884 self->is_predicate = is_predicate;
888 code_props_translate_code (code_props *self)
892 case CODE_PROPS_NONE:
894 case CODE_PROPS_PLAIN:
895 self->code = translate_action (self, INITIAL);
897 case CODE_PROPS_SYMBOL_ACTION:
898 self->code = translate_action (self, SC_SYMBOL_ACTION);
900 case CODE_PROPS_RULE_ACTION:
901 self->code = translate_action (self, SC_RULE_ACTION);
907 code_scanner_last_string_free (void)
913 code_scanner_free (void)
915 obstack_free (&obstack_for_string, 0);
916 variant_table_free ();
918 /* Reclaim Flex's buffers. */