/* Bison Grammar Scanner -*- C -*-
- Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free
- Software Foundation, Inc.
+ Copyright (C) 2002-2012 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
-%option debug nodefault nounput noyywrap never-interactive
+%option debug nodefault noinput nounput noyywrap never-interactive
%option prefix="gram_" outfile="lex.yy.c"
%{
#include "reader.h"
#include "uniqstr.h"
+#include <c-ctype.h>
#include <mbswidth.h>
#include <quote.h>
/* A string representing the most recently saved token. */
static char *last_string;
-/* Bracketed identifier */
+/* Bracketed identifier. */
static uniqstr bracketed_id_str = 0;
static location bracketed_id_loc;
static boundary bracketed_id_start;
/* Strings and characters in directives/rules. */
%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
/* A identifier was just read in directives/rules. Special state
- to capture the sequence `identifier :'. */
+ to capture the sequence 'identifier :'. */
%x SC_AFTER_IDENTIFIER
/* Three types of user code:
- - prologue (code between `%{' `%}' in the first section, before %%);
+ - prologue (code between '%{' '%}' in the first section, before %%);
- actions, printers, union, etc, (between braced in the middle section);
- epilogue (everything after the second %%). */
%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
%x SC_COMMENT SC_LINE_COMMENT
/* Strings and characters in code. */
%x SC_STRING SC_CHARACTER
- /* Bracketed identifiers support */
+ /* Bracketed identifiers support. */
%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
-letter [-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
-id {letter}({letter}|[0-9])*
-directive %{id}
+letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
+notletter [^.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]{-}[%\{]
+id {letter}({letter}|[-0-9])*
int [0-9]+
/* POSIX says that a tag must be both an id and a C union member, but
%%
%{
/* Nesting level of the current code in braces. */
- int braces_level IF_LINT (= 0);
+ int braces_level PACIFY_CC (= 0);
/* Parent context state, when applicable. */
- int context_state IF_LINT (= 0);
+ int context_state PACIFY_CC (= 0);
/* Location of most recent identifier, when applicable. */
- location id_loc IF_LINT (= empty_location);
+ location id_loc PACIFY_CC (= empty_location);
/* Where containing code started, when applicable. Its initial
value is relevant only when yylex is invoked in the SC_EPILOGUE
/* Where containing comment or string or character literal started,
when applicable. */
- boundary token_start IF_LINT (= scanner_cursor);
+ boundary token_start PACIFY_CC (= scanner_cursor);
%}
<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
{
/* Comments and white space. */
- "," warn_at (*loc, _("stray `,' treated as white space"));
+ "," warn_at (*loc, _("stray ',' treated as white space"));
[ \f\n\t\v] |
- "//".* ;
+ "//".* continue;
"/*" {
token_start = loc->start;
context_state = YY_START;
/* #line directives are not documented, and may be withdrawn or
modified in future versions of Bison. */
- ^"#line "{int}" \"".*"\"\n" {
+ ^"#line "{int}(" \"".*"\"")?"\n" {
handle_syncline (yytext + sizeof "#line " - 1, *loc);
}
}
"%verbose" return PERCENT_VERBOSE;
"%yacc" return PERCENT_YACC;
- {directive} {
+ "%"{id}|"%"{notletter}([[:graph:]])+ {
complain_at (*loc, _("invalid directive: %s"), quote (yytext));
}
complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
}
- /* Characters. We don't check there is only one. */
- "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
+ /* Characters. */
+ "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
/* Strings. */
"\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
BEGIN SC_BRACKETED_ID;
}
- . {
- complain_at (*loc, _("invalid character: %s"), quote (yytext));
+ [^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. {
+ complain_at (*loc, "%s: %s",
+ ngettext ("invalid character", "invalid characters", yyleng),
+ quote_mem (yytext, yyleng));
}
<<EOF>> {
<SC_AFTER_IDENTIFIER>
{
"[" {
- if (!bracketed_id_str)
- {
- bracketed_id_start = loc->start;
- bracketed_id_context_state = YY_START;
- BEGIN SC_BRACKETED_ID;
- }
- else
+ if (bracketed_id_str)
{
ROLLBACK_CURRENT_TOKEN;
BEGIN SC_RETURN_BRACKETED_ID;
*loc = id_loc;
return ID;
}
+ else
+ {
+ bracketed_id_start = loc->start;
+ bracketed_id_context_state = YY_START;
+ BEGIN SC_BRACKETED_ID;
+ }
}
":" {
BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
<SC_BRACKETED_ID>
{
{id} {
- if (!bracketed_id_str)
+ if (bracketed_id_str)
{
- bracketed_id_str = uniqstr_new (yytext);
- bracketed_id_loc = *loc;
+ complain_at (*loc, _("unexpected identifier in bracketed name: %s"),
+ quote (yytext));
}
else
{
- complain_at (*loc, _("redundant identifier in bracketed name: %s"),
- quote (yytext));
+ bracketed_id_str = uniqstr_new (yytext);
+ bracketed_id_loc = *loc;
}
}
"]" {
}
}
else
- complain_at (*loc, _("a non empty identifier expected"));
+ complain_at (*loc, _("an identifier expected"));
}
- . {
- complain_at (*loc, _("invalid character in bracketed name: %s"),
- quote (yytext));
+
+ [^\].A-Za-z0-9_/ \f\n\t\v]+|. {
+ complain_at (*loc, "%s: %s",
+ ngettext ("invalid character in bracketed name",
+ "invalid characters in bracketed name", yyleng),
+ quote_mem (yytext, yyleng));
}
+
<<EOF>> {
BEGIN bracketed_id_context_state;
unexpected_eof (bracketed_id_start, "]");
/*---------------------------------------------------------------.
- | Scanning a Yacc comment. The initial `/ *' is already eaten. |
+ | Scanning a Yacc comment. The initial '/ *' is already eaten. |
`---------------------------------------------------------------*/
<SC_YACC_COMMENT>
{
"*/" BEGIN context_state;
- .|\n ;
+ .|\n continue;
<<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
}
/*------------------------------------------------------------.
- | Scanning a C comment. The initial `/ *' is already eaten. |
+ | Scanning a C comment. The initial '/ *' is already eaten. |
`------------------------------------------------------------*/
<SC_COMMENT>
/*--------------------------------------------------------------.
- | Scanning a line comment. The initial `//' is already eaten. |
+ | Scanning a line comment. The initial '//' is already eaten. |
`--------------------------------------------------------------*/
<SC_LINE_COMMENT>
<SC_ESCAPED_CHARACTER>
{
"'"|"\n" {
- if (yytext[0] == '\n')
- unexpected_newline (token_start, "'");
- STRING_GROW;
STRING_FINISH;
loc->start = token_start;
- val->character = last_string[1];
+ val->character = last_string[0];
+ {
+ /* FIXME: Eventually, make these errors. */
+ if (last_string[0] == '\0')
+ {
+ warn_at (*loc, _("empty character literal"));
+ /* '\0' seems dangerous even if we are about to complain. */
+ val->character = '\'';
+ }
+ else if (last_string[1] != '\0')
+ warn_at (*loc, _("extra characters in character literal"));
+ }
+ if (yytext[0] == '\n')
+ unexpected_newline (token_start, "'");
STRING_FREE;
BEGIN INITIAL;
return CHAR;
}
<<EOF>> {
- unexpected_eof (token_start, "'");
STRING_FINISH;
loc->start = token_start;
- if (strlen (last_string) > 1)
- val->character = last_string[1];
- else
- val->character = last_string[0];
+ val->character = last_string[0];
+ {
+ /* FIXME: Eventually, make these errors. */
+ if (last_string[0] == '\0')
+ {
+ warn_at (*loc, _("empty character literal"));
+ /* '\0' seems dangerous even if we are about to complain. */
+ val->character = '\'';
+ }
+ else if (last_string[1] != '\0')
+ warn_at (*loc, _("extra characters in character literal"));
+ }
+ unexpected_eof (token_start, "'");
STRING_FREE;
BEGIN INITIAL;
return CHAR;
{
\\[0-7]{1,3} {
unsigned long int c = strtoul (yytext + 1, NULL, 8);
- if (UCHAR_MAX < c)
- complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
- else if (! c)
- complain_at (*loc, _("invalid null character: %s"), quote (yytext));
+ if (!c || UCHAR_MAX < c)
+ complain_at (*loc, _("invalid number after \\-escape: %s"),
+ yytext+1);
else
obstack_1grow (&obstack_for_string, c);
}
\\x[0-9abcdefABCDEF]+ {
verify (UCHAR_MAX < ULONG_MAX);
unsigned long int c = strtoul (yytext + 2, NULL, 16);
- if (UCHAR_MAX < c)
- complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
- else if (! c)
- complain_at (*loc, _("invalid null character: %s"), quote (yytext));
+ if (!c || UCHAR_MAX < c)
+ complain_at (*loc, _("invalid number after \\-escape: %s"),
+ yytext+1);
else
obstack_1grow (&obstack_for_string, c);
}
\\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
int c = convert_ucn_to_byte (yytext);
- if (c < 0)
- complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
- else if (! c)
- complain_at (*loc, _("invalid null character: %s"), quote (yytext));
+ if (c <= 0)
+ complain_at (*loc, _("invalid number after \\-escape: %s"),
+ yytext+1);
else
obstack_1grow (&obstack_for_string, c);
}
\\(.|\n) {
- complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
- STRING_GROW;
+ char const *p = yytext + 1;
+ /* Quote only if escaping won't make the character visible. */
+ if (c_isspace ((unsigned char) *p) && c_isprint ((unsigned char) *p))
+ p = quote (p);
+ else
+ p = quotearg_style_mem (escape_quoting_style, p, 1);
+ complain_at (*loc, _("invalid character after \\-escape: %s"), p);
}
}
}
}
- /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
- (as `<' `<%'). */
+ /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
+ (as '<' '<%'). */
"<"{splice}"<" STRING_GROW;
<<EOF>> {
}
-/*----------------------------------------------------------------.
-| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
-`----------------------------------------------------------------*/
+/*---------------------------------------------------------------------.
+| Handle '#line INT( "FILE")?\n'. ARGS has already skipped '#line '. |
+`---------------------------------------------------------------------*/
static void
handle_syncline (char *args, location loc)
{
- char *after_num;
- unsigned long int lineno = strtoul (args, &after_num, 10);
- char *file = strchr (after_num, '"') + 1;
- *strchr (file, '"') = '\0';
+ char *file;
+ unsigned long int lineno = strtoul (args, &file, 10);
if (INT_MAX <= lineno)
{
warn_at (loc, _("line number overflow"));
lineno = INT_MAX;
}
- current_file = uniqstr_new (file);
+
+ file = mbschr (file, '"');
+ if (file)
+ {
+ *mbschr (file + 1, '"') = '\0';
+ current_file = uniqstr_new (file + 1);
+ }
boundary_set (&scanner_cursor, current_file, lineno, 1);
}
location loc;
loc.start = start;
loc.end = scanner_cursor;
+ token_end = quote (token_end);
+ // Instead of '\'', display "'".
+ if (!strcmp (token_end, "'\\''"))
+ token_end = "\"'\"";
complain_at (loc, _(msgid), token_end);
}
static void
unexpected_eof (boundary start, char const *token_end)
{
- unexpected_end (start, N_("missing `%s' at end of file"), token_end);
+ unexpected_end (start, N_("missing %s at end of file"), token_end);
}
static void
unexpected_newline (boundary start, char const *token_end)
{
- unexpected_end (start, N_("missing `%s' at end of line"), token_end);
+ unexpected_end (start, N_("missing %s at end of line"), token_end);
}