/* Bison Grammar Scanner -*- C -*-
- Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free
- Software Foundation, Inc.
+ Copyright (C) 2002-2012 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
-%option debug nodefault nounput noyywrap never-interactive
+%option debug nodefault noinput nounput noyywrap never-interactive
%option prefix="gram_" outfile="lex.yy.c"
%{
#include "reader.h"
#include "uniqstr.h"
+#include <ctype.h>
#include <mbswidth.h>
#include <quote.h>
static size_t no_cr_read (FILE *, char *, size_t);
#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
+#define ROLLBACK_CURRENT_TOKEN \
+ do { \
+ scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
+ yyless (0); \
+ } while (0)
+
/* A string representing the most recently saved token. */
static char *last_string;
+/* Bracketed identifier. */
+static uniqstr bracketed_id_str = 0;
+static location bracketed_id_loc;
+static boundary bracketed_id_start;
+static int bracketed_id_context_state = 0;
+
void
gram_scanner_last_string_free (void)
{
/* Strings and characters in directives/rules. */
%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
/* A identifier was just read in directives/rules. Special state
- to capture the sequence `identifier :'. */
+ to capture the sequence 'identifier :'. */
%x SC_AFTER_IDENTIFIER
/* Three types of user code:
- - prologue (code between `%{' `%}' in the first section, before %%);
+ - prologue (code between '%{' '%}' in the first section, before %%);
- actions, printers, union, etc, (between braced in the middle section);
- epilogue (everything after the second %%). */
%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
%x SC_COMMENT SC_LINE_COMMENT
/* Strings and characters in code. */
%x SC_STRING SC_CHARACTER
+ /* Bracketed identifiers support. */
+%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
-letter [-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
-id {letter}({letter}|[0-9])*
+letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
+id {letter}({letter}|[-0-9])*
directive %{id}
int [0-9]+
%%
%{
/* Nesting level of the current code in braces. */
- int braces_level IF_LINT (= 0);
+ int braces_level PACIFY_CC (= 0);
/* Parent context state, when applicable. */
- int context_state IF_LINT (= 0);
+ int context_state PACIFY_CC (= 0);
/* Location of most recent identifier, when applicable. */
- location id_loc IF_LINT (= empty_location);
+ location id_loc PACIFY_CC (= empty_location);
/* Where containing code started, when applicable. Its initial
value is relevant only when yylex is invoked in the SC_EPILOGUE
/* Where containing comment or string or character literal started,
when applicable. */
- boundary token_start IF_LINT (= scanner_cursor);
+ boundary token_start PACIFY_CC (= scanner_cursor);
%}
| Scanning white space. |
`-----------------------*/
-<INITIAL,SC_AFTER_IDENTIFIER>
+<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
{
/* Comments and white space. */
- "," warn_at (*loc, _("stray `,' treated as white space"));
+ "," warn_at (*loc, _("stray ',' treated as white space"));
[ \f\n\t\v] |
"//".* ;
"/*" {
complain_at (*loc, _("invalid directive: %s"), quote (yytext));
}
- /* Identifiers may not start with a digit. Yet, don't silently
- accept "1FOO" as "1 FOO". */
- {int}{id} {
- complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
- }
-
"=" return EQUAL;
"|" return PIPE;
";" return SEMICOLON;
{id} {
val->uniqstr = uniqstr_new (yytext);
id_loc = *loc;
+ bracketed_id_str = NULL;
BEGIN SC_AFTER_IDENTIFIER;
}
return INT;
}
- /* Characters. We don't check there is only one. */
- "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
+ /* Identifiers may not start with a digit. Yet, don't silently
+ accept "1FOO" as "1 FOO". */
+ {int}{id} {
+ complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
+ }
+
+ /* Characters. */
+ "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
/* Strings. */
"\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
return PERCENT_PERCENT;
}
+ "[" {
+ bracketed_id_str = NULL;
+ bracketed_id_start = loc->start;
+ bracketed_id_context_state = YY_START;
+ BEGIN SC_BRACKETED_ID;
+ }
+
. {
complain_at (*loc, _("invalid character: %s"), quote (yytext));
}
<SC_AFTER_IDENTIFIER>
{
+ "[" {
+ if (bracketed_id_str)
+ {
+ ROLLBACK_CURRENT_TOKEN;
+ BEGIN SC_RETURN_BRACKETED_ID;
+ *loc = id_loc;
+ return ID;
+ }
+ else
+ {
+ bracketed_id_start = loc->start;
+ bracketed_id_context_state = YY_START;
+ BEGIN SC_BRACKETED_ID;
+ }
+ }
":" {
+ BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
*loc = id_loc;
- BEGIN INITIAL;
return ID_COLON;
}
. {
- scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
- yyless (0);
+ ROLLBACK_CURRENT_TOKEN;
+ BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
*loc = id_loc;
- BEGIN INITIAL;
return ID;
}
<<EOF>> {
+ BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
*loc = id_loc;
- BEGIN INITIAL;
return ID;
}
}
+ /*--------------------------------.
+ | Scanning bracketed identifiers. |
+ `--------------------------------*/
+
+<SC_BRACKETED_ID>
+{
+ {id} {
+ if (bracketed_id_str)
+ {
+ complain_at (*loc, _("unexpected identifier in bracketed name: %s"),
+ quote (yytext));
+ }
+ else
+ {
+ bracketed_id_str = uniqstr_new (yytext);
+ bracketed_id_loc = *loc;
+ }
+ }
+ "]" {
+ BEGIN bracketed_id_context_state;
+ if (bracketed_id_str)
+ {
+ if (INITIAL == bracketed_id_context_state)
+ {
+ val->uniqstr = bracketed_id_str;
+ bracketed_id_str = 0;
+ *loc = bracketed_id_loc;
+ return BRACKETED_ID;
+ }
+ }
+ else
+ complain_at (*loc, _("an identifier expected"));
+ }
+ . {
+ complain_at (*loc, _("invalid character in bracketed name: %s"),
+ quote (yytext));
+ }
+ <<EOF>> {
+ BEGIN bracketed_id_context_state;
+ unexpected_eof (bracketed_id_start, "]");
+ }
+}
+
+<SC_RETURN_BRACKETED_ID>
+{
+ . {
+ ROLLBACK_CURRENT_TOKEN;
+ val->uniqstr = bracketed_id_str;
+ bracketed_id_str = 0;
+ *loc = bracketed_id_loc;
+ BEGIN INITIAL;
+ return BRACKETED_ID;
+ }
+}
+
/*---------------------------------------------------------------.
- | Scanning a Yacc comment. The initial `/ *' is already eaten. |
+ | Scanning a Yacc comment. The initial '/ *' is already eaten. |
`---------------------------------------------------------------*/
<SC_YACC_COMMENT>
/*------------------------------------------------------------.
- | Scanning a C comment. The initial `/ *' is already eaten. |
+ | Scanning a C comment. The initial '/ *' is already eaten. |
`------------------------------------------------------------*/
<SC_COMMENT>
/*--------------------------------------------------------------.
- | Scanning a line comment. The initial `//' is already eaten. |
+ | Scanning a line comment. The initial '//' is already eaten. |
`--------------------------------------------------------------*/
<SC_LINE_COMMENT>
<SC_ESCAPED_CHARACTER>
{
"'"|"\n" {
- if (yytext[0] == '\n')
- unexpected_newline (token_start, "'");
- STRING_GROW;
STRING_FINISH;
loc->start = token_start;
- val->character = last_string[1];
+ val->character = last_string[0];
+ {
+ /* FIXME: Eventually, make these errors. */
+ if (last_string[0] == '\0')
+ {
+ warn_at (*loc, _("empty character literal"));
+ /* '\0' seems dangerous even if we are about to complain. */
+ val->character = '\'';
+ }
+ else if (last_string[1] != '\0')
+ warn_at (*loc, _("extra characters in character literal"));
+ }
+ if (yytext[0] == '\n')
+ unexpected_newline (token_start, "'");
STRING_FREE;
BEGIN INITIAL;
return CHAR;
}
<<EOF>> {
- unexpected_eof (token_start, "'");
STRING_FINISH;
loc->start = token_start;
- if (strlen (last_string) > 1)
- val->character = last_string[1];
- else
- val->character = last_string[0];
+ val->character = last_string[0];
+ {
+ /* FIXME: Eventually, make these errors. */
+ if (last_string[0] == '\0')
+ {
+ warn_at (*loc, _("empty character literal"));
+ /* '\0' seems dangerous even if we are about to complain. */
+ val->character = '\'';
+ }
+ else if (last_string[1] != '\0')
+ warn_at (*loc, _("extra characters in character literal"));
+ }
+ unexpected_eof (token_start, "'");
STRING_FREE;
BEGIN INITIAL;
return CHAR;
{
\\[0-7]{1,3} {
unsigned long int c = strtoul (yytext + 1, NULL, 8);
- if (UCHAR_MAX < c)
- complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
- else if (! c)
- complain_at (*loc, _("invalid null character: %s"), quote (yytext));
+ if (!c || UCHAR_MAX < c)
+ complain_at (*loc, _("invalid number after \\-escape: %s"),
+ yytext+1);
else
obstack_1grow (&obstack_for_string, c);
}
\\x[0-9abcdefABCDEF]+ {
verify (UCHAR_MAX < ULONG_MAX);
unsigned long int c = strtoul (yytext + 2, NULL, 16);
- if (UCHAR_MAX < c)
- complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
- else if (! c)
- complain_at (*loc, _("invalid null character: %s"), quote (yytext));
+ if (!c || UCHAR_MAX < c)
+ complain_at (*loc, _("invalid number after \\-escape: %s"),
+ yytext+1);
else
obstack_1grow (&obstack_for_string, c);
}
\\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
int c = convert_ucn_to_byte (yytext);
- if (c < 0)
- complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
- else if (! c)
- complain_at (*loc, _("invalid null character: %s"), quote (yytext));
+ if (c <= 0)
+ complain_at (*loc, _("invalid number after \\-escape: %s"),
+ yytext+1);
else
obstack_1grow (&obstack_for_string, c);
}
\\(.|\n) {
- complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
- STRING_GROW;
+ char const *p = yytext + 1;
+ /* Quote only if escaping won't make the character visible. */
+ if (isspace ((unsigned char) *p) && isprint ((unsigned char) *p))
+ p = quote (p);
+ else
+ p = quotearg_style_mem (escape_quoting_style, p, 1);
+ complain_at (*loc, _("invalid character after \\-escape: %s"), p);
}
}
}
}
- /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
- (as `<' `<%'). */
+ /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
+ (as '<' '<%'). */
"<"{splice}"<" STRING_GROW;
<<EOF>> {
/*----------------------------------------------------------------.
-| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
+| Handle '#line INT "FILE"'. ARGS has already skipped '#line '. |
`----------------------------------------------------------------*/
static void
{
char *after_num;
unsigned long int lineno = strtoul (args, &after_num, 10);
- char *file = strchr (after_num, '"') + 1;
- *strchr (file, '"') = '\0';
+ char *file = mbschr (after_num, '"') + 1;
+ *mbschr (file, '"') = '\0';
if (INT_MAX <= lineno)
{
warn_at (loc, _("line number overflow"));
location loc;
loc.start = start;
loc.end = scanner_cursor;
+ token_end = quote (token_end);
+ // Instead of '\'', display "'".
+ if (!strcmp (token_end, "'\\''"))
+ token_end = "\"'\"";
complain_at (loc, _(msgid), token_end);
}
static void
unexpected_eof (boundary start, char const *token_end)
{
- unexpected_end (start, N_("missing `%s' at end of file"), token_end);
+ unexpected_end (start, N_("missing %s at end of file"), token_end);
}
static void
unexpected_newline (boundary start, char const *token_end)
{
- unexpected_end (start, N_("missing `%s' at end of line"), token_end);
+ unexpected_end (start, N_("missing %s at end of line"), token_end);
}