/* Bison Grammar Scanner -*- C -*-
- Copyright (C) 2002 Free Software Foundation, Inc.
+
+ Copyright (C) 2002-2012 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
- This program is free software; you can redistribute it and/or modify
+ This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
+ the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA
-*/
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
-%option debug nodefault noyywrap nounput never-interactive stack
+%option debug nodefault noinput nounput noyywrap never-interactive
%option prefix="gram_" outfile="lex.yy.c"
%{
-#include "system.h"
-#include "complain.h"
-#include "quote.h"
-#include "getargs.h"
-#include "gram.h"
-#include "reader.h"
+/* Work around a bug in flex 2.5.31. See Debian bug 333231
+ <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
+#undef gram_wrap
+#define gram_wrap() 1
+
+#define FLEX_PREFIX(Id) gram_ ## Id
+#include <src/flex-scanner.h>
+
+#include <src/complain.h>
+#include <src/files.h>
+#include <src/gram.h>
+#include <quotearg.h>
+#include <src/reader.h>
+#include <src/uniqstr.h>
+
+#include <c-ctype.h>
+#include <mbswidth.h>
+#include <quote.h>
+
+#include <src/scan-gram.h>
+
+#define YY_DECL GRAM_LEX_DECL
+
+#define YY_USER_INIT \
+ code_start = scanner_cursor = loc->start; \
+
+/* Location of scanner cursor. */
+static boundary scanner_cursor;
-/* Each time we match a string, move the end cursor to its end. */
-#define YY_USER_ACTION LOCATION_COLUMNS (*yylloc, yyleng)
-#define YY_LINES LOCATION_LINES (*yylloc, yyleng); lineno += yyleng;
-#define YY_STEP LOCATION_STEP (*yylloc)
+#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
-/* Appending to the STRING_OBSTACK. */
-#define YY_INIT obstack_init (&string_obstack)
-#define YY_GROW obstack_grow (&string_obstack, yytext, yyleng)
-#define YY_FINISH obstack_1grow (&string_obstack, '\0'); yylval->string = obstack_finish (&string_obstack);
+static size_t no_cr_read (FILE *, char *, size_t);
+#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
-/* This is only to avoid GCC warnings. */
-#define YY_USER_INIT if (yycontrol) {;};
+#define RETURN_PERCENT_PARAM(Value) \
+ RETURN_VALUE(PERCENT_PARAM, param, param_ ## Value)
-static struct obstack string_obstack;
-static int braces_level = 0;
-static int percent_percent_count = 0;
+#define RETURN_PERCENT_FLAG(Value) \
+ RETURN_VALUE(PERCENT_FLAG, uniqstr, uniqstr_new (Value))
-static void handle_dollar PARAMS ((char *cp));
-static void handle_at PARAMS ((char *cp));
+#define RETURN_VALUE(Token, Field, Value) \
+ do { \
+ val->Field = Value; \
+ return Token; \
+ } while (0)
+
+#define ROLLBACK_CURRENT_TOKEN \
+ do { \
+ scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
+ yyless (0); \
+ } while (0)
+
+/* A string representing the most recently saved token. */
+static char *last_string;
+
+/* Bracketed identifier. */
+static uniqstr bracketed_id_str = 0;
+static location bracketed_id_loc;
+static boundary bracketed_id_start;
+static int bracketed_id_context_state = 0;
+
+void
+gram_scanner_last_string_free (void)
+{
+ STRING_FREE;
+}
+
+static void handle_syncline (char *, location);
+static unsigned long int scan_integer (char const *p, int base, location loc);
+static int convert_ucn_to_byte (char const *hex_text);
+static void unexpected_eof (boundary, char const *);
+static void unexpected_newline (boundary, char const *);
%}
-%x SC_COMMENT
-%x SC_STRING SC_CHARACTER
+ /* A C-like comment in directives/rules. */
+%x SC_YACC_COMMENT
+ /* Strings and characters in directives/rules. */
%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
-%x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
+ /* A identifier was just read in directives/rules. Special state
+ to capture the sequence 'identifier :'. */
+%x SC_AFTER_IDENTIFIER
+ /* A complex tag, with nested angles brackets. */
+%x SC_TAG
+
+ /* Four types of user code:
+ - prologue (code between '%{' '%}' in the first section, before %%);
+ - actions, printers, union, etc, (between braced in the middle section);
+ - epilogue (everything after the second %%).
+ - predicate (code between '%?{' and '{' in middle section); */
+%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE SC_PREDICATE
+ /* C and C++ comments in code. */
+%x SC_COMMENT SC_LINE_COMMENT
+ /* Strings and characters in code. */
+%x SC_STRING SC_CHARACTER
+ /* Bracketed identifiers support. */
+%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
+
+letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
+id {letter}({letter}|[-0-9])*
+directive %{id}
+int [0-9]+
-id [.a-zA-Z][.a-zA-Z_0-9]*
-int [0-9]+
-eols (\n|\r|\n\r|\r\n)+
-blanks [ \t\f]+
+/* POSIX says that a tag must be both an id and a C union member, but
+ historically almost any character is allowed in a tag. We disallow
+ NUL, as this simplifies our implementation. We disallow angle
+ bracket to match them in nested pairs: several languages use them
+ for generics/template types. */
+tag [^\0<>]+
+
+/* Zero or more instances of backslash-newline. Following GCC, allow
+ white space between the backslash and the newline. */
+splice (\\[ \f\t\v]*\n)*
%%
%{
- /* At each yylex invocation, mark the current position as the
- start of the next token. */
-#define TR_POS 0
-#if TR_POS
- fprintf (stderr, "FOO1: ");
- LOCATION_PRINT (stderr, *yylloc);
- fprintf (stderr, "\n");
-#endif
- YY_STEP;
-#if TR_POS
- fprintf (stderr, "BAR1: ");
- LOCATION_PRINT (stderr, *yylloc);
- fprintf (stderr, "\n");
-#endif
+ /* Nesting level. Either for nested braces, or nested angle brackets
+ (but not mixed). */
+ int nesting PACIFY_CC (= 0);
+
+ /* Parent context state, when applicable. */
+ int context_state PACIFY_CC (= 0);
+
+ /* Location of most recent identifier, when applicable. */
+ location id_loc PACIFY_CC (= empty_location);
+
+ /* Where containing code started, when applicable. Its initial
+ value is relevant only when yylex is invoked in the SC_EPILOGUE
+ start condition. */
+ boundary code_start = scanner_cursor;
+
+ /* Where containing comment or string or character literal started,
+ when applicable. */
+ boundary token_start PACIFY_CC (= scanner_cursor);
%}
+ /*-----------------------.
+ | Scanning white space. |
+ `-----------------------*/
+
+<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
+{
+ /* Comments and white space. */
+ "," {
+ complain_at (*loc, Wother, _("stray ',' treated as white space"));
+ }
+ [ \f\n\t\v] |
+ "//".* ;
+ "/*" {
+ token_start = loc->start;
+ context_state = YY_START;
+ BEGIN SC_YACC_COMMENT;
+ }
+
+ /* #line directives are not documented, and may be withdrawn or
+ modified in future versions of Bison. */
+ ^"#line "{int}" \"".*"\"\n" {
+ handle_syncline (yytext + sizeof "#line " - 1, *loc);
+ }
+}
+
+
/*----------------------------.
| Scanning Bison directives. |
`----------------------------*/
+
+ /* For directives that are also command line options, the regex must be
+ "%..."
+ after "[-_]"s are removed, and the directive must match the --long
+ option name, with a single string argument. Otherwise, add exceptions
+ to ../build-aux/cross-options.pl. */
+
<INITIAL>
{
- "%binary" return PERCENT_NONASSOC;
- "%debug" return PERCENT_DEBUG;
- "%define" return PERCENT_DEFINE;
- "%defines" return PERCENT_DEFINES;
- "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
- "%expect" return PERCENT_EXPECT;
- "%file-prefix" return PERCENT_FILE_PREFIX;
+ "%binary" return PERCENT_NONASSOC;
+ "%code" return PERCENT_CODE;
+ "%debug" RETURN_PERCENT_FLAG("parse.trace");
+ "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
+ "%define" return PERCENT_DEFINE;
+ "%defines" return PERCENT_DEFINES;
+ "%destructor" return PERCENT_DESTRUCTOR;
+ "%dprec" return PERCENT_DPREC;
+ "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
+ "%expect" return PERCENT_EXPECT;
+ "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
+ "%file-prefix" return PERCENT_FILE_PREFIX;
"%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
- "%left" return PERCENT_LEFT;
- "%locations" return PERCENT_LOCATIONS;
- "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
- "%no"[-_]"lines" return PERCENT_NO_LINES;
- "%nonassoc" return PERCENT_NONASSOC;
- "%nterm" return PERCENT_NTERM;
- "%output" return PERCENT_OUTPUT;
- "%prec" return PERCENT_PREC;
- "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
- "%right" return PERCENT_RIGHT;
- "%skeleton" return PERCENT_SKELETON;
- "%start" return PERCENT_START;
- "%term" return PERCENT_TOKEN;
- "%token" return PERCENT_TOKEN;
- "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
- "%type" return PERCENT_TYPE;
- "%union" return PERCENT_UNION;
- "%verbose" return PERCENT_VERBOSE;
- "%yacc" return PERCENT_YACC;
+ "%initial-action" return PERCENT_INITIAL_ACTION;
+ "%glr-parser" return PERCENT_GLR_PARSER;
+ "%language" return PERCENT_LANGUAGE;
+ "%left" return PERCENT_LEFT;
+ "%lex-param" RETURN_PERCENT_PARAM(lex);
+ "%locations" RETURN_PERCENT_FLAG("locations");
+ "%merge" return PERCENT_MERGE;
+ "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
+ "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
+ "%no"[-_]"lines" return PERCENT_NO_LINES;
+ "%nonassoc" return PERCENT_NONASSOC;
+ "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
+ "%nterm" return PERCENT_NTERM;
+ "%output" return PERCENT_OUTPUT;
+ "%param" RETURN_PERCENT_PARAM(both);
+ "%parse-param" RETURN_PERCENT_PARAM(parse);
+ "%prec" return PERCENT_PREC;
+ "%precedence" return PERCENT_PRECEDENCE;
+ "%printer" return PERCENT_PRINTER;
+ "%pure"[-_]"parser" RETURN_PERCENT_FLAG("api.pure");
+ "%require" return PERCENT_REQUIRE;
+ "%right" return PERCENT_RIGHT;
+ "%skeleton" return PERCENT_SKELETON;
+ "%start" return PERCENT_START;
+ "%term" return PERCENT_TOKEN;
+ "%token" return PERCENT_TOKEN;
+ "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
+ "%type" return PERCENT_TYPE;
+ "%union" return PERCENT_UNION;
+ "%verbose" return PERCENT_VERBOSE;
+ "%yacc" return PERCENT_YACC;
+
+ {directive} {
+ complain_at (*loc, complaint, _("invalid directive: %s"), quote (yytext));
+ }
"=" return EQUAL;
- ":" return COLON;
"|" return PIPE;
";" return SEMICOLON;
- {eols} YY_LINES; YY_STEP;
- {blanks} YY_STEP;
- {id} {
- YY_INIT; YY_GROW; YY_FINISH;
- yylval->symbol = getsym (yylval->string);
- return ID;
+ {id} {
+ val->uniqstr = uniqstr_new (yytext);
+ id_loc = *loc;
+ bracketed_id_str = NULL;
+ BEGIN SC_AFTER_IDENTIFIER;
+ }
+
+ {int} {
+ val->integer = scan_integer (yytext, 10, *loc);
+ return INT;
+ }
+ 0[xX][0-9abcdefABCDEF]+ {
+ val->integer = scan_integer (yytext, 16, *loc);
+ return INT;
}
- {int} yylval->integer = strtol (yytext, 0, 10); return INT;
+ /* Identifiers may not start with a digit. Yet, don't silently
+ accept "1FOO" as "1 FOO". */
+ {int}{id} {
+ complain_at (*loc, complaint, _("invalid identifier: %s"), quote (yytext));
+ }
- /* Characters. We don't check there is only one. */
- \' YY_INIT; YY_GROW; yy_push_state (SC_ESCAPED_CHARACTER);
+ /* Characters. */
+ "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
/* Strings. */
- \" YY_INIT; YY_GROW; yy_push_state (SC_ESCAPED_STRING);
-
- /* Comments. */
- "/*" yy_push_state (SC_COMMENT);
- "//".* YY_STEP;
+ "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
/* Prologue. */
- "%{" YY_INIT; yy_push_state (SC_PROLOGUE);
+ "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
/* Code in between braces. */
- "{" YY_INIT; YY_GROW; ++braces_level; yy_push_state (SC_BRACED_CODE);
+ "{" {
+ STRING_GROW;
+ nesting = 0;
+ code_start = loc->start;
+ BEGIN SC_BRACED_CODE;
+ }
+
+ /* Semantic predicate. */
+ "%?"[ \f\n\t\v]*"{" {
+ nesting = 0;
+ code_start = loc->start;
+ BEGIN SC_PREDICATE;
+ }
/* A type. */
- "<"[^>]+">" YY_INIT; obstack_grow (&string_obstack, yytext + 1, yyleng - 2); YY_FINISH; return TYPE;
+ "<*>" return TAG_ANY;
+ "<>" return TAG_NONE;
+ "<"{tag}">" {
+ obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
+ STRING_FINISH;
+ val->uniqstr = uniqstr_new (last_string);
+ STRING_FREE;
+ return TAG;
+ }
+ "<" {
+ nesting = 0;
+ token_start = loc->start;
+ BEGIN SC_TAG;
+ }
- "%%" {
+ "%%" {
+ static int percent_percent_count;
if (++percent_percent_count == 2)
- yy_push_state (SC_EPILOGUE);
+ BEGIN SC_EPILOGUE;
return PERCENT_PERCENT;
}
- . {
- LOCATION_PRINT (stderr, *yylloc);
- fprintf (stderr, ": invalid character: `%c'\n", *yytext);
- YY_STEP;
+ "[" {
+ bracketed_id_str = NULL;
+ bracketed_id_start = loc->start;
+ bracketed_id_context_state = YY_START;
+ BEGIN SC_BRACKETED_ID;
+ }
+
+ . {
+ complain_at (*loc, complaint, _("invalid character: %s"), quote (yytext));
+ }
+
+ <<EOF>> {
+ loc->start = loc->end = scanner_cursor;
+ yyterminate ();
}
}
- /*------------------------------------------------------------.
- | Whatever the start condition (but those which correspond to |
- | entity `swallowed' by Bison: SC_ESCAPED_STRING and |
- | SC_ESCAPED_CHARACTER), no M4 character must escape as is. |
- `------------------------------------------------------------*/
+ /*--------------------------------------------------------------.
+ | Supporting \0 complexifies our implementation for no expected |
+ | added value. |
+ `--------------------------------------------------------------*/
-<SC_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
+<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
{
- \[ obstack_sgrow (&string_obstack, "@<:@");
- \] obstack_sgrow (&string_obstack, "@:>@");
+ \0 complain_at (*loc, complaint, _("invalid null character"));
}
+ /*-----------------------------------------------------------------.
+ | Scanning after an identifier, checking whether a colon is next. |
+ `-----------------------------------------------------------------*/
- /*-----------------------------------------------------------.
- | Scanning a C comment. The initial `/ *' is already eaten. |
- `-----------------------------------------------------------*/
-
-<SC_COMMENT>
+<SC_AFTER_IDENTIFIER>
{
- "*/" { /* End of the comment. */
- if (yy_top_state () == INITIAL)
+ "[" {
+ if (bracketed_id_str)
{
- YY_STEP;
+ ROLLBACK_CURRENT_TOKEN;
+ BEGIN SC_RETURN_BRACKETED_ID;
+ *loc = id_loc;
+ return ID;
}
else
{
- YY_GROW;
+ bracketed_id_start = loc->start;
+ bracketed_id_context_state = YY_START;
+ BEGIN SC_BRACKETED_ID;
}
- yy_pop_state ();
}
+ ":" {
+ BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
+ *loc = id_loc;
+ return ID_COLON;
+ }
+ . {
+ ROLLBACK_CURRENT_TOKEN;
+ BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
+ *loc = id_loc;
+ return ID;
+ }
+ <<EOF>> {
+ BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
+ *loc = id_loc;
+ return ID;
+ }
+}
- [^\[\]*\n\r]+ if (yy_top_state () != INITIAL) YY_GROW;
- {eols} if (yy_top_state () != INITIAL) YY_GROW; YY_LINES;
- . /* Stray `*'. */if (yy_top_state () != INITIAL) YY_GROW;
+ /*--------------------------------.
+ | Scanning bracketed identifiers. |
+ `--------------------------------*/
+<SC_BRACKETED_ID>
+{
+ {id} {
+ if (bracketed_id_str)
+ {
+ complain_at (*loc, complaint,
+ _("unexpected identifier in bracketed name: %s"),
+ quote (yytext));
+ }
+ else
+ {
+ bracketed_id_str = uniqstr_new (yytext);
+ bracketed_id_loc = *loc;
+ }
+ }
+ "]" {
+ BEGIN bracketed_id_context_state;
+ if (bracketed_id_str)
+ {
+ if (INITIAL == bracketed_id_context_state)
+ {
+ val->uniqstr = bracketed_id_str;
+ bracketed_id_str = 0;
+ *loc = bracketed_id_loc;
+ return BRACKETED_ID;
+ }
+ }
+ else
+ complain_at (*loc, complaint, _("an identifier expected"));
+ }
+ . {
+ complain_at (*loc, complaint, _("invalid character in bracketed name: %s"),
+ quote (yytext));
+ }
<<EOF>> {
- LOCATION_PRINT (stderr, *yylloc);
- fprintf (stderr, ": unexpected end of file in a comment\n");
- yy_pop_state ();
+ BEGIN bracketed_id_context_state;
+ unexpected_eof (bracketed_id_start, "]");
}
}
+<SC_RETURN_BRACKETED_ID>
+{
+ . {
+ ROLLBACK_CURRENT_TOKEN;
+ val->uniqstr = bracketed_id_str;
+ bracketed_id_str = 0;
+ *loc = bracketed_id_loc;
+ BEGIN INITIAL;
+ return BRACKETED_ID;
+ }
+}
- /*----------------------------------------------------------------.
- | Scanning a C string, including its escapes. The initial `"' is |
- | already eaten. |
- `----------------------------------------------------------------*/
-<SC_ESCAPED_STRING>
+ /*---------------------------------------------------------------.
+ | Scanning a Yacc comment. The initial '/ *' is already eaten. |
+ `---------------------------------------------------------------*/
+
+<SC_YACC_COMMENT>
{
- \" {
- assert (yy_top_state () == INITIAL);
- YY_GROW;
- YY_FINISH;
- yy_pop_state ();
- return STRING;
- }
+ "*/" BEGIN context_state;
+ .|\n ;
+ <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
+}
- [^\"\n\r\\]+ YY_GROW;
- {eols} obstack_1grow (&string_obstack, '\n'); YY_LINES;
+ /*------------------------------------------------------------.
+ | Scanning a C comment. The initial '/ *' is already eaten. |
+ `------------------------------------------------------------*/
+<SC_COMMENT>
+{
+ "*"{splice}"/" STRING_GROW; BEGIN context_state;
+ <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
+}
+
+
+ /*--------------------------------------------------------------.
+ | Scanning a line comment. The initial '//' is already eaten. |
+ `--------------------------------------------------------------*/
+
+<SC_LINE_COMMENT>
+{
+ "\n" STRING_GROW; BEGIN context_state;
+ {splice} STRING_GROW;
+ <<EOF>> BEGIN context_state;
+}
+
+
+ /*------------------------------------------------.
+ | Scanning a Bison string, including its escapes. |
+ | The initial quote is already eaten. |
+ `------------------------------------------------*/
+
+<SC_ESCAPED_STRING>
+{
+ "\""|"\n" {
+ if (yytext[0] == '\n')
+ unexpected_newline (token_start, "\"");
+ STRING_FINISH;
+ loc->start = token_start;
+ val->chars = last_string;
+ BEGIN INITIAL;
+ return STRING;
+ }
<<EOF>> {
- LOCATION_PRINT (stderr, *yylloc);
- fprintf (stderr, ": unexpected end of file in a string\n");
- assert (yy_top_state () == INITIAL);
- YY_FINISH;
- yy_pop_state ();
+ unexpected_eof (token_start, "\"");
+ STRING_FINISH;
+ loc->start = token_start;
+ val->chars = last_string;
+ BEGIN INITIAL;
return STRING;
}
}
- /*---------------------------------------------------------------.
- | Scanning a C character, decoding its escapes. The initial "'" |
- | is already eaten. |
- `---------------------------------------------------------------*/
+ /*----------------------------------------------------------.
+ | Scanning a Bison character literal, decoding its escapes. |
+ | The initial quote is already eaten. |
+ `----------------------------------------------------------*/
<SC_ESCAPED_CHARACTER>
{
- \' {
- YY_GROW;
- assert (yy_top_state () == INITIAL);
+ "'"|"\n" {
+ STRING_FINISH;
+ loc->start = token_start;
+ val->character = last_string[0];
{
- char c;
- YY_FINISH;
- c = yylval->string[1];
- yylval->symbol = getsym (yylval->string);
- symbol_class_set (yylval->symbol, token_sym);
- symbol_user_token_number_set (yylval->symbol, (unsigned int) c);
- yy_pop_state ();
- return ID;
+ /* FIXME: Eventually, make these errors. */
+ if (last_string[0] == '\0')
+ {
+ complain_at (*loc, Wother, _("empty character literal"));
+ /* '\0' seems dangerous even if we are about to complain. */
+ val->character = '\'';
+ }
+ else if (last_string[1] != '\0')
+ complain_at (*loc, Wother,
+ _("extra characters in character literal"));
}
+ if (yytext[0] == '\n')
+ unexpected_newline (token_start, "'");
+ STRING_FREE;
+ BEGIN INITIAL;
+ return CHAR;
}
+ <<EOF>> {
+ STRING_FINISH;
+ loc->start = token_start;
+ val->character = last_string[0];
+ {
+ /* FIXME: Eventually, make these errors. */
+ if (last_string[0] == '\0')
+ {
+ complain_at (*loc, Wother, _("empty character literal"));
+ /* '\0' seems dangerous even if we are about to complain. */
+ val->character = '\'';
+ }
+ else if (last_string[1] != '\0')
+ complain_at (*loc, Wother,
+ _("extra characters in character literal"));
+ }
+ unexpected_eof (token_start, "'");
+ STRING_FREE;
+ BEGIN INITIAL;
+ return CHAR;
+ }
+}
- [^\'\n\r\\] YY_GROW;
+ /*-----------------------------------------------------------.
+ | Scanning a Bison nested tag. The initial angle bracket is |
+ | already eaten. |
+ `-----------------------------------------------------------*/
- {eols} obstack_1grow (&string_obstack, '\n'); YY_LINES;
+<SC_TAG>
+{
+ ">" {
+ --nesting;
+ if (nesting < 0)
+ {
+ STRING_FINISH;
+ loc->start = token_start;
+ val->uniqstr = uniqstr_new (last_string);
+ STRING_FREE;
+ BEGIN INITIAL;
+ return TAG;
+ }
+ STRING_GROW;
+ }
+
+ [^<>]+ STRING_GROW;
+ "<"+ STRING_GROW; nesting += yyleng;
<<EOF>> {
- LOCATION_PRINT (stderr, *yylloc);
- fprintf (stderr, ": unexpected end of file in a character\n");
- assert (yy_top_state () == INITIAL);
- YY_FINISH;
- yy_pop_state ();
- return CHARACTER;
+ unexpected_eof (token_start, ">");
+ STRING_FINISH;
+ loc->start = token_start;
+ val->uniqstr = uniqstr_new (last_string);
+ STRING_FREE;
+ BEGIN INITIAL;
+ return TAG;
}
}
-
/*----------------------------.
| Decode escaped characters. |
`----------------------------*/
<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
{
- \\[0-7]{3} {
- long c = strtol (yytext + 1, 0, 8);
- if (c > 255)
- {
- LOCATION_PRINT (stderr, *yylloc);
- fprintf (stderr, ": invalid escape: %s\n", yytext);
- YY_STEP;
- }
+ \\[0-7]{1,3} {
+ unsigned long int c = strtoul (yytext + 1, NULL, 8);
+ if (!c || UCHAR_MAX < c)
+ complain_at (*loc, complaint, _("invalid number after \\-escape: %s"),
+ yytext+1);
else
- obstack_1grow (&string_obstack, c);
+ obstack_1grow (&obstack_for_string, c);
}
- \\x[0-9a-fA-F]{2} {
- obstack_1grow (&string_obstack, strtol (yytext + 2, 0, 16));
+ \\x[0-9abcdefABCDEF]+ {
+ verify (UCHAR_MAX < ULONG_MAX);
+ unsigned long int c = strtoul (yytext + 2, NULL, 16);
+ if (!c || UCHAR_MAX < c)
+ complain_at (*loc, complaint, _("invalid number after \\-escape: %s"),
+ yytext+1);
+ else
+ obstack_1grow (&obstack_for_string, c);
}
- \\a obstack_1grow (&string_obstack, '\a');
- \\b obstack_1grow (&string_obstack, '\b');
- \\f obstack_1grow (&string_obstack, '\f');
- \\n obstack_1grow (&string_obstack, '\n');
- \\r obstack_1grow (&string_obstack, '\r');
- \\t obstack_1grow (&string_obstack, '\t');
- \\v obstack_1grow (&string_obstack, '\v');
- \\[\\""] obstack_1grow (&string_obstack, yytext[1]);
- \\. {
- LOCATION_PRINT (stderr, *yylloc);
- fprintf (stderr, ": unrecognized escape: %s\n", yytext);
- YY_GROW;
+ \\a obstack_1grow (&obstack_for_string, '\a');
+ \\b obstack_1grow (&obstack_for_string, '\b');
+ \\f obstack_1grow (&obstack_for_string, '\f');
+ \\n obstack_1grow (&obstack_for_string, '\n');
+ \\r obstack_1grow (&obstack_for_string, '\r');
+ \\t obstack_1grow (&obstack_for_string, '\t');
+ \\v obstack_1grow (&obstack_for_string, '\v');
+
+ /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
+ \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
+
+ \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
+ int c = convert_ucn_to_byte (yytext);
+ if (c <= 0)
+ complain_at (*loc, complaint, _("invalid number after \\-escape: %s"),
+ yytext+1);
+ else
+ obstack_1grow (&obstack_for_string, c);
+ }
+ \\(.|\n) {
+ char const *p = yytext + 1;
+ /* Quote only if escaping won't make the character visible. */
+ if (c_isspace ((unsigned char) *p) && c_isprint ((unsigned char) *p))
+ p = quote (p);
+ else
+ p = quotearg_style_mem (escape_quoting_style, p, 1);
+ complain_at (*loc, complaint, _("invalid character after \\-escape: %s"),
+ p);
}
}
+ /*--------------------------------------------.
+ | Scanning user-code characters and strings. |
+ `--------------------------------------------*/
- /*----------------------------------------------------------.
- | Scanning a C character without decoding its escapes. The |
- | initial "'" is already eaten. |
- `----------------------------------------------------------*/
+<SC_CHARACTER,SC_STRING>
+{
+ {splice}|\\{splice}[^\n\[\]] STRING_GROW;
+}
<SC_CHARACTER>
{
- \' {
- YY_GROW;
- assert (yy_top_state () != INITIAL);
- yy_pop_state ();
- }
-
- [^\[\]\'\n\r\\] YY_GROW;
- \\. YY_GROW;
-
- {eols} YY_GROW; YY_LINES;
-
- <<EOF>> {
- LOCATION_PRINT (stderr, *yylloc);
- fprintf (stderr, ": unexpected end of file in a character\n");
- assert (yy_top_state () != INITIAL);
- yy_pop_state ();
- }
+ "'" STRING_GROW; BEGIN context_state;
+ \n unexpected_newline (token_start, "'"); BEGIN context_state;
+ <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
}
-
- /*----------------------------------------------------------------.
- | Scanning a C string, without decoding its escapes. The initial |
- | `"' is already eaten. |
- `----------------------------------------------------------------*/
-
<SC_STRING>
{
- \" {
- assert (yy_top_state () != INITIAL);
- YY_GROW;
- yy_pop_state ();
- }
-
- [^\[\]\"\n\r\\]+ YY_GROW;
- \\. YY_GROW;
-
- {eols} YY_GROW; YY_LINES;
-
- <<EOF>> {
- LOCATION_PRINT (stderr, *yylloc);
- fprintf (stderr, ": unexpected end of file in a string\n");
- assert (yy_top_state () != INITIAL);
- yy_pop_state ();
- }
+ "\"" STRING_GROW; BEGIN context_state;
+ \n unexpected_newline (token_start, "\""); BEGIN context_state;
+ <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
}
| Strings, comments etc. can be found in user code. |
`---------------------------------------------------*/
-<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
+<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_PREDICATE>
{
- /* Characters. We don't check there is only one. */
- \' YY_GROW; yy_push_state (SC_CHARACTER);
+ "'" {
+ STRING_GROW;
+ context_state = YY_START;
+ token_start = loc->start;
+ BEGIN SC_CHARACTER;
+ }
+ "\"" {
+ STRING_GROW;
+ context_state = YY_START;
+ token_start = loc->start;
+ BEGIN SC_STRING;
+ }
+ "/"{splice}"*" {
+ STRING_GROW;
+ context_state = YY_START;
+ token_start = loc->start;
+ BEGIN SC_COMMENT;
+ }
+ "/"{splice}"/" {
+ STRING_GROW;
+ context_state = YY_START;
+ BEGIN SC_LINE_COMMENT;
+ }
+}
- /* Strings. */
- \" YY_GROW; yy_push_state (SC_STRING);
- /* Comments. */
- "/*" YY_GROW; yy_push_state (SC_COMMENT);
- "//".* YY_GROW;
-}
+ /*-----------------------------------------------------------.
+ | Scanning some code in braces (actions, predicates). The |
+ | initial "{" is already eaten. |
+ `-----------------------------------------------------------*/
- /*---------------------------------------------------------------.
- | Scanning some code in braces (%union and actions). The initial |
- | "{" is already eaten. |
- `---------------------------------------------------------------*/
+<SC_BRACED_CODE,SC_PREDICATE>
+{
+ "{"|"<"{splice}"%" STRING_GROW; nesting++;
+ "%"{splice}">" STRING_GROW; nesting--;
+
+ /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
+ (as '<' '<%'). */
+ "<"{splice}"<" STRING_GROW;
+
+ <<EOF>> {
+ int token = (YY_START == SC_BRACED_CODE) ? BRACED_CODE : BRACED_PREDICATE;
+ unexpected_eof (code_start, "}");
+ STRING_FINISH;
+ loc->start = code_start;
+ val->code = last_string;
+ BEGIN INITIAL;
+ return token;
+ }
+}
<SC_BRACED_CODE>
{
"}" {
- YY_GROW;
- if (--braces_level == 0)
+ obstack_1grow (&obstack_for_string, '}');
+
+ --nesting;
+ if (nesting < 0)
{
- yy_pop_state ();
- YY_FINISH;
- return BRACED_CODE;
+ STRING_FINISH;
+ loc->start = code_start;
+ val->code = last_string;
+ BEGIN INITIAL;
+ return BRACED_CODE;
}
}
+}
- "{" YY_GROW; braces_level++;
-
- "$"("<".*">")?(-?[0-9]+|"$") { handle_dollar (yytext); }
- "@"(-?[0-9]+|"$") { handle_at (yytext); }
-
- [^\[\]$/\'\"@\{\}\n\r]+ YY_GROW;
- {eols} YY_GROW; YY_LINES;
-
- /* A lose $, or /, or etc. */
- . YY_GROW;
-
- <<EOF>> {
- LOCATION_PRINT (stderr, *yylloc);
- fprintf (stderr, ": unexpected end of file in a braced code\n");
- yy_pop_state ();
- YY_FINISH;
- return PROLOGUE;
+<SC_PREDICATE>
+{
+ "}" {
+ --nesting;
+ if (nesting < 0)
+ {
+ STRING_FINISH;
+ loc->start = code_start;
+ val->code = last_string;
+ BEGIN INITIAL;
+ return BRACED_PREDICATE;
+ }
+ else
+ obstack_1grow (&obstack_for_string, '}');
}
-
}
-
/*--------------------------------------------------------------.
| Scanning some prologue: from "%{" (already scanned) to "%}". |
`--------------------------------------------------------------*/
<SC_PROLOGUE>
{
"%}" {
- yy_pop_state ();
- YY_FINISH;
+ STRING_FINISH;
+ loc->start = code_start;
+ val->chars = last_string;
+ BEGIN INITIAL;
return PROLOGUE;
}
- [^\[\]%\n\r]+ YY_GROW;
- "%"+[^%\}\n\r]+ YY_GROW;
- {eols} YY_GROW; YY_LINES;
-
<<EOF>> {
- LOCATION_PRINT (stderr, *yylloc);
- fprintf (stderr, ": unexpected end of file in a prologue\n");
- yy_pop_state ();
- YY_FINISH;
+ unexpected_eof (code_start, "%}");
+ STRING_FINISH;
+ loc->start = code_start;
+ val->chars = last_string;
+ BEGIN INITIAL;
return PROLOGUE;
}
-
}
/*---------------------------------------------------------------.
| Scanning the epilogue (everything after the second "%%", which |
- | has already been eaten. |
+ | has already been eaten). |
`---------------------------------------------------------------*/
<SC_EPILOGUE>
{
- ([^\[\]]|{eols})+ YY_GROW;
-
<<EOF>> {
- yy_pop_state ();
- YY_FINISH;
+ STRING_FINISH;
+ loc->start = code_start;
+ val->chars = last_string;
+ BEGIN INITIAL;
return EPILOGUE;
}
}
+ /*-----------------------------------------------------.
+ | By default, grow the string obstack with the input. |
+ `-----------------------------------------------------*/
+
+<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
+ <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
+
%%
-/*------------------------------------------------------------------.
-| CP is pointing to a wannabee semantic value (i.e., a `$'). |
-| |
-| Possible inputs: $[<TYPENAME>]($|integer) |
-| |
-| Output to the STRING_OBSTACK a reference to this semantic value. |
-`------------------------------------------------------------------*/
+/* Read bytes from FP into buffer BUF of size SIZE. Return the
+ number of bytes read. Remove '\r' from input, treating \r\n
+ and isolated \r as \n. */
-static void
-handle_dollar (char *cp)
+static size_t
+no_cr_read (FILE *fp, char *buf, size_t size)
{
- const char *type_name = NULL;
+ size_t bytes_read = fread (buf, 1, size, fp);
+ if (bytes_read)
+ {
+ char *w = memchr (buf, '\r', bytes_read);
+ if (w)
+ {
+ char const *r = ++w;
+ char const *lim = buf + bytes_read;
+
+ for (;;)
+ {
+ /* Found an '\r'. Treat it like '\n', but ignore any
+ '\n' that immediately follows. */
+ w[-1] = '\n';
+ if (r == lim)
+ {
+ int ch = getc (fp);
+ if (ch != '\n' && ungetc (ch, fp) != ch)
+ break;
+ }
+ else if (*r == '\n')
+ r++;
+
+ /* Copy until the next '\r'. */
+ do
+ {
+ if (r == lim)
+ return w - buf;
+ }
+ while ((*w++ = *r++) != '\r');
+ }
+
+ return w - buf;
+ }
+ }
- /* RULE_LENGTH is the number of values in the current rule so far,
- which says where to find `$0' with respect to the top of the
- stack. It is not the same as the rule->length in the case of mid
- rule actions. */
- int rule_length = 0;
- symbol_list *rhs;
- for (rhs = current_rule->next; rhs; rhs = rhs->next)
- ++rule_length;
+ return bytes_read;
+}
- ++cp;
- /* Get the type name if explicit. */
- if (*cp == '<')
- {
- type_name = ++cp;
- while (*cp != '>')
- ++cp;
- *cp = '\0';
- ++cp;
- }
- if (*cp == '$')
- {
- if (!type_name)
- type_name = get_type_name (0, current_rule);
- if (!type_name && typed)
- complain (_("$$ of `%s' has no declared type"),
- current_rule->sym->tag);
- if (!type_name)
- type_name = "";
- obstack_fgrow1 (&string_obstack,
- "]b4_lhs_value([%s])[", type_name);
- }
- else if (isdigit (*cp) || *cp == '-')
+/*------------------------------------------------------.
+| Scan NUMBER for a base-BASE integer at location LOC. |
+`------------------------------------------------------*/
+
+static unsigned long int
+scan_integer (char const *number, int base, location loc)
+{
+ verify (INT_MAX < ULONG_MAX);
+ unsigned long int num = strtoul (number, NULL, base);
+
+ if (INT_MAX < num)
{
- int n = strtol (cp, &cp, 10);
-
- if (n > rule_length)
- complain (_("invalid value: %s%d"), "$", n);
- else
- {
- if (!type_name && n > 0)
- type_name = get_type_name (n, current_rule);
- if (!type_name && typed)
- complain (_("$%d of `%s' has no declared type"),
- n, current_rule->sym->tag);
- if (!type_name)
- type_name = "";
- obstack_fgrow3 (&string_obstack,
- "]b4_rhs_value([%d], [%d], [%s])[",
- rule_length, n, type_name);
- }
+ complain_at (loc, complaint, _("integer out of range: %s"),
+ quote (number));
+ num = INT_MAX;
}
- else
+
+ return num;
+}
+
+
+/*------------------------------------------------------------------.
+| Convert universal character name UCN to a single-byte character, |
+| and return that character. Return -1 if UCN does not correspond |
+| to a single-byte character. |
+`------------------------------------------------------------------*/
+
+static int
+convert_ucn_to_byte (char const *ucn)
+{
+ verify (UCHAR_MAX <= INT_MAX);
+ unsigned long int code = strtoul (ucn + 2, NULL, 16);
+
+ /* FIXME: Currently we assume Unicode-compatible unibyte characters
+ on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
+ non-ASCII hosts we support only the portable C character set.
+ These limitations should be removed once we add support for
+ multibyte characters. */
+
+ if (UCHAR_MAX < code)
+ return -1;
+
+#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
+ {
+ /* A non-ASCII host. Use CODE to index into a table of the C
+ basic execution character set, which is guaranteed to exist on
+ all Standard C platforms. This table also includes '$', '@',
+ and '`', which are not in the basic execution character set but
+ which are unibyte characters on all the platforms that we know
+ about. */
+ static signed char const table[] =
+ {
+ '\0', -1, -1, -1, -1, -1, -1, '\a',
+ '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ ' ', '!', '"', '#', '$', '%', '&', '\'',
+ '(', ')', '*', '+', ',', '-', '.', '/',
+ '0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', ':', ';', '<', '=', '>', '?',
+ '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
+ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
+ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
+ 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
+ '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
+ 'x', 'y', 'z', '{', '|', '}', '~'
+ };
+
+ code = code < sizeof table ? table[code] : -1;
+ }
+#endif
+
+ return code;
+}
+
+
+/*----------------------------------------------------------------.
+| Handle '#line INT "FILE"'. ARGS has already skipped '#line '. |
+`----------------------------------------------------------------*/
+
+static void
+handle_syncline (char *args, location loc)
+{
+ char *after_num;
+ unsigned long int lineno = strtoul (args, &after_num, 10);
+ char *file = strchr (after_num, '"') + 1;
+ *strchr (file, '"') = '\0';
+ if (INT_MAX <= lineno)
{
- char buf[] = "$c";
- buf[1] = *cp;
- complain (_("%s is invalid"), quote (buf));
+ complain_at (loc, Wother, _("line number overflow"));
+ lineno = INT_MAX;
}
+ current_file = uniqstr_new (file);
+ boundary_set (&scanner_cursor, current_file, lineno, 1);
}
-/*-------------------------------------------------------.
-| CP is pointing to a location (i.e., a `@'). Output to |
-| STRING_OBSTACK a reference to this location. |
-`-------------------------------------------------------*/
+
+/*----------------------------------------------------------------.
+| For a token or comment starting at START, report message MSGID, |
+| which should say that an end marker was found before |
+| the expected TOKEN_END. |
+`----------------------------------------------------------------*/
static void
-handle_at (char *cp)
+unexpected_end (boundary start, char const *msgid, char const *token_end)
{
- /* RULE_LENGTH is the number of values in the current rule so far,
- which says where to find `$0' with respect to the top of the
- stack. It is not the same as the rule->length in the case of mid
- rule actions. */
- int rule_length = 0;
- symbol_list *rhs;
- for (rhs = current_rule->next; rhs; rhs = rhs->next)
- ++rule_length;
+ location loc;
+ loc.start = start;
+ loc.end = scanner_cursor;
+ token_end = quote (token_end);
+ // Instead of '\'', display "'".
+ if (STREQ (token_end, "'\\''"))
+ token_end = "\"'\"";
+ complain_at (loc, complaint, _(msgid), token_end);
+}
- locations_flag = 1;
- ++cp;
- if (*cp == '$')
- {
- obstack_sgrow (&string_obstack, "]b4_lhs_location[");
- }
- else if (isdigit (*cp) || *cp == '-')
- {
- int n = strtol (cp, &cp, 10);
- if (n > rule_length)
- complain (_("invalid value: %s%d"), "@", n);
- else
- obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
- rule_length, n);
- }
- else
- {
- char buf[] = "@c";
- buf[1] = *cp;
- complain (_("%s is invalid"), quote (buf));
- }
+/*------------------------------------------------------------------------.
+| Report an unexpected EOF in a token or comment starting at START. |
+| An end of file was encountered and the expected TOKEN_END was missing. |
+`------------------------------------------------------------------------*/
+
+static void
+unexpected_eof (boundary start, char const *token_end)
+{
+ unexpected_end (start, N_("missing %s at end of file"), token_end);
+}
+
+
+/*----------------------------------------.
+| Likewise, but for unexpected newlines. |
+`----------------------------------------*/
+
+static void
+unexpected_newline (boundary start, char const *token_end)
+{
+ unexpected_end (start, N_("missing %s at end of line"), token_end);
+}
+
+
+/*-------------------------.
+| Initialize the scanner. |
+`-------------------------*/
+
+void
+gram_scanner_initialize (void)
+{
+ obstack_init (&obstack_for_string);
+}
+
+
+/*-----------------------------------------------.
+| Free all the memory allocated to the scanner. |
+`-----------------------------------------------*/
+
+void
+gram_scanner_free (void)
+{
+ obstack_free (&obstack_for_string, 0);
+ /* Reclaim Flex's buffers. */
+ yylex_destroy ();
}