/* Bison Grammar Scanner -*- C -*-
- Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
- This program is free software; you can redistribute it and/or modify
+ This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
+ the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301 USA
-*/
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
%option debug nodefault nounput noyywrap never-interactive
%option prefix="gram_" outfile="lex.yy.c"
#define gram_wrap() 1
#define FLEX_PREFIX(Id) gram_ ## Id
-#include "flex-scanner.h"
+#include <src/flex-scanner.h>
-#include "complain.h"
-#include "files.h"
-#include "getargs.h" /* yacc_flag */
-#include "gram.h"
-#include "quotearg.h"
-#include "reader.h"
-#include "uniqstr.h"
+#include <src/complain.h>
+#include <src/files.h>
+#include <src/gram.h>
+#include <quotearg.h>
+#include <src/reader.h>
+#include <src/uniqstr.h>
#include <mbswidth.h>
#include <quote.h>
-#include "scan-gram.h"
+#include <src/scan-gram.h>
#define YY_DECL GRAM_LEX_DECL
-
+
#define YY_USER_INIT \
code_start = scanner_cursor = loc->start; \
/* Location of scanner cursor. */
-boundary scanner_cursor;
+static boundary scanner_cursor;
#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
static size_t no_cr_read (FILE *, char *, size_t);
#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
+#define RETURN_PERCENT_FLAG(Value) \
+ do { \
+ val->uniqstr = uniqstr_new (Value); \
+ return PERCENT_FLAG; \
+ } while (0)
+
+
/* A string representing the most recently saved token. */
-char *last_string;
+static char *last_string;
void
gram_scanner_last_string_free (void)
STRING_FREE;
}
-/* The location of the most recently saved token, if it was a
- BRACED_CODE token; otherwise, this has an unspecified value. */
-location gram_last_braced_code_loc;
-
static void handle_syncline (char *, location);
static unsigned long int scan_integer (char const *p, int base, location loc);
static int convert_ucn_to_byte (char const *hex_text);
/* A identifier was just read in directives/rules. Special state
to capture the sequence `identifier :'. */
%x SC_AFTER_IDENTIFIER
- /* A keyword that should be followed by some code was read (e.g.
- %printer). */
-%x SC_PRE_CODE
+ /* A complex tag, with nested angles brackets. */
+%x SC_TAG
/* Three types of user code:
- prologue (code between `%{' `%}' in the first section, before %%);
/* POSIX says that a tag must be both an id and a C union member, but
historically almost any character is allowed in a tag. We disallow
- NUL and newline, as this simplifies our implementation. */
-tag [^\0\n>]+
+ NUL, as this simplifies our implementation. We disallow angle
+ bracket to match them in nested pairs: several languages use them
+ for generics/template types. */
+tag [^\0<>]+
/* Zero or more instances of backslash-newline. Following GCC, allow
white space between the backslash and the newline. */
%%
%{
- /* Nesting level of the current code in braces. */
- int braces_level IF_LINT (= 0);
+ /* Nesting level. Either for nested braces, or nested angle brackets
+ (but not mixed). */
+ int nesting IF_LINT (= 0);
/* Parent context state, when applicable. */
int context_state IF_LINT (= 0);
- /* Token type to return, when applicable. */
- int token_type IF_LINT (= 0);
-
/* Location of most recent identifier, when applicable. */
location id_loc IF_LINT (= empty_location);
| Scanning white space. |
`-----------------------*/
-<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
+<INITIAL,SC_AFTER_IDENTIFIER>
{
/* Comments and white space. */
"," warn_at (*loc, _("stray `,' treated as white space"));
/*----------------------------.
| Scanning Bison directives. |
`----------------------------*/
+
+ /* For directives that are also command line options, the regex must be
+ "%..."
+ after "[-_]"s are removed, and the directive must match the --long
+ option name, with a single string argument. Otherwise, add exceptions
+ to ../build-aux/cross-options.pl. */
+
<INITIAL>
{
- "%binary" return PERCENT_NONASSOC;
- "%debug" return PERCENT_DEBUG;
- "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
- "%define" return PERCENT_DEFINE;
- "%defines" return PERCENT_DEFINES;
- "%destructor" /* FIXME: Remove once %union handled differently. */ token_type = BRACED_CODE; return PERCENT_DESTRUCTOR;
- "%dprec" return PERCENT_DPREC;
- "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
- "%expect" return PERCENT_EXPECT;
- "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
- "%file-prefix" return PERCENT_FILE_PREFIX;
+ "%binary" return PERCENT_NONASSOC;
+ "%code" return PERCENT_CODE;
+ "%debug" RETURN_PERCENT_FLAG("debug");
+ "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
+ "%define" return PERCENT_DEFINE;
+ "%defines" return PERCENT_DEFINES;
+ "%destructor" return PERCENT_DESTRUCTOR;
+ "%dprec" return PERCENT_DPREC;
+ "%error"[-_]"verbose" RETURN_PERCENT_FLAG("error_verbose");
+ "%expect" return PERCENT_EXPECT;
+ "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
+ "%file-prefix" return PERCENT_FILE_PREFIX;
"%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
- "%initial-action" /* FIXME: Remove once %union handled differently. */ token_type = BRACED_CODE; return PERCENT_INITIAL_ACTION;
- "%glr-parser" return PERCENT_GLR_PARSER;
- "%left" return PERCENT_LEFT;
- "%lex-param" /* FIXME: Remove once %union handled differently. */ token_type = BRACED_CODE; return PERCENT_LEX_PARAM;
- "%locations" return PERCENT_LOCATIONS;
- "%merge" return PERCENT_MERGE;
- "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
- "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
- "%no"[-_]"lines" return PERCENT_NO_LINES;
- "%nonassoc" return PERCENT_NONASSOC;
- "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
- "%nterm" return PERCENT_NTERM;
- "%output" return PERCENT_OUTPUT;
- "%parse-param" /* FIXME: Remove once %union handled differently. */ token_type = BRACED_CODE; return PERCENT_PARSE_PARAM;
- "%prec" return PERCENT_PREC;
- "%printer" /* FIXME: Remove once %union handled differently. */ token_type = BRACED_CODE; return PERCENT_PRINTER;
- "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
- "%require" return PERCENT_REQUIRE;
- "%right" return PERCENT_RIGHT;
- "%skeleton" return PERCENT_SKELETON;
- "%start" return PERCENT_START;
- "%term" return PERCENT_TOKEN;
- "%token" return PERCENT_TOKEN;
- "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
- "%type" return PERCENT_TYPE;
- "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
- "%verbose" return PERCENT_VERBOSE;
- "%yacc" return PERCENT_YACC;
+ "%initial-action" return PERCENT_INITIAL_ACTION;
+ "%glr-parser" return PERCENT_GLR_PARSER;
+ "%language" return PERCENT_LANGUAGE;
+ "%left" return PERCENT_LEFT;
+ "%lex-param" return PERCENT_LEX_PARAM;
+ "%locations" return PERCENT_LOCATIONS;
+ "%merge" return PERCENT_MERGE;
+ "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
+ "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
+ "%no"[-_]"lines" return PERCENT_NO_LINES;
+ "%nonassoc" return PERCENT_NONASSOC;
+ "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
+ "%nterm" return PERCENT_NTERM;
+ "%output" return PERCENT_OUTPUT;
+ "%parse-param" return PERCENT_PARSE_PARAM;
+ "%prec" return PERCENT_PREC;
+ "%precedence" return PERCENT_PRECEDENCE;
+ "%printer" return PERCENT_PRINTER;
+ "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
+ "%require" return PERCENT_REQUIRE;
+ "%right" return PERCENT_RIGHT;
+ "%skeleton" return PERCENT_SKELETON;
+ "%start" return PERCENT_START;
+ "%term" return PERCENT_TOKEN;
+ "%token" return PERCENT_TOKEN;
+ "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
+ "%type" return PERCENT_TYPE;
+ "%union" return PERCENT_UNION;
+ "%verbose" return PERCENT_VERBOSE;
+ "%yacc" return PERCENT_YACC;
{directive} {
complain_at (*loc, _("invalid directive: %s"), quote (yytext));
";" return SEMICOLON;
{id} {
- val->symbol = symbol_get (yytext, *loc);
+ val->uniqstr = uniqstr_new (yytext);
id_loc = *loc;
BEGIN SC_AFTER_IDENTIFIER;
}
/* Code in between braces. */
"{" {
- if (current_rule && current_rule->action)
- grammar_midrule_action ();
STRING_GROW;
- token_type = BRACED_CODE;
- braces_level = 0;
+ nesting = 0;
code_start = loc->start;
BEGIN SC_BRACED_CODE;
}
/* A type. */
+ "<*>" return TAG_ANY;
+ "<>" return TAG_NONE;
"<"{tag}">" {
obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
STRING_FINISH;
val->uniqstr = uniqstr_new (last_string);
STRING_FREE;
- return TYPE;
+ return TAG;
+ }
+ "<" {
+ nesting = 0;
+ token_start = loc->start;
+ BEGIN SC_TAG;
}
"%%" {
}
+ /*--------------------------------------------------------------.
+ | Supporting \0 complexifies our implementation for no expected |
+ | added value. |
+ `--------------------------------------------------------------*/
+
+<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
+{
+ \0 complain_at (*loc, _("invalid null character"));
+}
+
+
/*-----------------------------------------------------------------.
| Scanning after an identifier, checking whether a colon is next. |
`-----------------------------------------------------------------*/
<SC_ESCAPED_STRING>
{
- "\"" {
+ "\""|"\n" {
+ if (yytext[0] == '\n')
+ unexpected_newline (token_start, "\"");
+ STRING_FINISH;
+ loc->start = token_start;
+ val->chars = last_string;
+ BEGIN INITIAL;
+ return STRING;
+ }
+ <<EOF>> {
+ unexpected_eof (token_start, "\"");
STRING_FINISH;
loc->start = token_start;
val->chars = last_string;
BEGIN INITIAL;
return STRING;
}
- \n unexpected_newline (token_start, "\""); BEGIN INITIAL;
- <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
}
/*----------------------------------------------------------.
<SC_ESCAPED_CHARACTER>
{
- "'" {
- unsigned char last_string_1;
+ "'"|"\n" {
+ if (yytext[0] == '\n')
+ unexpected_newline (token_start, "'");
STRING_GROW;
STRING_FINISH;
loc->start = token_start;
- val->symbol = symbol_get (quotearg_style (escape_quoting_style,
- last_string),
- *loc);
- symbol_class_set (val->symbol, token_sym, *loc, false);
- last_string_1 = last_string[1];
- symbol_user_token_number_set (val->symbol, last_string_1, *loc);
+ val->character = last_string[1];
STRING_FREE;
BEGIN INITIAL;
- return ID;
+ return CHAR;
+ }
+ <<EOF>> {
+ unexpected_eof (token_start, "'");
+ STRING_FINISH;
+ loc->start = token_start;
+ if (strlen(last_string) > 1)
+ val->character = last_string[1];
+ else
+ val->character = last_string[0];
+ STRING_FREE;
+ BEGIN INITIAL;
+ return CHAR;
}
- \n unexpected_newline (token_start, "'"); BEGIN INITIAL;
- <<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
}
-<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
+ /*-----------------------------------------------------------.
+ | Scanning a Bison nested tag. The initial angle bracket is |
+ | already eaten. |
+ `-----------------------------------------------------------*/
+
+<SC_TAG>
{
- \0 complain_at (*loc, _("invalid null character"));
-}
+ ">" {
+ --nesting;
+ if (nesting < 0)
+ {
+ STRING_FINISH;
+ loc->start = token_start;
+ val->uniqstr = uniqstr_new (last_string);
+ STRING_FREE;
+ BEGIN INITIAL;
+ return TAG;
+ }
+ STRING_GROW;
+ }
+
+ [^<>]+ STRING_GROW;
+ "<"+ STRING_GROW; nesting += yyleng;
+ <<EOF>> {
+ unexpected_eof (token_start, ">");
+ STRING_FINISH;
+ loc->start = token_start;
+ val->uniqstr = uniqstr_new (last_string);
+ STRING_FREE;
+ BEGIN INITIAL;
+ return TAG;
+ }
+}
/*----------------------------.
| Decode escaped characters. |
}
- /*---------------------------------------------------------------.
- | Scanning after %union etc., possibly followed by white space. |
- | For %union only, allow arbitrary C code to appear before the |
- | following brace, as an extension to POSIX. |
- `---------------------------------------------------------------*/
-
-<SC_PRE_CODE>
-{
- . {
- bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
- scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
- yyless (0);
-
- if (valid)
- {
- braces_level = -1;
- code_start = loc->start;
- BEGIN SC_BRACED_CODE;
- }
- else
- {
- complain_at (*loc, _("missing `{' in %s"),
- token_name (token_type));
- obstack_sgrow (&obstack_for_string, "{}");
- STRING_FINISH;
- val->chars = last_string;
- BEGIN INITIAL;
- return token_type;
- }
- }
-
- <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
-}
-
- /*---------------------------------------------------------------.
- | Scanning some code in braces (%union and actions). The initial |
- | "{" is already eaten. |
- `---------------------------------------------------------------*/
+ /*-----------------------------------------------------------.
+ | Scanning some code in braces (actions). The initial "{" is |
+ | already eaten. |
+ `-----------------------------------------------------------*/
<SC_BRACED_CODE>
{
- "{"|"<"{splice}"%" STRING_GROW; braces_level++;
- "%"{splice}">" STRING_GROW; braces_level--;
+ "{"|"<"{splice}"%" STRING_GROW; nesting++;
+ "%"{splice}">" STRING_GROW; nesting--;
"}" {
- bool outer_brace = --braces_level < 0;
-
- /* As an undocumented Bison extension, append `;' before the last
- brace in braced code, so that the user code can omit trailing
- `;'. But do not append `;' if emulating Yacc, since Yacc does
- not append one.
-
- FIXME: Bison should warn if a semicolon seems to be necessary
- here, and should omit the semicolon if it seems unnecessary
- (e.g., after ';', '{', or '}', each followed by comments or
- white space). Such a warning shouldn't depend on --yacc; it
- should depend on a new --pedantic option, which would cause
- Bison to warn if it detects an extension to POSIX. --pedantic
- should also diagnose other Bison extensions like %yacc.
- Perhaps there should also be a GCC-style --pedantic-errors
- option, so that such warnings are diagnosed as errors. */
- if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
- obstack_1grow (&obstack_for_string, ';');
-
obstack_1grow (&obstack_for_string, '}');
- if (outer_brace)
+ --nesting;
+ if (nesting < 0)
{
STRING_FINISH;
loc->start = code_start;
- val->chars = last_string;
- gram_last_braced_code_loc = *loc;
+ val->code = last_string;
BEGIN INITIAL;
- return token_type;
+ return BRACED_CODE;
}
}
(as `<' `<%'). */
"<"{splice}"<" STRING_GROW;
- <<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL;
+ <<EOF>> {
+ unexpected_eof (code_start, "}");
+ STRING_FINISH;
+ loc->start = code_start;
+ val->code = last_string;
+ BEGIN INITIAL;
+ return BRACED_CODE;
+ }
}
return PROLOGUE;
}
- <<EOF>> unexpected_eof (code_start, "%}"); BEGIN INITIAL;
+ <<EOF>> {
+ unexpected_eof (code_start, "%}");
+ STRING_FINISH;
+ loc->start = code_start;
+ val->chars = last_string;
+ BEGIN INITIAL;
+ return PROLOGUE;
+ }
}
lineno = INT_MAX;
}
current_file = uniqstr_new (file);
- boundary_set, (&scanner_cursor, current_file, lineno, 1);
+ boundary_set (&scanner_cursor, current_file, lineno, 1);
}
{
obstack_free (&obstack_for_string, 0);
/* Reclaim Flex's buffers. */
- yy_delete_buffer (YY_CURRENT_BUFFER);
+ yylex_destroy ();
}