X-Git-Url: https://git.saurik.com/bison.git/blobdiff_plain/d8d3f94a993ce890baae68bf9da7ded29f9f8d76..ccdb39e0a6f95cb4a3f19c08885a7c421cdebedb:/src/scan-gram.l?ds=sidebyside diff --git a/src/scan-gram.l b/src/scan-gram.l index 200e56eb..8f18c355 100644 --- a/src/scan-gram.l +++ b/src/scan-gram.l @@ -19,7 +19,7 @@ 02111-1307 USA */ -%option debug nodefault noyywrap nounput never-interactive stack +%option debug nodefault noyywrap never-interactive %option prefix="gram_" outfile="lex.yy.c" %{ @@ -140,7 +140,6 @@ extend_location (location_t *loc, char const *token, int size) used, and which is used by YY_OBS_FREE to free the last string. */ static struct obstack string_obstack; -char *last_string; #define YY_OBS_GROW \ obstack_grow (&string_obstack, yytext, yyleng) @@ -151,20 +150,9 @@ char *last_string; last_string = obstack_finish (&string_obstack); \ } while (0) -#define YY_OBS_FREE \ - do { \ - obstack_free (&string_obstack, last_string); \ - } while (0) +#define YY_OBS_FREE \ + obstack_free (&string_obstack, last_string) -void -scanner_last_string_free (void) -{ - YY_OBS_FREE; -} - - -static int braces_level = 0; -static int percent_percent_count = 0; /* Within well-formed rules, RULE_LENGTH is the number of values in the current rule so far, which says where to find `$0' with respect @@ -178,7 +166,9 @@ static void handle_dollar (braced_code_t code_kind, char *cp, location_t location); static void handle_at (braced_code_t code_kind, char *cp, location_t location); +static void handle_syncline (char *args, location_t *location); static int convert_ucn_to_byte (char const *hex_text); +static void unexpected_end_of_file (location_t *, char const *); %} %x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT @@ -186,9 +176,10 @@ static int convert_ucn_to_byte (char const *hex_text); %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER %x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE -letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_] -id {letter}({letter}|[0-9])* -int [0-9]+ +letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_] +id {letter}({letter}|[0-9])* +directive %{letter}({letter}|[0-9]|-)* +int [0-9]+ /* POSIX says that a tag must be both an id and a C union member, but historically almost any character is allowed in a tag. We disallow @@ -201,6 +192,15 @@ splice (\\[ \f\t\v]*\n)* %% %{ + /* Nesting level of the current code in braces. */ + int braces_level IF_LINT (= 0); + + /* Scanner context when scanning C code. */ + int c_context IF_LINT (= 0); + + /* A string representing the most recently saved token. */ + char *last_string; + /* At each yylex invocation, mark the current position as the start of the next token. */ YY_STEP; @@ -247,13 +247,20 @@ splice (\\[ \f\t\v]*\n)* "%verbose" return PERCENT_VERBOSE; "%yacc" return PERCENT_YACC; + {directive} { + complain_at (*yylloc, _("invalid directive: %s"), quote (yytext)); + YY_STEP; + } + + ^"#line "{int}" \""[^\"]*"\"\n" handle_syncline (yytext + strlen ("#line "), yylloc); YY_STEP; + "=" return EQUAL; ":" rule_length = 0; return COLON; "|" rule_length = 0; return PIPE; "," return COMMA; ";" return SEMICOLON; - [ \f\n\t\v]+ YY_STEP; + [ \f\n\t\v] YY_STEP; {id} { yylval->symbol = symbol_get (yytext, *yylloc); @@ -267,7 +274,7 @@ splice (\\[ \f\t\v]*\n)* num = strtoul (yytext, 0, 10); if (INT_MAX < num || errno) { - complain_at (*yylloc, _("%s is invalid"), yytext); + complain_at (*yylloc, _("integer out of range: %s"), quote (yytext)); num = INT_MAX; } yylval->integer = num; @@ -275,20 +282,20 @@ splice (\\[ \f\t\v]*\n)* } /* Characters. We don't check there is only one. */ - "'" YY_OBS_GROW; yy_push_state (SC_ESCAPED_CHARACTER); + "'" YY_OBS_GROW; BEGIN SC_ESCAPED_CHARACTER; /* Strings. */ - "\"" YY_OBS_GROW; yy_push_state (SC_ESCAPED_STRING); + "\"" YY_OBS_GROW; BEGIN SC_ESCAPED_STRING; /* Comments. */ "/*" BEGIN SC_YACC_COMMENT; "//".* YY_STEP; /* Prologue. */ - "%{" yy_push_state (SC_PROLOGUE); + "%{" BEGIN SC_PROLOGUE; /* Code in between braces. */ - "{" YY_OBS_GROW; ++braces_level; yy_push_state (SC_BRACED_CODE); + "{" YY_OBS_GROW; braces_level = 0; BEGIN SC_BRACED_CODE; /* A type. */ "<"{tag}">" { @@ -298,34 +305,20 @@ splice (\\[ \f\t\v]*\n)* return TYPE; } - - "%%" { + "%%" { + static int percent_percent_count; if (++percent_percent_count == 2) - yy_push_state (SC_EPILOGUE); + BEGIN SC_EPILOGUE; return PERCENT_PERCENT; } - . { - LOCATION_PRINT (stderr, *yylloc); - fprintf (stderr, _(": invalid character: `%c'\n"), *yytext); + . { + complain_at (*yylloc, _("invalid character: %s"), quote (yytext)); YY_STEP; } } - /*-------------------------------------------------------------------. - | Whatever the start condition (but those which correspond to | - | entities `swallowed' by Bison: SC_YACC_COMMENT, SC_ESCAPED_STRING, | - | and SC_ESCAPED_CHARACTER), no M4 character must escape as is. | - `-------------------------------------------------------------------*/ - - -{ - \[ obstack_sgrow (&string_obstack, "@<:@"); - \] obstack_sgrow (&string_obstack, "@:>@"); -} - - /*---------------------------------------------------------------. | Scanning a Yacc comment. The initial `/ *' is already eaten. | `---------------------------------------------------------------*/ @@ -337,13 +330,8 @@ splice (\\[ \f\t\v]*\n)* BEGIN INITIAL; } - [^*]+|"*" ; - - <> { - LOCATION_PRINT (stderr, *yylloc); - fprintf (stderr, _(": unexpected end of file in a comment\n")); - BEGIN INITIAL; - } + .|\n ; + <> unexpected_end_of_file (yylloc, "*/"); } @@ -353,14 +341,8 @@ splice (\\[ \f\t\v]*\n)* { - "*"{splice}"/" YY_OBS_GROW; yy_pop_state (); - [^*\[\]]+|"*" YY_OBS_GROW; - - <> { - LOCATION_PRINT (stderr, *yylloc); - fprintf (stderr, _(": unexpected end of file in a comment\n")); - yy_pop_state (); - } + "*"{splice}"/" YY_OBS_GROW; BEGIN c_context; + <> unexpected_end_of_file (yylloc, "*/"); } @@ -370,9 +352,9 @@ splice (\\[ \f\t\v]*\n)* { - "\n" YY_OBS_GROW; yy_pop_state (); - ([^\n\[\]]|{splice})+ YY_OBS_GROW; - <> yy_pop_state (); + "\n" YY_OBS_GROW; BEGIN c_context; + {splice} YY_OBS_GROW; + <> BEGIN c_context; } @@ -384,26 +366,16 @@ splice (\\[ \f\t\v]*\n)* { "\"" { - assert (yy_top_state () == INITIAL); YY_OBS_GROW; YY_OBS_FINISH; yylval->string = last_string; - yy_pop_state (); rule_length++; + BEGIN INITIAL; return STRING; } - [^\"\\]+ YY_OBS_GROW; - - <> { - LOCATION_PRINT (stderr, *yylloc); - fprintf (stderr, _(": unexpected end of file in a string\n")); - assert (yy_top_state () == INITIAL); - YY_OBS_FINISH; - yylval->string = last_string; - yy_pop_state (); - return STRING; - } + .|\n YY_OBS_GROW; + <> unexpected_end_of_file (yylloc, "\""); } /*---------------------------------------------------------------. @@ -415,31 +387,19 @@ splice (\\[ \f\t\v]*\n)* { "'" { YY_OBS_GROW; - assert (yy_top_state () == INITIAL); - { - YY_OBS_FINISH; - yylval->symbol = symbol_get (last_string, *yylloc); - symbol_class_set (yylval->symbol, token_sym, *yylloc); - symbol_user_token_number_set (yylval->symbol, - (unsigned char) last_string[1], *yylloc); - YY_OBS_FREE; - yy_pop_state (); - rule_length++; - return ID; - } - } - - [^'\\]+ YY_OBS_GROW; - - <> { - LOCATION_PRINT (stderr, *yylloc); - fprintf (stderr, _(": unexpected end of file in a character\n")); - assert (yy_top_state () == INITIAL); YY_OBS_FINISH; - yylval->string = last_string; - yy_pop_state (); - return CHARACTER; + yylval->symbol = symbol_get (last_string, *yylloc); + symbol_class_set (yylval->symbol, token_sym, *yylloc); + symbol_user_token_number_set (yylval->symbol, + (unsigned char) last_string[1], *yylloc); + YY_OBS_FREE; + rule_length++; + BEGIN INITIAL; + return ID; } + + .|\n YY_OBS_GROW; + <> unexpected_end_of_file (yylloc, "'"); } @@ -453,8 +413,8 @@ splice (\\[ \f\t\v]*\n)* unsigned long c = strtoul (yytext + 1, 0, 8); if (UCHAR_MAX < c) { - LOCATION_PRINT (stderr, *yylloc); - fprintf (stderr, _(": invalid escape: %s\n"), quote (yytext)); + complain_at (*yylloc, _("invalid escape sequence: %s"), + quote (yytext)); YY_STEP; } else @@ -467,8 +427,8 @@ splice (\\[ \f\t\v]*\n)* c = strtoul (yytext + 2, 0, 16); if (UCHAR_MAX < c || errno) { - LOCATION_PRINT (stderr, *yylloc); - fprintf (stderr, _(": invalid escape: %s\n"), quote (yytext)); + complain_at (*yylloc, _("invalid escape sequence: %s"), + quote (yytext)); YY_STEP; } else @@ -482,25 +442,23 @@ splice (\\[ \f\t\v]*\n)* \\r obstack_1grow (&string_obstack, '\r'); \\t obstack_1grow (&string_obstack, '\t'); \\v obstack_1grow (&string_obstack, '\v'); - \\[\"'?\\] obstack_1grow (&string_obstack, yytext[1]); + \\[\"\'?\\] obstack_1grow (&string_obstack, yytext[1]); \\(u|U[0-9a-fA-F]{4})[0-9a-fA-F]{4} { int c = convert_ucn_to_byte (yytext); if (c < 0) { - LOCATION_PRINT (stderr, *yylloc); - fprintf (stderr, _(": invalid escape: %s\n"), quote (yytext)); + complain_at (*yylloc, _("invalid escape sequence: %s"), + quote (yytext)); YY_STEP; } else obstack_1grow (&string_obstack, c); } \\(.|\n) { - LOCATION_PRINT (stderr, *yylloc); - fprintf (stderr, _(": unrecognized escape: %s\n"), quote (yytext)); + complain_at (*yylloc, _("unrecognized escape sequence: %s"), + quote (yytext)); YY_OBS_GROW; } - /* FLex wants this rule, in case of a `\<>'. */ - \\ YY_OBS_GROW; } @@ -511,24 +469,9 @@ splice (\\[ \f\t\v]*\n)* { - "'" { - YY_OBS_GROW; - assert (yy_top_state () != INITIAL); - yy_pop_state (); - } - - [^'\[\]\\]+ YY_OBS_GROW; - \\{splice}[^\[\]] YY_OBS_GROW; - {splice} YY_OBS_GROW; - /* Needed for `\<>', `\\<>[', and `\\<>]'. */ - \\ YY_OBS_GROW; - - <> { - LOCATION_PRINT (stderr, *yylloc); - fprintf (stderr, _(": unexpected end of file in a character\n")); - assert (yy_top_state () != INITIAL); - yy_pop_state (); - } + "'" YY_OBS_GROW; BEGIN c_context; + \\{splice}[\'\\] YY_OBS_GROW; + <> unexpected_end_of_file (yylloc, "'"); } @@ -539,24 +482,9 @@ splice (\\[ \f\t\v]*\n)* { - "\"" { - assert (yy_top_state () != INITIAL); - YY_OBS_GROW; - yy_pop_state (); - } - - [^\"\[\]\\]+ YY_OBS_GROW; - \\{splice}[^\[\]] YY_OBS_GROW; - {splice} YY_OBS_GROW; - /* Needed for `\<>', `\\<>[', and `\\<>]'. */ - \\ YY_OBS_GROW; - - <> { - LOCATION_PRINT (stderr, *yylloc); - fprintf (stderr, _(": unexpected end of file in a string\n")); - assert (yy_top_state () != INITIAL); - yy_pop_state (); - } + "\"" YY_OBS_GROW; BEGIN c_context; + \\{splice}[\"\\] YY_OBS_GROW; + <> unexpected_end_of_file (yylloc, "\""); } @@ -566,18 +494,10 @@ splice (\\[ \f\t\v]*\n)* { - /* Characters. We don't check there is only one. */ - "'" YY_OBS_GROW; yy_push_state (SC_CHARACTER); - - /* Strings. */ - "\"" YY_OBS_GROW; yy_push_state (SC_STRING); - - /* Comments. */ - "/"{splice}"*" YY_OBS_GROW; yy_push_state (SC_COMMENT); - "/"{splice}"/" YY_OBS_GROW; yy_push_state (SC_LINE_COMMENT); - - /* Not comments. */ - "/" YY_OBS_GROW; + "'" YY_OBS_GROW; c_context = YY_START; BEGIN SC_CHARACTER; + "\"" YY_OBS_GROW; c_context = YY_START; BEGIN SC_STRING; + "/"{splice}"*" YY_OBS_GROW; c_context = YY_START; BEGIN SC_COMMENT; + "/"{splice}"/" YY_OBS_GROW; c_context = YY_START; BEGIN SC_LINE_COMMENT; } @@ -588,39 +508,31 @@ splice (\\[ \f\t\v]*\n)* { + "{"|"<"{splice}"%" YY_OBS_GROW; braces_level++; + "%"{splice}">" YY_OBS_GROW; braces_level--; "}" { YY_OBS_GROW; - if (--braces_level == 0) + braces_level--; + if (braces_level < 0) { - yy_pop_state (); YY_OBS_FINISH; yylval->string = last_string; rule_length++; + BEGIN INITIAL; return BRACED_CODE; } } - "{" YY_OBS_GROW; braces_level++; + /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly + (as `<' `<%'). */ + "<"{splice}"<" YY_OBS_GROW; "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code, yytext, *yylloc); } "@"(-?[0-9]+|"$") { handle_at (current_braced_code, yytext, *yylloc); } - [^$@\[\]/'\"\{\}]+ YY_OBS_GROW; - - /* A stray $, or /, or etc. */ - . YY_OBS_GROW; - - <> { - LOCATION_PRINT (stderr, *yylloc); - fprintf (stderr, _(": unexpected end of file in a braced code\n")); - yy_pop_state (); - YY_OBS_FINISH; - yylval->string = last_string; - return BRACED_CODE; - } - + <> unexpected_end_of_file (yylloc, "}"); } @@ -631,23 +543,13 @@ splice (\\[ \f\t\v]*\n)* { "%}" { - yy_pop_state (); YY_OBS_FINISH; yylval->string = last_string; + BEGIN INITIAL; return PROLOGUE; } - [^%\[\]/'\"]+ YY_OBS_GROW; - "%" YY_OBS_GROW; - - <> { - LOCATION_PRINT (stderr, *yylloc); - fprintf (stderr, _(": unexpected end of file in a prologue\n")); - yy_pop_state (); - YY_OBS_FINISH; - yylval->string = last_string; - return PROLOGUE; - } + <> unexpected_end_of_file (yylloc, "%}"); } @@ -658,17 +560,30 @@ splice (\\[ \f\t\v]*\n)* { - [^\[\]]+ YY_OBS_GROW; - <> { - yy_pop_state (); YY_OBS_FINISH; yylval->string = last_string; + BEGIN INITIAL; return EPILOGUE; } } + /*----------------------------------------------------------------. + | By default, grow the string obstack with the input, escaping M4 | + | quoting characters. | + `----------------------------------------------------------------*/ + + +{ + \$ obstack_sgrow (&string_obstack, "$]["); + \@ obstack_sgrow (&string_obstack, "@@"); + \[ obstack_sgrow (&string_obstack, "@{"); + \] obstack_sgrow (&string_obstack, "@}"); + .|\n YY_OBS_GROW; +} + + %% /*------------------------------------------------------------------. @@ -729,7 +644,7 @@ handle_action_dollar (char *text, location_t location) rule_length, n, type_name); } else - complain_at (location, _("invalid value: %s"), text); + complain_at (location, _("integer out of range: %s"), quote (text)); } } @@ -746,7 +661,7 @@ handle_symbol_code_dollar (char *text, location_t location) if (*cp == '$') obstack_sgrow (&string_obstack, "]b4_dollar_dollar["); else - complain_at (location, _("%s is invalid"), quote_n (1, text)); + complain_at (location, _("invalid value: %s"), quote (text)); } @@ -801,7 +716,7 @@ handle_action_at (char *text, location_t location) rule_length, n); } else - complain_at (location, _("invalid value: %s"), text); + complain_at (location, _("integer out of range: %s"), quote (text)); } } @@ -818,7 +733,7 @@ handle_symbol_code_at (char *text, location_t location) if (*cp == '$') obstack_sgrow (&string_obstack, "]b4_at_dollar["); else - complain_at (location, _("%s is invalid"), quote_n (1, text)); + complain_at (location, _("invalid value: %s"), quote (text)); } @@ -870,7 +785,7 @@ convert_ucn_to_byte (char const *ucn) /* A non-ASCII host. Use CODE to index into a table of the C basic execution character set, which is guaranteed to exist on all Standard C platforms. This table also includes '$', '@', - and '`', which not in the basic execution character set but + and '`', which are not in the basic execution character set but which are unibyte characters on all the platforms that we know about. */ static signed char const table[] = @@ -896,11 +811,52 @@ convert_ucn_to_byte (char const *ucn) code = code < sizeof table ? table[code] : -1; } #endif - + return code; } +/*----------------------------------------------------------------. +| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. | +`----------------------------------------------------------------*/ + +static void +handle_syncline (char *args, location_t *location) +{ + int lineno = strtol (args, &args, 10); + const char *file = NULL; + file = strchr (args, '"') + 1; + *strchr (file, '"') = 0; + /* FIXME: Leaking... Can't free, as some locations are still + pointing to the old file name. */ + infile = xstrdup (file); + location->file = infile; + location->last_line = lineno; +} + + +/*-------------------------------------------------------------. +| Report an unexpected end of file at LOC. An end of file was | +| encountered and the expected TOKEN_END was missing. After | +| reporting the problem, pretend that TOKEN_END was found. | +`-------------------------------------------------------------*/ + +static void +unexpected_end_of_file (location_t *loc, char const *token_end) +{ + size_t i = strlen (token_end); + + complain_at (*loc, _("missing `%s' at end of file"), token_end); + + /* Adjust location's last column so that any later message does not + mention the characters just inserted. */ + loc->last_column -= i; + + while (i != 0) + unput (token_end[--i]); +} + + /*-------------------------. | Initialize the scanner. | `-------------------------*/