X-Git-Url: https://git.saurik.com/bison.git/blobdiff_plain/1a9e39f116a7db196e75459aca8eb671fbabb4c7..ccdb39e0a6f95cb4a3f19c08885a7c421cdebedb:/src/scan-gram.l diff --git a/src/scan-gram.l b/src/scan-gram.l index 3e8ae487..8f18c355 100644 --- a/src/scan-gram.l +++ b/src/scan-gram.l @@ -19,7 +19,7 @@ 02111-1307 USA */ -%option debug nodefault noyywrap nounput never-interactive stack +%option debug nodefault noyywrap never-interactive %option prefix="gram_" outfile="lex.yy.c" %{ @@ -140,7 +140,6 @@ extend_location (location_t *loc, char const *token, int size) used, and which is used by YY_OBS_FREE to free the last string. */ static struct obstack string_obstack; -char *last_string; #define YY_OBS_GROW \ obstack_grow (&string_obstack, yytext, yyleng) @@ -151,19 +150,9 @@ char *last_string; last_string = obstack_finish (&string_obstack); \ } while (0) -#define YY_OBS_FREE \ - do { \ - obstack_free (&string_obstack, last_string); \ - } while (0) +#define YY_OBS_FREE \ + obstack_free (&string_obstack, last_string) -void -scanner_last_string_free (void) -{ - YY_OBS_FREE; -} - - -static int percent_percent_count = 0; /* Within well-formed rules, RULE_LENGTH is the number of values in the current rule so far, which says where to find `$0' with respect @@ -177,7 +166,9 @@ static void handle_dollar (braced_code_t code_kind, char *cp, location_t location); static void handle_at (braced_code_t code_kind, char *cp, location_t location); +static void handle_syncline (char *args, location_t *location); static int convert_ucn_to_byte (char const *hex_text); +static void unexpected_end_of_file (location_t *, char const *); %} %x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT @@ -201,8 +192,15 @@ splice (\\[ \f\t\v]*\n)* %% %{ + /* Nesting level of the current code in braces. */ int braces_level IF_LINT (= 0); + /* Scanner context when scanning C code. */ + int c_context IF_LINT (= 0); + + /* A string representing the most recently saved token. */ + char *last_string; + /* At each yylex invocation, mark the current position as the start of the next token. */ YY_STEP; @@ -254,13 +252,15 @@ splice (\\[ \f\t\v]*\n)* YY_STEP; } + ^"#line "{int}" \""[^\"]*"\"\n" handle_syncline (yytext + strlen ("#line "), yylloc); YY_STEP; + "=" return EQUAL; ":" rule_length = 0; return COLON; "|" rule_length = 0; return PIPE; "," return COMMA; ";" return SEMICOLON; - [ \f\n\t\v]+ YY_STEP; + [ \f\n\t\v] YY_STEP; {id} { yylval->symbol = symbol_get (yytext, *yylloc); @@ -274,7 +274,7 @@ splice (\\[ \f\t\v]*\n)* num = strtoul (yytext, 0, 10); if (INT_MAX < num || errno) { - complain_at (*yylloc, _("invalid value: %s"), quote (yytext)); + complain_at (*yylloc, _("integer out of range: %s"), quote (yytext)); num = INT_MAX; } yylval->integer = num; @@ -282,20 +282,20 @@ splice (\\[ \f\t\v]*\n)* } /* Characters. We don't check there is only one. */ - "'" YY_OBS_GROW; yy_push_state (SC_ESCAPED_CHARACTER); + "'" YY_OBS_GROW; BEGIN SC_ESCAPED_CHARACTER; /* Strings. */ - "\"" YY_OBS_GROW; yy_push_state (SC_ESCAPED_STRING); + "\"" YY_OBS_GROW; BEGIN SC_ESCAPED_STRING; /* Comments. */ "/*" BEGIN SC_YACC_COMMENT; "//".* YY_STEP; /* Prologue. */ - "%{" yy_push_state (SC_PROLOGUE); + "%{" BEGIN SC_PROLOGUE; /* Code in between braces. */ - "{" YY_OBS_GROW; braces_level = 0; yy_push_state (SC_BRACED_CODE); + "{" YY_OBS_GROW; braces_level = 0; BEGIN SC_BRACED_CODE; /* A type. */ "<"{tag}">" { @@ -305,33 +305,20 @@ splice (\\[ \f\t\v]*\n)* return TYPE; } - - "%%" { + "%%" { + static int percent_percent_count; if (++percent_percent_count == 2) - yy_push_state (SC_EPILOGUE); + BEGIN SC_EPILOGUE; return PERCENT_PERCENT; } - . { + . { complain_at (*yylloc, _("invalid character: %s"), quote (yytext)); YY_STEP; } } - /*-------------------------------------------------------------------. - | Whatever the start condition (but those which correspond to | - | entities `swallowed' by Bison: SC_YACC_COMMENT, SC_ESCAPED_STRING, | - | and SC_ESCAPED_CHARACTER), no M4 character must escape as is. | - `-------------------------------------------------------------------*/ - - -{ - \[ obstack_sgrow (&string_obstack, "@<:@"); - \] obstack_sgrow (&string_obstack, "@:>@"); -} - - /*---------------------------------------------------------------. | Scanning a Yacc comment. The initial `/ *' is already eaten. | `---------------------------------------------------------------*/ @@ -343,12 +330,8 @@ splice (\\[ \f\t\v]*\n)* BEGIN INITIAL; } - [^*]+|"*" ; - - <> { - complain_at (*yylloc, _("unexpected end of file in a comment")); - BEGIN INITIAL; - } + .|\n ; + <> unexpected_end_of_file (yylloc, "*/"); } @@ -358,13 +341,8 @@ splice (\\[ \f\t\v]*\n)* { - "*"{splice}"/" YY_OBS_GROW; yy_pop_state (); - [^*\[\]]+|"*" YY_OBS_GROW; - - <> { - complain_at (*yylloc, _("unexpected end of file in a comment")); - yy_pop_state (); - } + "*"{splice}"/" YY_OBS_GROW; BEGIN c_context; + <> unexpected_end_of_file (yylloc, "*/"); } @@ -374,9 +352,9 @@ splice (\\[ \f\t\v]*\n)* { - "\n" YY_OBS_GROW; yy_pop_state (); - ([^\n\[\]]|{splice})+ YY_OBS_GROW; - <> yy_pop_state (); + "\n" YY_OBS_GROW; BEGIN c_context; + {splice} YY_OBS_GROW; + <> BEGIN c_context; } @@ -388,25 +366,16 @@ splice (\\[ \f\t\v]*\n)* { "\"" { - assert (yy_top_state () == INITIAL); YY_OBS_GROW; YY_OBS_FINISH; yylval->string = last_string; - yy_pop_state (); rule_length++; + BEGIN INITIAL; return STRING; } - [^\"\\]+ YY_OBS_GROW; - - <> { - complain_at (*yylloc, _("unexpected end of file in a string")); - assert (yy_top_state () == INITIAL); - YY_OBS_FINISH; - yylval->string = last_string; - yy_pop_state (); - return STRING; - } + .|\n YY_OBS_GROW; + <> unexpected_end_of_file (yylloc, "\""); } /*---------------------------------------------------------------. @@ -418,30 +387,19 @@ splice (\\[ \f\t\v]*\n)* { "'" { YY_OBS_GROW; - assert (yy_top_state () == INITIAL); - { - YY_OBS_FINISH; - yylval->symbol = symbol_get (last_string, *yylloc); - symbol_class_set (yylval->symbol, token_sym, *yylloc); - symbol_user_token_number_set (yylval->symbol, - (unsigned char) last_string[1], *yylloc); - YY_OBS_FREE; - yy_pop_state (); - rule_length++; - return ID; - } - } - - [^\'\\]+ YY_OBS_GROW; - - <> { - complain_at (*yylloc, _("unexpected end of file in a character")); - assert (yy_top_state () == INITIAL); YY_OBS_FINISH; - yylval->string = last_string; - yy_pop_state (); - return CHARACTER; + yylval->symbol = symbol_get (last_string, *yylloc); + symbol_class_set (yylval->symbol, token_sym, *yylloc); + symbol_user_token_number_set (yylval->symbol, + (unsigned char) last_string[1], *yylloc); + YY_OBS_FREE; + rule_length++; + BEGIN INITIAL; + return ID; } + + .|\n YY_OBS_GROW; + <> unexpected_end_of_file (yylloc, "'"); } @@ -455,7 +413,8 @@ splice (\\[ \f\t\v]*\n)* unsigned long c = strtoul (yytext + 1, 0, 8); if (UCHAR_MAX < c) { - complain_at (*yylloc, _("invalid escape: %s"), quote (yytext)); + complain_at (*yylloc, _("invalid escape sequence: %s"), + quote (yytext)); YY_STEP; } else @@ -468,7 +427,8 @@ splice (\\[ \f\t\v]*\n)* c = strtoul (yytext + 2, 0, 16); if (UCHAR_MAX < c || errno) { - complain_at (*yylloc, _("invalid escape: %s"), quote (yytext)); + complain_at (*yylloc, _("invalid escape sequence: %s"), + quote (yytext)); YY_STEP; } else @@ -487,18 +447,18 @@ splice (\\[ \f\t\v]*\n)* int c = convert_ucn_to_byte (yytext); if (c < 0) { - complain_at (*yylloc, _("invalid escape: %s"), quote (yytext)); + complain_at (*yylloc, _("invalid escape sequence: %s"), + quote (yytext)); YY_STEP; } else obstack_1grow (&string_obstack, c); } \\(.|\n) { - complain_at (*yylloc, _("unrecognized escape: %s"), quote (yytext)); + complain_at (*yylloc, _("unrecognized escape sequence: %s"), + quote (yytext)); YY_OBS_GROW; } - /* FLex wants this rule, in case of a `\<>'. */ - \\ YY_OBS_GROW; } @@ -509,23 +469,9 @@ splice (\\[ \f\t\v]*\n)* { - "'" { - YY_OBS_GROW; - assert (yy_top_state () != INITIAL); - yy_pop_state (); - } - - [^'\[\]\\]+ YY_OBS_GROW; - \\{splice}[^\[\]] YY_OBS_GROW; - {splice} YY_OBS_GROW; - /* Needed for `\<>', `\\<>[', and `\\<>]'. */ - \\ YY_OBS_GROW; - - <> { - complain_at (*yylloc, _("unexpected end of file in a character")); - assert (yy_top_state () != INITIAL); - yy_pop_state (); - } + "'" YY_OBS_GROW; BEGIN c_context; + \\{splice}[\'\\] YY_OBS_GROW; + <> unexpected_end_of_file (yylloc, "'"); } @@ -536,23 +482,9 @@ splice (\\[ \f\t\v]*\n)* { - "\"" { - assert (yy_top_state () != INITIAL); - YY_OBS_GROW; - yy_pop_state (); - } - - [^\"\[\]\\]+ YY_OBS_GROW; - \\{splice}[^\[\]] YY_OBS_GROW; - {splice} YY_OBS_GROW; - /* Needed for `\<>', `\\<>[', and `\\<>]'. */ - \\ YY_OBS_GROW; - - <> { - complain_at (*yylloc, _("unexpected end of file in a string")); - assert (yy_top_state () != INITIAL); - yy_pop_state (); - } + "\"" YY_OBS_GROW; BEGIN c_context; + \\{splice}[\"\\] YY_OBS_GROW; + <> unexpected_end_of_file (yylloc, "\""); } @@ -562,18 +494,10 @@ splice (\\[ \f\t\v]*\n)* { - /* Characters. We don't check there is only one. */ - "'" YY_OBS_GROW; yy_push_state (SC_CHARACTER); - - /* Strings. */ - "\"" YY_OBS_GROW; yy_push_state (SC_STRING); - - /* Comments. */ - "/"{splice}"*" YY_OBS_GROW; yy_push_state (SC_COMMENT); - "/"{splice}"/" YY_OBS_GROW; yy_push_state (SC_LINE_COMMENT); - - /* Not comments. */ - "/" YY_OBS_GROW; + "'" YY_OBS_GROW; c_context = YY_START; BEGIN SC_CHARACTER; + "\"" YY_OBS_GROW; c_context = YY_START; BEGIN SC_STRING; + "/"{splice}"*" YY_OBS_GROW; c_context = YY_START; BEGIN SC_COMMENT; + "/"{splice}"/" YY_OBS_GROW; c_context = YY_START; BEGIN SC_LINE_COMMENT; } @@ -591,31 +515,24 @@ splice (\\[ \f\t\v]*\n)* braces_level--; if (braces_level < 0) { - yy_pop_state (); YY_OBS_FINISH; yylval->string = last_string; rule_length++; + BEGIN INITIAL; return BRACED_CODE; } } + /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly + (as `<' `<%'). */ + "<"{splice}"<" YY_OBS_GROW; + "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code, yytext, *yylloc); } "@"(-?[0-9]+|"$") { handle_at (current_braced_code, yytext, *yylloc); } - /* `"<"{splice}"<"' tokenizes `<<%' correctly (as `<<' `%') rather - than incorrrectly (as `<' `<%'). */ - [^\"$%\'/<@\[\]\{\}]+|[$%/<@]|"<"{splice}"<" YY_OBS_GROW; - - <> { - complain_at (*yylloc, _("unexpected end of file in a braced code")); - yy_pop_state (); - YY_OBS_FINISH; - yylval->string = last_string; - return BRACED_CODE; - } - + <> unexpected_end_of_file (yylloc, "}"); } @@ -626,22 +543,13 @@ splice (\\[ \f\t\v]*\n)* { "%}" { - yy_pop_state (); YY_OBS_FINISH; yylval->string = last_string; + BEGIN INITIAL; return PROLOGUE; } - [^%\[\]/\'\"]+ YY_OBS_GROW; - "%" YY_OBS_GROW; - - <> { - complain_at (*yylloc, _("unexpected end of file in a prologue")); - yy_pop_state (); - YY_OBS_FINISH; - yylval->string = last_string; - return PROLOGUE; - } + <> unexpected_end_of_file (yylloc, "%}"); } @@ -652,17 +560,30 @@ splice (\\[ \f\t\v]*\n)* { - [^\[\]]+ YY_OBS_GROW; - <> { - yy_pop_state (); YY_OBS_FINISH; yylval->string = last_string; + BEGIN INITIAL; return EPILOGUE; } } + /*----------------------------------------------------------------. + | By default, grow the string obstack with the input, escaping M4 | + | quoting characters. | + `----------------------------------------------------------------*/ + + +{ + \$ obstack_sgrow (&string_obstack, "$]["); + \@ obstack_sgrow (&string_obstack, "@@"); + \[ obstack_sgrow (&string_obstack, "@{"); + \] obstack_sgrow (&string_obstack, "@}"); + .|\n YY_OBS_GROW; +} + + %% /*------------------------------------------------------------------. @@ -723,7 +644,7 @@ handle_action_dollar (char *text, location_t location) rule_length, n, type_name); } else - complain_at (location, _("invalid value: %s"), quote (text)); + complain_at (location, _("integer out of range: %s"), quote (text)); } } @@ -795,7 +716,7 @@ handle_action_at (char *text, location_t location) rule_length, n); } else - complain_at (location, _("invalid value: %s"), quote (text)); + complain_at (location, _("integer out of range: %s"), quote (text)); } } @@ -864,7 +785,7 @@ convert_ucn_to_byte (char const *ucn) /* A non-ASCII host. Use CODE to index into a table of the C basic execution character set, which is guaranteed to exist on all Standard C platforms. This table also includes '$', '@', - and '`', which not in the basic execution character set but + and '`', which are not in the basic execution character set but which are unibyte characters on all the platforms that we know about. */ static signed char const table[] = @@ -895,6 +816,47 @@ convert_ucn_to_byte (char const *ucn) } +/*----------------------------------------------------------------. +| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. | +`----------------------------------------------------------------*/ + +static void +handle_syncline (char *args, location_t *location) +{ + int lineno = strtol (args, &args, 10); + const char *file = NULL; + file = strchr (args, '"') + 1; + *strchr (file, '"') = 0; + /* FIXME: Leaking... Can't free, as some locations are still + pointing to the old file name. */ + infile = xstrdup (file); + location->file = infile; + location->last_line = lineno; +} + + +/*-------------------------------------------------------------. +| Report an unexpected end of file at LOC. An end of file was | +| encountered and the expected TOKEN_END was missing. After | +| reporting the problem, pretend that TOKEN_END was found. | +`-------------------------------------------------------------*/ + +static void +unexpected_end_of_file (location_t *loc, char const *token_end) +{ + size_t i = strlen (token_end); + + complain_at (*loc, _("missing `%s' at end of file"), token_end); + + /* Adjust location's last column so that any later message does not + mention the characters just inserted. */ + loc->last_column -= i; + + while (i != 0) + unput (token_end[--i]); +} + + /*-------------------------. | Initialize the scanner. | `-------------------------*/