From: Paul Eggert Date: Thu, 21 Jul 2005 20:53:03 +0000 (+0000) Subject: * data/glr.c (yytnamerr): New function. X-Git-Tag: BISON-2_1~64 X-Git-Url: https://git.saurik.com/bison.git/commitdiff_plain/9e0876fb0cd09964f07046749a320855e78560ef?ds=sidebyside;hp=49b1cf794d71aa23b7fab120e9817062ec29b177 * data/glr.c (yytnamerr): New function. (yyreportSyntaxError): Use it to dequote most string literals. * data/lalr1.c (yytname_): Renamed from yyname_, for compatibility with other skeletons. All uses changed. (yytnameerr_): New function. (yyreport_syntax_error): Use it to dequote most string literals. * data/yacc.c (yytnamerr): New function. (yyerrlab): Use it to decode most string literals. * doc/bison.texinfo (Decl Summary, Calling Convention): Clarify quoting convention of yytname. * src/output.c (prepare_symbols): Quote all names. This undoes the 2005-04-17 change, which is now accomplished (mostly) via changes in the parsers as described above. * tests/regression.at (Token definitions, Web2c Actions): Undo most 2005-04-17 change here, too. --- diff --git a/data/glr.c b/data/glr.c index 219dd3f5..d004cec9 100644 --- a/data/glr.c +++ b/data/glr.c @@ -332,7 +332,7 @@ static const ]b4_int_type_for([b4_rline])[ yyrline[] = #endif #if (YYDEBUG) || YYERROR_VERBOSE -/* YYTNME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. +/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. First, the terminals, then, starting at YYNTOKENS, nonterminals. */ static const char *const yytname[] = { @@ -600,6 +600,54 @@ yystpcpy (char *yydest, const char *yysrc) # endif # endif +# ifndef yytnamerr +/* Copy to YYRES the contents of YYSTR after stripping away unnecessary + quotes and backslashes, so that it's suitable for yyerror. The + heuristic is that double-quoting is unnecessary unless the string + contains an apostrophe, a comma, or backslash (other than + backslash-backslash). YYSTR is taken from yytname. If YYRES is + null, do not copy; instead, return the length of what the result + would have been. */ +static size_t +yytnamerr (char *yyres, const char *yystr) +{ + if (*yystr == '"') + { + size_t yyn = 0; + char const *yyp = yystr; + + for (;;) + switch (*++yyp) + { + case '\'': + case ',': + goto do_not_strip_quotes; + + case '\\': + if (*++yyp != '\\') + goto do_not_strip_quotes; + /* Fall through. */ + default: + if (yyres) + yyres[yyn] = *yyp; + yyn++; + break; + + case '"': + if (yyres) + yyres[yyn] = '\0'; + return yyn; + } + do_not_strip_quotes: ; + } + + if (! yyres) + return strlen (yystr); + + return yystpcpy (yyres, yystr) - yyres; +} +# endif + #endif /* !YYERROR_VERBOSE */ /** State numbers, as in LALR(1) machine */ @@ -1736,7 +1784,7 @@ yyreportSyntaxError (yyGLRStack* yystack, yyn = yypact[yystack->yytops.yystates[0]->yylrState]; if (YYPACT_NINF < yyn && yyn < YYLAST) { - size_t yysize0 = strlen (yytokenName (*yytokenp)); + size_t yysize0 = yytnamerr (NULL, yytokenName (*yytokenp)); size_t yysize = yysize0; size_t yysize1; yybool yysize_overflow = yyfalse; @@ -1778,7 +1826,7 @@ yyreportSyntaxError (yyGLRStack* yystack, break; } yyarg[yycount++] = yytokenName (yyx); - yysize1 = yysize + strlen (yytokenName (yyx)); + yysize1 = yysize + yytnamerr (NULL, yytokenName (yyx)); yysize_overflow |= yysize1 < yysize; yysize = yysize1; yyfmt = yystpcpy (yyfmt, yyprefix); @@ -1801,7 +1849,7 @@ yyreportSyntaxError (yyGLRStack* yystack, { if (*yyp == '%' && yyf[1] == 's' && yyi < yycount) { - yyp = yystpcpy (yyp, yyarg[yyi++]); + yyp += yytnamerr (yyp, yyarg[yyi++]); yyf += 2; } else diff --git a/data/lalr1.cc b/data/lalr1.cc index 26fb920c..e492173a 100644 --- a/data/lalr1.cc +++ b/data/lalr1.cc @@ -323,7 +323,12 @@ namespace yy #if YYDEBUG || YYERROR_VERBOSE /// For a symbol, its name in clear. - static const char* const yyname_[]; + static const char* const yytname_[]; +#endif + +#if YYERROR_VERBOSE + /// Convert the symbol name \a n to a form suitable for a diagnostic. + virtual std::string yytnamerr_ (const char *n); #endif #if YYDEBUG @@ -471,6 +476,47 @@ do { \ #define YYABORT goto yyabortlab #define YYERROR goto yyerrorlab +#if YYERROR_VERBOSE + +/* Return YYSTR after stripping away unnecessary quotes and + backslashes, so that it's suitable for yyerror. The heuristic is + that double-quoting is unnecessary unless the string contains an + apostrophe, a comma, or backslash (other than backslash-backslash). + YYSTR is taken from yytname. */ +std::string +yy::]b4_parser_class_name[::yytnamerr_ (const char *yystr) +{ + if (*yystr == '"') + { + std::string yyr = ""; + char const *yyp = yystr; + + for (;;) + switch (*++yyp) + { + case '\'': + case ',': + goto do_not_strip_quotes; + + case '\\': + if (*++yyp != '\\') + goto do_not_strip_quotes; + /* Fall through. */ + default: + yyr += *yyp; + break; + + case '"': + return yyr; + } + do_not_strip_quotes: ; + } + + return yystr; +} + +#endif + #if YYDEBUG /*--------------------------------. | Print this symbol on YYOUTPUT. | @@ -488,7 +534,7 @@ yy::]b4_parser_class_name[::yysymprint_ (int yytype, (void) cdebug_; *yycdebug_ << (yytype < yyntokens_ ? "token" : "nterm") - << ' ' << yyname_[yytype] << " (" + << ' ' << yytname_[yytype] << " (" << *yylocationp << ": "; switch (yytype) { @@ -880,10 +926,10 @@ yy::]b4_parser_class_name[::yyreport_syntax_error_ () // "syntax error, unexpected %s or %s or %s" // Then, invoke YY_ on this string. // Finally, use the string as a format to output - // yyname_[yyilooka_], etc. + // yytname_[yyilooka_], etc. // Until this gets fixed, this message appears in English only. message = "syntax error, unexpected "; - message += yyname_[yyilooka_]; + message += yytnamerr_ (yytname_[yyilooka_]); if (count < 5) { count = 0; @@ -891,7 +937,7 @@ yy::]b4_parser_class_name[::yyreport_syntax_error_ () if (yycheck_[x + yyn_] == x && x != yyterror_) { message += (!count++) ? ", expecting " : " or "; - message += yyname_[x]; + message += yytnamerr_ (yytname_[x]); } } } @@ -988,7 +1034,7 @@ yy::]b4_parser_class_name[::yyr2_[] = /* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. First, the terminals, then, starting at \a yyntokens_, nonterminals. */ const char* -const yy::]b4_parser_class_name[::yyname_[] = +const yy::]b4_parser_class_name[::yytname_[] = { ]b4_tname[ }; @@ -1038,8 +1084,8 @@ yy::]b4_parser_class_name[::yyreduce_print_ (int yyrule) << " (line " << yylno << "), "; for (]b4_int_type_for([b4_prhs])[ i = yyprhs_[yyn_]; 0 <= yyrhs_[i]; ++i) - *yycdebug_ << yyname_[yyrhs_[i]] << ' '; - *yycdebug_ << "-> " << yyname_[yyr1_[yyn_]] << std::endl; + *yycdebug_ << yytname_[yyrhs_[i]] << ' '; + *yycdebug_ << "-> " << yytname_[yyr1_[yyn_]] << std::endl; } #endif // YYDEBUG diff --git a/data/yacc.c b/data/yacc.c index c046e5e2..3dc30898 100644 --- a/data/yacc.c +++ b/data/yacc.c @@ -416,7 +416,7 @@ static const ]b4_int_type_for([b4_rline])[ yyrline[] = #endif #if YYDEBUG || YYERROR_VERBOSE -/* YYTNME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. +/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. First, the terminals, then, starting at YYNTOKENS, nonterminals. */ static const char *const yytname[] = { @@ -645,8 +645,8 @@ do { \ yyrule - 1, yylno); /* Print the symbols being reduced, and their result. */ for (yyi = yyprhs[yyrule]; 0 <= yyrhs[yyi]; yyi++) - YYFPRINTF (stderr, "%s ", yytname [yyrhs[yyi]]); - YYFPRINTF (stderr, "-> %s\n", yytname [yyr1[yyrule]]); + YYFPRINTF (stderr, "%s ", yytname[yyrhs[yyi]]); + YYFPRINTF (stderr, "-> %s\n", yytname[yyr1[yyrule]]); } # define YY_REDUCE_PRINT(Rule) \ @@ -735,7 +735,55 @@ yystpcpy (yydest, yysrc) # endif # endif -#endif /* !YYERROR_VERBOSE */ +# ifndef yytnamerr +/* Copy to YYRES the contents of YYSTR after stripping away unnecessary + quotes and backslashes, so that it's suitable for yyerror. The + heuristic is that double-quoting is unnecessary unless the string + contains an apostrophe, a comma, or backslash (other than + backslash-backslash). YYSTR is taken from yytname. If YYRES is + null, do not copy; instead, return the length of what the result + would have been. */ +static YYSIZE_T +yytnamerr (char *yyres, const char *yystr) +{ + if (*yystr == '"') + { + size_t yyn = 0; + char const *yyp = yystr; + + for (;;) + switch (*++yyp) + { + case '\'': + case ',': + goto do_not_strip_quotes; + + case '\\': + if (*++yyp != '\\') + goto do_not_strip_quotes; + /* Fall through. */ + default: + if (yyres) + yyres[yyn] = *yyp; + yyn++; + break; + + case '"': + if (yyres) + yyres[yyn] = '\0'; + return yyn; + } + do_not_strip_quotes: ; + } + + if (! yyres) + return yystrlen (yystr); + + return yystpcpy (yyres, yystr) - yyres; +} +# endif + +#endif /* YYERROR_VERBOSE */ @@ -1111,7 +1159,7 @@ yyerrlab: if (YYPACT_NINF < yyn && yyn < YYLAST) { int yytype = YYTRANSLATE (yychar); - YYSIZE_T yysize0 = yystrlen (yytname[yytype]); + YYSIZE_T yysize0 = yytnamerr (0, yytname[yytype]); YYSIZE_T yysize = yysize0; YYSIZE_T yysize1; int yysize_overflow = 0; @@ -1163,7 +1211,7 @@ yyerrlab: break; } yyarg[yycount++] = yytname[yyx]; - yysize1 = yysize + yystrlen (yytname[yyx]); + yysize1 = yysize + yytnamerr (0, yytname[yyx]); yysize_overflow |= yysize1 < yysize; yysize = yysize1; yyfmt = yystpcpy (yyfmt, yyprefix); @@ -1188,7 +1236,7 @@ yyerrlab: { if (*yyp == '%' && yyf[1] == 's' && yyi < yycount) { - yyp = yystpcpy (yyp, yyarg[yyi++]); + yyp += yytnamerr (yyp, yyarg[yyi++]); yyf += 2; } else diff --git a/doc/bison.texinfo b/doc/bison.texinfo index 6198c280..c25813fb 100644 --- a/doc/bison.texinfo +++ b/doc/bison.texinfo @@ -4164,15 +4164,14 @@ three elements of @code{yytname} correspond to the predefined tokens @code{"error"}, and @code{"$undefined"}; after these come the symbols defined in the grammar file. -For single-character literal tokens and literal string tokens, the name -in the table includes the single-quote or double-quote characters: for -example, @code{"'+'"} is a single-character literal and @code{"\"<=\""} -is a literal string token. All the characters of the literal string -token appear verbatim in the string found in the table; even -double-quote characters are not escaped. For example, if the token -consists of three characters @samp{*"*}, its string in @code{yytname} -contains @samp{"*"*"}. (In C, that would be written as -@code{"\"*\"*\""}). +The name in the table includes all the characters needed to represent +the token in Bison. For single-character literals and literal +strings, this includes the surrounding quoting characters and any +escape sequences. For example, the Bison single-character literal +@code{'+'} corresponds to a three-character name, represented in C as +@code{"'+'"}; and the Bison two-character literal string @code{"\\/"} +corresponds to a five-character name, represented in C as +@code{"\"\\\\/\""}. When you specify @code{%token-table}, Bison also generates macro definitions for macros @code{YYNTOKENS}, @code{YYNNTS}, and @@ -4413,11 +4412,13 @@ the grammar file has no effect on @code{yylex}. table. The index of the token in the table is the token type's code. The name of a multicharacter token is recorded in @code{yytname} with a double-quote, the token's characters, and another double-quote. The -token's characters are not escaped in any way; they appear verbatim in -the contents of the string in the table. +token's characters are escaped as necessary to be suitable as input +to Bison. -Here's code for looking up a token in @code{yytname}, assuming that the -characters of the token are stored in @code{token_buffer}. +Here's code for looking up a multicharacter token in @code{yytname}, +assuming that the characters of the token are stored in +@code{token_buffer}, and assuming that the token does not contain any +characters like @samp{"} that require escaping. @smallexample for (i = 0; i < YYNTOKENS; i++) diff --git a/src/output.c b/src/output.c index 2a45c39a..51541822 100644 --- a/src/output.c +++ b/src/output.c @@ -60,7 +60,7 @@ bool error_verbose = false; #define GENERATE_MUSCLE_INSERT_TABLE(Name, Type) \ \ static void \ -Name (const char *name, \ +Name (char const *name, \ Type *table_data, \ Type first, \ int begin, \ @@ -162,10 +162,7 @@ prepare_symbols (void) int j = 2; for (i = 0; i < nsyms; i++) { - char const *tag = symbols[i]->tag; - char const *cp = (*tag == '"' - ? tag - : quotearg_style (c_quoting_style, tag)); + char const *cp = quotearg_style (c_quoting_style, symbols[i]->tag); /* Width of the next token, including the two quotes, the comma and the space. */ int width = strlen (cp) + 2; diff --git a/tests/regression.at b/tests/regression.at index fe7c31b4..5ee3da93 100644 --- a/tests/regression.at +++ b/tests/regression.at @@ -324,9 +324,9 @@ int yylex (void); %token B_TOKEN "b" %token C_TOKEN 'c' %token 'd' D_TOKEN -%token SPECIAL "\\\'\?\"\n\t??!" +%token SPECIAL "\\\'\?\"\a\b\f\n\r\t\v\001\377\x001\x0000ff??!" %% -exp: "a" "\\\'\?\"\n\t??!"; +exp: "a" "\\\'\?\"\a\b\f\n\r\t\v\001\377\x001\x0000ff??!"; %% void yyerror (char const *s) @@ -350,8 +350,7 @@ main (void) AT_CHECK([bison -o input.c input.y]) AT_COMPILE([input]) AT_PARSER_CHECK([./input], 1, [], -[syntax error, unexpected \'?" - ??!, expecting a +[syntax error, unexpected "\\'?\"\a\b\f\n\r\t\v\001\377\001\377?\?!", expecting a ]) AT_CLEANUP @@ -635,8 +634,8 @@ static const unsigned char yyrline[] = }; static const char *const yytname[] = { - "$end", "error", "$undefined", "if", "const", "then", "else", "$accept", - "statement", "struct_stat", "if", "else", 0 + "$end", "error", "$undefined", "\"if\"", "\"const\"", "\"then\"", + "\"else\"", "$accept", "statement", "struct_stat", "if", "else", 0 }; static const unsigned short int yytoknum[] = {