From: Joel E. Denny Date: Thu, 20 Aug 2009 00:37:28 +0000 (-0400) Subject: Fix complaints about escape sequences. X-Git-Tag: v2.4.1a~29 X-Git-Url: https://git.saurik.com/bison.git/commitdiff_plain/b1a4261e6d2c8b5eecf2f7be8c920f0a83feee42 Fix complaints about escape sequences. Discussed starting at . * src/scan-gram.l (SC_ESCAPED_STRING, SC_ESCAPED_CHARACTER): For a \0 and similar escape sequences meaning the null character, report an invalid escape sequence instead of an invalid null character because the latter does not actually appear in the user's input. In all escape sequence complaints, don't escape the initial backslash, and don't quote when the sequence appears at the end of the complaint line unless there's whitespace that quotearg won't escape. Consistently say "invalid" not "unrecognized". * tests/input.at (Bad escapes in literals): New. (cherry picked from commit c2724603c9d87e816dbdf1a9bfd7d70ffc1bd137) Conflicts: tests/input.at --- diff --git a/ChangeLog b/ChangeLog index 86a3c496..3874c978 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +2009-08-19 Joel E. Denny + + Fix complaints about escape sequences. + Discussed starting at + . + * src/scan-gram.l (SC_ESCAPED_STRING, SC_ESCAPED_CHARACTER): + For a \0 and similar escape sequences meaning the null + character, report an invalid escape sequence instead of an + invalid null character because the latter does not actually + appear in the user's input. + In all escape sequence complaints, don't escape the initial + backslash, and don't quote when the sequence appears at the end + of the complaint line unless there's whitespace that quotearg + won't escape. + Consistently say "invalid" not "unrecognized". + * tests/input.at (Bad escapes in literals): New. + 2009-08-19 Akim Demaille doc: %initial-action to initialize yylloc. diff --git a/src/scan-gram.l b/src/scan-gram.l index 110c339d..683d5822 100644 --- a/src/scan-gram.l +++ b/src/scan-gram.l @@ -37,6 +37,7 @@ #include "reader.h" #include "uniqstr.h" +#include #include #include @@ -399,10 +400,9 @@ splice (\\[ \f\t\v]*\n)* { \\[0-7]{1,3} { unsigned long int c = strtoul (yytext + 1, NULL, 8); - if (UCHAR_MAX < c) - complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext)); - else if (! c) - complain_at (*loc, _("invalid null character: %s"), quote (yytext)); + if (!c || UCHAR_MAX < c) + complain_at (*loc, _("invalid number after \\-escape: %s"), + yytext+1); else obstack_1grow (&obstack_for_string, c); } @@ -410,10 +410,9 @@ splice (\\[ \f\t\v]*\n)* \\x[0-9abcdefABCDEF]+ { verify (UCHAR_MAX < ULONG_MAX); unsigned long int c = strtoul (yytext + 2, NULL, 16); - if (UCHAR_MAX < c) - complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext)); - else if (! c) - complain_at (*loc, _("invalid null character: %s"), quote (yytext)); + if (!c || UCHAR_MAX < c) + complain_at (*loc, _("invalid number after \\-escape: %s"), + yytext+1); else obstack_1grow (&obstack_for_string, c); } @@ -431,16 +430,23 @@ splice (\\[ \f\t\v]*\n)* \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} { int c = convert_ucn_to_byte (yytext); - if (c < 0) - complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext)); - else if (! c) - complain_at (*loc, _("invalid null character: %s"), quote (yytext)); + if (c <= 0) + complain_at (*loc, _("invalid number after \\-escape: %s"), + yytext+1); else obstack_1grow (&obstack_for_string, c); } \\(.|\n) { - complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext)); - STRING_GROW; + char const *p = yytext + 1; + char quoted_ws[] = "` '"; + if (isspace (*p) && isprint (*p)) + { + quoted_ws[1] = *p; + p = quoted_ws; + } + else + p = quotearg_style_mem (escape_quoting_style, p, 1); + complain_at (*loc, _("invalid character after \\-escape: %s"), p); } } diff --git a/tests/input.at b/tests/input.at index 00c4b2be..1a63d7e8 100644 --- a/tests/input.at +++ b/tests/input.at @@ -940,3 +940,36 @@ AT_CHECK_NAMESPACE_ERROR([[::]], [[namespace reference has a trailing "::"]]) AT_CLEANUP + +## ------------------------- ## +## Bad escapes in literals. ## +## ------------------------- ## + +AT_SETUP([[Bad escapes in literals]]) + +AT_DATA([input.y], +[[%% +start: '\777' '\0' '\xfff' '\x0' + '\uffff' '\u0000' '\Uffffffff' '\U00000000' + '\ ' '\A'; +]]) +echo 'start: "\T\F\0\1" ;' | tr 'TF01' '\011\014\0\1' >> input.y + +AT_BISON_CHECK([input.y], [1], [], +[[input.y:2.9-12: invalid number after \-escape: 777 +input.y:2.16-17: invalid number after \-escape: 0 +input.y:2.21-25: invalid number after \-escape: xfff +input.y:2.29-31: invalid number after \-escape: x0 +input.y:3.9-14: invalid number after \-escape: uffff +input.y:3.18-23: invalid number after \-escape: u0000 +input.y:3.27-36: invalid number after \-escape: Uffffffff +input.y:3.40-49: invalid number after \-escape: U00000000 +input.y:4.9-10: invalid character after \-escape: ` ' +input.y:4.14-15: invalid character after \-escape: A +input.y:5.9-16: invalid character after \-escape: \t +input.y:5.17: invalid character after \-escape: \f +input.y:5.18: invalid character after \-escape: \0 +input.y:5.19: invalid character after \-escape: \001 +]]) + +AT_CLEANUP