+2009-08-19 Joel E. Denny <jdenny@clemson.edu>
+
+ Fix complaints about escape sequences.
+ Discussed starting at
+ <http://lists.gnu.org/archive/html/bison-patches/2009-08/msg00036.html>.
+ * src/scan-gram.l (SC_ESCAPED_STRING, SC_ESCAPED_CHARACTER):
+ For a \0 and similar escape sequences meaning the null
+ character, report an invalid escape sequence instead of an
+ invalid null character because the latter does not actually
+ appear in the user's input.
+ In all escape sequence complaints, don't escape the initial
+ backslash, and don't quote when the sequence appears at the end
+ of the complaint line unless there's whitespace that quotearg
+ won't escape.
+ Consistently say "invalid" not "unrecognized".
+ Consistently prefer "empty character literal" over "extra
+ characters in character literal" warning for invalid escape
+ sequences; that is, consistently discard those sequences.
+ * tests/input.at (Bad escapes in literals): New.
+
2009-08-19 Akim Demaille <demaille@gostai.com>
doc: fixes.
#include <src/reader.h>
#include <src/uniqstr.h>
+#include <ctype.h>
#include <mbswidth.h>
#include <quote.h>
{
\\[0-7]{1,3} {
unsigned long int c = strtoul (yytext + 1, NULL, 8);
- if (UCHAR_MAX < c)
- complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
- else if (! c)
- complain_at (*loc, _("invalid null character: %s"), quote (yytext));
+ if (!c || UCHAR_MAX < c)
+ complain_at (*loc, _("invalid number after \\-escape: %s"),
+ yytext+1);
else
obstack_1grow (&obstack_for_string, c);
}
\\x[0-9abcdefABCDEF]+ {
verify (UCHAR_MAX < ULONG_MAX);
unsigned long int c = strtoul (yytext + 2, NULL, 16);
- if (UCHAR_MAX < c)
- complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
- else if (! c)
- complain_at (*loc, _("invalid null character: %s"), quote (yytext));
+ if (!c || UCHAR_MAX < c)
+ complain_at (*loc, _("invalid number after \\-escape: %s"),
+ yytext+1);
else
obstack_1grow (&obstack_for_string, c);
}
\\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
int c = convert_ucn_to_byte (yytext);
- if (c < 0)
- complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
- else if (! c)
- complain_at (*loc, _("invalid null character: %s"), quote (yytext));
+ if (c <= 0)
+ complain_at (*loc, _("invalid number after \\-escape: %s"),
+ yytext+1);
else
obstack_1grow (&obstack_for_string, c);
}
\\(.|\n) {
- complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
- STRING_GROW;
+ char const *p = yytext + 1;
+ char quoted_ws[] = "` '";
+ if (isspace (*p) && isprint (*p))
+ {
+ quoted_ws[1] = *p;
+ p = quoted_ws;
+ }
+ else
+ p = quotearg_style_mem (escape_quoting_style, p, 1);
+ complain_at (*loc, _("invalid character after \\-escape: %s"), p);
}
}
]])
AT_CLEANUP
+
+## ------------------------- ##
+## Bad escapes in literals. ##
+## ------------------------- ##
+
+AT_SETUP([[Bad escapes in literals]])
+
+AT_DATA([input.y],
+[[%%
+start: '\777' '\0' '\xfff' '\x0'
+ '\uffff' '\u0000' '\Uffffffff' '\U00000000'
+ '\ ' '\A';
+]])
+echo 'start: "\T\F\0\1" ;' | tr 'TF01' '\011\014\0\1' >> input.y
+
+AT_BISON_CHECK([input.y], [1], [],
+[[input.y:2.9-12: invalid number after \-escape: 777
+input.y:2.8-13: warning: empty character literal
+input.y:2.16-17: invalid number after \-escape: 0
+input.y:2.15-18: warning: empty character literal
+input.y:2.21-25: invalid number after \-escape: xfff
+input.y:2.20-26: warning: empty character literal
+input.y:2.29-31: invalid number after \-escape: x0
+input.y:2.28-32: warning: empty character literal
+input.y:3.9-14: invalid number after \-escape: uffff
+input.y:3.8-15: warning: empty character literal
+input.y:3.18-23: invalid number after \-escape: u0000
+input.y:3.17-24: warning: empty character literal
+input.y:3.27-36: invalid number after \-escape: Uffffffff
+input.y:3.26-37: warning: empty character literal
+input.y:3.40-49: invalid number after \-escape: U00000000
+input.y:3.39-50: warning: empty character literal
+input.y:4.9-10: invalid character after \-escape: ` '
+input.y:4.8-11: warning: empty character literal
+input.y:4.14-15: invalid character after \-escape: A
+input.y:4.13-16: warning: empty character literal
+input.y:5.9-16: invalid character after \-escape: \t
+input.y:5.17: invalid character after \-escape: \f
+input.y:5.18: invalid character after \-escape: \0
+input.y:5.19: invalid character after \-escape: \001
+]])
+
+AT_CLEANUP