From: Claes Jakobsson Date: Thu, 14 May 2009 14:10:06 +0000 (+0200) Subject: Amelioration of the error message "Unrecognized character %s in column %d" X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=b1fc363696b1a308e6bce75eb292bacc8f2c9833;p=p5sagit%2Fp5-mst-13.2.git Amelioration of the error message "Unrecognized character %s in column %d" Changes the error message to "Unrecognized character %s; marked by <-- HERE after %s<-- HERE near column %d". This should make it a little simpler to spot and correct the suspicious character. --- diff --git a/pod/perldiag.pod b/pod/perldiag.pod index f2a4a1a..1dbb3e4 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -4423,10 +4423,10 @@ reserved word. It's best to put such a word in quotes, or capitalize it somehow, or insert an underbar into it. You might also declare it as a subroutine. -=item Unrecognized character %s in column %d +=item Unrecognized character %s; marked by <-- HERE after %s near column %d (F) The Perl parser has no idea what to do with the specified character -in your Perl script (or eval) at the specified column. Perhaps you tried +in your Perl script (or eval) near the specified column. Perhaps you tried to run a compressed script, a binary program, or a directory as a Perl program. =item Unrecognized escape \\%c in character class passed through in regex; marked by <-- HERE in m/%s/ diff --git a/t/base/lex.t b/t/base/lex.t index 1b8045b..8cadf85 100755 --- a/t/base/lex.t +++ b/t/base/lex.t @@ -265,7 +265,7 @@ sub foo::::::bar { print "ok $test\n"; $test++ } foo::::::bar; eval "\$x =\xE2foo"; -if ($@ =~ /Unrecognized character \\xE2 in column 5/) { print "ok $test\n"; } else { print "not ok $test\n"; } +if ($@ =~ /Unrecognized character \\xE2; marked by <-- HERE after \$x =<-- HERE near column 5/) { print "ok $test\n"; } else { print "not ok $test\n"; } $test++; # Is "[~" scanned correctly? diff --git a/toke.c b/toke.c index c803a80..3fda3a5 100644 --- a/toke.c +++ b/toke.c @@ -124,6 +124,9 @@ static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen); # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8)) #endif +/* The maximum number of characters preceding the unrecognized one to display */ +#define UNRECOGNIZED_PRECEDE_COUNT 10 + /* In variables named $^X, these are the legal values for X. * 1999-02-27 mjd-perl-patch@plover.com */ #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) @@ -3652,8 +3655,17 @@ Perl_yylex(pTHX) default: if (isIDFIRST_lazy_if(s,UTF)) goto keylookup; - len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); - Perl_croak(aTHX_ "Unrecognized character \\x%02X in column %d", *s & 255, (int) len + 1); + { + unsigned char c = *s; + len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); + if (len > UNRECOGNIZED_PRECEDE_COUNT) { + d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT; + } else { + d = PL_linestart; + } + *s = '\0'; + Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1); + } case 4: case 26: goto fake_eof; /* emulate EOF on ^D or ^Z */