From: Jarkko Hietaniemi Date: Tue, 18 Dec 2001 15:24:50 +0000 (+0000) Subject: Make the utf8 malformedness messages more verbose. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=097fb8e2acde8522bd4ee4e5e00d3d2b810e2e56;p=p5sagit%2Fp5-mst-13.2.git Make the utf8 malformedness messages more verbose. p4raw-id: //depot/perl@13757 --- diff --git a/lib/utf8.t b/lib/utf8.t index ee3c258..aaa0685 100644 --- a/lib/utf8.t +++ b/lib/utf8.t @@ -159,7 +159,7 @@ plan tests => 94; use utf8; %a = ("\xE1\xA0"=>"sterling"); print 'start'; printf '%x,', ord \$_ foreach keys %a; print "end\n"; BANG - qr/^Malformed UTF-8 character \(2 bytes, need 3\).*start\d+,end$/s + qr/^Malformed UTF-8 character \(2 bytes, need 3.+\).*start\d+,end$/s ], ); foreach (@tests) { diff --git a/utf8.c b/utf8.c index af36592..4ca7b1c 100644 --- a/utf8.c +++ b/utf8.c @@ -251,9 +251,11 @@ Most code should use utf8_to_uvchr() rather than call this directly. UV Perl_utf8n_to_uvuni(pTHX_ U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags) { + U8 *s0 = s; UV uv = *s, ouv = 0; STRLEN len = 1; bool dowarn = ckWARN_d(WARN_UTF8); + U8 startbyte = *s; STRLEN expectlen = 0; U32 warning = 0; @@ -396,23 +398,28 @@ malformed: Perl_sv_catpvf(aTHX_ sv, "(empty string)"); break; case UTF8_WARN_CONTINUATION: - Perl_sv_catpvf(aTHX_ sv, "(unexpected continuation byte 0x%02"UVxf")", uv); + Perl_sv_catpvf(aTHX_ sv, "(unexpected continuation byte 0x%02"UVxf", with no preceding start byte)", uv); break; case UTF8_WARN_NON_CONTINUATION: - Perl_sv_catpvf(aTHX_ sv, "(unexpected non-continuation byte 0x%02"UVxf" after start byte 0x%02"UVxf")", - (UV)s[1], uv); + if (s == s0) + Perl_sv_catpvf(aTHX_ sv, "(unexpected non-continuation byte 0x%02"UVxf", immediately after start byte 0x%02"UVxf")", + (UV)s[1], startbyte); + else + Perl_sv_catpvf(aTHX_ sv, "(unexpected non-continuation byte 0x%02"UVxf", %d byte%s after start byte 0x%02"UVxf", expected %d bytes)", + (UV)s[1], s - s0, s - s0 > 1 ? "s" : "", startbyte, expectlen); + break; case UTF8_WARN_FE_FF: Perl_sv_catpvf(aTHX_ sv, "(byte 0x%02"UVxf")", uv); break; case UTF8_WARN_SHORT: - Perl_sv_catpvf(aTHX_ sv, "(%d byte%s, need %d)", - curlen, curlen == 1 ? "" : "s", expectlen); + Perl_sv_catpvf(aTHX_ sv, "(%d byte%s, need %d, after start byte 0x%02"UVxf")", + curlen, curlen == 1 ? "" : "s", expectlen, startbyte); expectlen = curlen; /* distance for caller to skip */ break; case UTF8_WARN_OVERFLOW: - Perl_sv_catpvf(aTHX_ sv, "(overflow at 0x%"UVxf", byte 0x%02x)", - ouv, *s); + Perl_sv_catpvf(aTHX_ sv, "(overflow at 0x%"UVxf", byte 0x%02x, after start byte 0x%02"UVxf")", + ouv, *s, startbyte); break; case UTF8_WARN_SURROGATE: Perl_sv_catpvf(aTHX_ sv, "(UTF-16 surrogate 0x%04"UVxf")", uv); @@ -421,8 +428,8 @@ malformed: Perl_sv_catpvf(aTHX_ sv, "(byte order mark 0x%04"UVxf")", uv); break; case UTF8_WARN_LONG: - Perl_sv_catpvf(aTHX_ sv, "(%d byte%s, need %d)", - expectlen, expectlen == 1 ? "": "s", UNISKIP(uv)); + Perl_sv_catpvf(aTHX_ sv, "(%d byte%s, need %d, after start byte 0x%02"UVxf")", + expectlen, expectlen == 1 ? "": "s", UNISKIP(uv), startbyte); break; case UTF8_WARN_FFFF: Perl_sv_catpvf(aTHX_ sv, "(character 0x%04"UVxf")", uv);