From: Nicholas Clark Date: Thu, 22 Oct 2009 12:30:03 +0000 (+0100) Subject: Perl_utf16_to_utf8() should treat "\0" like any every other odd-length input. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=26cc780bb8911dcc1e2c3e971db95322db3e2d9f;p=p5sagit%2Fp5-mst-13.2.git Perl_utf16_to_utf8() should treat "\0" like any every other odd-length input. The "be understanding" bodge to not panic, introduced in 1de9afcdf18cf98b, is no longer needed now that c28d61051c446453 fixes the underlying problem. --- diff --git a/ext/XS-APItest/t/utf16_to_utf8.t b/ext/XS-APItest/t/utf16_to_utf8.t index 592d0b1..40a6288 100644 --- a/ext/XS-APItest/t/utf16_to_utf8.t +++ b/ext/XS-APItest/t/utf16_to_utf8.t @@ -23,15 +23,13 @@ for my $ord (0, 10, 13, 78, 255, 256, 0xD7FF, 0xE000, 0xFFFD, } } -# Currently this is special-cased to work. Should it? - -is(utf16_to_utf8("\0"), "\0", 'Short string to utf16_to_utf8'); - -# But anything else is fatal - -my $got = eval {utf16_to_utf8('N')}; -like($@, qr/^panic: utf16_to_utf8: odd bytelen 1 at/, 'Odd byte length panics'); -is($got, undef, 'hence eval returns undef'); +foreach ("\0", 'N', 'Perl rules!') { + my $length = length $_; + my $got = eval {utf16_to_utf8($_)}; + like($@, qr/^panic: utf16_to_utf8: odd bytelen $length at/, + "Odd byte length panics for '$_'"); + is($got, undef, 'hence eval returns undef'); +} for (["\xD8\0\0\0", 'NULs'], ["\xD8\0\xD8\0", '2 Lows'], @@ -40,7 +38,7 @@ for (["\xD8\0\0\0", 'NULs'], ["\xDC\0\xDC\0", 'High High'], ) { my ($malformed, $name) = @$_; - $got = eval {utf16_to_utf8($malformed)}; + my $got = eval {utf16_to_utf8($malformed)}; like($@, qr/^Malformed UTF-16 surrogate at/, "Malformed surrogate $name croaks for utf16_to_utf8"); is($got, undef, 'hence eval returns undef'); @@ -53,7 +51,7 @@ for (["\xD8\0\0\0", 'NULs'], } my $in = "NA"; -$got = eval {utf16_to_utf8_reversed($in, 1)}; +my $got = eval {utf16_to_utf8_reversed($in, 1)}; like($@, qr/^panic: utf16_to_utf8_reversed: odd bytelen 1 at/, 'Odd byte length panics'); is($got, undef, 'hence eval returns undef'); diff --git a/utf8.c b/utf8.c index dc12df8..c504891 100644 --- a/utf8.c +++ b/utf8.c @@ -958,12 +958,6 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen) PERL_ARGS_ASSERT_UTF16_TO_UTF8; - if (bytelen == 1 && p[0] == 0) { /* Be understanding. */ - d[0] = 0; - *newlen = 1; - return d + 1; - } - if (bytelen & 1) Perl_croak(aTHX_ "panic: utf16_to_utf8: odd bytelen %"UVuf, (UV)bytelen);