From: Nicholas Clark Date: Sun, 18 Oct 2009 21:09:14 +0000 (+0100) Subject: In utf16_to_utf8(), fix off-by-one errors for the range of valid surrogates. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=52b9aa85a8c28ddc591b0a7f2b1f8c729075d9a1;p=p5sagit%2Fp5-mst-13.2.git In utf16_to_utf8(), fix off-by-one errors for the range of valid surrogates. Both high ends were one too low. --- diff --git a/ext/XS-APItest/t/utf16_to_utf8.t b/ext/XS-APItest/t/utf16_to_utf8.t index 5e6c58a..592d0b1 100644 --- a/ext/XS-APItest/t/utf16_to_utf8.t +++ b/ext/XS-APItest/t/utf16_to_utf8.t @@ -6,7 +6,8 @@ use Encode; use XS::APItest qw(utf16_to_utf8 utf16_to_utf8_reversed); -for my $ord (0, 10, 13, 78, 255, 256, 0xD7FF, 0xE000, 0x10000) { +for my $ord (0, 10, 13, 78, 255, 256, 0xD7FF, 0xE000, 0xFFFD, + 0x10000, 0x10FC00, 0x103FF, 0x10FFFD) { my $chr = chr $ord; for my $prefix ('', "\0", 'Perl rules') { for my $suffix ('', "\0", "Moo!") { diff --git a/utf8.c b/utf8.c index 3e4451b..3de02ed 100644 --- a/utf8.c +++ b/utf8.c @@ -985,13 +985,13 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen) *d++ = (U8)(( uv & 0x3f) | 0x80); continue; } - if (uv >= 0xd800 && uv < 0xdbff) { /* surrogates */ + if (uv >= 0xd800 && uv <= 0xdbff) { /* surrogates */ if (p >= pend) { Perl_croak(aTHX_ "Malformed UTF-16 surrogate"); } else { UV low = (p[0] << 8) + p[1]; p += 2; - if (low < 0xdc00 || low >= 0xdfff) + if (low < 0xdc00 || low > 0xdfff) Perl_croak(aTHX_ "Malformed UTF-16 surrogate"); uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000; }