From: Nicholas Clark Date: Sun, 18 Oct 2009 21:01:49 +0000 (+0100) Subject: utf16_to_utf8() should croak on encountering a bare low surrogate. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=dbde19516d139ef4237fc56ac1a14665a9f13c0b;p=p5sagit%2Fp5-mst-13.2.git utf16_to_utf8() should croak on encountering a bare low surrogate. --- diff --git a/ext/XS-APItest/t/utf16_to_utf8.t b/ext/XS-APItest/t/utf16_to_utf8.t index 3f6f798..5e6c58a 100644 --- a/ext/XS-APItest/t/utf16_to_utf8.t +++ b/ext/XS-APItest/t/utf16_to_utf8.t @@ -34,6 +34,9 @@ is($got, undef, 'hence eval returns undef'); for (["\xD8\0\0\0", 'NULs'], ["\xD8\0\xD8\0", '2 Lows'], + ["\xDC\0\0\0", 'High NUL'], + ["\xDC\0\xD8\0", 'High Low'], + ["\xDC\0\xDC\0", 'High High'], ) { my ($malformed, $name) = @$_; $got = eval {utf16_to_utf8($malformed)}; diff --git a/utf8.c b/utf8.c index 4a728aa..3e4451b 100644 --- a/utf8.c +++ b/utf8.c @@ -995,6 +995,8 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen) Perl_croak(aTHX_ "Malformed UTF-16 surrogate"); uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000; } + } else if (uv >= 0xdc00 && uv <= 0xdfff) { + Perl_croak(aTHX_ "Malformed UTF-16 surrogate"); } if (uv < 0x10000) { *d++ = (U8)(( uv >> 12) | 0xe0);