From: Nicholas Clark Date: Sun, 18 Oct 2009 20:55:52 +0000 (+0100) Subject: utf16_to_utf8() should croak if the buffer ends without the second surrogate. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=01ea242be7d23d3bfac7a37c0cdfaec0a8eb7e33;p=p5sagit%2Fp5-mst-13.2.git utf16_to_utf8() should croak if the buffer ends without the second surrogate. --- diff --git a/ext/XS-APItest/t/utf16_to_utf8.t b/ext/XS-APItest/t/utf16_to_utf8.t index 83add20..3f6f798 100644 --- a/ext/XS-APItest/t/utf16_to_utf8.t +++ b/ext/XS-APItest/t/utf16_to_utf8.t @@ -54,3 +54,10 @@ like($@, qr/^panic: utf16_to_utf8_reversed: odd bytelen 1 at/, 'Odd byte length panics'); is($got, undef, 'hence eval returns undef'); is($in, "NA", 'and input unchanged'); + +$in = "\xD8\0\xDC\0"; +$got = eval {utf16_to_utf8($in, 2)}; +like($@, qr/^Malformed UTF-16 surrogate at/, 'Lone surrogate croaks'); +(ok(!defined $got, 'hence eval returns undef')) or + diag(join ', ', map {ord $_} split //, $got); + diff --git a/utf8.c b/utf8.c index 455078d..4a728aa 100644 --- a/utf8.c +++ b/utf8.c @@ -986,11 +986,15 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen) continue; } if (uv >= 0xd800 && uv < 0xdbff) { /* surrogates */ - UV low = (p[0] << 8) + p[1]; - p += 2; - if (low < 0xdc00 || low >= 0xdfff) + if (p >= pend) { Perl_croak(aTHX_ "Malformed UTF-16 surrogate"); - uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000; + } else { + UV low = (p[0] << 8) + p[1]; + p += 2; + if (low < 0xdc00 || low >= 0xdfff) + Perl_croak(aTHX_ "Malformed UTF-16 surrogate"); + uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000; + } } if (uv < 0x10000) { *d++ = (U8)(( uv >> 12) | 0xe0);