utf16_to_utf8() should croak on encountering a bare low surrogate.
Nicholas Clark [Sun, 18 Oct 2009 21:01:49 +0000 (22:01 +0100)]
ext/XS-APItest/t/utf16_to_utf8.t
utf8.c

index 3f6f798..5e6c58a 100644 (file)
@@ -34,6 +34,9 @@ is($got, undef, 'hence eval returns undef');
 
 for (["\xD8\0\0\0", 'NULs'],
      ["\xD8\0\xD8\0", '2 Lows'],
+     ["\xDC\0\0\0", 'High NUL'],
+     ["\xDC\0\xD8\0", 'High Low'],
+     ["\xDC\0\xDC\0", 'High High'],
     ) {
     my ($malformed, $name) = @$_;
     $got = eval {utf16_to_utf8($malformed)};
diff --git a/utf8.c b/utf8.c
index 4a728aa..3e4451b 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -995,6 +995,8 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
                    Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
                uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000;
            }
+       } else if (uv >= 0xdc00 && uv <= 0xdfff) {
+           Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
        }
        if (uv < 0x10000) {
            *d++ = (U8)(( uv >> 12)         | 0xe0);