In utf16_to_utf8(), fix off-by-one errors for the range of valid surrogates.
Nicholas Clark [Sun, 18 Oct 2009 21:09:14 +0000 (22:09 +0100)]
Both high ends were one too low.

ext/XS-APItest/t/utf16_to_utf8.t
utf8.c

index 5e6c58a..592d0b1 100644 (file)
@@ -6,7 +6,8 @@ use Encode;
 
 use XS::APItest qw(utf16_to_utf8 utf16_to_utf8_reversed);
 
-for my $ord (0, 10, 13, 78, 255, 256, 0xD7FF, 0xE000, 0x10000) {
+for my $ord (0, 10, 13, 78, 255, 256, 0xD7FF, 0xE000, 0xFFFD,
+            0x10000, 0x10FC00, 0x103FF, 0x10FFFD) {
     my $chr = chr $ord;
     for my $prefix ('', "\0", 'Perl rules') {
        for my $suffix ('', "\0", "Moo!") {
diff --git a/utf8.c b/utf8.c
index 3e4451b..3de02ed 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -985,13 +985,13 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
            *d++ = (U8)(( uv        & 0x3f) | 0x80);
            continue;
        }
-       if (uv >= 0xd800 && uv < 0xdbff) {      /* surrogates */
+       if (uv >= 0xd800 && uv <= 0xdbff) {     /* surrogates */
            if (p >= pend) {
                Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
            } else {
                UV low = (p[0] << 8) + p[1];
                p += 2;
-               if (low < 0xdc00 || low >= 0xdfff)
+               if (low < 0xdc00 || low > 0xdfff)
                    Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
                uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000;
            }