[PATCH] perlcommunity.pod: add information about OSDC.fr

[p5sagit/p5-mst-13.2.git] / utf8.c
diff --git a/utf8.c b/utf8.c

index 455078d..c504891 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -958,12 +958,6 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
 
     PERL_ARGS_ASSERT_UTF16_TO_UTF8;
 
-    if (bytelen == 1 && p[0] == 0) { /* Be understanding. */
-        d[0] = 0;
-        *newlen = 1;
-        return d;
-    }
-
     if (bytelen & 1)
        Perl_croak(aTHX_ "panic: utf16_to_utf8: odd bytelen %"UVuf, (UV)bytelen);
 
@@ -985,12 +979,18 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
            *d++ = (U8)(( uv        & 0x3f) | 0x80);
            continue;
        }
-       if (uv >= 0xd800 && uv < 0xdbff) {      /* surrogates */
-           UV low = (p[0] << 8) + p[1];
-           p += 2;
-           if (low < 0xdc00 || low >= 0xdfff)
+       if (uv >= 0xd800 && uv <= 0xdbff) {     /* surrogates */
+           if (p >= pend) {
                Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
-           uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000;
+           } else {
+               UV low = (p[0] << 8) + p[1];
+               p += 2;
+               if (low < 0xdc00 || low > 0xdfff)
+                   Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
+               uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000;
+           }
+       } else if (uv >= 0xdc00 && uv <= 0xdfff) {
+           Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
        }
        if (uv < 0x10000) {
            *d++ = (U8)(( uv >> 12)         | 0xe0);
@@ -1369,6 +1369,26 @@ Perl_is_utf8_space(pTHX_ const U8 *p)
 }
 
 bool
+Perl_is_utf8_perl_space(pTHX_ const U8 *p)
+{
+    dVAR;
+
+    PERL_ARGS_ASSERT_IS_UTF8_PERL_SPACE;
+
+    return is_utf8_common(p, &PL_utf8_perl_space, "IsPerlSpace");
+}
+
+bool
+Perl_is_utf8_perl_word(pTHX_ const U8 *p)
+{
+    dVAR;
+
+    PERL_ARGS_ASSERT_IS_UTF8_PERL_WORD;
+
+    return is_utf8_common(p, &PL_utf8_perl_word, "IsPerlWord");
+}
+
+bool
 Perl_is_utf8_digit(pTHX_ const U8 *p)
 {
     dVAR;
@@ -1379,6 +1399,16 @@ Perl_is_utf8_digit(pTHX_ const U8 *p)
 }
 
 bool
+Perl_is_utf8_posix_digit(pTHX_ const U8 *p)
+{
+    dVAR;
+
+    PERL_ARGS_ASSERT_IS_UTF8_POSIX_DIGIT;
+
+    return is_utf8_common(p, &PL_utf8_posix_digit, "IsPosixDigit");
+}
+
+bool
 Perl_is_utf8_upper(pTHX_ const U8 *p)
 {
     dVAR;
@@ -1445,7 +1475,7 @@ Perl_is_utf8_xdigit(pTHX_ const U8 *p)
 
     PERL_ARGS_ASSERT_IS_UTF8_XDIGIT;
 
-    return is_utf8_common(p, &PL_utf8_xdigit, "Isxdigit");
+    return is_utf8_common(p, &PL_utf8_xdigit, "IsXDigit");
 }
 
 bool