PERL_ARGS_ASSERT_UTF16_TO_UTF8;
- if (bytelen == 1 && p[0] == 0) { /* Be understanding. */
- d[0] = 0;
- *newlen = 1;
- return d;
- }
-
if (bytelen & 1)
Perl_croak(aTHX_ "panic: utf16_to_utf8: odd bytelen %"UVuf, (UV)bytelen);
*d++ = (U8)(( uv & 0x3f) | 0x80);
continue;
}
- if (uv >= 0xd800 && uv < 0xdbff) { /* surrogates */
- UV low = (p[0] << 8) + p[1];
- p += 2;
- if (low < 0xdc00 || low >= 0xdfff)
+ if (uv >= 0xd800 && uv <= 0xdbff) { /* surrogates */
+ if (p >= pend) {
Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
- uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000;
+ } else {
+ UV low = (p[0] << 8) + p[1];
+ p += 2;
+ if (low < 0xdc00 || low > 0xdfff)
+ Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
+ uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000;
+ }
+ } else if (uv >= 0xdc00 && uv <= 0xdfff) {
+ Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
}
if (uv < 0x10000) {
*d++ = (U8)(( uv >> 12) | 0xe0);
}
bool
+Perl_is_utf8_perl_space(pTHX_ const U8 *p)
+{
+ dVAR;
+
+ PERL_ARGS_ASSERT_IS_UTF8_PERL_SPACE;
+
+ return is_utf8_common(p, &PL_utf8_perl_space, "IsPerlSpace");
+}
+
+bool
+Perl_is_utf8_perl_word(pTHX_ const U8 *p)
+{
+ dVAR;
+
+ PERL_ARGS_ASSERT_IS_UTF8_PERL_WORD;
+
+ return is_utf8_common(p, &PL_utf8_perl_word, "IsPerlWord");
+}
+
+bool
Perl_is_utf8_digit(pTHX_ const U8 *p)
{
dVAR;
}
bool
+Perl_is_utf8_posix_digit(pTHX_ const U8 *p)
+{
+ dVAR;
+
+ PERL_ARGS_ASSERT_IS_UTF8_POSIX_DIGIT;
+
+ return is_utf8_common(p, &PL_utf8_posix_digit, "IsPosixDigit");
+}
+
+bool
Perl_is_utf8_upper(pTHX_ const U8 *p)
{
dVAR;
PERL_ARGS_ASSERT_IS_UTF8_XDIGIT;
- return is_utf8_common(p, &PL_utf8_xdigit, "Isxdigit");
+ return is_utf8_common(p, &PL_utf8_xdigit, "IsXDigit");
}
bool