/* Unicode support */
U8 *
-Perl_uv_to_utf8(pTHX_ U8 *d, UV uv)
+Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) /* the d must be UTF8_MAXLEN+1 deep */
{
if (uv < 0x80) {
*d++ = uv;
+ *d = 0;
return d;
}
if (uv < 0x800) {
*d++ = (( uv >> 6) | 0xc0);
*d++ = (( uv & 0x3f) | 0x80);
+ *d = 0;
return d;
}
if (uv < 0x10000) {
*d++ = (( uv >> 12) | 0xe0);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
+ *d = 0;
return d;
}
if (uv < 0x200000) {
*d++ = (((uv >> 12) & 0x3f) | 0x80);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
+ *d = 0;
return d;
}
if (uv < 0x4000000) {
*d++ = (((uv >> 12) & 0x3f) | 0x80);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
+ *d = 0;
return d;
}
if (uv < 0x80000000) {
*d++ = (((uv >> 12) & 0x3f) | 0x80);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
+ *d = 0;
return d;
}
#ifdef HAS_QUAD
*d++ = (((uv >> 12) & 0x3f) | 0x80);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
+ *d = 0;
return d;
}
#ifdef HAS_QUAD
*d++ = (((uv >> 12) & 0x3f) | 0x80);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
+ *d = 0;
return d;
}
#endif
bool
Perl_is_uni_alnum(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_alnum(tmpbuf);
}
bool
Perl_is_uni_alnumc(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_alnumc(tmpbuf);
}
bool
Perl_is_uni_idfirst(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_idfirst(tmpbuf);
}
bool
Perl_is_uni_alpha(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_alpha(tmpbuf);
}
bool
Perl_is_uni_ascii(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_ascii(tmpbuf);
}
bool
Perl_is_uni_space(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_space(tmpbuf);
}
bool
Perl_is_uni_digit(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_digit(tmpbuf);
}
bool
Perl_is_uni_upper(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_upper(tmpbuf);
}
bool
Perl_is_uni_lower(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_lower(tmpbuf);
}
bool
Perl_is_uni_cntrl(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_cntrl(tmpbuf);
}
bool
Perl_is_uni_graph(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_graph(tmpbuf);
}
bool
Perl_is_uni_print(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_print(tmpbuf);
}
bool
Perl_is_uni_punct(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_punct(tmpbuf);
}
bool
Perl_is_uni_xdigit(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_xdigit(tmpbuf);
}
U32
Perl_to_uni_upper(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return to_utf8_upper(tmpbuf);
}
U32
Perl_to_uni_title(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return to_utf8_title(tmpbuf);
}
U32
Perl_to_uni_lower(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return to_utf8_lower(tmpbuf);
}