UV
Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
{
- return Perl_utf8n_to_uvchr(aTHX_ s, UTF8_MAXBYTES, retlen,
- ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+ return utf8n_to_uvchr(s, UTF8_MAXBYTES, retlen,
+ ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
}
/*
return (U32)to_uni_lower(c, tmpbuf, &len);
}
-bool
-S_is_utf8_common(pTHX_ const U8 const *p, SV **swash,
+static bool
+S_is_utf8_common(pTHX_ const U8 *const p, SV **swash,
const char *const swashname)
{
if (!is_utf8_char(p))
bool
Perl_is_utf8_alnum(pTHX_ const U8 *p)
{
- if (!is_utf8_char(p))
- return FALSE;
- if (!PL_utf8_alnum)
- /* NOTE: "IsWord", not "IsAlnum", since Alnum is a true
- * descendant of isalnum(3), in other words, it doesn't
- * contain the '_'. --jhi */
- PL_utf8_alnum = swash_init("utf8", "IsWord", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_alnum, p, TRUE) != 0;
-/* return *p == '_' || is_utf8_alpha(p) || is_utf8_digit(p); */
-#ifdef SURPRISINGLY_SLOWER /* probably because alpha is usually true */
- if (!PL_utf8_alnum)
- PL_utf8_alnum = swash_init("utf8", "",
- sv_2mortal(newSVpv("+utf8::IsAlpha\n+utf8::IsDigit\n005F\n",0)), 0, 0);
- return swash_fetch(PL_utf8_alnum, p, TRUE) != 0;
-#endif
+ /* NOTE: "IsWord", not "IsAlnum", since Alnum is a true
+ * descendant of isalnum(3), in other words, it doesn't
+ * contain the '_'. --jhi */
+ return S_is_utf8_common(aTHX_ p, &PL_utf8_alnum, "IsWord");
}
bool
Perl_is_utf8_alnumc(pTHX_ const U8 *p)
{
- if (!is_utf8_char(p))
- return FALSE;
- if (!PL_utf8_alnumc)
- PL_utf8_alnumc = swash_init("utf8", "IsAlnumC", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_alnumc, p, TRUE) != 0;
-/* return is_utf8_alpha(p) || is_utf8_digit(p); */
-#ifdef SURPRISINGLY_SLOWER /* probably because alpha is usually true */
- if (!PL_utf8_alnum)
- PL_utf8_alnum = swash_init("utf8", "",
- sv_2mortal(newSVpv("+utf8::IsAlpha\n+utf8::IsDigit\n005F\n",0)), 0, 0);
- return swash_fetch(PL_utf8_alnum, p, TRUE) != 0;
-#endif
+ return S_is_utf8_common(aTHX_ p, &PL_utf8_alnumc, "IsAlnumC");
}
bool
return 0;
}
+/*
+=for apidoc A|U8 *|uvchr_to_utf8|U8 *d|UV uv
+
+Adds the UTF-8 representation of the Native codepoint C<uv> to the end
+of the string C<d>; C<d> should be have at least C<UTF8_MAXBYTES+1> free
+bytes available. The return value is the pointer to the byte after the
+end of the new character. In other words,
+
+ d = uvchr_to_utf8(d, uv);
+
+is the recommended wide native character-aware way of saying
+
+ *(d++) = uv;
+
+=cut
+*/
+
+/* On ASCII machines this is normally a macro but we want a
+ real function in case XS code wants it
+*/
+U8 *
+Perl_uvchr_to_utf8(pTHX_ U8 *d, UV uv)
+{
+ return Perl_uvuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), 0);
+}
+
U8 *
Perl_uvchr_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
{
}
/*
+=for apidoc A|UV|utf8n_to_uvchr|U8 *s|STRLEN curlen|STRLEN *retlen|U32
+flags
+
+Returns the native character value of the first character in the string
+C<s>
+which is assumed to be in UTF-8 encoding; C<retlen> will be set to the
+length, in bytes, of that character.
+
+Allows length and flags to be passed to low level routine.
+
+=cut
+*/
+/* On ASCII machines this is normally a macro but we want
+ a real function in case XS code wants it
+*/
+UV
+Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen,
+U32 flags)
+{
+ const UV uv = Perl_utf8n_to_uvuni(aTHX_ s, curlen, retlen, flags);
+ return UNI_TO_NATIVE(uv);
+}
+
+/*
=for apidoc A|char *|pv_uni_display|SV *dsv|U8 *spv|STRLEN len|STRLEN pvlim|UV flags
Build to the scalar dsv a displayable version of the string spv,