/* utf8.c
*
- * Copyright (c) 1998-2002, Larry Wall
+ * Copyright (c) 1998-2003, Larry Wall
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
/*
=for apidoc A|bool|is_utf8_string|U8 *s|STRLEN len
-Returns true if first C<len> bytes of the given string form a valid UTF8
-string, false otherwise. Note that 'a valid UTF8 string' does not mean
-'a string that contains UTF8' because a valid ASCII string is a valid
-UTF8 string.
+Returns true if first C<len> bytes of the given string form a valid
+UTF8 string, false otherwise. Note that 'a valid UTF8 string' does
+not mean 'a string that contains code points above 0x7F encoded in
+UTF8' because a valid ASCII string is a valid UTF8 string.
=cut
*/
send = s + len;
while (x < send) {
- c = is_utf8_char(x);
- if (!c)
- return FALSE;
+ /* Inline the easy bits of is_utf8_char() here for speed... */
+ if (UTF8_IS_INVARIANT(*x))
+ c = 1;
+ else if (!UTF8_IS_START(*x))
+ return FALSE;
+ else {
+ /* ... and call is_utf8_char() only if really needed. */
+ c = is_utf8_char(x);
+ if (!c)
+ return FALSE;
+ }
x += c;
}
if (x != send)
Returns a pointer to the newly-created string, and sets C<len> to
reflect the new length.
+If you want to convert to UTF8 from other encodings than ASCII,
+see sv_recode_to_utf8().
+
=cut
*/
HE *he;
SV *val;
-#if defined(UNDER_CE) && defined(MIPS)
-/*strange: compiler complaints that I redefine macro UVXf and points where
- it was first defined. I copied line from there without any changes.
- Nothing should change.
- But when I do not do this, there is an error on a line with
- Perl_newSVpvf(aTHX_ "%04"UVXf, uv1)
-*/
-#define UVXf "lX" /**/
-#endif
if ((hv = get_hv(special, FALSE)) &&
(keysv = sv_2mortal(Perl_newSVpvf(aTHX_ "%04"UVXf, uv1))) &&
(he = hv_fetch_ent(hv, keysv, FALSE, 0)) &&
if (PL_curcop == &PL_compiling) {
/* XXX ought to be handled by lex_start */
SAVEI32(PL_in_my);
+ PL_in_my = 0;
sv_setpv(tokenbufsv, PL_tokenbuf);
}
errsv_save = newSVsv(ERRSV);
}
if (!SvROK(retval) || SvTYPE(SvRV(retval)) != SVt_PVHV) {
if (SvPOK(retval))
- Perl_croak(aTHX_ "Can't find Unicode property definition \"%s\"",
- SvPV_nolen(retval));
+ Perl_croak(aTHX_ "Can't find Unicode property definition \"%"SVf"\"",
+ retval);
Perl_croak(aTHX_ "SWASHNEW didn't return an HV ref");
}
return retval;