if (!(u & 0x40))
return 0;
- if (!(u & 0x20)) { len = 2; }
- else if (!(u & 0x10)) { len = 3; }
- else if (!(u & 0x08)) { len = 4; }
- else if (!(u & 0x04)) { len = 5; }
- else if (!(u & 0x02)) { len = 6; }
- else if (!(u & 0x01)) { len = 7; }
- else { len = 13; } /* whoa! */
+ len = UTF8SKIP(s);
slen = len - 1;
s++;
=cut
*/
-bool
+bool
Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len)
{
U8* x=s;
}
/*
-=for apidoc Am|utf8_to_uv|U8 *s|I32 *retlen|I32 checking
+=for apidoc Am|U8* s|utf8_to_uv_chk|I32 *retlen|I32 checking
Returns the character value of the first character in the string C<s>
which is assumed to be in UTF8 encoding; C<retlen> will be set to the
*/
UV
-Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen, bool checking)
+Perl_utf8_to_uv_chk(pTHX_ U8* s, I32* retlen, bool checking)
{
UV uv = *s;
int len;
return 0;
}
- if (ckWARN_d(WARN_UTF8))
+ if (ckWARN_d(WARN_UTF8))
Perl_warner(aTHX_ WARN_UTF8, "Malformed UTF-8 character");
if (retlen)
*retlen = 1;
return 0;
}
- if (ckWARN_d(WARN_UTF8))
+ if (ckWARN_d(WARN_UTF8))
Perl_warner(aTHX_ WARN_UTF8, "Malformed UTF-8 character");
if (retlen)
*retlen -= len + 1;
return uv;
}
+/*
+=for apidoc Am|U8* s|utf8_to_uv|I32 *retlen
+
+Returns the character value of the first character in the string C<s>
+which is assumed to be in UTF8 encoding; C<retlen> will be set to the
+length, in bytes, of that character, and the pointer C<s> will be
+advanced to the end of the character.
+
+If C<s> does not point to a well-formed UTF8 character, an optional UTF8
+warning is produced.
+
+=cut
+*/
+
+UV
+Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen)
+{
+ return Perl_utf8_to_uv_chk(aTHX_ s, retlen, 0);
+}
+
/* utf8_distance(a,b) returns the number of UTF8 characters between
the pointers a and b */
if (c >= 0x80 &&
( (s >= send) || ((*s++ & 0xc0) != 0x80) || ((c & 0xfe) != 0xc2))) {
*len = -1;
- return 0;
+ return 0;
}
}
s = save;
*d++ = *s++;
else {
I32 ulen;
- *d++ = (U8)utf8_to_uv(s, &ulen, 0);
+ *d++ = (U8)utf8_to_uv(s, &ulen);
s += ulen;
}
}
if (!PL_utf8_toupper)
PL_utf8_toupper = swash_init("utf8", "ToUpper", &PL_sv_undef, 4, 0);
uv = swash_fetch(PL_utf8_toupper, p);
- return uv ? uv : utf8_to_uv(p,0,0);
+ return uv ? uv : utf8_to_uv_chk(p,0,0);
}
UV
if (!PL_utf8_totitle)
PL_utf8_totitle = swash_init("utf8", "ToTitle", &PL_sv_undef, 4, 0);
uv = swash_fetch(PL_utf8_totitle, p);
- return uv ? uv : utf8_to_uv(p,0,0);
+ return uv ? uv : utf8_to_uv_chk(p,0,0);
}
UV
if (!PL_utf8_tolower)
PL_utf8_tolower = swash_init("utf8", "ToLower", &PL_sv_undef, 4, 0);
uv = swash_fetch(PL_utf8_tolower, p);
- return uv ? uv : utf8_to_uv(p,0,0);
+ return uv ? uv : utf8_to_uv_chk(p,0,0);
}
/* a "swash" is a swatch hash */
{
SV* retval;
char tmpbuf[256];
- dSP;
+ dSP;
if (!gv_stashpv(pkg, 0)) { /* demand load utf8 */
ENTER;
if (PL_curcop == &PL_compiling) /* XXX ought to be handled by lex_start */
strncpy(tmpbuf, PL_tokenbuf, sizeof tmpbuf);
if (call_method("SWASHNEW", G_SCALAR))
- retval = newSVsv(*PL_stack_sp--);
+ retval = newSVsv(*PL_stack_sp--);
else
retval = &PL_sv_undef;
LEAVE;
PUSHMARK(SP);
EXTEND(SP,3);
PUSHs((SV*)sv);
- PUSHs(sv_2mortal(newSViv(utf8_to_uv(ptr, 0, 0) & ~(needents - 1))));
+ PUSHs(sv_2mortal(newSViv(utf8_to_uv_chk(ptr, 0, 0) & ~(needents - 1))));
PUSHs(sv_2mortal(newSViv(needents)));
PUTBACK;
if (call_method("SWASHGET", G_SCALAR))
- retval = newSVsv(*PL_stack_sp--);
+ retval = newSVsv(*PL_stack_sp--);
else
retval = &PL_sv_undef;
POPSTACK;