return d;
}
#ifdef HAS_QUAD
- if (uv < 0x1000000000LL)
+ if (uv < UTF8_QUAD_MAX)
#endif
{
*d++ = 0xfe; /* Can't match U+FEFF! */
dTHR;
UV uv = *s, ouv;
STRLEN len = 1;
+#ifdef EBCDIC
+ bool dowarn = 0;
+#else
bool dowarn = ckWARN_d(WARN_UTF8);
+#endif
STRLEN expectlen = 0;
+ if (curlen == 0) {
+ if (dowarn)
+ Perl_warner(aTHX_ WARN_UTF8,
+ "Malformed UTF-8 character (an empty string)");
+ goto malformed;
+ }
+
if (uv <= 0x7f) { /* Pure ASCII. */
if (retlen)
*retlen = 1;
goto malformed;
}
- if ((uv >= 0xc0 && uv <= 0xfd && curlen >1 && s[1] < 0x80) &&
+ if ((uv >= 0xc0 && uv <= 0xfd && curlen > 1 && s[1] < 0x80) &&
!(flags & UTF8_ALLOW_NON_CONTINUATION)) {
if (dowarn)
Perl_warner(aTHX_ WARN_UTF8,
if (dowarn)
Perl_warner(aTHX_ WARN_UTF8,
"Malformed UTF-8 character (%d byte%s, need %d)",
- curlen, curlen > 1 ? "s" : "", expectlen);
+ curlen, curlen == 1 ? "" : "s", expectlen);
goto malformed;
}
if (dowarn)
Perl_warner(aTHX_ WARN_UTF8,
"Malformed UTF-8 character (%d byte%s, need %d)",
- expectlen, expectlen > 1 ? "s": "", UNISKIP(uv));
+ expectlen, expectlen == 1 ? "": "s", UNISKIP(uv));
goto malformed;
}
return Perl_utf8_to_uv(aTHX_ s, (STRLEN)-1, retlen, 0);
}
+/*
+=for apidoc|utf8_length|U8 *s|U8 *e
+
+Return the length of the UTF-8 char encoded string C<s> in characters.
+Stops at C<e> (inclusive). If C<e E<lt> s> or if the scan would end
+up past C<e>, croaks.
+
+=cut
+*/
+
+STRLEN
+Perl_utf8_length(pTHX_ U8* s, U8* e)
+{
+ STRLEN len = 0;
+
+ if (e < s)
+ Perl_croak(aTHX_ "panic: utf8_length: unexpected end");
+ while (s < e) {
+ U8 t = UTF8SKIP(s);
+
+ if (e - s < t)
+ Perl_croak(aTHX_ "panic: utf8_length: unaligned end");
+ s += t;
+ len++;
+ }
+
+ return len;
+}
+
/* utf8_distance(a,b) returns the number of UTF8 characters between
the pointers a and b */
-I32
+IV
Perl_utf8_distance(pTHX_ U8 *a, U8 *b)
{
- I32 off = 0;
+ IV off = 0;
+
if (a < b) {
while (a < b) {
- a += UTF8SKIP(a);
+ U8 c = UTF8SKIP(a);
+
+ if (b - a < c)
+ Perl_croak(aTHX_ "panic: utf8_distance: unaligned end");
+ a += c;
off--;
}
}
else {
while (b < a) {
- b += UTF8SKIP(b);
+ U8 c = UTF8SKIP(b);
+
+ if (a - b < c)
+ Perl_croak(aTHX_ "panic: utf8_distance: unaligned end");
+ b += c;
off++;
}
}
+
return off;
}
if (!PL_utf8_toupper)
PL_utf8_toupper = swash_init("utf8", "ToUpper", &PL_sv_undef, 4, 0);
uv = swash_fetch(PL_utf8_toupper, p);
- return uv ? uv : utf8_to_uv(p,STRLEN_MAX,0,0);
+ return uv ? uv : utf8_to_uv(p,UTF8_MAXLEN,0,0);
}
UV
if (!PL_utf8_totitle)
PL_utf8_totitle = swash_init("utf8", "ToTitle", &PL_sv_undef, 4, 0);
uv = swash_fetch(PL_utf8_totitle, p);
- return uv ? uv : utf8_to_uv(p,STRLEN_MAX,0,0);
+ return uv ? uv : utf8_to_uv(p,UTF8_MAXLEN,0,0);
}
UV
if (!PL_utf8_tolower)
PL_utf8_tolower = swash_init("utf8", "ToLower", &PL_sv_undef, 4, 0);
uv = swash_fetch(PL_utf8_tolower, p);
- return uv ? uv : utf8_to_uv(p,STRLEN_MAX,0,0);
+ return uv ? uv : utf8_to_uv(p,UTF8_MAXLEN,0,0);
}
/* a "swash" is a swatch hash */
PUSHMARK(SP);
EXTEND(SP,3);
PUSHs((SV*)sv);
- PUSHs(sv_2mortal(newSViv(utf8_to_uv(ptr, STRLEN_MAX, 0, 0) & ~(needents - 1))));
+ PUSHs(sv_2mortal(newSViv(utf8_to_uv(ptr, UTF8_MAXLEN, 0, 0) & ~(needents - 1))));
PUSHs(sv_2mortal(newSViv(needents)));
PUTBACK;
if (call_method("SWASHGET", G_SCALAR))