ulen = 1;
/* Need to check this, otherwise 128..255 won't match */
- c = utf8_to_uv_chk(s, send - s, &ulen, 0);
+ c = utf8_to_uv(s, send - s, &ulen, 0);
if (c < 0x100 && (ch = tbl[(short)c]) >= 0) {
matches++;
if (ch < 0x80)
STRLEN ulen;
ulen = 1;
if (hasutf)
- c = utf8_to_uv_chk(s, send - s, &ulen, 0);
+ c = utf8_to_uv(s, send - s, &ulen, 0);
else
c = *s;
if (c < 0x100 && tbl[c] >= 0)
}
else if (uv == none) { /* "none" is unmapped character */
STRLEN ulen;
- *d++ = (U8)utf8_to_uv_chk(s, send - s, &ulen, 0);
+ *d++ = (U8)utf8_to_uv(s, send - s, &ulen, 0);
s += ulen;
puv = 0xfeedface;
continue;
}
else if (uv == none) { /* "none" is unmapped character */
STRLEN ulen;
- *d++ = (U8)utf8_to_uv_chk(s, send - s, &ulen, 0);
+ *d++ = (U8)utf8_to_uv(s, send - s, &ulen, 0);
s += ulen;
continue;
}
if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
Perl_croak(aTHX_ "Illegal number of bits in vec");
- if (SvUTF8(sv)) {
+ if (SvUTF8(sv))
(void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
- }
offset *= size; /* turn into bit offset */
len = (offset + size + 7) / 8; /* required number of bytes */
switch (optype) {
case OP_BIT_AND:
while (lulen && rulen) {
- luc = utf8_to_uv_chk((U8*)lc, lulen, &ulen, 0);
+ luc = utf8_to_uv((U8*)lc, lulen, &ulen, 0);
lc += ulen;
lulen -= ulen;
- ruc = utf8_to_uv_chk((U8*)rc, rulen, &ulen, 0);
+ ruc = utf8_to_uv((U8*)rc, rulen, &ulen, 0);
rc += ulen;
rulen -= ulen;
duc = luc & ruc;
break;
case OP_BIT_XOR:
while (lulen && rulen) {
- luc = utf8_to_uv_chk((U8*)lc, lulen, &ulen, 0);
+ luc = utf8_to_uv((U8*)lc, lulen, &ulen, 0);
lc += ulen;
lulen -= ulen;
- ruc = utf8_to_uv_chk((U8*)rc, rulen, &ulen, 0);
+ ruc = utf8_to_uv((U8*)rc, rulen, &ulen, 0);
rc += ulen;
rulen -= ulen;
duc = luc ^ ruc;
goto mop_up_utf;
case OP_BIT_OR:
while (lulen && rulen) {
- luc = utf8_to_uv_chk((U8*)lc, lulen, &ulen, 0);
+ luc = utf8_to_uv((U8*)lc, lulen, &ulen, 0);
lc += ulen;
lulen -= ulen;
- ruc = utf8_to_uv_chk((U8*)rc, rulen, &ulen, 0);
+ ruc = utf8_to_uv((U8*)rc, rulen, &ulen, 0);
rc += ulen;
rulen -= ulen;
duc = luc | ruc;
#define utf8_hop Perl_utf8_hop
#define utf8_to_bytes Perl_utf8_to_bytes
#define bytes_to_utf8 Perl_bytes_to_utf8
+#define utf8_to_uv_simple Perl_utf8_to_uv_simple
#define utf8_to_uv Perl_utf8_to_uv
-#define utf8_to_uv_chk Perl_utf8_to_uv_chk
#define uv_to_utf8 Perl_uv_to_utf8
#define vivify_defelem Perl_vivify_defelem
#define vivify_ref Perl_vivify_ref
#define utf8_hop(a,b) Perl_utf8_hop(aTHX_ a,b)
#define utf8_to_bytes(a,b) Perl_utf8_to_bytes(aTHX_ a,b)
#define bytes_to_utf8(a,b) Perl_bytes_to_utf8(aTHX_ a,b)
-#define utf8_to_uv(a,b) Perl_utf8_to_uv(aTHX_ a,b)
-#define utf8_to_uv_chk(a,b,c,d) Perl_utf8_to_uv_chk(aTHX_ a,b,c,d)
+#define utf8_to_uv_simple(a,b) Perl_utf8_to_uv_simple(aTHX_ a,b)
+#define utf8_to_uv(a,b,c,d) Perl_utf8_to_uv(aTHX_ a,b,c,d)
#define uv_to_utf8(a,b) Perl_uv_to_utf8(aTHX_ a,b)
#define vivify_defelem(a) Perl_vivify_defelem(aTHX_ a)
#define vivify_ref(a,b) Perl_vivify_ref(aTHX_ a,b)
#define utf8_to_bytes Perl_utf8_to_bytes
#define Perl_bytes_to_utf8 CPerlObj::Perl_bytes_to_utf8
#define bytes_to_utf8 Perl_bytes_to_utf8
+#define Perl_utf8_to_uv_simple CPerlObj::Perl_utf8_to_uv_simple
+#define utf8_to_uv_simple Perl_utf8_to_uv_simple
#define Perl_utf8_to_uv CPerlObj::Perl_utf8_to_uv
#define utf8_to_uv Perl_utf8_to_uv
-#define Perl_utf8_to_uv_chk CPerlObj::Perl_utf8_to_uv_chk
-#define utf8_to_uv_chk Perl_utf8_to_uv_chk
#define Perl_uv_to_utf8 CPerlObj::Perl_uv_to_utf8
#define uv_to_utf8 Perl_uv_to_utf8
#define Perl_vivify_defelem CPerlObj::Perl_vivify_defelem
Ap |U8* |utf8_hop |U8 *s|I32 off
ApM |U8* |utf8_to_bytes |U8 *s|STRLEN *len
ApM |U8* |bytes_to_utf8 |U8 *s|STRLEN *len
-Ap |UV |utf8_to_uv |U8 *s|STRLEN* retlen
-Ap |UV |utf8_to_uv_chk |U8 *s|STRLEN curlen|STRLEN* retlen|U32 flags
-Ap |U8* |uv_to_utf8 |U8 *d|UV uv
+Ap |UV |utf8_to_uv_simple|U8 *s|STRLEN* retlen
+Ap |UV |utf8_to_uv |U8 *s|STRLEN curlen|STRLEN* retlen|U32 flags
+Ap |U8* |uv_to_utf8|U8 *d|UV uv
p |void |vivify_defelem |SV* sv
p |void |vivify_ref |SV* sv|U32 to_what
p |I32 |wait4pid |Pid_t pid|int* statusp|int flags
#define Perl_bytes_to_utf8 pPerl->Perl_bytes_to_utf8
#undef bytes_to_utf8
#define bytes_to_utf8 Perl_bytes_to_utf8
+#undef Perl_utf8_to_uv_simple
+#define Perl_utf8_to_uv_simple pPerl->Perl_utf8_to_uv_simple
+#undef utf8_to_uv_simple
+#define utf8_to_uv_simple Perl_utf8_to_uv_simple
#undef Perl_utf8_to_uv
#define Perl_utf8_to_uv pPerl->Perl_utf8_to_uv
#undef utf8_to_uv
#define utf8_to_uv Perl_utf8_to_uv
-#undef Perl_utf8_to_uv_chk
-#define Perl_utf8_to_uv_chk pPerl->Perl_utf8_to_uv_chk
-#undef utf8_to_uv_chk
-#define utf8_to_uv_chk Perl_utf8_to_uv_chk
#undef Perl_uv_to_utf8
#define Perl_uv_to_utf8 pPerl->Perl_uv_to_utf8
#undef uv_to_utf8
for (j = 0; j < i; j++) {
U8 *s = cp[j];
I32 cur = j < i ? cp[j+1] - s : tend - s;
- UV val = utf8_to_uv_chk(s, cur, &ulen, 0);
+ UV val = utf8_to_uv(s, cur, &ulen, 0);
s += ulen;
diff = val - nextmin;
if (diff > 0) {
}
}
if (*s == 0xff)
- val = utf8_to_uv_chk(s+1, cur - 1, &ulen, 0);
+ val = utf8_to_uv(s+1, cur - 1, &ulen, 0);
if (val >= nextmin)
nextmin = val + 1;
}
while (t < tend || tfirst <= tlast) {
/* see if we need more "t" chars */
if (tfirst > tlast) {
- tfirst = (I32)utf8_to_uv_chk(t, tend - t, &ulen, 0);
+ tfirst = (I32)utf8_to_uv(t, tend - t, &ulen, 0);
t += ulen;
if (t < tend && *t == 0xff) { /* illegal utf8 val indicates range */
t++;
- tlast = (I32)utf8_to_uv_chk(t, tend - t, &ulen, 0);
+ tlast = (I32)utf8_to_uv(t, tend - t, &ulen, 0);
t += ulen;
}
else
/* now see if we need more "r" chars */
if (rfirst > rlast) {
if (r < rend) {
- rfirst = (I32)utf8_to_uv_chk(r, rend - r, &ulen, 0);
+ rfirst = (I32)utf8_to_uv(r, rend - r, &ulen, 0);
r += ulen;
if (r < rend && *r == 0xff) { /* illegal utf8 val indicates range */
r++;
- rlast = (I32)utf8_to_uv_chk(r, rend - r, &ulen, 0);
+ rlast = (I32)utf8_to_uv(r, rend - r, &ulen, 0);
r += ulen;
}
else
}
#undef Perl_is_utf8_char
-int
+STRLEN
Perl_is_utf8_char(pTHXo_ U8 *p)
{
return ((CPerlObj*)pPerl)->Perl_is_utf8_char(p);
return ((CPerlObj*)pPerl)->Perl_bytes_to_utf8(s, len);
}
-#undef Perl_utf8_to_uv
+#undef Perl_utf8_to_uv_simple
UV
-Perl_utf8_to_uv(pTHXo_ U8 *s, STRLEN* retlen)
+Perl_utf8_to_uv_simple(pTHXo_ U8 *s, STRLEN* retlen)
{
- return ((CPerlObj*)pPerl)->Perl_utf8_to_uv(s, retlen);
+ return ((CPerlObj*)pPerl)->Perl_utf8_to_uv_simple(s, retlen);
}
-#undef Perl_utf8_to_uv_chk
+#undef Perl_utf8_to_uv
UV
-Perl_utf8_to_uv_chk(pTHXo_ U8 *s, STRLEN curlen, STRLEN* retlen, bool checking)
+Perl_utf8_to_uv(pTHXo_ U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags)
{
- return ((CPerlObj*)pPerl)->Perl_utf8_to_uv_chk(s, curlen, retlen, checking);
+ return ((CPerlObj*)pPerl)->Perl_utf8_to_uv(s, curlen, retlen, flags);
}
#undef Perl_uv_to_utf8
=for hackers
Found in file sv.h
-=item svtype
+=item SvTYPE
-An enum of flags for Perl types. These are found in the file B<sv.h>
-in the C<svtype> enum. Test these flags with the C<SvTYPE> macro.
+Returns the type of the SV. See C<svtype>.
+
+ svtype SvTYPE(SV* sv)
=for hackers
Found in file sv.h
-=item SvTYPE
-
-Returns the type of the SV. See C<svtype>.
+=item svtype
- svtype SvTYPE(SV* sv)
+An enum of flags for Perl types. These are found in the file B<sv.h>
+in the C<svtype> enum. Test these flags with the C<SvTYPE> macro.
=for hackers
Found in file sv.h
=item utf8_to_uv
Returns the character value of the first character in the string C<s>
-which is assumed to be in UTF8 encoding; C<retlen> will be set to the
-length, in bytes, of that character, and the pointer C<s> will be
-advanced to the end of the character.
+which is assumed to be in UTF8 encoding and no longer than C<curlen>;
+C<retlen> will be set to the length, in bytes, of that character,
+and the pointer C<s> will be advanced to the end of the character.
-If C<s> does not point to a well-formed UTF8 character, an optional UTF8
+If C<s> does not point to a well-formed UTF8 character, the behaviour
+is dependent on the value of C<checking>: if this is true, it is
+assumed that the caller will raise a warning, and this function will
+set C<retlen> to C<-1> and return. If C<checking> is not true, an optional UTF8
warning is produced.
- U8* s utf8_to_uv(STRLEN *retlen)
+ U8* s utf8_to_uv(STRLEN curlen, I32 *retlen, U32 flags)
=for hackers
Found in file utf8.c
-=item utf8_to_uv_chk
+=item utf8_to_uv_simple
Returns the character value of the first character in the string C<s>
-which is assumed to be in UTF8 encoding and no longer than C<curlen>;
-C<retlen> will be set to the length, in bytes, of that character,
-and the pointer C<s> will be advanced to the end of the character.
+which is assumed to be in UTF8 encoding; C<retlen> will be set to the
+length, in bytes, of that character, and the pointer C<s> will be
+advanced to the end of the character.
-If C<s> does not point to a well-formed UTF8 character, the behaviour
-is dependent on the value of C<checking>: if this is true, it is
-assumed that the caller will raise a warning, and this function will
-set C<retlen> to C<-1> and return. If C<checking> is not true, an optional UTF8
-warning is produced.
+If C<s> does not point to a well-formed UTF8 character, zero is
+returned and retlen is set, if possible, to -1.
- U8* s utf8_to_uv_chk(STRLEN curlen, I32 *retlen, I32 checking)
+ U8* s utf8_to_uv_simple(STRLEN *retlen)
=for hackers
Found in file utf8.c
Regardless of the above, the C<bytes> pragma can always be used to force
byte semantics in a particular lexical scope. See L<bytes>.
-One effect of the C<utf8> pragma is that the internal UTF-8 decoding
-becomes stricter so that the character 0xFFFF (UTF-8 bytes 0xEF 0xBF
-0xBF), and the bytes 0xFE and 0xFF, start to cause warnings if they
-appear in the data.
-
The C<utf8> pragma is primarily a compatibility device that enables
recognition of UTF-8 in literals encountered by the parser. It may also
be used for enabling some of the more experimental Unicode support features.
send = tmps + len;
while (tmps < send) {
- UV c = utf8_to_uv_chk(tmps, 0, &l, UTF8_ALLOW_ANY);
+ UV c = utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
tmps += UTF8SKIP(tmps);
targlen += UNISKIP(~c);
}
tmps -= len;
Newz(0, result, targlen + 1, U8);
while (tmps < send) {
- UV c = utf8_to_uv_chk(tmps, 0, &l, UTF8_ALLOW_ANY);
+ UV c = utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
tmps += UTF8SKIP(tmps);
result = uv_to_utf8(result,(UV)~c);
}
STRLEN retlen;
if ((*tmps & 0x80) && DO_UTF8(tmpsv))
- value = utf8_to_uv_chk(tmps, len, &retlen, 0);
+ value = utf8_to_uv(tmps, len, &retlen, 0);
else
value = (UV)(*tmps & 255);
XPUSHu(value);
STRLEN ulen;
U8 tmpbuf[UTF8_MAXLEN];
U8 *tend;
- UV uv = utf8_to_uv_chk(s, slen, &ulen, 0);
+ UV uv = utf8_to_uv(s, slen, &ulen, 0);
if (PL_op->op_private & OPpLOCALE) {
TAINT;
STRLEN ulen;
U8 tmpbuf[UTF8_MAXLEN];
U8 *tend;
- UV uv = utf8_to_uv_chk(s, slen, &ulen, 0);
+ UV uv = utf8_to_uv(s, slen, &ulen, 0);
if (PL_op->op_private & OPpLOCALE) {
TAINT;
TAINT;
SvTAINTED_on(TARG);
while (s < send) {
- d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv_chk(s, len, &ulen, 0)));
+ d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv(s, len, &ulen, 0)));
s += ulen;
}
}
TAINT;
SvTAINTED_on(TARG);
while (s < send) {
- d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv_chk(s, len, &ulen, 0)));
+ d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv(s, len, &ulen, 0)));
s += ulen;
}
}
if (checksum) {
while (len-- > 0 && s < strend) {
STRLEN alen;
- auint = utf8_to_uv_chk((U8*)s, strend - s, &alen, 0);
+ auint = utf8_to_uv((U8*)s, strend - s, &alen, 0);
along = alen;
s += along;
if (checksum > 32)
EXTEND_MORTAL(len);
while (len-- > 0 && s < strend) {
STRLEN alen;
- auint = utf8_to_uv_chk((U8*)s, strend - s, &alen, 0);
+ auint = utf8_to_uv((U8*)s, strend - s, &alen, 0);
along = alen;
s += along;
sv = NEWSV(37, 0);
U8 *s = (U8*)SvPVX(sv);
U8 *end = (U8*)SvPVX(sv) + SvCUR(sv);
if (s < end) {
- rev = utf8_to_uv_chk(s, end - s, &len, 0);
+ rev = utf8_to_uv(s, end - s, &len, 0);
s += len;
if (s < end) {
- ver = utf8_to_uv_chk(s, end - s, &len, 0);
+ ver = utf8_to_uv(s, end - s, &len, 0);
s += len;
if (s < end)
- sver = utf8_to_uv_chk(s, end - s, &len, 0);
+ sver = utf8_to_uv(s, end - s, &len, 0);
}
}
if (PERL_REVISION < rev
PERL_CALLCONV U8* Perl_utf8_hop(pTHX_ U8 *s, I32 off);
PERL_CALLCONV U8* Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len);
PERL_CALLCONV U8* Perl_bytes_to_utf8(pTHX_ U8 *s, STRLEN *len);
-PERL_CALLCONV UV Perl_utf8_to_uv(pTHX_ U8 *s, STRLEN* retlen);
-PERL_CALLCONV UV Perl_utf8_to_uv_chk(pTHX_ U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags);
+PERL_CALLCONV UV Perl_utf8_to_uv_simple(pTHX_ U8 *s, STRLEN* retlen);
+PERL_CALLCONV UV Perl_utf8_to_uv(pTHX_ U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags);
PERL_CALLCONV U8* Perl_uv_to_utf8(pTHX_ U8 *d, UV uv);
PERL_CALLCONV void Perl_vivify_defelem(pTHX_ SV* sv);
PERL_CALLCONV void Perl_vivify_ref(pTHX_ SV* sv, U32 to_what);
default:
normal_default:
if ((*p & 0xc0) == 0xc0 && UTF) {
- ender = utf8_to_uv_chk((U8*)p, PL_regxend - p,
+ ender = utf8_to_uv((U8*)p, PL_regxend - p,
&numlen, 0);
p += numlen;
}
namedclass = OOB_NAMEDCLASS;
if (!range)
rangebegin = PL_regcomp_parse;
- value = utf8_to_uv_chk((U8*)PL_regcomp_parse,
+ value = utf8_to_uv((U8*)PL_regcomp_parse,
PL_regxend - PL_regcomp_parse,
&numlen, 0);
PL_regcomp_parse += numlen;
if (value == '[')
namedclass = regpposixcc(value);
else if (value == '\\') {
- value = (U32)utf8_to_uv_chk((U8*)PL_regcomp_parse,
+ value = (U32)utf8_to_uv((U8*)PL_regcomp_parse,
PL_regxend - PL_regcomp_parse,
&numlen, 0);
PL_regcomp_parse += numlen;
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case BOUNDUTF8:
- tmp = (I32)(s != startpos) ? utf8_to_uv_chk(reghop((U8*)s, -1),
+ tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1),
strend - s,
0, 0) : '\n';
tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case NBOUNDUTF8:
- tmp = (I32)(s != startpos) ? utf8_to_uv_chk(reghop((U8*)s, -1),
+ tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1),
strend - s,
0, 0) : '\n';
tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
while (s < e) {
if (l >= PL_regeol)
sayNO;
- if (utf8_to_uv_chk((U8*)s, e - s, 0, 0) != (c1 ?
+ if (utf8_to_uv((U8*)s, e - s, 0, 0) != (c1 ?
toLOWER_utf8((U8*)l) :
toLOWER_LC_utf8((U8*)l)))
{
case NBOUNDUTF8:
/* was last char in word? */
ln = (locinput != PL_regbol)
- ? utf8_to_uv_chk(reghop((U8*)locinput, -1),
+ ? utf8_to_uv(reghop((U8*)locinput, -1),
PL_regeol - locinput, 0, 0) : PL_regprev;
if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
ln = isALNUM_uni(ln);
break;
}
if (utf)
- iv = (IV)utf8_to_uv_chk(vecstr, veclen, &ulen, 0);
+ iv = (IV)utf8_to_uv(vecstr, veclen, &ulen, 0);
else {
iv = *vecstr;
ulen = 1;
break;
}
if (utf)
- uv = utf8_to_uv_chk(vecstr, veclen, &ulen, 0);
+ uv = utf8_to_uv(vecstr, veclen, &ulen, 0);
else {
uv = *vecstr;
ulen = 1;
STRLEN skip;
UV n;
if (utf)
- n = utf8_to_uv_chk((U8*)start, len, &skip, 0);
+ n = utf8_to_uv((U8*)start, len, &skip, 0);
else {
n = *(U8*)start;
skip = 1;
STRLEN len;
UV uv;
- uv = utf8_to_uv_chk((U8*)s, send - s, &len, UTF8_CHECK_ONLY);
+ uv = utf8_to_uv((U8*)s, send - s, &len, UTF8_CHECK_ONLY);
if (len == 1) {
/* Illegal UTF8 (a high-bit byte), make it valid. */
char *old_pvx = SvPVX(sv);
}
/*
-=for apidoc Am|U8* s|utf8_to_uv_chk|STRLEN curlen|I32 *retlen|U32 flags
+=for apidoc Am|U8* s|utf8_to_uv|STRLEN curlen|I32 *retlen|U32 flags
Returns the character value of the first character in the string C<s>
which is assumed to be in UTF8 encoding and no longer than C<curlen>;
and the pointer C<s> will be advanced to the end of the character.
If C<s> does not point to a well-formed UTF8 character, the behaviour
-is dependent on the value of C<checking>: if this is true, it is
-assumed that the caller will raise a warning, and this function will
-set C<retlen> to C<-1> and return. If C<checking> is not true, an optional UTF8
-warning is produced.
+is dependent on the value of C<flags>: if it contains UTF8_CHECK_ONLY,
+it is assumed that the caller will raise a warning, and this function
+will set C<retlen> to C<-1> and return. The C<flags> can also contain
+various flags to allow deviations from the strict UTF-8 encoding.
-=cut
-*/
+=cut */
UV
-Perl_utf8_to_uv_chk(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
+Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
{
dTHR;
UV uv = *s, ouv;
}
/*
-=for apidoc Am|U8* s|utf8_to_uv|STRLEN *retlen
+=for apidoc Am|U8* s|utf8_to_uv_simple|STRLEN *retlen
Returns the character value of the first character in the string C<s>
which is assumed to be in UTF8 encoding; C<retlen> will be set to the
*/
UV
-Perl_utf8_to_uv(pTHX_ U8* s, STRLEN* retlen)
+Perl_utf8_to_uv_simple(pTHX_ U8* s, STRLEN* retlen)
{
- return Perl_utf8_to_uv_chk(aTHX_ s, (STRLEN)-1, retlen, 0);
+ return Perl_utf8_to_uv(aTHX_ s, (STRLEN)-1, retlen, 0);
}
/* utf8_distance(a,b) returns the number of UTF8 characters between
U8 *
Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN *len)
{
- dTHR;
U8 *send;
U8 *d;
- U8 *save;
-
- send = s + *len;
- d = save = s;
+ U8 *save = s;
/* ensure valid UTF8 and chars < 256 before updating string */
- while (s < send) {
- U8 c = *s++;
+ for (send = s + *len; s < send; ) {
+ U8 c = *s++;
+
if (c >= 0x80 &&
- ( (s >= send) || ((*s++ & 0xc0) != 0x80) || ((c & 0xfe) != 0xc2))) {
- *len = -1;
- return 0;
- }
+ ((s >= send) ||
+ ((*s++ & 0xc0) != 0x80) || ((c & 0xfe) != 0xc2))) {
+ *len = -1;
+ return 0;
+ }
}
- s = save;
+
+ d = s = save;
while (s < send) {
- if (*s < 0x80)
- *d++ = *s++;
+ if (*s < 0x80) {
+ *d++ = *s++;
+ }
else {
STRLEN ulen;
- *d++ = (U8)utf8_to_uv(s, &ulen);
+ *d++ = (U8)utf8_to_uv_simple(s, &ulen);
s += ulen;
}
}
if (!PL_utf8_toupper)
PL_utf8_toupper = swash_init("utf8", "ToUpper", &PL_sv_undef, 4, 0);
uv = swash_fetch(PL_utf8_toupper, p);
- return uv ? uv : utf8_to_uv_chk(p,STRLEN_MAX,0,0);
+ return uv ? uv : utf8_to_uv(p,STRLEN_MAX,0,0);
}
UV
if (!PL_utf8_totitle)
PL_utf8_totitle = swash_init("utf8", "ToTitle", &PL_sv_undef, 4, 0);
uv = swash_fetch(PL_utf8_totitle, p);
- return uv ? uv : utf8_to_uv_chk(p,STRLEN_MAX,0,0);
+ return uv ? uv : utf8_to_uv(p,STRLEN_MAX,0,0);
}
UV
if (!PL_utf8_tolower)
PL_utf8_tolower = swash_init("utf8", "ToLower", &PL_sv_undef, 4, 0);
uv = swash_fetch(PL_utf8_tolower, p);
- return uv ? uv : utf8_to_uv_chk(p,STRLEN_MAX,0,0);
+ return uv ? uv : utf8_to_uv(p,STRLEN_MAX,0,0);
}
/* a "swash" is a swatch hash */
PUSHMARK(SP);
EXTEND(SP,3);
PUSHs((SV*)sv);
- PUSHs(sv_2mortal(newSViv(utf8_to_uv_chk(ptr, STRLEN_MAX, 0, 0) & ~(needents - 1))));
+ PUSHs(sv_2mortal(newSViv(utf8_to_uv(ptr, STRLEN_MAX, 0, 0) & ~(needents - 1))));
PUSHs(sv_2mortal(newSViv(needents)));
PUTBACK;
if (call_method("SWASHGET", G_SCALAR))