ulen = 1;
/* Need to check this, otherwise 128..255 won't match */
- c = utf8_to_uv(s, &ulen, 0);
+ c = utf8_to_uv_chk(s, &ulen, 0);
if (c < 0x100 && (ch = tbl[(short)c]) >= 0) {
matches++;
- if (ch < 0x80)
+ if (ch < 0x80)
*d++ = ch;
- else
+ else
d = uv_to_utf8(d,ch);
s += ulen;
}
I32 ulen;
ulen = 1;
if (hasutf)
- c = utf8_to_uv(s,&ulen, 0);
+ c = utf8_to_uv_chk(s,&ulen, 0);
else
c = *s;
if (c < 0x100 && tbl[c] >= 0)
UV extra = none + 1;
UV final;
UV uv;
- I32 isutf;
+ I32 isutf;
I32 howmany;
isutf = SvUTF8(sv);
i = UTF8SKIP(s);
s += i;
matches++;
- if (i > 1 && !isutf++)
+ if (i > 1 && !isutf++)
HALF_UTF8_UPGRADE(dstart,d);
d = uv_to_utf8(d, final);
}
if (squash) {
UV puv = 0xfeedface;
while (s < send) {
- if (SvUTF8(sv))
+ if (SvUTF8(sv))
uv = swash_fetch(rv, s);
else {
U8 tmpbuf[2];
if (uv < none) {
matches++;
if (uv != puv) {
- if ((uv & 0x80) && !isutf++)
+ if ((uv & 0x80) && !isutf++)
HALF_UTF8_UPGRADE(dst,d);
d = uv_to_utf8(d, uv);
puv = uv;
}
else if (uv == none) { /* "none" is unmapped character */
I32 ulen;
- *d++ = (U8)utf8_to_uv(s, &ulen, 0);
+ *d++ = (U8)utf8_to_uv_chk(s, &ulen, 0);
s += ulen;
puv = 0xfeedface;
continue;
}
else {
while (s < send) {
- if (SvUTF8(sv))
+ if (SvUTF8(sv))
uv = swash_fetch(rv, s);
else {
U8 tmpbuf[2];
}
else if (uv == none) { /* "none" is unmapped character */
I32 ulen;
- *d++ = (U8)utf8_to_uv(s, &ulen, 0);
+ *d++ = (U8)utf8_to_uv_chk(s, &ulen, 0);
s += ulen;
continue;
}
{
dTHR;
STRLEN len;
- I32 hasutf = (PL_op->op_private &
+ I32 hasutf = (PL_op->op_private &
(OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF));
if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL))
if (offset < 0)
return retnum;
- if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
+ if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
Perl_croak(aTHX_ "Illegal number of bits in vec");
if (SvUTF8(sv)) {
((UV) s[offset + 4] << 24) +
((UV) s[offset + 5] << 16);
else
- retnum =
+ retnum =
((UV) s[offset ] << 56) +
((UV) s[offset + 1] << 48) +
((UV) s[offset + 2] << 40) +
if (offset < 0)
Perl_croak(aTHX_ "Assigning to negative offset in vec");
size = LvTARGLEN(sv);
- if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
+ if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
Perl_croak(aTHX_ "Illegal number of bits in vec");
-
+
offset *= size; /* turn into bit offset */
len = (offset + size + 7) / 8; /* required number of bytes */
if (len > targlen) {
(void)memzero((char *)(s + targlen), len - targlen + 1);
SvCUR_set(targ, len);
}
-
+
if (size < 8) {
mask = (1 << size) - 1;
size = offset & 7;
STRLEN len;
char *s;
dTHR;
-
+
if (SvTYPE(sv) == SVt_PVAV) {
register I32 i;
I32 max;
nope:
SvSETMAGIC(sv);
return count;
-}
+}
void
Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
switch (optype) {
case OP_BIT_AND:
while (lulen && rulen) {
- luc = utf8_to_uv((U8*)lc, &ulen, 0);
+ luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
lc += ulen;
lulen -= ulen;
- ruc = utf8_to_uv((U8*)rc, &ulen, 0);
+ ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
rc += ulen;
rulen -= ulen;
duc = luc & ruc;
break;
case OP_BIT_XOR:
while (lulen && rulen) {
- luc = utf8_to_uv((U8*)lc, &ulen, 0);
+ luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
lc += ulen;
lulen -= ulen;
- ruc = utf8_to_uv((U8*)rc, &ulen, 0);
+ ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
rc += ulen;
rulen -= ulen;
duc = luc ^ ruc;
goto mop_up_utf;
case OP_BIT_OR:
while (lulen && rulen) {
- luc = utf8_to_uv((U8*)lc, &ulen, 0);
+ luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
lc += ulen;
lulen -= ulen;
- ruc = utf8_to_uv((U8*)rc, &ulen, 0);
+ ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
rc += ulen;
rulen -= ulen;
duc = luc | ruc;
I32 dokeys = (PL_op->op_type == OP_KEYS);
I32 dovalues = (PL_op->op_type == OP_VALUES);
I32 realhv = (SvTYPE(hv) == SVt_PVHV);
-
- if (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV)
+
+ if (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV)
dokeys = dovalues = TRUE;
if (!hv) {
#define utf8_to_bytes Perl_utf8_to_bytes
#define bytes_to_utf8 Perl_bytes_to_utf8
#define utf8_to_uv Perl_utf8_to_uv
+#define utf8_to_uv_chk Perl_utf8_to_uv_chk
#define uv_to_utf8 Perl_uv_to_utf8
#define vivify_defelem Perl_vivify_defelem
#define vivify_ref Perl_vivify_ref
#define utf8_hop(a,b) Perl_utf8_hop(aTHX_ a,b)
#define utf8_to_bytes(a,b) Perl_utf8_to_bytes(aTHX_ a,b)
#define bytes_to_utf8(a,b) Perl_bytes_to_utf8(aTHX_ a,b)
-#define utf8_to_uv(a,b,c) Perl_utf8_to_uv(aTHX_ a,b,c)
+#define utf8_to_uv(a,b) Perl_utf8_to_uv(aTHX_ a,b)
+#define utf8_to_uv_chk(a,b,c) Perl_utf8_to_uv_chk(aTHX_ a,b,c)
#define uv_to_utf8(a,b) Perl_uv_to_utf8(aTHX_ a,b)
#define vivify_defelem(a) Perl_vivify_defelem(aTHX_ a)
#define vivify_ref(a,b) Perl_vivify_ref(aTHX_ a,b)
#define bytes_to_utf8 Perl_bytes_to_utf8
#define Perl_utf8_to_uv CPerlObj::Perl_utf8_to_uv
#define utf8_to_uv Perl_utf8_to_uv
+#define Perl_utf8_to_uv_chk CPerlObj::Perl_utf8_to_uv_chk
+#define utf8_to_uv_chk Perl_utf8_to_uv_chk
#define Perl_uv_to_utf8 CPerlObj::Perl_uv_to_utf8
#define uv_to_utf8 Perl_uv_to_utf8
#define Perl_vivify_defelem CPerlObj::Perl_vivify_defelem
Ap |U8* |utf8_hop |U8 *s|I32 off
ApM |U8* |utf8_to_bytes |U8 *s|STRLEN *len
ApM |U8* |bytes_to_utf8 |U8 *s|STRLEN *len
-Ap |UV |utf8_to_uv |U8 *s|I32* retlen|bool checking
+Ap |UV |utf8_to_uv |U8 *s|I32* retlen
+Ap |UV |utf8_to_uv_chk |U8 *s|I32* retlen|bool checking
Ap |U8* |uv_to_utf8 |U8 *d|UV uv
p |void |vivify_defelem |SV* sv
p |void |vivify_ref |SV* sv|U32 to_what
Perl_utf8_to_bytes
Perl_bytes_to_utf8
Perl_utf8_to_uv
+Perl_utf8_to_uv_chk
Perl_uv_to_utf8
Perl_warn
Perl_vwarn
just figure out all the headers such a test needs.
Andy Dougherty August 1996
*/
-/* bool is built-in for g++-2.6.3 and later, which might be used
+/* bool is built-in for g++-2.6.3 and later, which might be used
for extensions. <_G_config.h> defines _G_HAVE_BOOL, but we can't
be sure _G_config.h will be included before this file. _G_config.h
- also defines _G_HAVE_BOOL for both gcc and g++, but only g++
+ also defines _G_HAVE_BOOL for both gcc and g++, but only g++
actually has bool. Hence, _G_HAVE_BOOL is pretty useless for us.
g++ can be identified by __GNUG__.
Andy Dougherty February 2000
Similarly, there is no guarantee that I16 and U16 have exactly 16
bits.
- For dealing with issues that may arise from various 32/64-bit
- systems, we will ask Configure to check out
+ For dealing with issues that may arise from various 32/64-bit
+ systems, we will ask Configure to check out
SHORTSIZE == sizeof(short)
INTSIZE == sizeof(int)
#define isPSXSPC_utf8(c) (isSPACE_utf8(c) ||(c) == '\f')
#define isBLANK_utf8(c) isBLANK(c) /* could be wrong */
-#define isALNUM_LC_utf8(p) isALNUM_LC_uni(utf8_to_uv(p, 0, 0))
-#define isIDFIRST_LC_utf8(p) isIDFIRST_LC_uni(utf8_to_uv(p, 0, 0))
-#define isALPHA_LC_utf8(p) isALPHA_LC_uni(utf8_to_uv(p, 0, 0))
-#define isSPACE_LC_utf8(p) isSPACE_LC_uni(utf8_to_uv(p, 0, 0))
-#define isDIGIT_LC_utf8(p) isDIGIT_LC_uni(utf8_to_uv(p, 0, 0))
-#define isUPPER_LC_utf8(p) isUPPER_LC_uni(utf8_to_uv(p, 0, 0))
-#define isLOWER_LC_utf8(p) isLOWER_LC_uni(utf8_to_uv(p, 0, 0))
-#define isALNUMC_LC_utf8(p) isALNUMC_LC_uni(utf8_to_uv(p, 0, 0))
-#define isCNTRL_LC_utf8(p) isCNTRL_LC_uni(utf8_to_uv(p, 0, 0))
-#define isGRAPH_LC_utf8(p) isGRAPH_LC_uni(utf8_to_uv(p, 0, 0))
-#define isPRINT_LC_utf8(p) isPRINT_LC_uni(utf8_to_uv(p, 0, 0))
-#define isPUNCT_LC_utf8(p) isPUNCT_LC_uni(utf8_to_uv(p, 0, 0))
-#define toUPPER_LC_utf8(p) toUPPER_LC_uni(utf8_to_uv(p, 0, 0))
-#define toTITLE_LC_utf8(p) toTITLE_LC_uni(utf8_to_uv(p, 0, 0))
-#define toLOWER_LC_utf8(p) toLOWER_LC_uni(utf8_to_uv(p, 0, 0))
+#define isALNUM_LC_utf8(p) isALNUM_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isIDFIRST_LC_utf8(p) isIDFIRST_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isALPHA_LC_utf8(p) isALPHA_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isSPACE_LC_utf8(p) isSPACE_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isDIGIT_LC_utf8(p) isDIGIT_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isUPPER_LC_utf8(p) isUPPER_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isLOWER_LC_utf8(p) isLOWER_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isALNUMC_LC_utf8(p) isALNUMC_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isCNTRL_LC_utf8(p) isCNTRL_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isGRAPH_LC_utf8(p) isGRAPH_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isPRINT_LC_utf8(p) isPRINT_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isPUNCT_LC_utf8(p) isPUNCT_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define toUPPER_LC_utf8(p) toUPPER_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define toTITLE_LC_utf8(p) toTITLE_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define toLOWER_LC_utf8(p) toLOWER_LC_uni(utf8_to_uv_chk(p, 0, 0))
#define isPSXSPC_LC_utf8(c) (isSPACE_LC_utf8(c) ||(c) == '\f')
#define isBLANK_LC_utf8(c) isBLANK(c) /* could be wrong */
#endif
-/*
+/*
XXX LEAKTEST doesn't really work in perl5. There are direct calls to
safemalloc() in the source, so LEAKTEST won't pick them up.
(The main "offenders" are extensions.)
Creates a new SV. A non-zero C<len> parameter indicates the number of
bytes of preallocated string space the SV should have. An extra byte for a
tailing NUL is also reserved. (SvPOK is not set for the SV even if string
-space is allocated.) The reference count for the new SV is set to 1.
+space is allocated.) The reference count for the new SV is set to 1.
C<id> is an integer id between 0 and 1299 (used to identify leaks).
=for apidoc Am|void|New|int id|void* ptr|int nitems|type
#define Perl_utf8_to_uv pPerl->Perl_utf8_to_uv
#undef utf8_to_uv
#define utf8_to_uv Perl_utf8_to_uv
+#undef Perl_utf8_to_uv_chk
+#define Perl_utf8_to_uv_chk pPerl->Perl_utf8_to_uv_chk
+#undef utf8_to_uv_chk
+#define utf8_to_uv_chk Perl_utf8_to_uv_chk
#undef Perl_uv_to_utf8
#define Perl_uv_to_utf8 pPerl->Perl_uv_to_utf8
#undef uv_to_utf8
qsort(cp, i, sizeof(U8*), utf8compare);
for (j = 0; j < i; j++) {
U8 *s = cp[j];
- UV val = utf8_to_uv(s, &ulen, 0);
+ UV val = utf8_to_uv_chk(s, &ulen, 0);
s += ulen;
diff = val - nextmin;
if (diff > 0) {
}
}
if (*s == 0xff)
- val = utf8_to_uv(s+1, &ulen, 0);
+ val = utf8_to_uv_chk(s+1, &ulen, 0);
if (val >= nextmin)
nextmin = val + 1;
}
while (t < tend || tfirst <= tlast) {
/* see if we need more "t" chars */
if (tfirst > tlast) {
- tfirst = (I32)utf8_to_uv(t, &ulen, 0);
+ tfirst = (I32)utf8_to_uv_chk(t, &ulen, 0);
t += ulen;
if (t < tend && *t == 0xff) { /* illegal utf8 val indicates range */
- tlast = (I32)utf8_to_uv(++t, &ulen, 0);
+ tlast = (I32)utf8_to_uv_chk(++t, &ulen, 0);
t += ulen;
}
else
/* now see if we need more "r" chars */
if (rfirst > rlast) {
if (r < rend) {
- rfirst = (I32)utf8_to_uv(r, &ulen, 0);
+ rfirst = (I32)utf8_to_uv_chk(r, &ulen, 0);
r += ulen;
if (r < rend && *r == 0xff) { /* illegal utf8 val indicates range */
- rlast = (I32)utf8_to_uv(++r, &ulen, 0);
+ rlast = (I32)utf8_to_uv_chk(++r, &ulen, 0);
r += ulen;
}
else
#undef Perl_utf8_to_uv
UV
-Perl_utf8_to_uv(pTHXo_ U8 *s, I32* retlen, bool checking)
+Perl_utf8_to_uv(pTHXo_ U8 *s, I32* retlen)
{
- return ((CPerlObj*)pPerl)->Perl_utf8_to_uv(s, retlen, checking);
+ return ((CPerlObj*)pPerl)->Perl_utf8_to_uv(s, retlen);
+}
+
+#undef Perl_utf8_to_uv_chk
+UV
+Perl_utf8_to_uv_chk(pTHXo_ U8 *s, I32* retlen, bool checking)
+{
+ return ((CPerlObj*)pPerl)->Perl_utf8_to_uv_chk(s, retlen, checking);
}
#undef Perl_uv_to_utf8
=for hackers
Found in file sv.h
-=item svtype
+=item SvTYPE
-An enum of flags for Perl types. These are found in the file B<sv.h>
-in the C<svtype> enum. Test these flags with the C<SvTYPE> macro.
+Returns the type of the SV. See C<svtype>.
+
+ svtype SvTYPE(SV* sv)
=for hackers
Found in file sv.h
-=item SvTYPE
-
-Returns the type of the SV. See C<svtype>.
+=item svtype
- svtype SvTYPE(SV* sv)
+An enum of flags for Perl types. These are found in the file B<sv.h>
+in the C<svtype> enum. Test these flags with the C<SvTYPE> macro.
=for hackers
Found in file sv.h
I32 retlen;
if ((*tmps & 0x80) && DO_UTF8(tmpsv))
- value = utf8_to_uv(tmps, &retlen, 0);
+ value = utf8_to_uv_chk(tmps, &retlen, 0);
else
value = (UV)(*tmps & 255);
XPUSHu(value);
I32 ulen;
U8 tmpbuf[UTF8_MAXLEN];
U8 *tend;
- UV uv = utf8_to_uv(s, &ulen, 0);
+ UV uv = utf8_to_uv_chk(s, &ulen, 0);
if (PL_op->op_private & OPpLOCALE) {
TAINT;
I32 ulen;
U8 tmpbuf[UTF8_MAXLEN];
U8 *tend;
- UV uv = utf8_to_uv(s, &ulen, 0);
+ UV uv = utf8_to_uv_chk(s, &ulen, 0);
if (PL_op->op_private & OPpLOCALE) {
TAINT;
TAINT;
SvTAINTED_on(TARG);
while (s < send) {
- d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv(s, &ulen, 0)));
+ d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv_chk(s, &ulen, 0)));
s += ulen;
}
}
TAINT;
SvTAINTED_on(TARG);
while (s < send) {
- d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv(s, &ulen, 0)));
+ d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv_chk(s, &ulen, 0)));
s += ulen;
}
}
len = strend - s;
if (checksum) {
while (len-- > 0 && s < strend) {
- auint = utf8_to_uv((U8*)s, &along, 0);
+ auint = utf8_to_uv_chk((U8*)s, &along, 0);
s += along;
if (checksum > 32)
cdouble += (NV)auint;
EXTEND(SP, len);
EXTEND_MORTAL(len);
while (len-- > 0 && s < strend) {
- auint = utf8_to_uv((U8*)s, &along, 0);
+ auint = utf8_to_uv_chk((U8*)s, &along, 0);
s += along;
sv = NEWSV(37, 0);
sv_setuv(sv, (UV)auint);
U8 *s = (U8*)SvPVX(sv);
U8 *end = (U8*)SvPVX(sv) + SvCUR(sv);
if (s < end) {
- rev = utf8_to_uv(s, &len, 0);
+ rev = utf8_to_uv_chk(s, &len, 0);
s += len;
if (s < end) {
- ver = utf8_to_uv(s, &len, 0);
+ ver = utf8_to_uv_chk(s, &len, 0);
s += len;
if (s < end)
- sver = utf8_to_uv(s, &len, 0);
+ sver = utf8_to_uv_chk(s, &len, 0);
}
}
if (PERL_REVISION < rev
PERL_CALLCONV U8* Perl_utf8_hop(pTHX_ U8 *s, I32 off);
PERL_CALLCONV U8* Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len);
PERL_CALLCONV U8* Perl_bytes_to_utf8(pTHX_ U8 *s, STRLEN *len);
-PERL_CALLCONV UV Perl_utf8_to_uv(pTHX_ U8 *s, I32* retlen, bool checking);
+PERL_CALLCONV UV Perl_utf8_to_uv(pTHX_ U8 *s, I32* retlen);
+PERL_CALLCONV UV Perl_utf8_to_uv_chk(pTHX_ U8 *s, I32* retlen, bool checking);
PERL_CALLCONV U8* Perl_uv_to_utf8(pTHX_ U8 *d, UV uv);
PERL_CALLCONV void Perl_vivify_defelem(pTHX_ SV* sv);
PERL_CALLCONV void Perl_vivify_ref(pTHX_ SV* sv, U32 to_what);
default:
normal_default:
if ((*p & 0xc0) == 0xc0 && UTF) {
- ender = utf8_to_uv((U8*)p, &numlen, 0);
+ ender = utf8_to_uv_chk((U8*)p, &numlen, 0);
p += numlen;
}
else
namedclass = OOB_NAMEDCLASS;
if (!range)
rangebegin = PL_regcomp_parse;
- value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen, 0);
+ value = utf8_to_uv_chk((U8*)PL_regcomp_parse, &numlen, 0);
PL_regcomp_parse += numlen;
if (value == '[')
namedclass = regpposixcc(value);
else if (value == '\\') {
- value = (U32)utf8_to_uv((U8*)PL_regcomp_parse, &numlen, 0);
+ value = (U32)utf8_to_uv_chk((U8*)PL_regcomp_parse, &numlen, 0);
PL_regcomp_parse += numlen;
/* Some compilers cannot handle switching on 64-bit integer
* values, therefore value cannot be an UV. Yes, this will
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case BOUNDUTF8:
- tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0, 0) : '\n';
+ tmp = (I32)(s != startpos) ? utf8_to_uv_chk(reghop((U8*)s, -1), 0, 0) : '\n';
tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
while (s < strend) {
if (tmp == !(OP(c) == BOUNDUTF8 ?
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case NBOUNDUTF8:
- tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0, 0) : '\n';
+ tmp = (I32)(s != startpos) ? utf8_to_uv_chk(reghop((U8*)s, -1), 0, 0) : '\n';
tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
while (s < strend) {
if (tmp == !(OP(c) == NBOUNDUTF8 ?
while (s < e) {
if (l >= PL_regeol)
sayNO;
- if (utf8_to_uv((U8*)s, 0, 0) != (c1 ?
+ if (utf8_to_uv_chk((U8*)s, 0, 0) != (c1 ?
toLOWER_utf8((U8*)l) :
toLOWER_LC_utf8((U8*)l)))
{
case NBOUNDUTF8:
/* was last char in word? */
ln = (locinput != PL_regbol)
- ? utf8_to_uv(reghop((U8*)locinput, -1), 0, 0) : PL_regprev;
+ ? utf8_to_uv_chk(reghop((U8*)locinput, -1), 0, 0) : PL_regprev;
if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
ln = isALNUM_uni(ln);
n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
break;
}
if (utf)
- iv = (IV)utf8_to_uv(vecstr, &ulen, 0);
+ iv = (IV)utf8_to_uv_chk(vecstr, &ulen, 0);
else {
iv = *vecstr;
ulen = 1;
break;
}
if (utf)
- uv = utf8_to_uv(vecstr, &ulen, 0);
+ uv = utf8_to_uv_chk(vecstr, &ulen, 0);
else {
uv = *vecstr;
ulen = 1;
I32 skip;
UV n;
if (utf)
- n = utf8_to_uv((U8*)start, &skip, 0);
+ n = utf8_to_uv_chk((U8*)start, &skip, 0);
else {
n = *(U8*)start;
skip = 1;
/* (now in tr/// code again) */
if (*s & 0x80 && thisutf) {
- (void)utf8_to_uv((U8*)s, &len, 0);
+ (void)utf8_to_uv_chk((U8*)s, &len, 0);
if (len == 1) {
/* illegal UTF8, make it valid */
char *old_pvx = SvPVX(sv);
=cut
*/
-bool
+bool
Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len)
{
U8* x=s;
}
/*
-=for apidoc Am|U8* s|utf8_to_uv|I32 *retlen|I32 checking
+=for apidoc Am|U8* s|utf8_to_uv_chk|I32 *retlen|I32 checking
Returns the character value of the first character in the string C<s>
which is assumed to be in UTF8 encoding; C<retlen> will be set to the
*/
UV
-Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen, bool checking)
+Perl_utf8_to_uv_chk(pTHX_ U8* s, I32* retlen, bool checking)
{
UV uv = *s;
int len;
return 0;
}
- if (ckWARN_d(WARN_UTF8))
+ if (ckWARN_d(WARN_UTF8))
Perl_warner(aTHX_ WARN_UTF8, "Malformed UTF-8 character");
if (retlen)
*retlen = 1;
return 0;
}
- if (ckWARN_d(WARN_UTF8))
+ if (ckWARN_d(WARN_UTF8))
Perl_warner(aTHX_ WARN_UTF8, "Malformed UTF-8 character");
if (retlen)
*retlen -= len + 1;
return uv;
}
+/*
+=for apidoc Am|U8* s|utf8_to_uv|I32 *retlen
+
+Returns the character value of the first character in the string C<s>
+which is assumed to be in UTF8 encoding; C<retlen> will be set to the
+length, in bytes, of that character, and the pointer C<s> will be
+advanced to the end of the character.
+
+If C<s> does not point to a well-formed UTF8 character, an optional UTF8
+warning is produced.
+
+=cut
+*/
+
+UV
+Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen)
+{
+ return Perl_utf8_to_uv_chk(aTHX_ s, retlen, 0);
+}
+
/* utf8_distance(a,b) returns the number of UTF8 characters between
the pointers a and b */
if (c >= 0x80 &&
( (s >= send) || ((*s++ & 0xc0) != 0x80) || ((c & 0xfe) != 0xc2))) {
*len = -1;
- return 0;
+ return 0;
}
}
s = save;
*d++ = *s++;
else {
I32 ulen;
- *d++ = (U8)utf8_to_uv(s, &ulen, 0);
+ *d++ = (U8)utf8_to_uv(s, &ulen);
s += ulen;
}
}
if (!PL_utf8_toupper)
PL_utf8_toupper = swash_init("utf8", "ToUpper", &PL_sv_undef, 4, 0);
uv = swash_fetch(PL_utf8_toupper, p);
- return uv ? uv : utf8_to_uv(p,0,0);
+ return uv ? uv : utf8_to_uv_chk(p,0,0);
}
UV
if (!PL_utf8_totitle)
PL_utf8_totitle = swash_init("utf8", "ToTitle", &PL_sv_undef, 4, 0);
uv = swash_fetch(PL_utf8_totitle, p);
- return uv ? uv : utf8_to_uv(p,0,0);
+ return uv ? uv : utf8_to_uv_chk(p,0,0);
}
UV
if (!PL_utf8_tolower)
PL_utf8_tolower = swash_init("utf8", "ToLower", &PL_sv_undef, 4, 0);
uv = swash_fetch(PL_utf8_tolower, p);
- return uv ? uv : utf8_to_uv(p,0,0);
+ return uv ? uv : utf8_to_uv_chk(p,0,0);
}
/* a "swash" is a swatch hash */
{
SV* retval;
char tmpbuf[256];
- dSP;
+ dSP;
if (!gv_stashpv(pkg, 0)) { /* demand load utf8 */
ENTER;
if (PL_curcop == &PL_compiling) /* XXX ought to be handled by lex_start */
strncpy(tmpbuf, PL_tokenbuf, sizeof tmpbuf);
if (call_method("SWASHNEW", G_SCALAR))
- retval = newSVsv(*PL_stack_sp--);
+ retval = newSVsv(*PL_stack_sp--);
else
retval = &PL_sv_undef;
LEAVE;
PUSHMARK(SP);
EXTEND(SP,3);
PUSHs((SV*)sv);
- PUSHs(sv_2mortal(newSViv(utf8_to_uv(ptr, 0, 0) & ~(needents - 1))));
+ PUSHs(sv_2mortal(newSViv(utf8_to_uv_chk(ptr, 0, 0) & ~(needents - 1))));
PUSHs(sv_2mortal(newSViv(needents)));
PUTBACK;
if (call_method("SWASHGET", G_SCALAR))
- retval = newSVsv(*PL_stack_sp--);
+ retval = newSVsv(*PL_stack_sp--);
else
retval = &PL_sv_undef;
POPSTACK;