From: Nick Ing-Simmons Date: Sat, 30 Sep 2000 12:18:00 +0000 (+0000) Subject: Re-instate Perl_utf8_to_uv without checking parameter - added in change 7075. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=8e84507e42a00e64817e92106359a5275566dc19;p=p5sagit%2Fp5-mst-13.2.git Re-instate Perl_utf8_to_uv without checking parameter - added in change 7075. i.e. rename Simon's function to Perl_utf8_to_uv_chk, change all calls to it to use new name and add Perl_utf8_to_uv() as a wrapper which calls it passing 0 to checking to get the warning. p4raw-id: //depot/perl@7096 --- diff --git a/doop.c b/doop.c index 80cc0f6..b75ffaa 100644 --- a/doop.c +++ b/doop.c @@ -77,12 +77,12 @@ S_do_trans_simple(pTHX_ SV *sv) ulen = 1; /* Need to check this, otherwise 128..255 won't match */ - c = utf8_to_uv(s, &ulen, 0); + c = utf8_to_uv_chk(s, &ulen, 0); if (c < 0x100 && (ch = tbl[(short)c]) >= 0) { matches++; - if (ch < 0x80) + if (ch < 0x80) *d++ = ch; - else + else d = uv_to_utf8(d,ch); s += ulen; } @@ -125,7 +125,7 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */ I32 ulen; ulen = 1; if (hasutf) - c = utf8_to_uv(s,&ulen, 0); + c = utf8_to_uv_chk(s,&ulen, 0); else c = *s; if (c < 0x100 && tbl[c] >= 0) @@ -222,7 +222,7 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ UV extra = none + 1; UV final; UV uv; - I32 isutf; + I32 isutf; I32 howmany; isutf = SvUTF8(sv); @@ -258,7 +258,7 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ i = UTF8SKIP(s); s += i; matches++; - if (i > 1 && !isutf++) + if (i > 1 && !isutf++) HALF_UTF8_UPGRADE(dstart,d); d = uv_to_utf8(d, final); } @@ -337,7 +337,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ if (squash) { UV puv = 0xfeedface; while (s < send) { - if (SvUTF8(sv)) + if (SvUTF8(sv)) uv = swash_fetch(rv, s); else { U8 tmpbuf[2]; @@ -354,7 +354,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ if (uv < none) { matches++; if (uv != puv) { - if ((uv & 0x80) && !isutf++) + if ((uv & 0x80) && !isutf++) HALF_UTF8_UPGRADE(dst,d); d = uv_to_utf8(d, uv); puv = uv; @@ -364,7 +364,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ } else if (uv == none) { /* "none" is unmapped character */ I32 ulen; - *d++ = (U8)utf8_to_uv(s, &ulen, 0); + *d++ = (U8)utf8_to_uv_chk(s, &ulen, 0); s += ulen; puv = 0xfeedface; continue; @@ -384,7 +384,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ } else { while (s < send) { - if (SvUTF8(sv)) + if (SvUTF8(sv)) uv = swash_fetch(rv, s); else { U8 tmpbuf[2]; @@ -405,7 +405,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ } else if (uv == none) { /* "none" is unmapped character */ I32 ulen; - *d++ = (U8)utf8_to_uv(s, &ulen, 0); + *d++ = (U8)utf8_to_uv_chk(s, &ulen, 0); s += ulen; continue; } @@ -435,7 +435,7 @@ Perl_do_trans(pTHX_ SV *sv) { dTHR; STRLEN len; - I32 hasutf = (PL_op->op_private & + I32 hasutf = (PL_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)); if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL)) @@ -547,7 +547,7 @@ Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size) if (offset < 0) return retnum; - if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ + if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ Perl_croak(aTHX_ "Illegal number of bits in vec"); if (SvUTF8(sv)) { @@ -625,7 +625,7 @@ Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size) ((UV) s[offset + 4] << 24) + ((UV) s[offset + 5] << 16); else - retnum = + retnum = ((UV) s[offset ] << 56) + ((UV) s[offset + 1] << 48) + ((UV) s[offset + 2] << 40) + @@ -708,9 +708,9 @@ Perl_do_vecset(pTHX_ SV *sv) if (offset < 0) Perl_croak(aTHX_ "Assigning to negative offset in vec"); size = LvTARGLEN(sv); - if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ + if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ Perl_croak(aTHX_ "Illegal number of bits in vec"); - + offset *= size; /* turn into bit offset */ len = (offset + size + 7) / 8; /* required number of bytes */ if (len > targlen) { @@ -718,7 +718,7 @@ Perl_do_vecset(pTHX_ SV *sv) (void)memzero((char *)(s + targlen), len - targlen + 1); SvCUR_set(targ, len); } - + if (size < 8) { mask = (1 << size) - 1; size = offset & 7; @@ -767,7 +767,7 @@ Perl_do_chop(pTHX_ register SV *astr, register SV *sv) STRLEN len; char *s; dTHR; - + if (SvTYPE(sv) == SVt_PVAV) { register I32 i; I32 max; @@ -901,7 +901,7 @@ Perl_do_chomp(pTHX_ register SV *sv) nope: SvSETMAGIC(sv); return count; -} +} void Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) @@ -969,10 +969,10 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) switch (optype) { case OP_BIT_AND: while (lulen && rulen) { - luc = utf8_to_uv((U8*)lc, &ulen, 0); + luc = utf8_to_uv_chk((U8*)lc, &ulen, 0); lc += ulen; lulen -= ulen; - ruc = utf8_to_uv((U8*)rc, &ulen, 0); + ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0); rc += ulen; rulen -= ulen; duc = luc & ruc; @@ -984,10 +984,10 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) break; case OP_BIT_XOR: while (lulen && rulen) { - luc = utf8_to_uv((U8*)lc, &ulen, 0); + luc = utf8_to_uv_chk((U8*)lc, &ulen, 0); lc += ulen; lulen -= ulen; - ruc = utf8_to_uv((U8*)rc, &ulen, 0); + ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0); rc += ulen; rulen -= ulen; duc = luc ^ ruc; @@ -996,10 +996,10 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) goto mop_up_utf; case OP_BIT_OR: while (lulen && rulen) { - luc = utf8_to_uv((U8*)lc, &ulen, 0); + luc = utf8_to_uv_chk((U8*)lc, &ulen, 0); lc += ulen; lulen -= ulen; - ruc = utf8_to_uv((U8*)rc, &ulen, 0); + ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0); rc += ulen; rulen -= ulen; duc = luc | ruc; @@ -1107,8 +1107,8 @@ Perl_do_kv(pTHX) I32 dokeys = (PL_op->op_type == OP_KEYS); I32 dovalues = (PL_op->op_type == OP_VALUES); I32 realhv = (SvTYPE(hv) == SVt_PVHV); - - if (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV) + + if (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV) dokeys = dovalues = TRUE; if (!hv) { diff --git a/embed.h b/embed.h index 7e030a9..404acfa 100644 --- a/embed.h +++ b/embed.h @@ -730,6 +730,7 @@ #define utf8_to_bytes Perl_utf8_to_bytes #define bytes_to_utf8 Perl_bytes_to_utf8 #define utf8_to_uv Perl_utf8_to_uv +#define utf8_to_uv_chk Perl_utf8_to_uv_chk #define uv_to_utf8 Perl_uv_to_utf8 #define vivify_defelem Perl_vivify_defelem #define vivify_ref Perl_vivify_ref @@ -2186,7 +2187,8 @@ #define utf8_hop(a,b) Perl_utf8_hop(aTHX_ a,b) #define utf8_to_bytes(a,b) Perl_utf8_to_bytes(aTHX_ a,b) #define bytes_to_utf8(a,b) Perl_bytes_to_utf8(aTHX_ a,b) -#define utf8_to_uv(a,b,c) Perl_utf8_to_uv(aTHX_ a,b,c) +#define utf8_to_uv(a,b) Perl_utf8_to_uv(aTHX_ a,b) +#define utf8_to_uv_chk(a,b,c) Perl_utf8_to_uv_chk(aTHX_ a,b,c) #define uv_to_utf8(a,b) Perl_uv_to_utf8(aTHX_ a,b) #define vivify_defelem(a) Perl_vivify_defelem(aTHX_ a) #define vivify_ref(a,b) Perl_vivify_ref(aTHX_ a,b) @@ -4286,6 +4288,8 @@ #define bytes_to_utf8 Perl_bytes_to_utf8 #define Perl_utf8_to_uv CPerlObj::Perl_utf8_to_uv #define utf8_to_uv Perl_utf8_to_uv +#define Perl_utf8_to_uv_chk CPerlObj::Perl_utf8_to_uv_chk +#define utf8_to_uv_chk Perl_utf8_to_uv_chk #define Perl_uv_to_utf8 CPerlObj::Perl_uv_to_utf8 #define uv_to_utf8 Perl_uv_to_utf8 #define Perl_vivify_defelem CPerlObj::Perl_vivify_defelem diff --git a/embed.pl b/embed.pl index f84be4a..5ae80eb 100755 --- a/embed.pl +++ b/embed.pl @@ -2074,7 +2074,8 @@ Ap |I32 |utf8_distance |U8 *a|U8 *b Ap |U8* |utf8_hop |U8 *s|I32 off ApM |U8* |utf8_to_bytes |U8 *s|STRLEN *len ApM |U8* |bytes_to_utf8 |U8 *s|STRLEN *len -Ap |UV |utf8_to_uv |U8 *s|I32* retlen|bool checking +Ap |UV |utf8_to_uv |U8 *s|I32* retlen +Ap |UV |utf8_to_uv_chk |U8 *s|I32* retlen|bool checking Ap |U8* |uv_to_utf8 |U8 *d|UV uv p |void |vivify_defelem |SV* sv p |void |vivify_ref |SV* sv|U32 to_what diff --git a/global.sym b/global.sym index 080d78c..0dea03e 100644 --- a/global.sym +++ b/global.sym @@ -466,6 +466,7 @@ Perl_utf8_hop Perl_utf8_to_bytes Perl_bytes_to_utf8 Perl_utf8_to_uv +Perl_utf8_to_uv_chk Perl_uv_to_utf8 Perl_warn Perl_vwarn diff --git a/handy.h b/handy.h index c240c42..f0e39af 100644 --- a/handy.h +++ b/handy.h @@ -48,10 +48,10 @@ Null SV pointer. just figure out all the headers such a test needs. Andy Dougherty August 1996 */ -/* bool is built-in for g++-2.6.3 and later, which might be used +/* bool is built-in for g++-2.6.3 and later, which might be used for extensions. <_G_config.h> defines _G_HAVE_BOOL, but we can't be sure _G_config.h will be included before this file. _G_config.h - also defines _G_HAVE_BOOL for both gcc and g++, but only g++ + also defines _G_HAVE_BOOL for both gcc and g++, but only g++ actually has bool. Hence, _G_HAVE_BOOL is pretty useless for us. g++ can be identified by __GNUG__. Andy Dougherty February 2000 @@ -101,8 +101,8 @@ Null SV pointer. Similarly, there is no guarantee that I16 and U16 have exactly 16 bits. - For dealing with issues that may arise from various 32/64-bit - systems, we will ask Configure to check out + For dealing with issues that may arise from various 32/64-bit + systems, we will ask Configure to check out SHORTSIZE == sizeof(short) INTSIZE == sizeof(int) @@ -448,21 +448,21 @@ Converts the specified character to lowercase. #define isPSXSPC_utf8(c) (isSPACE_utf8(c) ||(c) == '\f') #define isBLANK_utf8(c) isBLANK(c) /* could be wrong */ -#define isALNUM_LC_utf8(p) isALNUM_LC_uni(utf8_to_uv(p, 0, 0)) -#define isIDFIRST_LC_utf8(p) isIDFIRST_LC_uni(utf8_to_uv(p, 0, 0)) -#define isALPHA_LC_utf8(p) isALPHA_LC_uni(utf8_to_uv(p, 0, 0)) -#define isSPACE_LC_utf8(p) isSPACE_LC_uni(utf8_to_uv(p, 0, 0)) -#define isDIGIT_LC_utf8(p) isDIGIT_LC_uni(utf8_to_uv(p, 0, 0)) -#define isUPPER_LC_utf8(p) isUPPER_LC_uni(utf8_to_uv(p, 0, 0)) -#define isLOWER_LC_utf8(p) isLOWER_LC_uni(utf8_to_uv(p, 0, 0)) -#define isALNUMC_LC_utf8(p) isALNUMC_LC_uni(utf8_to_uv(p, 0, 0)) -#define isCNTRL_LC_utf8(p) isCNTRL_LC_uni(utf8_to_uv(p, 0, 0)) -#define isGRAPH_LC_utf8(p) isGRAPH_LC_uni(utf8_to_uv(p, 0, 0)) -#define isPRINT_LC_utf8(p) isPRINT_LC_uni(utf8_to_uv(p, 0, 0)) -#define isPUNCT_LC_utf8(p) isPUNCT_LC_uni(utf8_to_uv(p, 0, 0)) -#define toUPPER_LC_utf8(p) toUPPER_LC_uni(utf8_to_uv(p, 0, 0)) -#define toTITLE_LC_utf8(p) toTITLE_LC_uni(utf8_to_uv(p, 0, 0)) -#define toLOWER_LC_utf8(p) toLOWER_LC_uni(utf8_to_uv(p, 0, 0)) +#define isALNUM_LC_utf8(p) isALNUM_LC_uni(utf8_to_uv_chk(p, 0, 0)) +#define isIDFIRST_LC_utf8(p) isIDFIRST_LC_uni(utf8_to_uv_chk(p, 0, 0)) +#define isALPHA_LC_utf8(p) isALPHA_LC_uni(utf8_to_uv_chk(p, 0, 0)) +#define isSPACE_LC_utf8(p) isSPACE_LC_uni(utf8_to_uv_chk(p, 0, 0)) +#define isDIGIT_LC_utf8(p) isDIGIT_LC_uni(utf8_to_uv_chk(p, 0, 0)) +#define isUPPER_LC_utf8(p) isUPPER_LC_uni(utf8_to_uv_chk(p, 0, 0)) +#define isLOWER_LC_utf8(p) isLOWER_LC_uni(utf8_to_uv_chk(p, 0, 0)) +#define isALNUMC_LC_utf8(p) isALNUMC_LC_uni(utf8_to_uv_chk(p, 0, 0)) +#define isCNTRL_LC_utf8(p) isCNTRL_LC_uni(utf8_to_uv_chk(p, 0, 0)) +#define isGRAPH_LC_utf8(p) isGRAPH_LC_uni(utf8_to_uv_chk(p, 0, 0)) +#define isPRINT_LC_utf8(p) isPRINT_LC_uni(utf8_to_uv_chk(p, 0, 0)) +#define isPUNCT_LC_utf8(p) isPUNCT_LC_uni(utf8_to_uv_chk(p, 0, 0)) +#define toUPPER_LC_utf8(p) toUPPER_LC_uni(utf8_to_uv_chk(p, 0, 0)) +#define toTITLE_LC_utf8(p) toTITLE_LC_uni(utf8_to_uv_chk(p, 0, 0)) +#define toLOWER_LC_utf8(p) toLOWER_LC_uni(utf8_to_uv_chk(p, 0, 0)) #define isPSXSPC_LC_utf8(c) (isSPACE_LC_utf8(c) ||(c) == '\f') #define isBLANK_LC_utf8(c) isBLANK(c) /* could be wrong */ @@ -484,7 +484,7 @@ typedef U16 line_t; #endif -/* +/* XXX LEAKTEST doesn't really work in perl5. There are direct calls to safemalloc() in the source, so LEAKTEST won't pick them up. (The main "offenders" are extensions.) @@ -501,7 +501,7 @@ typedef U16 line_t; Creates a new SV. A non-zero C parameter indicates the number of bytes of preallocated string space the SV should have. An extra byte for a tailing NUL is also reserved. (SvPOK is not set for the SV even if string -space is allocated.) The reference count for the new SV is set to 1. +space is allocated.) The reference count for the new SV is set to 1. C is an integer id between 0 and 1299 (used to identify leaks). =for apidoc Am|void|New|int id|void* ptr|int nitems|type diff --git a/objXSUB.h b/objXSUB.h index 00184c9..bc04f03 100644 --- a/objXSUB.h +++ b/objXSUB.h @@ -1873,6 +1873,10 @@ #define Perl_utf8_to_uv pPerl->Perl_utf8_to_uv #undef utf8_to_uv #define utf8_to_uv Perl_utf8_to_uv +#undef Perl_utf8_to_uv_chk +#define Perl_utf8_to_uv_chk pPerl->Perl_utf8_to_uv_chk +#undef utf8_to_uv_chk +#define utf8_to_uv_chk Perl_utf8_to_uv_chk #undef Perl_uv_to_utf8 #define Perl_uv_to_utf8 pPerl->Perl_uv_to_utf8 #undef uv_to_utf8 diff --git a/op.c b/op.c index d24396a..4856d98 100644 --- a/op.c +++ b/op.c @@ -2656,7 +2656,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) qsort(cp, i, sizeof(U8*), utf8compare); for (j = 0; j < i; j++) { U8 *s = cp[j]; - UV val = utf8_to_uv(s, &ulen, 0); + UV val = utf8_to_uv_chk(s, &ulen, 0); s += ulen; diff = val - nextmin; if (diff > 0) { @@ -2669,7 +2669,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) } } if (*s == 0xff) - val = utf8_to_uv(s+1, &ulen, 0); + val = utf8_to_uv_chk(s+1, &ulen, 0); if (val >= nextmin) nextmin = val + 1; } @@ -2696,10 +2696,10 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) while (t < tend || tfirst <= tlast) { /* see if we need more "t" chars */ if (tfirst > tlast) { - tfirst = (I32)utf8_to_uv(t, &ulen, 0); + tfirst = (I32)utf8_to_uv_chk(t, &ulen, 0); t += ulen; if (t < tend && *t == 0xff) { /* illegal utf8 val indicates range */ - tlast = (I32)utf8_to_uv(++t, &ulen, 0); + tlast = (I32)utf8_to_uv_chk(++t, &ulen, 0); t += ulen; } else @@ -2709,10 +2709,10 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) /* now see if we need more "r" chars */ if (rfirst > rlast) { if (r < rend) { - rfirst = (I32)utf8_to_uv(r, &ulen, 0); + rfirst = (I32)utf8_to_uv_chk(r, &ulen, 0); r += ulen; if (r < rend && *r == 0xff) { /* illegal utf8 val indicates range */ - rlast = (I32)utf8_to_uv(++r, &ulen, 0); + rlast = (I32)utf8_to_uv_chk(++r, &ulen, 0); r += ulen; } else diff --git a/perlapi.c b/perlapi.c index 614f94f..39a13ba 100644 --- a/perlapi.c +++ b/perlapi.c @@ -3380,9 +3380,16 @@ Perl_bytes_to_utf8(pTHXo_ U8 *s, STRLEN *len) #undef Perl_utf8_to_uv UV -Perl_utf8_to_uv(pTHXo_ U8 *s, I32* retlen, bool checking) +Perl_utf8_to_uv(pTHXo_ U8 *s, I32* retlen) { - return ((CPerlObj*)pPerl)->Perl_utf8_to_uv(s, retlen, checking); + return ((CPerlObj*)pPerl)->Perl_utf8_to_uv(s, retlen); +} + +#undef Perl_utf8_to_uv_chk +UV +Perl_utf8_to_uv_chk(pTHXo_ U8 *s, I32* retlen, bool checking) +{ + return ((CPerlObj*)pPerl)->Perl_utf8_to_uv_chk(s, retlen, checking); } #undef Perl_uv_to_utf8 diff --git a/pod/perlapi.pod b/pod/perlapi.pod index 5d5bc5f..78d6fa4 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -2355,19 +2355,19 @@ false, defined or undefined. Does not handle 'get' magic. =for hackers Found in file sv.h -=item svtype +=item SvTYPE -An enum of flags for Perl types. These are found in the file B -in the C enum. Test these flags with the C macro. +Returns the type of the SV. See C. + + svtype SvTYPE(SV* sv) =for hackers Found in file sv.h -=item SvTYPE - -Returns the type of the SV. See C. +=item svtype - svtype SvTYPE(SV* sv) +An enum of flags for Perl types. These are found in the file B +in the C enum. Test these flags with the C macro. =for hackers Found in file sv.h diff --git a/pp.c b/pp.c index d4a1df0..01cb070 100644 --- a/pp.c +++ b/pp.c @@ -2195,7 +2195,7 @@ PP(pp_ord) I32 retlen; if ((*tmps & 0x80) && DO_UTF8(tmpsv)) - value = utf8_to_uv(tmps, &retlen, 0); + value = utf8_to_uv_chk(tmps, &retlen, 0); else value = (UV)(*tmps & 255); XPUSHu(value); @@ -2262,7 +2262,7 @@ PP(pp_ucfirst) I32 ulen; U8 tmpbuf[UTF8_MAXLEN]; U8 *tend; - UV uv = utf8_to_uv(s, &ulen, 0); + UV uv = utf8_to_uv_chk(s, &ulen, 0); if (PL_op->op_private & OPpLOCALE) { TAINT; @@ -2321,7 +2321,7 @@ PP(pp_lcfirst) I32 ulen; U8 tmpbuf[UTF8_MAXLEN]; U8 *tend; - UV uv = utf8_to_uv(s, &ulen, 0); + UV uv = utf8_to_uv_chk(s, &ulen, 0); if (PL_op->op_private & OPpLOCALE) { TAINT; @@ -2398,7 +2398,7 @@ PP(pp_uc) TAINT; SvTAINTED_on(TARG); while (s < send) { - d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv(s, &ulen, 0))); + d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv_chk(s, &ulen, 0))); s += ulen; } } @@ -2472,7 +2472,7 @@ PP(pp_lc) TAINT; SvTAINTED_on(TARG); while (s < send) { - d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv(s, &ulen, 0))); + d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv_chk(s, &ulen, 0))); s += ulen; } } @@ -3614,7 +3614,7 @@ PP(pp_unpack) len = strend - s; if (checksum) { while (len-- > 0 && s < strend) { - auint = utf8_to_uv((U8*)s, &along, 0); + auint = utf8_to_uv_chk((U8*)s, &along, 0); s += along; if (checksum > 32) cdouble += (NV)auint; @@ -3626,7 +3626,7 @@ PP(pp_unpack) EXTEND(SP, len); EXTEND_MORTAL(len); while (len-- > 0 && s < strend) { - auint = utf8_to_uv((U8*)s, &along, 0); + auint = utf8_to_uv_chk((U8*)s, &along, 0); s += along; sv = NEWSV(37, 0); sv_setuv(sv, (UV)auint); diff --git a/pp_ctl.c b/pp_ctl.c index 3cc74e5..254cce8 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -2959,13 +2959,13 @@ PP(pp_require) U8 *s = (U8*)SvPVX(sv); U8 *end = (U8*)SvPVX(sv) + SvCUR(sv); if (s < end) { - rev = utf8_to_uv(s, &len, 0); + rev = utf8_to_uv_chk(s, &len, 0); s += len; if (s < end) { - ver = utf8_to_uv(s, &len, 0); + ver = utf8_to_uv_chk(s, &len, 0); s += len; if (s < end) - sver = utf8_to_uv(s, &len, 0); + sver = utf8_to_uv_chk(s, &len, 0); } } if (PERL_REVISION < rev diff --git a/proto.h b/proto.h index 79b584d..604a664 100644 --- a/proto.h +++ b/proto.h @@ -809,7 +809,8 @@ PERL_CALLCONV I32 Perl_utf8_distance(pTHX_ U8 *a, U8 *b); PERL_CALLCONV U8* Perl_utf8_hop(pTHX_ U8 *s, I32 off); PERL_CALLCONV U8* Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len); PERL_CALLCONV U8* Perl_bytes_to_utf8(pTHX_ U8 *s, STRLEN *len); -PERL_CALLCONV UV Perl_utf8_to_uv(pTHX_ U8 *s, I32* retlen, bool checking); +PERL_CALLCONV UV Perl_utf8_to_uv(pTHX_ U8 *s, I32* retlen); +PERL_CALLCONV UV Perl_utf8_to_uv_chk(pTHX_ U8 *s, I32* retlen, bool checking); PERL_CALLCONV U8* Perl_uv_to_utf8(pTHX_ U8 *d, UV uv); PERL_CALLCONV void Perl_vivify_defelem(pTHX_ SV* sv); PERL_CALLCONV void Perl_vivify_ref(pTHX_ SV* sv, U32 to_what); diff --git a/regcomp.c b/regcomp.c index 766b84c..e7042ea 100644 --- a/regcomp.c +++ b/regcomp.c @@ -2884,7 +2884,7 @@ tryagain: default: normal_default: if ((*p & 0xc0) == 0xc0 && UTF) { - ender = utf8_to_uv((U8*)p, &numlen, 0); + ender = utf8_to_uv_chk((U8*)p, &numlen, 0); p += numlen; } else @@ -3638,12 +3638,12 @@ S_regclassutf8(pTHX) namedclass = OOB_NAMEDCLASS; if (!range) rangebegin = PL_regcomp_parse; - value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen, 0); + value = utf8_to_uv_chk((U8*)PL_regcomp_parse, &numlen, 0); PL_regcomp_parse += numlen; if (value == '[') namedclass = regpposixcc(value); else if (value == '\\') { - value = (U32)utf8_to_uv((U8*)PL_regcomp_parse, &numlen, 0); + value = (U32)utf8_to_uv_chk((U8*)PL_regcomp_parse, &numlen, 0); PL_regcomp_parse += numlen; /* Some compilers cannot handle switching on 64-bit integer * values, therefore value cannot be an UV. Yes, this will diff --git a/regexec.c b/regexec.c index 990791b..ea52383 100644 --- a/regexec.c +++ b/regexec.c @@ -914,7 +914,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case BOUNDUTF8: - tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0, 0) : '\n'; + tmp = (I32)(s != startpos) ? utf8_to_uv_chk(reghop((U8*)s, -1), 0, 0) : '\n'; tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == BOUNDUTF8 ? @@ -950,7 +950,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case NBOUNDUTF8: - tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0, 0) : '\n'; + tmp = (I32)(s != startpos) ? utf8_to_uv_chk(reghop((U8*)s, -1), 0, 0) : '\n'; tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == NBOUNDUTF8 ? @@ -1995,7 +1995,7 @@ S_regmatch(pTHX_ regnode *prog) while (s < e) { if (l >= PL_regeol) sayNO; - if (utf8_to_uv((U8*)s, 0, 0) != (c1 ? + if (utf8_to_uv_chk((U8*)s, 0, 0) != (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l))) { @@ -2133,7 +2133,7 @@ S_regmatch(pTHX_ regnode *prog) case NBOUNDUTF8: /* was last char in word? */ ln = (locinput != PL_regbol) - ? utf8_to_uv(reghop((U8*)locinput, -1), 0, 0) : PL_regprev; + ? utf8_to_uv_chk(reghop((U8*)locinput, -1), 0, 0) : PL_regprev; if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) { ln = isALNUM_uni(ln); n = swash_fetch(PL_utf8_alnum, (U8*)locinput); diff --git a/sv.c b/sv.c index 561d9d9..d584c54 100644 --- a/sv.c +++ b/sv.c @@ -6318,7 +6318,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV break; } if (utf) - iv = (IV)utf8_to_uv(vecstr, &ulen, 0); + iv = (IV)utf8_to_uv_chk(vecstr, &ulen, 0); else { iv = *vecstr; ulen = 1; @@ -6400,7 +6400,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV break; } if (utf) - uv = utf8_to_uv(vecstr, &ulen, 0); + uv = utf8_to_uv_chk(vecstr, &ulen, 0); else { uv = *vecstr; ulen = 1; diff --git a/toke.c b/toke.c index 783f282..9834d90 100644 --- a/toke.c +++ b/toke.c @@ -812,7 +812,7 @@ Perl_str_to_version(pTHX_ SV *sv) I32 skip; UV n; if (utf) - n = utf8_to_uv((U8*)start, &skip, 0); + n = utf8_to_uv_chk((U8*)start, &skip, 0); else { n = *(U8*)start; skip = 1; @@ -1323,7 +1323,7 @@ S_scan_const(pTHX_ char *start) /* (now in tr/// code again) */ if (*s & 0x80 && thisutf) { - (void)utf8_to_uv((U8*)s, &len, 0); + (void)utf8_to_uv_chk((U8*)s, &len, 0); if (len == 1) { /* illegal UTF8, make it valid */ char *old_pvx = SvPVX(sv); diff --git a/utf8.c b/utf8.c index d23c9f7..3ab402c 100644 --- a/utf8.c +++ b/utf8.c @@ -143,7 +143,7 @@ string, false otherwise. =cut */ -bool +bool Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len) { U8* x=s; @@ -159,7 +159,7 @@ Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len) } /* -=for apidoc Am|U8* s|utf8_to_uv|I32 *retlen|I32 checking +=for apidoc Am|U8* s|utf8_to_uv_chk|I32 *retlen|I32 checking Returns the character value of the first character in the string C which is assumed to be in UTF8 encoding; C will be set to the @@ -176,7 +176,7 @@ warning is produced. */ UV -Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen, bool checking) +Perl_utf8_to_uv_chk(pTHX_ U8* s, I32* retlen, bool checking) { UV uv = *s; int len; @@ -192,7 +192,7 @@ Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen, bool checking) return 0; } - if (ckWARN_d(WARN_UTF8)) + if (ckWARN_d(WARN_UTF8)) Perl_warner(aTHX_ WARN_UTF8, "Malformed UTF-8 character"); if (retlen) *retlen = 1; @@ -219,7 +219,7 @@ Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen, bool checking) return 0; } - if (ckWARN_d(WARN_UTF8)) + if (ckWARN_d(WARN_UTF8)) Perl_warner(aTHX_ WARN_UTF8, "Malformed UTF-8 character"); if (retlen) *retlen -= len + 1; @@ -231,6 +231,26 @@ Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen, bool checking) return uv; } +/* +=for apidoc Am|U8* s|utf8_to_uv|I32 *retlen + +Returns the character value of the first character in the string C +which is assumed to be in UTF8 encoding; C will be set to the +length, in bytes, of that character, and the pointer C will be +advanced to the end of the character. + +If C does not point to a well-formed UTF8 character, an optional UTF8 +warning is produced. + +=cut +*/ + +UV +Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen) +{ + return Perl_utf8_to_uv_chk(aTHX_ s, retlen, 0); +} + /* utf8_distance(a,b) returns the number of UTF8 characters between the pointers a and b */ @@ -302,7 +322,7 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN *len) if (c >= 0x80 && ( (s >= send) || ((*s++ & 0xc0) != 0x80) || ((c & 0xfe) != 0xc2))) { *len = -1; - return 0; + return 0; } } s = save; @@ -311,7 +331,7 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN *len) *d++ = *s++; else { I32 ulen; - *d++ = (U8)utf8_to_uv(s, &ulen, 0); + *d++ = (U8)utf8_to_uv(s, &ulen); s += ulen; } } @@ -839,7 +859,7 @@ Perl_to_utf8_upper(pTHX_ U8 *p) if (!PL_utf8_toupper) PL_utf8_toupper = swash_init("utf8", "ToUpper", &PL_sv_undef, 4, 0); uv = swash_fetch(PL_utf8_toupper, p); - return uv ? uv : utf8_to_uv(p,0,0); + return uv ? uv : utf8_to_uv_chk(p,0,0); } UV @@ -850,7 +870,7 @@ Perl_to_utf8_title(pTHX_ U8 *p) if (!PL_utf8_totitle) PL_utf8_totitle = swash_init("utf8", "ToTitle", &PL_sv_undef, 4, 0); uv = swash_fetch(PL_utf8_totitle, p); - return uv ? uv : utf8_to_uv(p,0,0); + return uv ? uv : utf8_to_uv_chk(p,0,0); } UV @@ -861,7 +881,7 @@ Perl_to_utf8_lower(pTHX_ U8 *p) if (!PL_utf8_tolower) PL_utf8_tolower = swash_init("utf8", "ToLower", &PL_sv_undef, 4, 0); uv = swash_fetch(PL_utf8_tolower, p); - return uv ? uv : utf8_to_uv(p,0,0); + return uv ? uv : utf8_to_uv_chk(p,0,0); } /* a "swash" is a swatch hash */ @@ -871,7 +891,7 @@ Perl_swash_init(pTHX_ char* pkg, char* name, SV *listsv, I32 minbits, I32 none) { SV* retval; char tmpbuf[256]; - dSP; + dSP; if (!gv_stashpv(pkg, 0)) { /* demand load utf8 */ ENTER; @@ -895,7 +915,7 @@ Perl_swash_init(pTHX_ char* pkg, char* name, SV *listsv, I32 minbits, I32 none) if (PL_curcop == &PL_compiling) /* XXX ought to be handled by lex_start */ strncpy(tmpbuf, PL_tokenbuf, sizeof tmpbuf); if (call_method("SWASHNEW", G_SCALAR)) - retval = newSVsv(*PL_stack_sp--); + retval = newSVsv(*PL_stack_sp--); else retval = &PL_sv_undef; LEAVE; @@ -951,11 +971,11 @@ Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr) PUSHMARK(SP); EXTEND(SP,3); PUSHs((SV*)sv); - PUSHs(sv_2mortal(newSViv(utf8_to_uv(ptr, 0, 0) & ~(needents - 1)))); + PUSHs(sv_2mortal(newSViv(utf8_to_uv_chk(ptr, 0, 0) & ~(needents - 1)))); PUSHs(sv_2mortal(newSViv(needents))); PUTBACK; if (call_method("SWASHGET", G_SCALAR)) - retval = newSVsv(*PL_stack_sp--); + retval = newSVsv(*PL_stack_sp--); else retval = &PL_sv_undef; POPSTACK;