X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=sv.c;h=c2c1cc03d073c8d2dfb56dbfd010e8fc4d52e5e3;hb=917b24923c0362e8f2d8d1f3f612150902a8f3eb;hp=5f5d2819e4708e55bcd9708dbe10039ca3930275;hpb=bb2899ba824e18be66f3ca9d67696457ff235d98;p=p5sagit%2Fp5-mst-13.2.git diff --git a/sv.c b/sv.c index 5f5d281..c2c1cc0 100644 --- a/sv.c +++ b/sv.c @@ -1,6 +1,6 @@ /* sv.c * - * Copyright (c) 1991-2000, Larry Wall + * Copyright (c) 1991-2001, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -834,7 +834,7 @@ S_more_xpvbm(pTHX) #define new_XPVHV() my_safemalloc(sizeof(XPVHV)) #define del_XPVHV(p) my_safefree(p) - + #define new_XPVMG() my_safemalloc(sizeof(XPVMG)) #define del_XPVMG(p) my_safefree(p) @@ -872,7 +872,7 @@ S_more_xpvbm(pTHX) #define new_XPVHV() (void*)new_xpvhv() #define del_XPVHV(p) del_xpvhv((XPVHV *)p) - + #define new_XPVMG() (void*)new_xpvmg() #define del_XPVMG(p) del_xpvmg((XPVMG *)p) @@ -886,10 +886,10 @@ S_more_xpvbm(pTHX) #define new_XPVGV() my_safemalloc(sizeof(XPVGV)) #define del_XPVGV(p) my_safefree(p) - + #define new_XPVFM() my_safemalloc(sizeof(XPVFM)) #define del_XPVFM(p) my_safefree(p) - + #define new_XPVIO() my_safemalloc(sizeof(XPVIO)) #define del_XPVIO(p) my_safefree(p) @@ -913,6 +913,10 @@ Perl_sv_upgrade(pTHX_ register SV *sv, U32 mt) MAGIC* magic; HV* stash; + if (mt != SVt_PV && SvREADONLY(sv) && SvFAKE(sv)) { + sv_force_normal(sv); + } + if (SvTYPE(sv) == mt) return TRUE; @@ -1281,11 +1285,8 @@ Perl_sv_setiv(pTHX_ register SV *sv, IV i) case SVt_PVCV: case SVt_PVFM: case SVt_PVIO: - { - dTHR; - Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0), - PL_op_desc[PL_op->op_type]); - } + Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0), + PL_op_desc[PL_op->op_type]); } (void)SvIOK_only(sv); /* validate number */ SvIVX(sv) = i; @@ -1319,6 +1320,18 @@ See C. void Perl_sv_setuv(pTHX_ register SV *sv, UV u) { + /* With these two if statements: + u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865 + + without + u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865 + + If you wish to remove them, please benchmark to see what the effect is + */ + if (u <= (UV)IV_MAX) { + sv_setiv(sv, (IV)u); + return; + } sv_setiv(sv, 0); SvIsUV_on(sv); SvUVX(sv) = u; @@ -1335,7 +1348,21 @@ Like C, but also handles 'set' magic. void Perl_sv_setuv_mg(pTHX_ register SV *sv, UV u) { - sv_setuv(sv,u); + /* With these two if statements: + u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865 + + without + u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865 + + If you wish to remove them, please benchmark to see what the effect is + */ + if (u <= (UV)IV_MAX) { + sv_setiv(sv, (IV)u); + } else { + sv_setiv(sv, 0); + SvIsUV_on(sv); + sv_setuv(sv,u); + } SvSETMAGIC(sv); } @@ -1369,11 +1396,8 @@ Perl_sv_setnv(pTHX_ register SV *sv, NV num) case SVt_PVCV: case SVt_PVFM: case SVt_PVIO: - { - dTHR; - Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0), - PL_op_name[PL_op->op_type]); - } + Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0), + PL_op_name[PL_op->op_type]); } SvNVX(sv) = num; (void)SvNOK_only(sv); /* validate number */ @@ -1398,7 +1422,6 @@ Perl_sv_setnv_mg(pTHX_ register SV *sv, NV num) STATIC void S_not_a_number(pTHX_ SV *sv) { - dTHR; char tmpbuf[64]; char *d = tmpbuf; char *s; @@ -1452,16 +1475,225 @@ S_not_a_number(pTHX_ SV *sv) "Argument \"%s\" isn't numeric", tmpbuf); } -/* the number can be converted to integer with atol() or atoll() */ -#define IS_NUMBER_TO_INT_BY_ATOL 0x01 -#define IS_NUMBER_TO_INT_BY_ATOF 0x02 /* atol() may be != atof() */ -#define IS_NUMBER_NOT_IV 0x04 /* (IV)atof() may be != atof() */ -#define IS_NUMBER_NEG 0x08 /* not good to cache UV */ -#define IS_NUMBER_INFINITY 0x10 /* this is big */ +/* the number can be converted to integer with atol() or atoll() although */ +#define IS_NUMBER_TO_INT_BY_ATOL 0x01 /* integer (may have decimals) */ +#define IS_NUMBER_TO_INT_BY_STRTOL 0x02 /* it may exceed IV_MAX */ +#define IS_NUMBER_TO_INT_BY_ATOF 0x04 /* seen something like 123e4 */ +#define IS_NUMBER_LONGER_THAN_IV_MAX 0x08 /* more digits than IV_MAX */ +#define IS_NUMBER_AS_LONG_AS_IV_MAX 0x10 /* may(be not) larger than IV_MAX */ +#define IS_NUMBER_NOT_INT 0x20 /* seen a decimal point or e */ +#define IS_NUMBER_NEG 0x40 /* seen a leading - */ +#define IS_NUMBER_INFINITY 0x80 /* /^\s*-?Infinity\s*$/i */ /* Actually, ISO C leaves conversion of UV to IV undefined, but until proven guilty, assume that things are not that bad... */ +/* As 64 bit platforms often have an NV that doesn't preserve all bits of + an IV (an assumption perl has been based on to date) it becomes necessary + to remove the assumption that the NV always carries enough precision to + recreate the IV whenever needed, and that the NV is the canonical form. + Instead, IV/UV and NV need to be given equal rights. So as to not lose + precision as an side effect of conversion (which would lead to insanity + and the dragon(s) in t/op/numconvert.t getting very angry) the intent is + 1) to distinguish between IV/UV/NV slots that have cached a valid + conversion where precision was lost and IV/UV/NV slots that have a + valid conversion which has lost no precision + 2) to ensure that if a numeric conversion to one form is request that + would lose precision, the precise conversion (or differently + imprecise conversion) is also performed and cached, to prevent + requests for different numeric formats on the same SV causing + lossy conversion chains. (lossless conversion chains are perfectly + acceptable (still)) + + + flags are used: + SvIOKp is true if the IV slot contains a valid value + SvIOK is true only if the IV value is accurate (UV if SvIOK_UV true) + SvNOKp is true if the NV slot contains a valid value + SvNOK is true only if the NV value is accurate + + so + while converting from PV to NV check to see if converting that NV to an + IV(or UV) would lose accuracy over a direct conversion from PV to + IV(or UV). If it would, cache both conversions, return NV, but mark + SV as IOK NOKp (ie not NOK). + + while converting from PV to IV check to see if converting that IV to an + NV would lose accuracy over a direct conversion from PV to NV. If it + would, cache both conversions, flag similarly. + + Before, the SV value "3.2" could become NV=3.2 IV=3 NOK, IOK quite + correctly because if IV & NV were set NV *always* overruled. + Now, "3.2" will become NV=3.2 IV=3 NOK, IOKp, because the flags meaning + changes - now IV and NV together means that the two are interchangeable + SvIVX == (IV) SvNVX && SvNVX == (NV) SvIVX; + + The benefit of this is operations such as pp_add know that if SvIOK is + true for both left and right operands, then integer addition can be + used instead of floating point. (for cases where the result won't + overflow) Before, floating point was always used, which could lead to + loss of precision compared with integer addition. + + * making IV and NV equal status should make maths accurate on 64 bit + platforms + * may speed up maths somewhat if pp_add and friends start to use + integers when possible instead of fp. (hopefully the overhead in + looking for SvIOK and checking for overflow will not outweigh the + fp to integer speedup) + * will slow down integer operations (callers of SvIV) on "inaccurate" + values, as the change from SvIOK to SvIOKp will cause a call into + sv_2iv each time rather than a macro access direct to the IV slot + * should speed up number->string conversion on integers as IV is + favoured when IV and NV equally accurate + + #################################################################### + You had better be using SvIOK_notUV if you want an IV for arithmetic + SvIOK is true if (IV or UV), so you might be getting (IV)SvUV + SvUOK is true iff UV. + #################################################################### + + Your mileage will vary depending your CPUs relative fp to integer + performance ratio. +*/ + +#ifndef NV_PRESERVES_UV +#define IS_NUMBER_UNDERFLOW_IV 1 +#define IS_NUMBER_UNDERFLOW_UV 2 +#define IS_NUMBER_IV_AND_UV 2 +#define IS_NUMBER_OVERFLOW_IV 4 +#define IS_NUMBER_OVERFLOW_UV 5 +/* Hopefully your optimiser will consider inlining these two functions. */ +STATIC int +S_sv_2inuv_non_preserve (pTHX_ register SV *sv, I32 numtype) { + NV nv = SvNVX(sv); /* Code simpler and had compiler problems if */ + UV nv_as_uv = U_V(nv); /* these are not in simple variables. */ + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2inuv_non '%s', IV=0x%"UVxf" NV=%g inttype=%"UVXf"\n", SvPVX(sv), SvIVX(sv), nv, (UV)numtype)); + if (nv_as_uv <= (UV)IV_MAX) { + (void)SvIOKp_on(sv); + (void)SvNOKp_on(sv); + /* Within suitable range to fit in an IV, atol won't overflow */ + /* XXX quite sure? Is that your final answer? not really, I'm + trusting that nv_as_uv to round down if NV is (IV_MAX + 1) */ + SvIVX(sv) = (IV)Atol(SvPVX(sv)); + if (numtype & IS_NUMBER_NOT_INT) { + /* I believe that even if the original PV had decimals, they + are lost beyond the limit of the FP precision. + However, neither is canonical, so both only get p flags. + NWC, 2000/11/25 */ + /* Both already have p flags, so do nothing */ + } else if (SvIVX(sv) == I_V(nv)) { + SvNOK_on(sv); + SvIOK_on(sv); + } else { + SvIOK_on(sv); + /* It had no "." so it must be integer. assert (get in here from + sv_2iv and sv_2uv only for ndef HAS_STRTOL and + IS_NUMBER_AS_LONG_AS_IV_MAX) or my logic is faulty and all + conversion routines need audit. */ + } + return nv < 0 ? IS_NUMBER_UNDERFLOW_UV : IS_NUMBER_IV_AND_UV; + } + /* between IV_MAX and NV(UV_MAX). Could be slightly> UV_MAX */ + (void)SvIOKp_on(sv); + (void)SvNOKp_on(sv); +#ifdef HAS_STRTOUL + { + int save_errno = errno; + errno = 0; + SvUVX(sv) = Strtoul(SvPVX(sv), Null(char**), 10); + if (errno == 0) { + if (numtype & IS_NUMBER_NOT_INT) { + /* UV and NV both imprecise. */ + SvIsUV_on(sv); + } else if (SvUVX(sv) == nv_as_uv && SvUVX(sv) != UV_MAX) { + SvNOK_on(sv); + SvIOK_on(sv); + SvIsUV_on(sv); + } else { + SvIOK_on(sv); + SvIsUV_on(sv); + } + errno = save_errno; + return IS_NUMBER_OVERFLOW_IV; + } + errno = save_errno; + SvNOK_on(sv); + /* Must have just overflowed UV, but not enough that an NV could spot + this.. */ + return IS_NUMBER_OVERFLOW_UV; + } +#else + /* We've just lost integer precision, nothing we could do. */ + SvUVX(sv) = nv_as_uv; + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2niuv_non UV? '%s', UV=0x%"UVxf" NV=%g U_V(NV)=0x%"UVxf" inttype=%"UVXf"\n", SvPVX(sv), SvIVX(sv), nv, nv_as_uv, (UV)numtype)); + /* UV and NV slots equally valid only if we have casting symmetry. */ + if (numtype & IS_NUMBER_NOT_INT) { + SvIsUV_on(sv); + } else if (SvUVX(sv) == nv_as_uv && SvUVX(sv) != UV_MAX) { + /* UV_MAX can cast up to NV (UV_MAX+1), that NV casts down to UV_MAX + UV_MAX ought to be 0xFF...FFF which won't preserve (We only + get to this point if NVs don't preserve UVs) */ + SvNOK_on(sv); + SvIOK_on(sv); + SvIsUV_on(sv); + } else { + /* As above, I believe UV at least as good as NV */ + SvIsUV_on(sv); + } +#endif /* HAS_STRTOUL */ + return IS_NUMBER_OVERFLOW_IV; +} + +/* For sv_2nv these three cases are "SvNOK and don't bother casting" */ +STATIC int +S_sv_2iuv_non_preserve (pTHX_ register SV *sv, I32 numtype) +{ + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%"UVxf" NV=%g inttype=%"UVXf"\n", SvPVX(sv), SvIVX(sv), SvNVX(sv), (UV)numtype)); + if (SvNVX(sv) < (NV)IV_MIN) { + (void)SvIOKp_on(sv); + (void)SvNOK_on(sv); + SvIVX(sv) = IV_MIN; + return IS_NUMBER_UNDERFLOW_IV; + } + if (SvNVX(sv) > (NV)UV_MAX) { + (void)SvIOKp_on(sv); + (void)SvNOK_on(sv); + SvIsUV_on(sv); + SvUVX(sv) = UV_MAX; + return IS_NUMBER_OVERFLOW_UV; + } + if (!(numtype & (IS_NUMBER_TO_INT_BY_ATOL | IS_NUMBER_TO_INT_BY_STRTOL))) { + (void)SvIOKp_on(sv); + (void)SvNOK_on(sv); + /* Can't use strtol etc to convert this string */ + if (SvNVX(sv) <= (UV)IV_MAX) { + SvIVX(sv) = I_V(SvNVX(sv)); + if ((NV)(SvIVX(sv)) == SvNVX(sv)) { + SvIOK_on(sv); /* Integer is precise. NOK, IOK */ + } else { + /* Integer is imprecise. NOK, IOKp */ + } + return SvNVX(sv) < 0 ? IS_NUMBER_UNDERFLOW_UV : IS_NUMBER_IV_AND_UV; + } + SvIsUV_on(sv); + SvUVX(sv) = U_V(SvNVX(sv)); + if ((NV)(SvUVX(sv)) == SvNVX(sv)) { + if (SvUVX(sv) == UV_MAX) { + /* As we know that NVs don't preserve UVs, UV_MAX cannot + possibly be preserved by NV. Hence, it must be overflow. + NOK, IOKp */ + return IS_NUMBER_OVERFLOW_UV; + } + SvIOK_on(sv); /* Integer is precise. NOK, UOK */ + } else { + /* Integer is imprecise. NOK, IOKp */ + } + return IS_NUMBER_OVERFLOW_IV; + } + return S_sv_2inuv_non_preserve(aTHX_ sv, numtype); +} +#endif /* NV_PRESERVES_UV*/ + IV Perl_sv_2iv(pTHX_ register SV *sv) { @@ -1478,7 +1710,6 @@ Perl_sv_2iv(pTHX_ register SV *sv) return asIV(sv); if (!SvROK(sv)) { if (!(SvFLAGS(sv) & SVs_PADTMP)) { - dTHR; if (ckWARN(WARN_UNINITIALIZED) && !PL_localizing) report_uninit(); } @@ -1488,12 +1719,15 @@ Perl_sv_2iv(pTHX_ register SV *sv) if (SvTHINKFIRST(sv)) { if (SvROK(sv)) { SV* tmpstr; - if (SvAMAGIC(sv) && (tmpstr=AMG_CALLun(sv, numer))) + if (SvAMAGIC(sv) && (tmpstr=AMG_CALLun(sv,numer)) && + (SvRV(tmpstr) != SvRV(sv))) return SvIV(tmpstr); return PTR2IV(SvRV(sv)); } + if (SvREADONLY(sv) && SvFAKE(sv)) { + sv_force_normal(sv); + } if (SvREADONLY(sv) && !SvOK(sv)) { - dTHR; if (ckWARN(WARN_UNINITIALIZED)) report_uninit(); return 0; @@ -1508,22 +1742,74 @@ Perl_sv_2iv(pTHX_ register SV *sv) } } if (SvNOKp(sv)) { - /* We can cache the IV/UV value even if it not good enough - * to reconstruct NV, since the conversion to PV will prefer - * NV over IV/UV. - */ + /* erm. not sure. *should* never get NOKp (without NOK) from sv_2nv + * without also getting a cached IV/UV from it at the same time + * (ie PV->NV conversion should detect loss of accuracy and cache + * IV or UV at same time to avoid this. NWC */ if (SvTYPE(sv) == SVt_NV) sv_upgrade(sv, SVt_PVNV); - (void)SvIOK_on(sv); - if (SvNVX(sv) < (NV)IV_MAX + 0.5) + (void)SvIOKp_on(sv); /* Must do this first, to clear any SvOOK */ + /* < not <= as for NV doesn't preserve UV, ((NV)IV_MAX+1) will almost + certainly cast into the IV range at IV_MAX, whereas the correct + answer is the UV IV_MAX +1. Hence < ensures that dodgy boundary + cases go to UV */ + if (SvNVX(sv) < (NV)IV_MAX + 0.5) { SvIVX(sv) = I_V(SvNVX(sv)); + if (SvNVX(sv) == (NV) SvIVX(sv) +#ifndef NV_PRESERVES_UV + && (((UV)1 << NV_PRESERVES_UV_BITS) > + (UV)(SvIVX(sv) > 0 ? SvIVX(sv) : -SvIVX(sv))) + /* Don't flag it as "accurately an integer" if the number + came from a (by definition imprecise) NV operation, and + we're outside the range of NV integer precision */ +#endif + ) { + SvIOK_on(sv); /* Can this go wrong with rounding? NWC */ + DEBUG_c(PerlIO_printf(Perl_debug_log, + "0x%"UVxf" iv(%g => %"IVdf") (precise)\n", + PTR2UV(sv), + SvNVX(sv), + SvIVX(sv))); + + } else { + /* IV not precise. No need to convert from PV, as NV + conversion would already have cached IV if it detected + that PV->IV would be better than PV->NV->IV + flags already correct - don't set public IOK. */ + DEBUG_c(PerlIO_printf(Perl_debug_log, + "0x%"UVxf" iv(%g => %"IVdf") (imprecise)\n", + PTR2UV(sv), + SvNVX(sv), + SvIVX(sv))); + } + /* Can the above go wrong if SvIVX == IV_MIN and SvNVX < IV_MIN, + but the cast (NV)IV_MIN rounds to a the value less (more + negative) than IV_MIN which happens to be equal to SvNVX ?? + Analogous to 0xFFFFFFFFFFFFFFFF rounding up to NV (2**64) and + NV rounding back to 0xFFFFFFFFFFFFFFFF, so UVX == UV(NVX) and + (NV)UVX == NVX are both true, but the values differ. :-( + Hopefully for 2s complement IV_MIN is something like + 0x8000000000000000 which will be exact. NWC */ + } else { SvUVX(sv) = U_V(SvNVX(sv)); + if ( + (SvNVX(sv) == (NV) SvUVX(sv)) +#ifndef NV_PRESERVES_UV + /* Make sure it's not 0xFFFFFFFFFFFFFFFF */ + /*&& (SvUVX(sv) != UV_MAX) irrelevant with code below */ + && (((UV)1 << NV_PRESERVES_UV_BITS) > SvUVX(sv)) + /* Don't flag it as "accurately an integer" if the number + came from a (by definition imprecise) NV operation, and + we're outside the range of NV integer precision */ +#endif + ) + SvIOK_on(sv); SvIsUV_on(sv); ret_iv_max: - DEBUG_c(PerlIO_printf(Perl_debug_log, + DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%"UVuf" => %"IVdf") (as unsigned)\n", PTR2UV(sv), SvUVX(sv), @@ -1537,59 +1823,120 @@ Perl_sv_2iv(pTHX_ register SV *sv) /* We want to avoid a possible problem when we cache an IV which may be later translated to an NV, and the resulting NV is not the translation of the initial data. - + This means that if we cache such an IV, we need to cache the NV as well. Moreover, we trade speed for space, and do not - cache the NV if not needed. + cache the NV if we are sure it's not needed. */ - if (numtype & IS_NUMBER_NOT_IV) { - /* May be not an integer. Need to cache NV if we cache IV - * - otherwise future conversion to NV will be wrong. */ - NV d; - d = Atof(SvPVX(sv)); - - if (SvTYPE(sv) < SVt_PVNV) - sv_upgrade(sv, SVt_PVNV); - SvNVX(sv) = d; - (void)SvNOK_on(sv); - (void)SvIOK_on(sv); -#if defined(USE_LONG_DOUBLE) - DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2nv(%" PERL_PRIgldbl ")\n", - PTR2UV(sv), SvNVX(sv))); -#else - DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2nv(%g)\n", - PTR2UV(sv), SvNVX(sv))); -#endif - if (SvNVX(sv) < (NV)IV_MAX + 0.5) - SvIVX(sv) = I_V(SvNVX(sv)); - else { - SvUVX(sv) = U_V(SvNVX(sv)); - SvIsUV_on(sv); - goto ret_iv_max; - } - } - else if (numtype) { + if ((numtype & ~IS_NUMBER_NEG) == IS_NUMBER_TO_INT_BY_ATOL) { /* The NV may be reconstructed from IV - safe to cache IV, - which may be calculated by atol(). */ - if (SvTYPE(sv) == SVt_PV) + which may be calculated by atol(). */ + if (SvTYPE(sv) < SVt_PVIV) sv_upgrade(sv, SVt_PVIV); (void)SvIOK_on(sv); SvIVX(sv) = Atol(SvPVX(sv)); - } - else { /* Not a number. Cache 0. */ - dTHR; + } else { +#ifdef HAS_STRTOL + IV i; + int save_errno = errno; + /* Is it an integer that we could convert with strtol? + So try it, and if it doesn't set errno then it's pukka. + This should be faster than going atof and then thinking. */ + if (((numtype & (IS_NUMBER_TO_INT_BY_STRTOL | IS_NUMBER_NOT_INT)) + == IS_NUMBER_TO_INT_BY_STRTOL) + /* && is a sequence point. Without it not sure if I'm trying + to do too much between sequence points and hence going + undefined */ + && ((errno = 0), 1) /* , 1 so always true */ + && ((i = Strtol(SvPVX(sv), Null(char**), 10)), 1) + && (errno == 0)) { + if (SvTYPE(sv) < SVt_PVIV) + sv_upgrade(sv, SVt_PVIV); + (void)SvIOK_on(sv); + SvIVX(sv) = i; + errno = save_errno; + } else +#endif + { + NV d; +#ifdef HAS_STRTOL + /* Hopefully trace flow will optimise this away where possible + */ + errno = save_errno; +#endif + /* It wasn't an integer, or it overflowed, or we don't have + strtol. Do things the slow way - check if it's a UV etc. */ + d = Atof(SvPVX(sv)); - if (SvTYPE(sv) < SVt_PVIV) - sv_upgrade(sv, SVt_PVIV); - (void)SvIOK_on(sv); - SvIVX(sv) = 0; - if (ckWARN(WARN_NUMERIC)) - not_a_number(sv); + if (SvTYPE(sv) < SVt_PVNV) + sv_upgrade(sv, SVt_PVNV); + SvNVX(sv) = d; + + if (! numtype && ckWARN(WARN_NUMERIC)) + not_a_number(sv); + +#if defined(USE_LONG_DOUBLE) + DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%" PERL_PRIgldbl ")\n", + PTR2UV(sv), SvNVX(sv))); +#else + DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%g)\n", + PTR2UV(sv), SvNVX(sv))); +#endif + + +#ifdef NV_PRESERVES_UV + (void)SvIOKp_on(sv); + (void)SvNOK_on(sv); + if (SvNVX(sv) < (NV)IV_MAX + 0.5) { + SvIVX(sv) = I_V(SvNVX(sv)); + if ((NV)(SvIVX(sv)) == SvNVX(sv)) { + SvIOK_on(sv); + } else { + /* Integer is imprecise. NOK, IOKp */ + } + /* UV will not work better than IV */ + } else { + if (SvNVX(sv) > (NV)UV_MAX) { + SvIsUV_on(sv); + /* Integer is inaccurate. NOK, IOKp, is UV */ + SvUVX(sv) = UV_MAX; + SvIsUV_on(sv); + } else { + SvUVX(sv) = U_V(SvNVX(sv)); + /* 0xFFFFFFFFFFFFFFFF not an issue in here */ + if ((NV)(SvUVX(sv)) == SvNVX(sv)) { + SvIOK_on(sv); + SvIsUV_on(sv); + } else { + /* Integer is imprecise. NOK, IOKp, is UV */ + SvIsUV_on(sv); + } + } + goto ret_iv_max; + } +#else /* NV_PRESERVES_UV */ + if (((UV)1 << NV_PRESERVES_UV_BITS) > + U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) { + /* Small enough to preserve all bits. */ + (void)SvIOKp_on(sv); + SvNOK_on(sv); + SvIVX(sv) = I_V(SvNVX(sv)); + if ((NV)(SvIVX(sv)) == SvNVX(sv)) + SvIOK_on(sv); + /* Assumption: first non-preserved integer is < IV_MAX, + this NV is in the preserved range, therefore: */ + if (!(U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv)) + < (UV)IV_MAX)) { + Perl_croak(aTHX_ "sv_2iv assumed (U_V(fabs(SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%g U_V is 0x%"UVxf", IV_MAX is 0x%"UVxf"\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX); + } + } else if (sv_2iuv_non_preserve (sv, numtype) + >= IS_NUMBER_OVERFLOW_IV) + goto ret_iv_max; +#endif /* NV_PRESERVES_UV */ + } } - } - else { - dTHR; + } else { if (ckWARN(WARN_UNINITIALIZED) && !PL_localizing && !(SvFLAGS(sv) & SVs_PADTMP)) report_uninit(); if (SvTYPE(sv) < SVt_IV) @@ -1617,7 +1964,6 @@ Perl_sv_2uv(pTHX_ register SV *sv) return asUV(sv); if (!SvROK(sv)) { if (!(SvFLAGS(sv) & SVs_PADTMP)) { - dTHR; if (ckWARN(WARN_UNINITIALIZED) && !PL_localizing) report_uninit(); } @@ -1627,12 +1973,15 @@ Perl_sv_2uv(pTHX_ register SV *sv) if (SvTHINKFIRST(sv)) { if (SvROK(sv)) { SV* tmpstr; - if (SvAMAGIC(sv) && (tmpstr=AMG_CALLun(sv, numer))) + if (SvAMAGIC(sv) && (tmpstr=AMG_CALLun(sv,numer)) && + (SvRV(tmpstr) != SvRV(sv))) return SvUV(tmpstr); return PTR2UV(SvRV(sv)); } + if (SvREADONLY(sv) && SvFAKE(sv)) { + sv_force_normal(sv); + } if (SvREADONLY(sv) && !SvOK(sv)) { - dTHR; if (ckWARN(WARN_UNINITIALIZED)) report_uninit(); return 0; @@ -1647,26 +1996,74 @@ Perl_sv_2uv(pTHX_ register SV *sv) } } if (SvNOKp(sv)) { - /* We can cache the IV/UV value even if it not good enough - * to reconstruct NV, since the conversion to PV will prefer - * NV over IV/UV. - */ + /* erm. not sure. *should* never get NOKp (without NOK) from sv_2nv + * without also getting a cached IV/UV from it at the same time + * (ie PV->NV conversion should detect loss of accuracy and cache + * IV or UV at same time to avoid this. */ + /* IV-over-UV optimisation - choose to cache IV if possible */ + if (SvTYPE(sv) == SVt_NV) sv_upgrade(sv, SVt_PVNV); - (void)SvIOK_on(sv); - if (SvNVX(sv) >= -0.5) { - SvIsUV_on(sv); - SvUVX(sv) = U_V(SvNVX(sv)); + + (void)SvIOKp_on(sv); /* Must do this first, to clear any SvOOK */ + if (SvNVX(sv) < (NV)IV_MAX + 0.5) { + SvIVX(sv) = I_V(SvNVX(sv)); + if (SvNVX(sv) == (NV) SvIVX(sv) +#ifndef NV_PRESERVES_UV + && (((UV)1 << NV_PRESERVES_UV_BITS) > + (UV)(SvIVX(sv) > 0 ? SvIVX(sv) : -SvIVX(sv))) + /* Don't flag it as "accurately an integer" if the number + came from a (by definition imprecise) NV operation, and + we're outside the range of NV integer precision */ +#endif + ) { + SvIOK_on(sv); /* Can this go wrong with rounding? NWC */ + DEBUG_c(PerlIO_printf(Perl_debug_log, + "0x%"UVxf" uv(%g => %"IVdf") (precise)\n", + PTR2UV(sv), + SvNVX(sv), + SvIVX(sv))); + + } else { + /* IV not precise. No need to convert from PV, as NV + conversion would already have cached IV if it detected + that PV->IV would be better than PV->NV->IV + flags already correct - don't set public IOK. */ + DEBUG_c(PerlIO_printf(Perl_debug_log, + "0x%"UVxf" uv(%g => %"IVdf") (imprecise)\n", + PTR2UV(sv), + SvNVX(sv), + SvIVX(sv))); + } + /* Can the above go wrong if SvIVX == IV_MIN and SvNVX < IV_MIN, + but the cast (NV)IV_MIN rounds to a the value less (more + negative) than IV_MIN which happens to be equal to SvNVX ?? + Analogous to 0xFFFFFFFFFFFFFFFF rounding up to NV (2**64) and + NV rounding back to 0xFFFFFFFFFFFFFFFF, so UVX == UV(NVX) and + (NV)UVX == NVX are both true, but the values differ. :-( + Hopefully for 2s complement IV_MIN is something like + 0x8000000000000000 which will be exact. NWC */ } else { - SvIVX(sv) = I_V(SvNVX(sv)); - ret_zero: - DEBUG_c(PerlIO_printf(Perl_debug_log, - "0x%"UVxf" 2uv(%"IVdf" => %"IVdf") (as signed)\n", + SvUVX(sv) = U_V(SvNVX(sv)); + if ( + (SvNVX(sv) == (NV) SvUVX(sv)) +#ifndef NV_PRESERVES_UV + /* Make sure it's not 0xFFFFFFFFFFFFFFFF */ + /*&& (SvUVX(sv) != UV_MAX) irrelevant with code below */ + && (((UV)1 << NV_PRESERVES_UV_BITS) > SvUVX(sv)) + /* Don't flag it as "accurately an integer" if the number + came from a (by definition imprecise) NV operation, and + we're outside the range of NV integer precision */ +#endif + ) + SvIOK_on(sv); + SvIsUV_on(sv); + DEBUG_c(PerlIO_printf(Perl_debug_log, + "0x%"UVxf" 2uv(%"UVuf" => %"IVdf") (as unsigned)\n", PTR2UV(sv), - SvIVX(sv), - (IV)(UV)SvIVX(sv))); - return (UV)SvIVX(sv); + SvUVX(sv), + SvUVX(sv))); } } else if (SvPOKp(sv) && SvLEN(sv)) { @@ -1675,78 +2072,146 @@ Perl_sv_2uv(pTHX_ register SV *sv) /* We want to avoid a possible problem when we cache a UV which may be later translated to an NV, and the resulting NV is not the translation of the initial data. - + This means that if we cache such a UV, we need to cache the NV as well. Moreover, we trade speed for space, and do not cache the NV if not needed. */ - if (numtype & IS_NUMBER_NOT_IV) { - /* May be not an integer. Need to cache NV if we cache IV - * - otherwise future conversion to NV will be wrong. */ - NV d; - d = Atof(SvPVX(sv)); - - if (SvTYPE(sv) < SVt_PVNV) - sv_upgrade(sv, SVt_PVNV); - SvNVX(sv) = d; - (void)SvNOK_on(sv); - (void)SvIOK_on(sv); -#if defined(USE_LONG_DOUBLE) - DEBUG_c(PerlIO_printf(Perl_debug_log, - "0x%"UVxf" 2nv(%" PERL_PRIgldbl ")\n", - PTR2UV(sv), SvNVX(sv))); -#else - DEBUG_c(PerlIO_printf(Perl_debug_log, - "0x%"UVxf" 2nv(%g)\n", - PTR2UV(sv), SvNVX(sv))); -#endif - if (SvNVX(sv) < -0.5) { - SvIVX(sv) = I_V(SvNVX(sv)); - goto ret_zero; - } else { - SvUVX(sv) = U_V(SvNVX(sv)); - SvIsUV_on(sv); - } - } - else if (numtype & IS_NUMBER_NEG) { + if ((numtype & ~IS_NUMBER_NEG) == IS_NUMBER_TO_INT_BY_ATOL) { /* The NV may be reconstructed from IV - safe to cache IV, - which may be calculated by atol(). */ - if (SvTYPE(sv) == SVt_PV) - sv_upgrade(sv, SVt_PVIV); - (void)SvIOK_on(sv); - SvIVX(sv) = (IV)Atol(SvPVX(sv)); - } - else if (numtype) { /* Non-negative */ - /* The NV may be reconstructed from UV - safe to cache UV, - which may be calculated by strtoul()/atol. */ - if (SvTYPE(sv) == SVt_PV) + which may be calculated by atol(). */ + if (SvTYPE(sv) < SVt_PVIV) sv_upgrade(sv, SVt_PVIV); (void)SvIOK_on(sv); - (void)SvIsUV_on(sv); + SvIVX(sv) = Atol(SvPVX(sv)); + } else { #ifdef HAS_STRTOUL - SvUVX(sv) = Strtoul(SvPVX(sv), Null(char**), 10); -#else /* no atou(), but we know the number fits into IV... */ - /* The only problem may be if it is negative... */ - SvUVX(sv) = (UV)Atol(SvPVX(sv)); + UV u; + char *num_begin = SvPVX(sv); + int save_errno = errno; + + /* seems that strtoul taking numbers that start with - is + implementation dependant, and can't be relied upon. */ + if (numtype & IS_NUMBER_NEG) { + /* Not totally defensive. assumine that looks_like_num + didn't lie about a - sign */ + while (isSPACE(*num_begin)) + num_begin++; + if (*num_begin == '-') + num_begin++; + } + + /* Is it an integer that we could convert with strtoul? + So try it, and if it doesn't set errno then it's pukka. + This should be faster than going atof and then thinking. */ + if (((numtype & (IS_NUMBER_TO_INT_BY_STRTOL | IS_NUMBER_NOT_INT)) + == IS_NUMBER_TO_INT_BY_STRTOL) + && ((errno = 0), 1) /* always true */ + && ((u = Strtoul(num_begin, Null(char**), 10)), 1) /* ditto */ + && (errno == 0) + /* If known to be negative, check it didn't undeflow IV + XXX possibly we should put more negative values as NVs + direct rather than go via atof below */ + && ((numtype & IS_NUMBER_NEG) ? (u <= (UV)IV_MIN) : 1)) { + errno = save_errno; + + if (SvTYPE(sv) < SVt_PVIV) + sv_upgrade(sv, SVt_PVIV); + (void)SvIOK_on(sv); + + /* If it's negative must use IV. + IV-over-UV optimisation */ + if (numtype & IS_NUMBER_NEG) { + SvIVX(sv) = -(IV)u; + } else if (u <= (UV) IV_MAX) { + SvIVX(sv) = (IV)u; + } else { + /* it didn't overflow, and it was positive. */ + SvUVX(sv) = u; + SvIsUV_on(sv); + } + } else #endif - } - else { /* Not a number. Cache 0. */ - dTHR; + { + NV d; +#ifdef HAS_STRTOUL + /* Hopefully trace flow will optimise this away where possible + */ + errno = save_errno; +#endif + /* It wasn't an integer, or it overflowed, or we don't have + strtol. Do things the slow way - check if it's a IV etc. */ + d = Atof(SvPVX(sv)); - if (SvTYPE(sv) < SVt_PVIV) - sv_upgrade(sv, SVt_PVIV); - (void)SvIOK_on(sv); - (void)SvIsUV_on(sv); - SvUVX(sv) = 0; /* We assume that 0s have the - same bitmap in IV and UV. */ - if (ckWARN(WARN_NUMERIC)) - not_a_number(sv); + if (SvTYPE(sv) < SVt_PVNV) + sv_upgrade(sv, SVt_PVNV); + SvNVX(sv) = d; + + if (! numtype && ckWARN(WARN_NUMERIC)) + not_a_number(sv); + +#if defined(USE_LONG_DOUBLE) + DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2uv(%" PERL_PRIgldbl ")\n", + PTR2UV(sv), SvNVX(sv))); +#else + DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2uv(%g)\n", + PTR2UV(sv), SvNVX(sv))); +#endif + +#ifdef NV_PRESERVES_UV + (void)SvIOKp_on(sv); + (void)SvNOK_on(sv); + if (SvNVX(sv) < (NV)IV_MAX + 0.5) { + SvIVX(sv) = I_V(SvNVX(sv)); + if ((NV)(SvIVX(sv)) == SvNVX(sv)) { + SvIOK_on(sv); + } else { + /* Integer is imprecise. NOK, IOKp */ + } + /* UV will not work better than IV */ + } else { + if (SvNVX(sv) > (NV)UV_MAX) { + SvIsUV_on(sv); + /* Integer is inaccurate. NOK, IOKp, is UV */ + SvUVX(sv) = UV_MAX; + SvIsUV_on(sv); + } else { + SvUVX(sv) = U_V(SvNVX(sv)); + /* 0xFFFFFFFFFFFFFFFF not an issue in here, NVs + NV preservse UV so can do correct comparison. */ + if ((NV)(SvUVX(sv)) == SvNVX(sv)) { + SvIOK_on(sv); + SvIsUV_on(sv); + } else { + /* Integer is imprecise. NOK, IOKp, is UV */ + SvIsUV_on(sv); + } + } + } +#else /* NV_PRESERVES_UV */ + if (((UV)1 << NV_PRESERVES_UV_BITS) > + U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) { + /* Small enough to preserve all bits. */ + (void)SvIOKp_on(sv); + SvNOK_on(sv); + SvIVX(sv) = I_V(SvNVX(sv)); + if ((NV)(SvIVX(sv)) == SvNVX(sv)) + SvIOK_on(sv); + /* Assumption: first non-preserved integer is < IV_MAX, + this NV is in the preserved range, therefore: */ + if (!(U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv)) + < (UV)IV_MAX)) { + Perl_croak(aTHX_ "sv_2uv assumed (U_V(fabs(SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%g U_V is 0x%"UVxf", IV_MAX is 0x%"UVxf"\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX); + } + } else + sv_2iuv_non_preserve (sv, numtype); +#endif /* NV_PRESERVES_UV */ + } } } else { if (!(SvFLAGS(sv) & SVs_PADTMP)) { - dTHR; if (ckWARN(WARN_UNINITIALIZED) && !PL_localizing) report_uninit(); } @@ -1771,20 +2236,18 @@ Perl_sv_2nv(pTHX_ register SV *sv) if (SvNOKp(sv)) return SvNVX(sv); if (SvPOKp(sv) && SvLEN(sv)) { - dTHR; if (ckWARN(WARN_NUMERIC) && !SvIOKp(sv) && !looks_like_number(sv)) not_a_number(sv); return Atof(SvPVX(sv)); } if (SvIOKp(sv)) { - if (SvIsUV(sv)) + if (SvIsUV(sv)) return (NV)SvUVX(sv); else return (NV)SvIVX(sv); } if (!SvROK(sv)) { if (!(SvFLAGS(sv) & SVs_PADTMP)) { - dTHR; if (ckWARN(WARN_UNINITIALIZED) && !PL_localizing) report_uninit(); } @@ -1794,12 +2257,15 @@ Perl_sv_2nv(pTHX_ register SV *sv) if (SvTHINKFIRST(sv)) { if (SvROK(sv)) { SV* tmpstr; - if (SvAMAGIC(sv) && (tmpstr=AMG_CALLun(sv,numer))) + if (SvAMAGIC(sv) && (tmpstr=AMG_CALLun(sv,numer)) && + (SvRV(tmpstr) != SvRV(sv))) return SvNV(tmpstr); return PTR2NV(SvRV(sv)); } + if (SvREADONLY(sv) && SvFAKE(sv)) { + sv_force_normal(sv); + } if (SvREADONLY(sv) && !SvOK(sv)) { - dTHR; if (ckWARN(WARN_UNINITIALIZED)) report_uninit(); return 0.0; @@ -1812,7 +2278,7 @@ Perl_sv_2nv(pTHX_ register SV *sv) sv_upgrade(sv, SVt_NV); #if defined(USE_LONG_DOUBLE) DEBUG_c({ - RESTORE_NUMERIC_STANDARD(); + STORE_NUMERIC_LOCAL_SET_STANDARD(); PerlIO_printf(Perl_debug_log, "0x%"UVxf" num(%" PERL_PRIgldbl ")\n", PTR2UV(sv), SvNVX(sv)); @@ -1820,7 +2286,7 @@ Perl_sv_2nv(pTHX_ register SV *sv) }); #else DEBUG_c({ - RESTORE_NUMERIC_STANDARD(); + STORE_NUMERIC_LOCAL_SET_STANDARD(); PerlIO_printf(Perl_debug_log, "0x%"UVxf" num(%g)\n", PTR2UV(sv), SvNVX(sv)); RESTORE_NUMERIC_LOCAL(); @@ -1833,33 +2299,73 @@ Perl_sv_2nv(pTHX_ register SV *sv) (!SvPOKp(sv) || !strchr(SvPVX(sv),'.') || !looks_like_number(sv))) { SvNVX(sv) = SvIsUV(sv) ? (NV)SvUVX(sv) : (NV)SvIVX(sv); +#ifdef NV_PRESERVES_UV + SvNOK_on(sv); +#else + /* Only set the public NV OK flag if this NV preserves the IV */ + /* Check it's not 0xFFFFFFFFFFFFFFFF */ + if (SvIsUV(sv) ? ((SvUVX(sv) != UV_MAX)&&(SvUVX(sv) == U_V(SvNVX(sv)))) + : (SvIVX(sv) == I_V(SvNVX(sv)))) + SvNOK_on(sv); + else + SvNOKp_on(sv); +#endif } else if (SvPOKp(sv) && SvLEN(sv)) { - dTHR; if (ckWARN(WARN_NUMERIC) && !SvIOKp(sv) && !looks_like_number(sv)) not_a_number(sv); SvNVX(sv) = Atof(SvPVX(sv)); +#ifdef NV_PRESERVES_UV + SvNOK_on(sv); +#else + /* Only set the public NV OK flag if this NV preserves the value in + the PV at least as well as an IV/UV would. + Not sure how to do this 100% reliably. */ + /* if that shift count is out of range then Configure's test is + wonky. We shouldn't be in here with NV_PRESERVES_UV_BITS == + UV_BITS */ + if (((UV)1 << NV_PRESERVES_UV_BITS) > + U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) + SvNOK_on(sv); /* Definitely small enough to preserve all bits */ + else if (SvNVX(sv) < (NV)IV_MIN || SvNVX(sv) > (NV)UV_MAX) { + /* Definitely too large/small to fit in an integer, so no loss + of precision going to integer in the future via NV */ + SvNOK_on(sv); + } else { + /* Is it something we can run through strtol etc (ie no + trailing exponent part)? */ + int numtype = looks_like_number(sv); + /* XXX probably should cache this if called above */ + + if (!(numtype & + (IS_NUMBER_TO_INT_BY_ATOL | IS_NUMBER_TO_INT_BY_STRTOL))) { + /* Can't use strtol etc to convert this string, so don't try */ + SvNOK_on(sv); + } else + sv_2inuv_non_preserve (sv, numtype); + } +#endif /* NV_PRESERVES_UV */ } else { - dTHR; if (ckWARN(WARN_UNINITIALIZED) && !PL_localizing && !(SvFLAGS(sv) & SVs_PADTMP)) report_uninit(); if (SvTYPE(sv) < SVt_NV) /* Typically the caller expects that sv_any is not NULL now. */ + /* XXX Ilya implies that this is a bug in callers that assume this + and ideally should be fixed. */ sv_upgrade(sv, SVt_NV); return 0.0; } - SvNOK_on(sv); #if defined(USE_LONG_DOUBLE) DEBUG_c({ - RESTORE_NUMERIC_STANDARD(); + STORE_NUMERIC_LOCAL_SET_STANDARD(); PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2nv(%" PERL_PRIgldbl ")\n", PTR2UV(sv), SvNVX(sv)); RESTORE_NUMERIC_LOCAL(); }); #else DEBUG_c({ - RESTORE_NUMERIC_STANDARD(); + STORE_NUMERIC_LOCAL_SET_STANDARD(); PerlIO_printf(Perl_debug_log, "0x%"UVxf" 1nv(%g)\n", PTR2UV(sv), SvNVX(sv)); RESTORE_NUMERIC_LOCAL(); @@ -1877,7 +2383,6 @@ S_asIV(pTHX_ SV *sv) if (numtype & IS_NUMBER_TO_INT_BY_ATOL) return Atol(SvPVX(sv)); if (!numtype) { - dTHR; if (ckWARN(WARN_NUMERIC)) not_a_number(sv); } @@ -1895,7 +2400,6 @@ S_asUV(pTHX_ SV *sv) return Strtoul(SvPVX(sv), Null(char**), 10); #endif if (!numtype) { - dTHR; if (ckWARN(WARN_NUMERIC)) not_a_number(sv); } @@ -1904,23 +2408,32 @@ S_asUV(pTHX_ SV *sv) /* * Returns a combination of (advisory only - can get false negatives) - * IS_NUMBER_TO_INT_BY_ATOL, IS_NUMBER_TO_INT_BY_ATOF, IS_NUMBER_NOT_IV, - * IS_NUMBER_NEG + * IS_NUMBER_TO_INT_BY_ATOL, IS_NUMBER_TO_INT_BY_ATOF + * IS_NUMBER_LONGER_THAN_IV_MAX, IS_NUMBER_AS_LONG_AS_IV_MAX + * IS_NUMBER_NOT_INT, IS_NUMBER_NEG, IS_NUMBER_INFINITY * 0 if does not look like number. * - * In fact possible values are 0 and - * IS_NUMBER_TO_INT_BY_ATOL 123 - * IS_NUMBER_TO_INT_BY_ATOL | IS_NUMBER_NOT_IV 123.1 - * IS_NUMBER_TO_INT_BY_ATOF | IS_NUMBER_NOT_IV 123e0 + * (atol and strtol stop when they hit a decimal point. strtol will return + * LONG_MAX and LONG_MIN when given out of range values. ANSI says they should + * do this, and vendors have had 11 years to get it right. + * However, will try to make it still work with only atol + * + * IS_NUMBER_TO_INT_BY_ATOL 123456789 or 123456789.3 definitely < IV_MAX + * IS_NUMBER_TO_INT_BY_STRTOL 123456789 or 123456789.3 if digits = IV_MAX + * IS_NUMBER_TO_INT_BY_ATOF 123456789e0 or >> IV_MAX + * IS_NUMBER_LONGER_THAN_IV_MAX lots of digits, don't bother with atol + * IS_NUMBER_AS_LONG_AS_IV_MAX atol might hit LONG_MAX, might not. + * IS_NUMBER_NOT_INT saw "." or "e" + * IS_NUMBER_NEG * IS_NUMBER_INFINITY - * with a possible addition of IS_NUMBER_NEG. */ /* =for apidoc looks_like_number Test if an the content of an SV looks like a number (or is a -number). +number). C and C are treated as numbers (so will not +issue a non-numeric warning), even if your atof() doesn't grok them. =cut */ @@ -1935,9 +2448,12 @@ Perl_looks_like_number(pTHX_ SV *sv) I32 numtype = 0; I32 sawinf = 0; STRLEN len; +#ifdef USE_LOCALE_NUMERIC + bool specialradix = FALSE; +#endif if (SvPOK(sv)) { - sbegin = SvPVX(sv); + sbegin = SvPVX(sv); len = SvCUR(sv); } else if (SvPOKp(sv)) @@ -1958,9 +2474,10 @@ Perl_looks_like_number(pTHX_ SV *sv) nbegin = s; /* - * we return IS_NUMBER_TO_INT_BY_ATOL if the number can be converted - * to _integer_ with atol() and IS_NUMBER_TO_INT_BY_ATOF if you need - * (int)atof(). + * we return IS_NUMBER_TO_INT_BY_ATOL if the number can converted to + * integer with atol() without overflow, IS_NUMBER_TO_INT_BY_STRTOL if + * possibly slightly larger than max int, IS_NUMBER_TO_INT_BY_ATOF if you + * will need (int)atof(). */ /* next must be digit or the radix separator or beginning of infinity */ @@ -1969,29 +2486,63 @@ Perl_looks_like_number(pTHX_ SV *sv) s++; } while (isDIGIT(*s)); - if (s - nbegin >= TYPE_DIGITS(IV)) /* Cannot cache ato[ul]() */ - numtype |= IS_NUMBER_TO_INT_BY_ATOF | IS_NUMBER_NOT_IV; - else + /* Aaargh. long long really is irritating. + In the gospel according to ANSI 1989, it is an axiom that "long" + is the longest integer type, and that if you don't know how long + something is you can cast it to long, and nothing will be lost + (except possibly speed of execution if long is slower than the + type is was). + Now, one can't be sure if the old rules apply, or long long + (or some other newfangled thing) is actually longer than the + (formerly) longest thing. + */ + /* This lot will work for 64 bit *as long as* either + either long is 64 bit + or we can find both strtol/strtoq and strtoul/strtouq + If not, we really should refuse to let the user use 64 bit IVs + By "64 bit" I really mean IVs that don't get preserved by NVs + It also should work for 128 bit IVs. Can any lend me a machine to + test this? + */ + if (s - nbegin > TYPE_DIGITS(UV)) /* Cannot cache ato[ul]() */ + numtype |= IS_NUMBER_TO_INT_BY_ATOF | IS_NUMBER_LONGER_THAN_IV_MAX; + else if (s - nbegin < BIT_DIGITS(((sizeof (IV)>sizeof (long)) + ? sizeof(long) : sizeof (IV))*8-1)) numtype |= IS_NUMBER_TO_INT_BY_ATOL; + else + /* Can't be sure either way. (For 64 bit UV, 63 bit IV is 1 decimal + digit less (IV_MAX= 9223372036854775807, + UV_MAX= 18446744073709551615) so be cautious */ + numtype |= IS_NUMBER_TO_INT_BY_STRTOL | IS_NUMBER_AS_LONG_AS_IV_MAX; if (*s == '.' -#ifdef USE_LOCALE_NUMERIC - || IS_NUMERIC_RADIX(*s) +#ifdef USE_LOCALE_NUMERIC + || (specialradix = IS_NUMERIC_RADIX(s)) #endif ) { - s++; - numtype |= IS_NUMBER_NOT_IV; +#ifdef USE_LOCALE_NUMERIC + if (specialradix) + s += SvCUR(PL_numeric_radix); + else +#endif + s++; + numtype |= IS_NUMBER_NOT_INT; while (isDIGIT(*s)) /* optional digits after the radix */ s++; } } else if (*s == '.' -#ifdef USE_LOCALE_NUMERIC - || IS_NUMERIC_RADIX(*s) +#ifdef USE_LOCALE_NUMERIC + || (specialradix = IS_NUMERIC_RADIX(s)) #endif ) { - s++; - numtype |= IS_NUMBER_TO_INT_BY_ATOL | IS_NUMBER_NOT_IV; +#ifdef USE_LOCALE_NUMERIC + if (specialradix) + s += SvCUR(PL_numeric_radix); + else +#endif + s++; + numtype |= IS_NUMBER_TO_INT_BY_ATOL | IS_NUMBER_NOT_INT; /* no digits before the radix means we need digits after it */ if (isDIGIT(*s)) { do { @@ -2009,6 +2560,7 @@ Perl_looks_like_number(pTHX_ SV *sv) s++; if (*s != 'I' && *s != 'i') return 0; s++; if (*s != 'T' && *s != 't') return 0; s++; if (*s != 'Y' && *s != 'y') return 0; + s++; } sawinf = 1; } @@ -2016,12 +2568,13 @@ Perl_looks_like_number(pTHX_ SV *sv) return 0; if (sawinf) - numtype = IS_NUMBER_INFINITY; + numtype = (numtype & IS_NUMBER_NEG) /* Keep track of sign */ + | IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; else { /* we can have an optional exponent part */ if (*s == 'e' || *s == 'E') { - numtype &= ~IS_NUMBER_NEG; - numtype |= IS_NUMBER_TO_INT_BY_ATOF | IS_NUMBER_NOT_IV; + numtype &= IS_NUMBER_NEG; + numtype |= IS_NUMBER_TO_INT_BY_ATOF | IS_NUMBER_NOT_INT; s++; if (*s == '+' || *s == '-') s++; @@ -2096,7 +2649,7 @@ Perl_sv_2pv(pTHX_ register SV *sv, STRLEN *lp) return SvPVX(sv); } if (SvIOKp(sv)) { - if (SvIsUV(sv)) + if (SvIsUV(sv)) (void)sprintf(tmpbuf,"%"UVuf, (UV)SvUVX(sv)); else (void)sprintf(tmpbuf,"%"IVdf, (IV)SvIVX(sv)); @@ -2110,7 +2663,6 @@ Perl_sv_2pv(pTHX_ register SV *sv, STRLEN *lp) } if (!SvROK(sv)) { if (!(SvFLAGS(sv) & SVs_PADTMP)) { - dTHR; if (ckWARN(WARN_UNINITIALIZED) && !PL_localizing) report_uninit(); } @@ -2121,7 +2673,8 @@ Perl_sv_2pv(pTHX_ register SV *sv, STRLEN *lp) if (SvTHINKFIRST(sv)) { if (SvROK(sv)) { SV* tmpstr; - if (SvAMAGIC(sv) && (tmpstr=AMG_CALLun(sv,string))) + if (SvAMAGIC(sv) && (tmpstr=AMG_CALLun(sv,string)) && + (SvRV(tmpstr) != SvRV(sv))) return SvPV(tmpstr,*lp); sv = (SV*)SvRV(sv); if (!sv) @@ -2132,11 +2685,10 @@ Perl_sv_2pv(pTHX_ register SV *sv, STRLEN *lp) switch (SvTYPE(sv)) { case SVt_PVMG: if ( ((SvFLAGS(sv) & - (SVs_OBJECT|SVf_OK|SVs_GMG|SVs_SMG|SVs_RMG)) + (SVs_OBJECT|SVf_OK|SVs_GMG|SVs_SMG|SVs_RMG)) == (SVs_OBJECT|SVs_RMG)) && strEQ(s=HvNAME(SvSTASH(sv)), "Regexp") && (mg = mg_find(sv, 'r'))) { - dTHR; regexp *re = (regexp *)mg->mg_obj; if (!mg->mg_ptr) { @@ -2182,7 +2734,10 @@ Perl_sv_2pv(pTHX_ register SV *sv, STRLEN *lp) case SVt_PV: case SVt_PVIV: case SVt_PVNV: - case SVt_PVBM: s = "SCALAR"; break; + case SVt_PVBM: if (SvROK(sv)) + s = "REF"; + else + s = "SCALAR"; break; case SVt_PVLV: s = "LVALUE"; break; case SVt_PVAV: s = "ARRAY"; break; case SVt_PVHV: s = "HASH"; break; @@ -2204,43 +2759,15 @@ Perl_sv_2pv(pTHX_ register SV *sv, STRLEN *lp) return s; } if (SvREADONLY(sv) && !SvOK(sv)) { - dTHR; if (ckWARN(WARN_UNINITIALIZED)) report_uninit(); *lp = 0; return ""; } } - if (SvNOKp(sv)) { /* See note in sv_2uv() */ - /* XXXX 64-bit? IV may have better precision... */ - /* I tried changing this for to be 64-bit-aware and - * the t/op/numconvert.t became very, very, angry. - * --jhi Sep 1999 */ - if (SvTYPE(sv) < SVt_PVNV) - sv_upgrade(sv, SVt_PVNV); - SvGROW(sv, 28); - s = SvPVX(sv); - olderrno = errno; /* some Xenix systems wipe out errno here */ -#ifdef apollo - if (SvNVX(sv) == 0.0) - (void)strcpy(s,"0"); - else -#endif /*apollo*/ - { - Gconvert(SvNVX(sv), NV_DIG, 0, s); - } - errno = olderrno; -#ifdef FIXNEGATIVEZERO - if (*s == '-' && s[1] == '0' && !s[2]) - strcpy(s,"0"); -#endif - while (*s) s++; -#ifdef hcx - if (s[-1] == '.') - *--s = '\0'; -#endif - } - else if (SvIOKp(sv)) { + if (SvIOK(sv) || ((SvIOKp(sv) && !SvNOKp(sv)))) { + /* I'm assuming that if both IV and NV are equally valid then + converting the IV is going to be more efficient */ U32 isIOK = SvIOK(sv); U32 isUIOK = SvIsUV(sv); char buf[TYPE_CHARS(UV)]; @@ -2263,15 +2790,37 @@ Perl_sv_2pv(pTHX_ register SV *sv, STRLEN *lp) SvIOKp_on(sv); if (isUIOK) SvIsUV_on(sv); - SvPOK_on(sv); + } + else if (SvNOKp(sv)) { + if (SvTYPE(sv) < SVt_PVNV) + sv_upgrade(sv, SVt_PVNV); + /* The +20 is pure guesswork. Configure test needed. --jhi */ + SvGROW(sv, NV_DIG + 20); + s = SvPVX(sv); + olderrno = errno; /* some Xenix systems wipe out errno here */ +#ifdef apollo + if (SvNVX(sv) == 0.0) + (void)strcpy(s,"0"); + else +#endif /*apollo*/ + { + Gconvert(SvNVX(sv), NV_DIG, 0, s); + } + errno = olderrno; +#ifdef FIXNEGATIVEZERO + if (*s == '-' && s[1] == '0' && !s[2]) + strcpy(s,"0"); +#endif + while (*s) s++; +#ifdef hcx + if (s[-1] == '.') + *--s = '\0'; +#endif } else { - dTHR; if (ckWARN(WARN_UNINITIALIZED) && !PL_localizing && !(SvFLAGS(sv) & SVs_PADTMP)) - { report_uninit(); - } *lp = 0; if (SvTYPE(sv) < SVt_PV) /* Typically the caller expects that sv_any is not NULL now. */ @@ -2349,9 +2898,9 @@ char * Perl_sv_2pvutf8(pTHX_ register SV *sv, STRLEN *lp) { sv_utf8_upgrade(sv); - return sv_2pv(sv,lp); + return SvPV(sv,*lp); } - + /* This function is only called on magical items */ bool Perl_sv_2bool(pTHX_ register SV *sv) @@ -2362,9 +2911,9 @@ Perl_sv_2bool(pTHX_ register SV *sv) if (!SvOK(sv)) return 0; if (SvROK(sv)) { - dTHR; SV* tmpsv; - if (SvAMAGIC(sv) && (tmpsv = AMG_CALLun(sv,bool_))) + if (SvAMAGIC(sv) && (tmpsv=AMG_CALLun(sv,bool_)) && + (SvRV(tmpsv) != SvRV(sv))) return SvTRUE(tmpsv); return SvRV(sv) != 0; } @@ -2390,109 +2939,141 @@ Perl_sv_2bool(pTHX_ register SV *sv) } } -void +/* +=for apidoc sv_utf8_upgrade + +Convert the PV of an SV to its UTF8-encoded form. +Forces the SV to string form it it is not already. +Always sets the SvUTF8 flag to avoid future validity checks even +if all the bytes have hibit clear. + +=cut +*/ + +STRLEN Perl_sv_utf8_upgrade(pTHX_ register SV *sv) { - int hicount; - char *c; + char *s, *t, *e; + int hibit = 0; - if (!sv || !SvPOK(sv) || SvUTF8(sv)) - return; + if (!sv) + return 0; + + if (!SvPOK(sv)) + (void) SvPV_nolen(sv); - /* This function could be much more efficient if we had a FLAG - * to signal if there are any hibit chars in the string + if (SvUTF8(sv)) + return SvCUR(sv); + + /* This function could be much more efficient if we had a FLAG in SVs + * to signal if there are any hibit chars in the PV. + * Given that there isn't make loop fast as possible */ - hicount = 0; - for (c = SvPVX(sv); c < SvEND(sv); c++) { - if (*c & 0x80) - hicount++; + s = SvPVX(sv); + e = SvEND(sv); + t = s; + while (t < e) { + if ((hibit = UTF8_IS_CONTINUED(*t++))) + break; } - if (hicount) { - char *src, *dst; - SvGROW(sv, SvCUR(sv) + hicount + 1); - - src = SvEND(sv) - 1; - SvCUR_set(sv, SvCUR(sv) + hicount); - dst = SvEND(sv) - 1; + if (hibit) { + STRLEN len; - while (src < dst) { - if (*src & 0x80) { - dst--; - uv_to_utf8((U8*)dst, (U8)*src--); - dst--; - } - else { - *dst-- = *src--; - } + if (SvREADONLY(sv) && SvFAKE(sv)) { + sv_force_normal(sv); + s = SvPVX(sv); } - - SvUTF8_on(sv); + len = SvCUR(sv) + 1; /* Plus the \0 */ + SvPVX(sv) = (char*)bytes_to_utf8((U8*)s, &len); + SvCUR(sv) = len - 1; + if (SvLEN(sv) != 0) + Safefree(s); /* No longer using what was there before. */ + SvLEN(sv) = len; /* No longer know the real size. */ } + /* Mark as UTF-8 even if no hibit - saves scanning loop */ + SvUTF8_on(sv); + return SvCUR(sv); } +/* +=for apidoc sv_utf8_downgrade + +Attempt to convert the PV of an SV from UTF8-encoded to byte encoding. +This may not be possible if the PV contains non-byte encoding characters; +if this is the case, either returns false or, if C is not +true, croaks. + +=cut +*/ + bool Perl_sv_utf8_downgrade(pTHX_ register SV* sv, bool fail_ok) { if (SvPOK(sv) && SvUTF8(sv)) { - char *c = SvPVX(sv); - char *first_hi = 0; - /* need to figure out if this is possible at all first */ - while (c < SvEND(sv)) { - if (*c & 0x80) { - I32 len; - UV uv = utf8_to_uv((U8*)c, &len); - if (uv >= 256) { - if (fail_ok) - return FALSE; - else { - /* XXX might want to make a callback here instead */ - Perl_croak(aTHX_ "Big byte"); - } + if (SvCUR(sv)) { + char *s; + STRLEN len; + + if (SvREADONLY(sv) && SvFAKE(sv)) + sv_force_normal(sv); + s = SvPV(sv, len); + if (!utf8_to_bytes((U8*)s, &len)) { + if (fail_ok) + return FALSE; + else { + if (PL_op) + Perl_croak(aTHX_ "Wide character in %s", + PL_op_desc[PL_op->op_type]); + else + Perl_croak(aTHX_ "Wide character"); } - if (!first_hi) - first_hi = c; - c += len; - } - else { - c++; - } - } - - if (first_hi) { - char *src = first_hi; - char *dst = first_hi; - while (src < SvEND(sv)) { - if (*src & 0x80) { - I32 len; - U8 u = (U8)utf8_to_uv((U8*)src, &len); - *dst++ = u; - src += len; - } - else { - *dst++ = *src++; - } - } - SvCUR_set(sv, dst - SvPVX(sv)); - } - SvUTF8_off(sv); + } + SvCUR(sv) = len; + } + SvUTF8_off(sv); } + return TRUE; } +/* +=for apidoc sv_utf8_encode + +Convert the PV of an SV to UTF8-encoded, but then turn off the C +flag so that it looks like octets again. Used as a building block +for encode_utf8 in Encode.xs + +=cut +*/ + void Perl_sv_utf8_encode(pTHX_ register SV *sv) { - sv_utf8_upgrade(sv); + (void) sv_utf8_upgrade(sv); SvUTF8_off(sv); } +/* +=for apidoc sv_utf8_decode + +Convert the octets in the PV from UTF-8 to chars. Scan for validity and then +turn of SvUTF8 if needed so that we see characters. Used as a building block +for decode_utf8 in Encode.xs + +=cut +*/ + + + bool Perl_sv_utf8_decode(pTHX_ register SV *sv) { if (SvPOK(sv)) { char *c; - bool has_utf = FALSE; + char *e; + + /* The octets may have got themselves encoded - get them back as bytes */ if (!sv_utf8_downgrade(sv, TRUE)) return FALSE; @@ -2500,24 +3081,15 @@ Perl_sv_utf8_decode(pTHX_ register SV *sv) * we want to make sure everything inside is valid utf8 first. */ c = SvPVX(sv); - while (c < SvEND(sv)) { - if (*c & 0x80) { - I32 len; - (void)utf8_to_uv((U8*)c, &len); - if (len == 1) { - /* bad utf8 */ - return FALSE; - } - c += len; - has_utf = TRUE; - } - else { - c++; - } + if (!is_utf8_string((U8*)c, SvCUR(sv)+1)) + return FALSE; + e = SvEND(sv); + while (c < e) { + if (UTF8_IS_CONTINUED(*c++)) { + SvUTF8_on(sv); + break; + } } - - if (has_utf) - SvUTF8_on(sv); } return TRUE; } @@ -2542,7 +3114,6 @@ C. void Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) { - dTHR; register U32 sflags; register int dtype; register int stype; @@ -2585,7 +3156,8 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) SvIVX(dstr) = SvIVX(sstr); if (SvIsUV(sstr)) SvIsUV_on(dstr); - SvTAINT(dstr); + if (SvTAINTED(sstr)) + SvTAINT(dstr); return; } goto undef_sstr; @@ -2605,7 +3177,8 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) } SvNVX(dstr) = SvNVX(sstr); (void)SvNOK_only(dstr); - SvTAINT(dstr); + if (SvTAINTED(sstr)) + SvTAINT(dstr); return; } goto undef_sstr; @@ -2670,11 +3243,19 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) && GvCV(dstr) && PL_sortcop == CvSTART(GvCV(dstr))) Perl_croak(aTHX_ "Can't redefine active sort subroutine %s", GvNAME(dstr)); + +#ifdef GV_SHARED_CHECK + if (GvSHARED((GV*)dstr)) { + Perl_croak(aTHX_ PL_no_modify); + } +#endif + (void)SvOK_off(dstr); GvINTRO_off(dstr); /* one-shot flag */ gp_free((GV*)dstr); GvGP(dstr) = gp_ref(GvGP(sstr)); - SvTAINT(dstr); + if (SvTAINTED(sstr)) + SvTAINT(dstr); if (GvIMPORTED(dstr) != GVf_IMPORTED && CopSTASH_ne(PL_curcop, GvSTASH(dstr))) { @@ -2709,6 +3290,12 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) SV *dref = 0; int intro = GvINTRO(dstr); +#ifdef GV_SHARED_CHECK + if (GvSHARED((GV*)dstr)) { + Perl_croak(aTHX_ PL_no_modify); + } +#endif + if (intro) { GP *gp; gp_free((GV*)dstr); @@ -2763,24 +3350,27 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) if (!GvCVGEN((GV*)dstr) && (CvROOT(cv) || CvXSUB(cv))) { - SV *const_sv = cv_const_sv(cv); - bool const_changed = TRUE; - if(const_sv) - const_changed = sv_cmp(const_sv, - op_const_sv(CvSTART((CV*)sref), - (CV*)sref)); /* ahem, death to those who redefine * active sort subs */ if (PL_curstackinfo->si_type == PERLSI_SORT && PL_sortcop == CvSTART(cv)) - Perl_croak(aTHX_ + Perl_croak(aTHX_ "Can't redefine active sort subroutine %s", GvENAME((GV*)dstr)); - if ((const_changed && const_sv) || ckWARN(WARN_REDEFINE)) - Perl_warner(aTHX_ WARN_REDEFINE, const_sv ? - "Constant subroutine %s redefined" - : "Subroutine %s redefined", - GvENAME((GV*)dstr)); + /* Redefining a sub - warning is mandatory if + it was a const and its value changed. */ + if (ckWARN(WARN_REDEFINE) + || (CvCONST(cv) + && (!CvCONST((CV*)sref) + || sv_cmp(cv_const_sv(cv), + cv_const_sv((CV*)sref))))) + { + Perl_warner(aTHX_ WARN_REDEFINE, + CvCONST(cv) + ? "Constant subroutine %s redefined" + : "Subroutine %s redefined", + GvENAME((GV*)dstr)); + } } cv_ckproto(cv, (GV*)dstr, SvPOK(sref) ? SvPVX(sref) : Nullch); @@ -2803,6 +3393,13 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) dref = (SV*)GvIOp(dstr); GvIOp(dstr) = (IO*)sref; break; + case SVt_PVFM: + if (intro) + SAVESPTR(GvFORM(dstr)); + else + dref = (SV*)GvFORM(dstr); + GvFORM(dstr) = (CV*)sref; + break; default: if (intro) SAVESPTR(GvSV(dstr)); @@ -2820,7 +3417,8 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) SvREFCNT_dec(dref); if (intro) SAVEFREESV(sref); - SvTAINT(dstr); + if (SvTAINTED(sstr)) + SvTAINT(dstr); return; } if (SvPVX(dstr)) { @@ -2834,14 +3432,19 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) SvRV(dstr) = SvREFCNT_inc(SvRV(sstr)); SvROK_on(dstr); if (sflags & SVp_NOK) { - SvNOK_on(dstr); + SvNOKp_on(dstr); + /* Only set the public OK flag if the source has public OK. */ + if (sflags & SVf_NOK) + SvFLAGS(dstr) |= SVf_NOK; SvNVX(dstr) = SvNVX(sstr); } if (sflags & SVp_IOK) { - (void)SvIOK_on(dstr); - SvIVX(dstr) = SvIVX(sstr); + (void)SvIOKp_on(dstr); + if (sflags & SVf_IOK) + SvFLAGS(dstr) |= SVf_IOK; if (sflags & SVf_IVisUV) SvIsUV_on(dstr); + SvIVX(dstr) = SvIVX(sstr); } if (SvAMAGIC(sstr)) { SvAMAGIC_on(dstr); @@ -2858,7 +3461,9 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) if (SvTEMP(sstr) && /* slated for free anyway? */ SvREFCNT(sstr) == 1 && /* and no other references to it? */ - !(sflags & SVf_OOK)) /* and not involved in OOK hack? */ + !(sflags & SVf_OOK) && /* and not involved in OOK hack? */ + SvLEN(sstr) && /* and really is a string */ + !(PL_op && PL_op->op_type == OP_AASSIGN)) /* and won't be needed again, potentially */ { if (SvPVX(dstr)) { /* we know that dtype >= SVt_PV */ if (SvOOK(dstr)) { @@ -2889,36 +3494,51 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) *SvEND(dstr) = '\0'; (void)SvPOK_only(dstr); } - if ((sflags & SVf_UTF8) && !IN_BYTE) + if (sflags & SVf_UTF8) SvUTF8_on(dstr); /*SUPPRESS 560*/ if (sflags & SVp_NOK) { - SvNOK_on(dstr); + SvNOKp_on(dstr); + if (sflags & SVf_NOK) + SvFLAGS(dstr) |= SVf_NOK; SvNVX(dstr) = SvNVX(sstr); } if (sflags & SVp_IOK) { - (void)SvIOK_on(dstr); - SvIVX(dstr) = SvIVX(sstr); + (void)SvIOKp_on(dstr); + if (sflags & SVf_IOK) + SvFLAGS(dstr) |= SVf_IOK; if (sflags & SVf_IVisUV) SvIsUV_on(dstr); - } - } - else if (sflags & SVp_NOK) { - SvNVX(dstr) = SvNVX(sstr); - (void)SvNOK_only(dstr); - if (sflags & SVf_IOK) { - (void)SvIOK_on(dstr); SvIVX(dstr) = SvIVX(sstr); - /* XXXX Do we want to set IsUV for IV(ROK)? Be extra safe... */ - if (sflags & SVf_IVisUV) - SvIsUV_on(dstr); } } else if (sflags & SVp_IOK) { - (void)SvIOK_only(dstr); - SvIVX(dstr) = SvIVX(sstr); + if (sflags & SVf_IOK) + (void)SvIOK_only(dstr); + else { + (void)SvOK_off(dstr); + (void)SvIOKp_on(dstr); + } + /* XXXX Do we want to set IsUV for IV(ROK)? Be extra safe... */ if (sflags & SVf_IVisUV) SvIsUV_on(dstr); + SvIVX(dstr) = SvIVX(sstr); + if (sflags & SVp_NOK) { + if (sflags & SVf_NOK) + (void)SvNOK_on(dstr); + else + (void)SvNOKp_on(dstr); + SvNVX(dstr) = SvNVX(sstr); + } + } + else if (sflags & SVp_NOK) { + if (sflags & SVf_NOK) + (void)SvNOK_only(dstr); + else { + (void)SvOK_off(dstr); + SvNOKp_on(dstr); + } + SvNVX(dstr) = SvNVX(sstr); } else { if (dtype == SVt_PVGV) { @@ -2928,7 +3548,8 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) else (void)SvOK_off(dstr); } - SvTAINT(dstr); + if (SvTAINTED(sstr)) + SvTAINT(dstr); } /* @@ -2959,13 +3580,17 @@ void Perl_sv_setpvn(pTHX_ register SV *sv, register const char *ptr, register STRLEN len) { register char *dptr; - assert(len >= 0); /* STRLEN is probably unsigned, so this may - elicit a warning, but it won't hurt. */ + SV_CHECK_THINKFIRST(sv); if (!ptr) { (void)SvOK_off(sv); return; } + else { + /* len is STRLEN which is unsigned, need to copy to signed */ + IV iv = len; + assert(iv >= 0); + } (void)SvUPGRADE(sv, SVt_PV); SvGROW(sv, len + 1); @@ -2973,7 +3598,7 @@ Perl_sv_setpvn(pTHX_ register SV *sv, register const char *ptr, register STRLEN Move(ptr,dptr,len,char); dptr[len] = '\0'; SvCUR_set(sv, len); - (void)SvPOK_only(sv); /* validate pointer */ + (void)SvPOK_only_UTF8(sv); /* validate pointer */ SvTAINT(sv); } @@ -3017,7 +3642,7 @@ Perl_sv_setpv(pTHX_ register SV *sv, register const char *ptr) SvGROW(sv, len + 1); Move(ptr,SvPVX(sv),len+1,char); SvCUR_set(sv, len); - (void)SvPOK_only(sv); /* validate pointer */ + (void)SvPOK_only_UTF8(sv); /* validate pointer */ SvTAINT(sv); } @@ -3040,7 +3665,7 @@ Perl_sv_setpv_mg(pTHX_ register SV *sv, register const char *ptr) =for apidoc sv_usepvn Tells an SV to use C to find its string value. Normally the string is -stored inside the SV but sv_usepvn allows the SV to use an outside string. +stored inside the SV but sv_usepvn allows the SV to use an outside string. The C should point to memory that was allocated by C. The string length, C, must be supplied. This function will realloc the memory pointed to by C, so that pointer should not be freed or used by @@ -3067,7 +3692,7 @@ Perl_sv_usepvn(pTHX_ register SV *sv, register char *ptr, register STRLEN len) SvCUR_set(sv, len); SvLEN_set(sv, len+1); *SvEND(sv) = '\0'; - (void)SvPOK_only(sv); /* validate pointer */ + (void)SvPOK_only_UTF8(sv); /* validate pointer */ SvTAINT(sv); } @@ -3087,23 +3712,39 @@ Perl_sv_usepvn_mg(pTHX_ register SV *sv, register char *ptr, register STRLEN len } void -Perl_sv_force_normal(pTHX_ register SV *sv) +Perl_sv_force_normal_flags(pTHX_ register SV *sv, U32 flags) { if (SvREADONLY(sv)) { - dTHR; - if (PL_curcop != &PL_compiling) + if (SvFAKE(sv)) { + char *pvx = SvPVX(sv); + STRLEN len = SvCUR(sv); + U32 hash = SvUVX(sv); + SvGROW(sv, len + 1); + Move(pvx,SvPVX(sv),len,char); + *SvEND(sv) = '\0'; + SvFAKE_off(sv); + SvREADONLY_off(sv); + unsharepvn(pvx,SvUTF8(sv)?-len:len,hash); + } + else if (PL_curcop != &PL_compiling) Perl_croak(aTHX_ PL_no_modify); } if (SvROK(sv)) - sv_unref(sv); + sv_unref_flags(sv, flags); else if (SvFAKE(sv) && SvTYPE(sv) == SVt_PVGV) sv_unglob(sv); } - + +void +Perl_sv_force_normal(pTHX_ register SV *sv) +{ + sv_force_normal_flags(sv, 0); +} + /* =for apidoc sv_chop -Efficient removal of characters from the beginning of the string buffer. +Efficient removal of characters from the beginning of the string buffer. SvPOK(sv) must be true and the C must be a pointer to somewhere inside the string buffer. The C becomes the first character of the adjusted string. @@ -3113,8 +3754,8 @@ string. void Perl_sv_chop(pTHX_ register SV *sv, register char *ptr) /* like set but assuming ptr is in sv */ - - + + { register STRLEN delta; @@ -3188,27 +3829,42 @@ Perl_sv_catpvn_mg(pTHX_ register SV *sv, register const char *ptr, register STRL /* =for apidoc sv_catsv -Concatenates the string from SV C onto the end of the string in SV -C. Handles 'get' magic, but not 'set' magic. See C. +Concatenates the string from SV C onto the end of the string in +SV C. Modifies C but not C. Handles 'get' magic, but +not 'set' magic. See C. -=cut -*/ +=cut */ void -Perl_sv_catsv(pTHX_ SV *dstr, register SV *sstr) +Perl_sv_catsv(pTHX_ SV *dsv, register SV *ssv) { - char *s; - STRLEN len; - if (!sstr) + char *spv; + STRLEN slen; + if (!ssv) return; - if ((s = SvPV(sstr, len))) { - if (DO_UTF8(sstr)) { - sv_utf8_upgrade(dstr); - sv_catpvn(dstr,s,len); - SvUTF8_on(dstr); + if ((spv = SvPV(ssv, slen))) { + bool dutf8 = DO_UTF8(dsv); + bool sutf8 = DO_UTF8(ssv); + + if (dutf8 == sutf8) + sv_catpvn(dsv,spv,slen); + else { + if (dutf8) { + /* Not modifying source SV, so taking a temporary copy. */ + SV* csv = sv_2mortal(newSVsv(ssv)); + char *cpv; + STRLEN clen; + + sv_utf8_upgrade(csv); + cpv = SvPV(csv,clen); + sv_catpvn(dsv,cpv,clen); + } + else { + sv_utf8_upgrade(dsv); + sv_catpvn(dsv,spv,slen); + SvUTF8_on(dsv); /* If dsv has no wide characters. */ + } } - else - sv_catpvn(dstr,s,len); } } @@ -3221,10 +3877,10 @@ Like C, but also handles 'set' magic. */ void -Perl_sv_catsv_mg(pTHX_ SV *dstr, register SV *sstr) +Perl_sv_catsv_mg(pTHX_ SV *dsv, register SV *ssv) { - sv_catsv(dstr,sstr); - SvSETMAGIC(dstr); + sv_catsv(dsv,ssv); + SvSETMAGIC(dsv); } /* @@ -3275,7 +3931,7 @@ SV * Perl_newSV(pTHX_ STRLEN len) { register SV *sv; - + new_SV(sv); if (len) { sv_upgrade(sv, SVt_PV); @@ -3298,9 +3954,8 @@ void Perl_sv_magic(pTHX_ register SV *sv, SV *obj, int how, const char *name, I32 namlen) { MAGIC* mg; - + if (SvREADONLY(sv)) { - dTHR; if (PL_curcop != &PL_compiling && !strchr("gBf", how)) Perl_croak(aTHX_ PL_no_modify); } @@ -3316,23 +3971,33 @@ Perl_sv_magic(pTHX_ register SV *sv, SV *obj, int how, const char *name, I32 nam } Newz(702,mg, 1, MAGIC); mg->mg_moremagic = SvMAGIC(sv); - SvMAGIC(sv) = mg; - if (!obj || obj == sv || how == '#' || how == 'r') + + /* Some magic sontains a reference loop, where the sv and object refer to + each other. To prevent a avoid a reference loop that would prevent such + objects being freed, we look for such loops and if we find one we avoid + incrementing the object refcount. */ + if (!obj || obj == sv || how == '#' || how == 'r' || + (SvTYPE(obj) == SVt_PVGV && + (GvSV(obj) == sv || GvHV(obj) == (HV*)sv || GvAV(obj) == (AV*)sv || + GvCV(obj) == (CV*)sv || GvIOp(obj) == (IO*)sv || + GvFORM(obj) == (CV*)sv))) + { mg->mg_obj = obj; + } else { - dTHR; mg->mg_obj = SvREFCNT_inc(obj); mg->mg_flags |= MGf_REFCOUNTED; } mg->mg_type = how; mg->mg_len = namlen; - if (name) + if (name) { if (namlen >= 0) mg->mg_ptr = savepvn(name, namlen); else if (namlen == HEf_SVKEY) mg->mg_ptr = (char*)SvREFCNT_inc((SV*)name); - + } + switch (how) { case 0: mg->mg_virtual = &PL_vtbl_sv; @@ -3344,7 +4009,7 @@ Perl_sv_magic(pTHX_ register SV *sv, SV *obj, int how, const char *name, I32 nam mg->mg_virtual = &PL_vtbl_amagicelem; break; case 'c': - mg->mg_virtual = 0; + mg->mg_virtual = &PL_vtbl_ovrld; break; case 'B': mg->mg_virtual = &PL_vtbl_bm; @@ -3451,6 +4116,14 @@ Perl_sv_magic(pTHX_ register SV *sv, SV *obj, int how, const char *name, I32 nam SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK|SVf_POK); } +/* +=for apidoc sv_unmagic + +Removes magic from an SV. + +=cut +*/ + int Perl_sv_unmagic(pTHX_ SV *sv, int type) { @@ -3465,11 +4138,12 @@ Perl_sv_unmagic(pTHX_ SV *sv, int type) *mgp = mg->mg_moremagic; if (vtbl && vtbl->svt_free) CALL_FPTR(vtbl->svt_free)(aTHX_ sv, mg); - if (mg->mg_ptr && mg->mg_type != 'g') + if (mg->mg_ptr && mg->mg_type != 'g') { if (mg->mg_len >= 0) Safefree(mg->mg_ptr); else if (mg->mg_len == HEf_SVKEY) SvREFCNT_dec((SV*)mg->mg_ptr); + } if (mg->mg_flags & MGf_REFCOUNTED) SvREFCNT_dec(mg->mg_obj); Safefree(mg); @@ -3485,6 +4159,14 @@ Perl_sv_unmagic(pTHX_ SV *sv, int type) return 0; } +/* +=for apidoc sv_rvweaken + +Weaken a reference. + +=cut +*/ + SV * Perl_sv_rvweaken(pTHX_ SV *sv) { @@ -3494,7 +4176,6 @@ Perl_sv_rvweaken(pTHX_ SV *sv) if (!SvROK(sv)) Perl_croak(aTHX_ "Can't weaken a nonreference"); else if (SvWEAKREF(sv)) { - dTHR; if (ckWARN(WARN_MISC)) Perl_warner(aTHX_ WARN_MISC, "Reference is already weak"); return sv; @@ -3502,7 +4183,7 @@ Perl_sv_rvweaken(pTHX_ SV *sv) tsv = SvRV(sv); sv_add_backref(tsv, sv); SvWEAKREF_on(sv); - SvREFCNT_dec(tsv); + SvREFCNT_dec(tsv); return sv; } @@ -3521,7 +4202,7 @@ S_sv_add_backref(pTHX_ SV *tsv, SV *sv) av_push(av,sv); } -STATIC void +STATIC void S_sv_del_backref(pTHX_ SV *sv) { AV *av; @@ -3560,7 +4241,7 @@ Perl_sv_insert(pTHX_ SV *bigstr, STRLEN offset, STRLEN len, char *little, STRLEN register char *bigend; register I32 i; STRLEN curlen; - + if (!bigstr) Perl_croak(aTHX_ "Can't modify non-existent substring"); @@ -3636,12 +4317,17 @@ Perl_sv_insert(pTHX_ SV *bigstr, STRLEN offset, STRLEN len, char *little, STRLEN SvSETMAGIC(bigstr); } -/* make sv point to what nstr did */ +/* +=for apidoc sv_replace + +Make the first argument a copy of the second, then delete the original. + +=cut +*/ void Perl_sv_replace(pTHX_ register SV *sv, register SV *nsv) { - dTHR; U32 refcnt = SvREFCNT(sv); SV_CHECK_THINKFIRST(sv); if (SvREFCNT(nsv) != 1 && ckWARN_d(WARN_INTERNAL)) @@ -3665,6 +4351,15 @@ Perl_sv_replace(pTHX_ register SV *sv, register SV *nsv) del_SV(nsv); } +/* +=for apidoc sv_clear + +Clear an SV, making it empty. Does not free the memory used by the SV +itself. + +=cut +*/ + void Perl_sv_clear(pTHX_ register SV *sv) { @@ -3673,10 +4368,9 @@ Perl_sv_clear(pTHX_ register SV *sv) assert(SvREFCNT(sv) == 0); if (SvOBJECT(sv)) { - dTHR; if (PL_defstash) { /* Still have a symbol table? */ djSP; - GV* destructor; + CV* destructor; SV tmpref; Zero(&tmpref, 1, SV); @@ -3685,9 +4379,9 @@ Perl_sv_clear(pTHX_ register SV *sv) SvREADONLY_on(&tmpref); /* DESTROY() could be naughty */ SvREFCNT(&tmpref) = 1; - do { + do { stash = SvSTASH(sv); - destructor = gv_fetchmethod(SvSTASH(sv), "DESTROY"); + destructor = StashHANDLER(stash,DESTROY); if (destructor) { ENTER; PUSHSTACKi(PERLSI_DESTROY); @@ -3696,8 +4390,7 @@ Perl_sv_clear(pTHX_ register SV *sv) PUSHMARK(SP); PUSHs(&tmpref); PUTBACK; - call_sv((SV*)GvCV(destructor), - G_DISCARD|G_EVAL|G_KEEPERR); + call_sv((SV*)destructor, G_DISCARD|G_EVAL|G_KEEPERR); SvREFCNT(sv)--; POPSTACK; SPAGAIN; @@ -3782,6 +4475,10 @@ Perl_sv_clear(pTHX_ register SV *sv) } else if (SvPVX(sv) && SvLEN(sv)) Safefree(SvPVX(sv)); + else if (SvPVX(sv) && SvREADONLY(sv) && SvFAKE(sv)) { + unsharepvn(SvPVX(sv),SvUTF8(sv)?-SvCUR(sv):SvCUR(sv),SvUVX(sv)); + SvFAKE_off(sv); + } break; /* case SVt_NV: @@ -3858,10 +4555,17 @@ Perl_sv_newref(pTHX_ SV *sv) return sv; } +/* +=for apidoc sv_free + +Free the memory used by an SV. + +=cut +*/ + void Perl_sv_free(pTHX_ SV *sv) { - dTHR; int refcount_is_zero; if (!sv) @@ -3926,29 +4630,30 @@ Perl_sv_len(pTHX_ register SV *sv) return len; } +/* +=for apidoc sv_len_utf8 + +Returns the number of characters in the string in an SV, counting wide +UTF8 bytes as a single character. + +=cut +*/ + STRLEN Perl_sv_len_utf8(pTHX_ register SV *sv) { - U8 *s; - U8 *send; - STRLEN len; - if (!sv) return 0; -#ifdef NOTYET if (SvGMAGICAL(sv)) - len = mg_length(sv); + return mg_length(sv); else -#endif - s = (U8*)SvPV(sv, len); - send = s + len; - len = 0; - while (s < send) { - s += UTF8SKIP(s); - len++; + { + STRLEN len; + U8 *s = (U8*)SvPV(sv, len); + + return Perl_utf8_length(aTHX_ s, s + len); } - return len; } void @@ -3994,18 +4699,18 @@ Perl_sv_pos_b2u(pTHX_ register SV *sv, I32* offsetp) s = (U8*)SvPV(sv, len); if (len < *offsetp) - Perl_croak(aTHX_ "panic: bad byte offset"); + Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset"); send = s + *offsetp; len = 0; while (s < send) { - s += UTF8SKIP(s); - ++len; - } - if (s != send) { - dTHR; - if (ckWARN_d(WARN_UTF8)) - Perl_warner(aTHX_ WARN_UTF8, "Malformed UTF-8 character"); - --len; + STRLEN n; + + if (utf8_to_uv(s, UTF8SKIP(s), &n, 0)) { + s += n; + len++; + } + else + break; } *offsetp = len; return; @@ -4021,38 +4726,60 @@ identical. */ I32 -Perl_sv_eq(pTHX_ register SV *str1, register SV *str2) +Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) { char *pv1; STRLEN cur1; char *pv2; STRLEN cur2; + I32 eq = 0; + bool pv1tmp = FALSE; + bool pv2tmp = FALSE; - if (!str1) { + if (!sv1) { pv1 = ""; cur1 = 0; } else - pv1 = SvPV(str1, cur1); + pv1 = SvPV(sv1, cur1); - if (cur1) { - if (!str2) - return 0; - if (SvUTF8(str1) != SvUTF8(str2) && !IN_BYTE) { - if (SvUTF8(str1)) { - sv_utf8_upgrade(str2); - } - else { - sv_utf8_upgrade(str1); - } + if (!sv2){ + pv2 = ""; + cur2 = 0; + } + else + pv2 = SvPV(sv2, cur2); + + /* do not utf8ize the comparands as a side-effect */ + if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTE) { + bool is_utf8 = TRUE; + + if (PL_hints & HINT_UTF8_DISTINCT) + return FALSE; + + if (SvUTF8(sv1)) { + char *pv = (char*)bytes_from_utf8((U8*)pv1, &cur1, &is_utf8); + + if ((pv1tmp = (pv != pv1))) + pv1 = pv; + } + else { + char *pv = (char *)bytes_from_utf8((U8*)pv2, &cur2, &is_utf8); + + if ((pv2tmp = (pv != pv2))) + pv2 = pv; } } - pv2 = SvPV(str2, cur2); - if (cur1 != cur2) - return 0; + if (cur1 == cur2) + eq = memEQ(pv1, pv2, cur1); + + if (pv1tmp) + Safefree(pv1); + if (pv2tmp) + Safefree(pv2); - return memEQ(pv1, pv2, cur1); + return eq; } /* @@ -4066,60 +4793,75 @@ C. */ I32 -Perl_sv_cmp(pTHX_ register SV *str1, register SV *str2) +Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2) { STRLEN cur1, cur2; char *pv1, *pv2; - I32 retval; + I32 cmp; + bool pv1tmp = FALSE; + bool pv2tmp = FALSE; - if (str1) { - pv1 = SvPV(str1, cur1); - } - else { + if (!sv1) { + pv1 = ""; cur1 = 0; } + else + pv1 = SvPV(sv1, cur1); - if (str2) { - if (SvPOK(str2)) { - if (SvPOK(str1) && SvUTF8(str1) != SvUTF8(str2) && !IN_BYTE) { - /* must upgrade other to UTF8 first */ - if (SvUTF8(str1)) { - sv_utf8_upgrade(str2); - } - else { - sv_utf8_upgrade(str1); - /* refresh pointer and length */ - pv1 = SvPVX(str1); - cur1 = SvCUR(str1); - } - } - pv2 = SvPVX(str2); - cur2 = SvCUR(str2); - } + if (!sv2){ + pv2 = ""; + cur2 = 0; + } + else + pv2 = SvPV(sv2, cur2); + + /* do not utf8ize the comparands as a side-effect */ + if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTE) { + if (PL_hints & HINT_UTF8_DISTINCT) + return SvUTF8(sv1) ? 1 : -1; + + if (SvUTF8(sv1)) { + pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2); + pv2tmp = TRUE; + } else { - pv2 = sv_2pv(str2, &cur2); + pv1 = (char*)bytes_to_utf8((U8*)pv1, &cur1); + pv1tmp = TRUE; } } - else { - cur2 = 0; + + if (!cur1) { + cmp = cur2 ? -1 : 0; + } else if (!cur2) { + cmp = 1; + } else { + I32 retval = memcmp((void*)pv1, (void*)pv2, cur1 < cur2 ? cur1 : cur2); + + if (retval) { + cmp = retval < 0 ? -1 : 1; + } else if (cur1 == cur2) { + cmp = 0; + } else { + cmp = cur1 < cur2 ? -1 : 1; + } } - if (!cur1) - return cur2 ? -1 : 0; + if (pv1tmp) + Safefree(pv1); + if (pv2tmp) + Safefree(pv2); - if (!cur2) - return 1; + return cmp; +} - retval = memcmp((void*)pv1, (void*)pv2, cur1 < cur2 ? cur1 : cur2); +/* +=for apidoc sv_cmp_locale - if (retval) - return retval < 0 ? -1 : 1; +Compares the strings in two SVs in a locale-aware manner. See +L - if (cur1 == cur2) - return 0; - else - return cur1 < cur2 ? -1 : 1; -} +=cut +*/ I32 Perl_sv_cmp_locale(pTHX_ register SV *sv1, register SV *sv2) @@ -4222,10 +4964,18 @@ Perl_sv_collxfrm(pTHX_ SV *sv, STRLEN *nxp) #endif /* USE_LOCALE_COLLATE */ +/* +=for apidoc sv_gets + +Get a line from the filehandle and store it into the SV, optionally +appending to the currently-stored string. + +=cut +*/ + char * Perl_sv_gets(pTHX_ register SV *sv, register PerlIO *fp, I32 append) { - dTHR; char *rsptr; STRLEN rslen; register STDCHAR rslast; @@ -4261,14 +5011,31 @@ Perl_sv_gets(pTHX_ register SV *sv, register PerlIO *fp, I32 append) #endif SvCUR_set(sv, bytesread); buffer[bytesread] = '\0'; + if (PerlIO_isutf8(fp)) + SvUTF8_on(sv); + else + SvUTF8_off(sv); return(SvCUR(sv) ? SvPVX(sv) : Nullch); } else if (RsPARA(PL_rs)) { rsptr = "\n\n"; rslen = 2; } - else - rsptr = SvPV(PL_rs, rslen); + else { + /* Get $/ i.e. PL_rs into same encoding as stream wants */ + if (PerlIO_isutf8(fp)) { + rsptr = SvPVutf8(PL_rs, rslen); + } + else { + if (SvUTF8(PL_rs)) { + if (!sv_utf8_downgrade(PL_rs, TRUE)) { + Perl_croak(aTHX_ "Wide character in $/"); + } + } + rsptr = SvPV(PL_rs, rslen); + } + } + rslast = rslen ? rsptr[rslen - 1] : '\0'; if (RsPARA(PL_rs)) { /* have to do this both before and after */ @@ -4288,7 +5055,7 @@ Perl_sv_gets(pTHX_ register SV *sv, register PerlIO *fp, I32 append) /* See if we know enough about I/O mechanism to cheat it ! */ /* This used to be #ifdef test - it is made run-time test for ease - of abstracting out stdio interface. One call should be cheap + of abstracting out stdio interface. One call should be cheap enough here - and may even be a macro allowing compile time optimization. */ @@ -4336,7 +5103,7 @@ Perl_sv_gets(pTHX_ register SV *sv, register PerlIO *fp, I32 append) "Screamer: entering, ptr=%"UVuf", cnt=%ld\n",PTR2UV(ptr),(long)cnt)); DEBUG_P(PerlIO_printf(Perl_debug_log, "Screamer: entering: FILE * thinks ptr=%"UVuf", cnt=%ld, base=%"UVuf"\n", - PTR2UV(PerlIO_get_ptr(fp)), (long)PerlIO_get_cnt(fp), + PTR2UV(PerlIO_get_ptr(fp)), (long)PerlIO_get_cnt(fp), PTR2UV(PerlIO_has_base(fp) ? PerlIO_get_base(fp) : 0))); for (;;) { screamer: @@ -4349,8 +5116,8 @@ Perl_sv_gets(pTHX_ register SV *sv, register PerlIO *fp, I32 append) } } else { - Copy(ptr, bp, cnt, char); /* this | eat */ - bp += cnt; /* screams | dust */ + Copy(ptr, bp, cnt, char); /* this | eat */ + bp += cnt; /* screams | dust */ ptr += cnt; /* louder | sed :-) */ cnt = 0; } @@ -4372,15 +5139,15 @@ Perl_sv_gets(pTHX_ register SV *sv, register PerlIO *fp, I32 append) PerlIO_set_ptrcnt(fp, ptr, cnt); /* deregisterize cnt and ptr */ DEBUG_P(PerlIO_printf(Perl_debug_log, "Screamer: pre: FILE * thinks ptr=%"UVuf", cnt=%ld, base=%"UVuf"\n", - PTR2UV(PerlIO_get_ptr(fp)), (long)PerlIO_get_cnt(fp), + PTR2UV(PerlIO_get_ptr(fp)), (long)PerlIO_get_cnt(fp), PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); - /* This used to call 'filbuf' in stdio form, but as that behaves like + /* This used to call 'filbuf' in stdio form, but as that behaves like getc when cnt <= 0 we use PerlIO_getc here to avoid introducing another abstraction. */ i = PerlIO_getc(fp); /* get more characters */ DEBUG_P(PerlIO_printf(Perl_debug_log, "Screamer: post: FILE * thinks ptr=%"UVuf", cnt=%ld, base=%"UVuf"\n", - PTR2UV(PerlIO_get_ptr(fp)), (long)PerlIO_get_cnt(fp), + PTR2UV(PerlIO_get_ptr(fp)), (long)PerlIO_get_cnt(fp), PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); cnt = PerlIO_get_cnt(fp); ptr = (STDCHAR*)PerlIO_get_ptr(fp); /* reregisterize cnt and ptr */ @@ -4413,7 +5180,7 @@ thats_really_all_folks: PerlIO_set_ptrcnt(fp, ptr, cnt); /* put these back or we're in trouble */ DEBUG_P(PerlIO_printf(Perl_debug_log, "Screamer: end: FILE * thinks ptr=%"UVuf", cnt=%ld, base=%"UVuf"\n", - PTR2UV(PerlIO_get_ptr(fp)), (long)PerlIO_get_cnt(fp), + PTR2UV(PerlIO_get_ptr(fp)), (long)PerlIO_get_cnt(fp), PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); *bp = '\0'; SvCUR_set(sv, bp - (STDCHAR*)SvPVX(sv)); /* set length */ @@ -4477,7 +5244,7 @@ screamer2: } } - if (RsPARA(PL_rs)) { /* have to do this both before and after */ + if (RsPARA(PL_rs)) { /* have to do this both before and after */ while (i != EOF) { /* to make sure file boundaries work right */ i = PerlIO_getc(fp); if (i != '\n') { @@ -4487,6 +5254,11 @@ screamer2: } } + if (PerlIO_isutf8(fp)) + SvUTF8_on(sv); + else + SvUTF8_off(sv); + return (SvCUR(sv) - append) ? SvPVX(sv) : Nullch; } @@ -4511,7 +5283,6 @@ Perl_sv_inc(pTHX_ register SV *sv) mg_get(sv); if (SvTHINKFIRST(sv)) { if (SvREADONLY(sv)) { - dTHR; if (PL_curcop != &PL_compiling) Perl_croak(aTHX_ PL_no_modify); } @@ -4525,12 +5296,15 @@ Perl_sv_inc(pTHX_ register SV *sv) } } flags = SvFLAGS(sv); - if (flags & SVp_NOK) { - (void)SvNOK_only(sv); - SvNVX(sv) += 1.0; - return; - } - if (flags & SVp_IOK) { + if ((flags & (SVp_NOK|SVp_IOK)) == SVp_NOK) { + /* It's (privately or publicly) a float, but not tested as an + integer, so test it to see. */ + (void) SvIV(sv); + flags = SvFLAGS(sv); + } + if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) { + /* It's publicly an integer, or privately an integer-not-float */ + oops_its_int: if (SvIsUV(sv)) { if (SvUVX(sv) == UV_MAX) sv_setnv(sv, (NV)UV_MAX + 1.0); @@ -4539,26 +5313,67 @@ Perl_sv_inc(pTHX_ register SV *sv) ++SvUVX(sv); } else { if (SvIVX(sv) == IV_MAX) - sv_setnv(sv, (NV)IV_MAX + 1.0); + sv_setuv(sv, (UV)IV_MAX + 1); else { (void)SvIOK_only(sv); ++SvIVX(sv); - } + } } return; } - if (!(flags & SVp_POK) || !*SvPVX(sv)) { - if ((flags & SVTYPEMASK) < SVt_PVNV) - sv_upgrade(sv, SVt_NV); - SvNVX(sv) = 1.0; + if (flags & SVp_NOK) { (void)SvNOK_only(sv); + SvNVX(sv) += 1.0; + return; + } + + if (!(flags & SVp_POK) || !*SvPVX(sv)) { + if ((flags & SVTYPEMASK) < SVt_PVIV) + sv_upgrade(sv, SVt_IV); + (void)SvIOK_only(sv); + SvIVX(sv) = 1; return; } d = SvPVX(sv); while (isALPHA(*d)) d++; while (isDIGIT(*d)) d++; if (*d) { - sv_setnv(sv,Atof(SvPVX(sv)) + 1.0); /* punt */ +#ifdef PERL_PRESERVE_IVUV + /* Got to punt this an an integer if needs be, but we don't issue + warnings. Probably ought to make the sv_iv_please() that does + the conversion if possible, and silently. */ + I32 numtype = looks_like_number(sv); + if (numtype && !(numtype & IS_NUMBER_INFINITY)) { + /* Need to try really hard to see if it's an integer. + 9.22337203685478e+18 is an integer. + but "9.22337203685478e+18" + 0 is UV=9223372036854779904 + so $a="9.22337203685478e+18"; $a+0; $a++ + needs to be the same as $a="9.22337203685478e+18"; $a++ + or we go insane. */ + + (void) sv_2iv(sv); + if (SvIOK(sv)) + goto oops_its_int; + + /* sv_2iv *should* have made this an NV */ + if (flags & SVp_NOK) { + (void)SvNOK_only(sv); + SvNVX(sv) += 1.0; + return; + } + /* I don't think we can get here. Maybe I should assert this + And if we do get here I suspect that sv_setnv will croak. NWC + Fall through. */ +#if defined(USE_LONG_DOUBLE) + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_inc punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"PERL_PRIgldbl"\n", + SvPVX(sv), SvIVX(sv), SvNVX(sv))); +#else + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_inc punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%g\n", + SvPVX(sv), SvIVX(sv), SvNVX(sv))); +#endif + } +#endif /* PERL_PRESERVE_IVUV */ + sv_setnv(sv,Atof(SvPVX(sv)) + 1.0); return; } d--; @@ -4573,7 +5388,7 @@ Perl_sv_inc(pTHX_ register SV *sv) /* MKS: The original code here died if letters weren't consecutive. * at least it didn't have to worry about non-C locales. The * new code assumes that ('z'-'a')==('Z'-'A'), letters are - * arranged in order (although not consecutively) and that only + * arranged in order (although not consecutively) and that only * [A-Za-z] are accepted by isALPHA in the C locale. */ if (*d != 'z' && *d != 'Z') { @@ -4619,7 +5434,6 @@ Perl_sv_dec(pTHX_ register SV *sv) mg_get(sv); if (SvTHINKFIRST(sv)) { if (SvREADONLY(sv)) { - dTHR; if (PL_curcop != &PL_compiling) Perl_croak(aTHX_ PL_no_modify); } @@ -4632,13 +5446,12 @@ Perl_sv_dec(pTHX_ register SV *sv) sv_setiv(sv, i); } } + /* Unlike sv_inc we don't have to worry about string-never-numbers + and keeping them magic. But we mustn't warn on punting */ flags = SvFLAGS(sv); - if (flags & SVp_NOK) { - SvNVX(sv) -= 1.0; - (void)SvNOK_only(sv); - return; - } - if (flags & SVp_IOK) { + if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) { + /* It's publicly an integer, or privately an integer-not-float */ + oops_its_int: if (SvIsUV(sv)) { if (SvUVX(sv) == 0) { (void)SvIOK_only(sv); @@ -4647,17 +5460,22 @@ Perl_sv_dec(pTHX_ register SV *sv) else { (void)SvIOK_only_UV(sv); --SvUVX(sv); - } + } } else { if (SvIVX(sv) == IV_MIN) sv_setnv(sv, (NV)IV_MIN - 1.0); else { (void)SvIOK_only(sv); --SvIVX(sv); - } + } } return; } + if (flags & SVp_NOK) { + SvNVX(sv) -= 1.0; + (void)SvNOK_only(sv); + return; + } if (!(flags & SVp_POK)) { if ((flags & SVTYPEMASK) < SVt_PVNV) sv_upgrade(sv, SVt_NV); @@ -4665,6 +5483,40 @@ Perl_sv_dec(pTHX_ register SV *sv) (void)SvNOK_only(sv); return; } +#ifdef PERL_PRESERVE_IVUV + { + I32 numtype = looks_like_number(sv); + if (numtype && !(numtype & IS_NUMBER_INFINITY)) { + /* Need to try really hard to see if it's an integer. + 9.22337203685478e+18 is an integer. + but "9.22337203685478e+18" + 0 is UV=9223372036854779904 + so $a="9.22337203685478e+18"; $a+0; $a-- + needs to be the same as $a="9.22337203685478e+18"; $a-- + or we go insane. */ + + (void) sv_2iv(sv); + if (SvIOK(sv)) + goto oops_its_int; + + /* sv_2iv *should* have made this an NV */ + if (flags & SVp_NOK) { + (void)SvNOK_only(sv); + SvNVX(sv) -= 1.0; + return; + } + /* I don't think we can get here. Maybe I should assert this + And if we do get here I suspect that sv_setnv will croak. NWC + Fall through. */ +#if defined(USE_LONG_DOUBLE) + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_dec punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"PERL_PRIgldbl"\n", + SvPVX(sv), SvIVX(sv), SvNVX(sv))); +#else + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_dec punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%g\n", + SvPVX(sv), SvIVX(sv), SvNVX(sv))); +#endif + } + } +#endif /* PERL_PRESERVE_IVUV */ sv_setnv(sv,Atof(SvPVX(sv)) - 1.0); /* punt */ } @@ -4685,7 +5537,6 @@ as mortal. SV * Perl_sv_mortalcopy(pTHX_ SV *oldstr) { - dTHR; register SV *sv; new_SV(sv); @@ -4707,7 +5558,6 @@ Creates a new SV which is mortal. The reference count of the SV is set to 1. SV * Perl_sv_newmortal(pTHX) { - dTHR; register SV *sv; new_SV(sv); @@ -4731,7 +5581,6 @@ ends. SV * Perl_sv_2mortal(pTHX_ register SV *sv) { - dTHR; if (!sv) return sv; if (SvREADONLY(sv) && SvIMMORTAL(sv)) @@ -4768,7 +5617,7 @@ Perl_newSVpv(pTHX_ const char *s, STRLEN len) =for apidoc newSVpvn Creates a new SV and copies a string into it. The reference count for the -SV is set to 1. Note that if C is zero, Perl will create a zero length +SV is set to 1. Note that if C is zero, Perl will create a zero length string. You are responsible for ensuring that the source string is at least C bytes long. @@ -4785,6 +5634,49 @@ Perl_newSVpvn(pTHX_ const char *s, STRLEN len) return sv; } +/* +=for apidoc newSVpvn_share + +Creates a new SV and populates it with a string from +the string table. Turns on READONLY and FAKE. +The idea here is that as string table is used for shared hash +keys these strings will have SvPVX == HeKEY and hash lookup +will avoid string compare. + +=cut +*/ + +SV * +Perl_newSVpvn_share(pTHX_ const char *src, I32 len, U32 hash) +{ + register SV *sv; + bool is_utf8 = FALSE; + if (len < 0) { + len = -len; + is_utf8 = TRUE; + } + if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT)) { + STRLEN tmplen = len; + /* See the note in hv.c:hv_fetch() --jhi */ + src = (char*)bytes_from_utf8((U8*)src, &tmplen, &is_utf8); + len = tmplen; + } + if (!hash) + PERL_HASH(hash, src, len); + new_SV(sv); + sv_upgrade(sv, SVt_PVIV); + SvPVX(sv) = sharepvn(src, is_utf8?-len:len, hash); + SvCUR(sv) = len; + SvUVX(sv) = hash; + SvLEN(sv) = 0; + SvREADONLY_on(sv); + SvFAKE_on(sv); + SvPOK_on(sv); + if (is_utf8) + SvUTF8_on(sv); + return sv; +} + #if defined(PERL_IMPLICIT_CONTEXT) SV * Perl_newSVpvf_nocontext(const char* pat, ...) @@ -4897,7 +5789,6 @@ SV is B incremented. SV * Perl_newRV_noinc(pTHX_ SV *tmpRef) { - dTHR; register SV *sv; new_SV(sv); @@ -4928,7 +5819,6 @@ Creates a new SV which is an exact duplicate of the original SV. SV * Perl_newSVsv(pTHX_ register SV *old) { - dTHR; register SV *sv; if (!old) @@ -5011,7 +5901,7 @@ Perl_sv_reset(pTHX_ register char *s, HV *stash) } if (GvHV(gv) && !HvNAME(GvHV(gv))) { hv_clear(GvHV(gv)); -#ifndef VMS /* VMS has no environ array */ +#ifdef USE_ENVIRON_ARRAY if (gv == PL_envgv) environ[0] = Nullch; #endif @@ -5083,7 +5973,6 @@ Perl_sv_2cv(pTHX_ SV *sv, HV **st, GV **gvp, I32 lref) if (SvGMAGICAL(sv)) mg_get(sv); if (SvROK(sv)) { - dTHR; SV **sp = &sv; /* Used in tryAMAGICunDEREF macro. */ tryAMAGICunDEREF(to_cv); @@ -5128,10 +6017,17 @@ Perl_sv_2cv(pTHX_ SV *sv, HV **st, GV **gvp, I32 lref) } } +/* +=for apidoc sv_true + +Returns true if the SV has a true value by Perl's rules. + +=cut +*/ + I32 Perl_sv_true(pTHX_ register SV *sv) { - dTHR; if (!sv) return 0; if (SvPOK(sv)) { @@ -5206,6 +6102,14 @@ Perl_sv_pvn(pTHX_ SV *sv, STRLEN *lp) return sv_2pv(sv, lp); } +/* +=for apidoc sv_pvn_force + +Get a sensible string out of the SV somehow. + +=cut +*/ + char * Perl_sv_pvn_force(pTHX_ SV *sv, STRLEN *lp) { @@ -5213,13 +6117,12 @@ Perl_sv_pvn_force(pTHX_ SV *sv, STRLEN *lp) if (SvTHINKFIRST(sv) && !SvROK(sv)) sv_force_normal(sv); - + if (SvPOK(sv)) { *lp = SvCUR(sv); } else { if (SvTYPE(sv) > SVt_PVLV && SvTYPE(sv) != SVt_PVFM) { - dTHR; Perl_croak(aTHX_ "Can't coerce %s to string in %s", sv_reftype(sv,0), PL_op_name[PL_op->op_type]); } @@ -5227,7 +6130,7 @@ Perl_sv_pvn_force(pTHX_ SV *sv, STRLEN *lp) s = sv_2pv(sv, lp); if (s != SvPVX(sv)) { /* Almost, but not quite, sv_setpvn() */ STRLEN len = *lp; - + if (SvROK(sv)) sv_unref(sv); (void)SvUPGRADE(sv, SVt_PV); /* Never FALSE */ @@ -5278,6 +6181,15 @@ Perl_sv_pvutf8n(pTHX_ SV *sv, STRLEN *lp) return sv_pvn(sv,lp); } +/* +=for apidoc sv_pvutf8n_force + +Get a sensible UTF8-encoded string out of the SV somehow. See +L. + +=cut +*/ + char * Perl_sv_pvutf8n_force(pTHX_ SV *sv, STRLEN *lp) { @@ -5285,6 +6197,14 @@ Perl_sv_pvutf8n_force(pTHX_ SV *sv, STRLEN *lp) return sv_pvn_force(sv,lp); } +/* +=for apidoc sv_reftype + +Returns a string describing what the SV is a reference to. + +=cut +*/ + char * Perl_sv_reftype(pTHX_ SV *sv, int ob) { @@ -5382,7 +6302,6 @@ reference count is 1. SV* Perl_newSVrv(pTHX_ SV *rv, const char *classname) { - dTHR; SV *sv; new_SV(sv); @@ -5390,8 +6309,23 @@ Perl_newSVrv(pTHX_ SV *rv, const char *classname) SV_CHECK_THINKFIRST(rv); SvAMAGIC_off(rv); + if (SvTYPE(rv) >= SVt_PVMG) { + U32 refcnt = SvREFCNT(rv); + SvREFCNT(rv) = 0; + sv_clear(rv); + SvFLAGS(rv) = 0; + SvREFCNT(rv) = refcnt; + } + if (SvTYPE(rv) < SVt_RV) - sv_upgrade(rv, SVt_RV); + sv_upgrade(rv, SVt_RV); + else if (SvTYPE(rv) > SVt_RV) { + (void)SvOOK_off(rv); + if (SvPVX(rv) && SvLEN(rv)) + Safefree(SvPVX(rv)); + SvCUR_set(rv, 0); + SvLEN_set(rv, 0); + } (void)SvOK_off(rv); SvRV(rv) = sv; @@ -5454,6 +6388,25 @@ Perl_sv_setref_iv(pTHX_ SV *rv, const char *classname, IV iv) } /* +=for apidoc sv_setref_uv + +Copies an unsigned integer into a new SV, optionally blessing the SV. The C +argument will be upgraded to an RV. That RV will be modified to point to +the new SV. The C argument indicates the package for the +blessing. Set C to C to avoid the blessing. The new SV +will be returned and will have a reference count of 1. + +=cut +*/ + +SV* +Perl_sv_setref_uv(pTHX_ SV *rv, const char *classname, UV uv) +{ + sv_setuv(newSVrv(rv,classname), uv); + return rv; +} + +/* =for apidoc sv_setref_nv Copies a double into a new SV, optionally blessing the SV. The C @@ -5507,7 +6460,6 @@ of the SV is unaffected. SV* Perl_sv_bless(pTHX_ SV *sv, HV *stash) { - dTHR; SV *tmpRef; if (!SvROK(sv)) Perl_croak(aTHX_ "Can't bless non-reference value"); @@ -5563,17 +6515,21 @@ S_sv_unglob(pTHX_ SV *sv) } /* -=for apidoc sv_unref +=for apidoc sv_unref_flags Unsets the RV status of the SV, and decrements the reference count of whatever was being referenced by the RV. This can almost be thought of -as a reversal of C. See C. +as a reversal of C. The C argument can contain +C to force the reference count to be decremented +(otherwise the decrementing is conditional on the reference count being +different from one or the reference being a readonly SV). +See C. =cut */ void -Perl_sv_unref(pTHX_ SV *sv) +Perl_sv_unref_flags(pTHX_ SV *sv, U32 flags) { SV* rv = SvRV(sv); @@ -5585,12 +6541,29 @@ Perl_sv_unref(pTHX_ SV *sv) } SvRV(sv) = 0; SvROK_off(sv); - if (SvREFCNT(rv) != 1 || SvREADONLY(rv)) + if (SvREFCNT(rv) != 1 || SvREADONLY(rv) || flags) /* SV_IMMEDIATE_UNREF */ SvREFCNT_dec(rv); - else + else /* XXX Hack, but hard to make $a=$a->[1] work otherwise */ sv_2mortal(rv); /* Schedule for freeing later */ } +/* +=for apidoc sv_unref + +Unsets the RV status of the SV, and decrements the reference count of +whatever was being referenced by the RV. This can almost be thought of +as a reversal of C. This is C with the C +being zero. See C. + +=cut +*/ + +void +Perl_sv_unref(pTHX_ SV *sv) +{ + sv_unref_flags(sv, 0); +} + void Perl_sv_taint(pTHX_ SV *sv) { @@ -5815,6 +6788,21 @@ Perl_sv_vsetpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV sv_vcatpvfn(sv, pat, patlen, args, svargs, svmax, maybe_tainted); } +STATIC I32 +S_expect_number(pTHX_ char** pattern) +{ + I32 var = 0; + switch (**pattern) { + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + while (isDIGIT(**pattern)) + var = var * 10 + (*(*pattern)++ - '0'); + } + return var; +} +#define EXPECT_NUMBER(pattern, var) (var = S_expect_number(aTHX_ &pattern)) + /* =for apidoc sv_vcatpvfn @@ -5830,7 +6818,6 @@ locales). void Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV **svargs, I32 svmax, bool *maybe_tainted) { - dTHR; char *p; char *q; char *patend; @@ -5876,7 +6863,8 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV bool alt = FALSE; bool left = FALSE; bool vectorize = FALSE; - bool utf = FALSE; + bool vectorarg = FALSE; + bool vec_utf = FALSE; char fill = ' '; char plus = 0; char intsize = 0; @@ -5885,9 +6873,9 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV bool has_precis = FALSE; STRLEN precis = 0; bool is_utf = FALSE; - + char esignbuf[4]; - U8 utf8buf[UTF8_MAXLEN]; + U8 utf8buf[UTF8_MAXLEN+1]; STRLEN esignlen = 0; char *eptr = Nullch; @@ -5913,7 +6901,13 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV STRLEN gap; char *dotstr = "."; STRLEN dotstrlen = 1; + I32 efix = 0; /* explicit format parameter index */ + I32 ewix = 0; /* explicit width index */ + I32 epix = 0; /* explicit precision index */ + I32 evix = 0; /* explicit vector index */ + bool asterisk = FALSE; + /* echo everything up to the next format specification */ for (q = p; q < patend && *q != '%'; ++q) ; if (q > p) { sv_catpvn(sv, p, q - p); @@ -5922,6 +6916,25 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV if (q++ >= patend) break; +/* + We allow format specification elements in this order: + \d+\$ explicit format parameter index + [-+ 0#]+ flags + \*?(\d+\$)?v vector with optional (optionally specified) arg + \d+|\*(\d+\$)? width using optional (optionally specified) arg + \.(\d*|\*(\d+\$)?) precision using optional (optionally specified) arg + [hlqLV] size + [%bcdefginopsux_DFOUX] format (mandatory) +*/ + if (EXPECT_NUMBER(q, width)) { + if (*q == '$') { + ++q; + efix = width; + } else { + goto gotwidth; + } + } + /* FLAGS */ while (*q) { @@ -5945,65 +6958,88 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV q++; continue; - case '*': /* printf("%*vX",":",$ipv6addr) */ - if (q[1] != 'v') - break; - q++; - if (args) - vecsv = va_arg(*args, SV*); - else if (svix < svmax) - vecsv = svargs[svix++]; - else - continue; - dotstr = SvPVx(vecsv,dotstrlen); - if (DO_UTF8(vecsv)) - is_utf = TRUE; - /* FALL THROUGH */ - - case 'v': - vectorize = TRUE; - q++; - continue; - default: break; } break; } - /* WIDTH */ + tryasterisk: + if (*q == '*') { + q++; + if (EXPECT_NUMBER(q, ewix)) + if (*q++ != '$') + goto unknown; + asterisk = TRUE; + } + if (*q == 'v') { + q++; + if (vectorize) + goto unknown; + if ((vectorarg = asterisk)) { + evix = ewix; + ewix = 0; + asterisk = FALSE; + } + vectorize = TRUE; + goto tryasterisk; + } + + if (!asterisk) + EXPECT_NUMBER(q, width); - switch (*q) { - case '1': case '2': case '3': - case '4': case '5': case '6': - case '7': case '8': case '9': - width = 0; - while (isDIGIT(*q)) - width = width * 10 + (*q++ - '0'); - break; + if (vectorize) { + if (vectorarg) { + if (args) + vecsv = va_arg(*args, SV*); + else + vecsv = (evix ? evix <= svmax : svix < svmax) ? + svargs[ewix ? ewix-1 : svix++] : &PL_sv_undef; + dotstr = SvPVx(vecsv, dotstrlen); + if (DO_UTF8(vecsv)) + is_utf = TRUE; + } + if (args) { + vecsv = va_arg(*args, SV*); + vecstr = (U8*)SvPVx(vecsv,veclen); + vec_utf = DO_UTF8(vecsv); + } + else if (efix ? efix <= svmax : svix < svmax) { + vecsv = svargs[efix ? efix-1 : svix++]; + vecstr = (U8*)SvPVx(vecsv,veclen); + vec_utf = DO_UTF8(vecsv); + } + else { + vecstr = (U8*)""; + veclen = 0; + } + } - case '*': + if (asterisk) { if (args) i = va_arg(*args, int); else - i = (svix < svmax) ? SvIVx(svargs[svix++]) : 0; + i = (ewix ? ewix <= svmax : svix < svmax) ? + SvIVx(svargs[ewix ? ewix-1 : svix++]) : 0; left |= (i < 0); width = (i < 0) ? -i : i; - q++; - break; } + gotwidth: /* PRECISION */ if (*q == '.') { q++; if (*q == '*') { + q++; + if (EXPECT_NUMBER(q, epix) && *q++ != '$') + goto unknown; if (args) i = va_arg(*args, int); else - i = (svix < svmax) ? SvIVx(svargs[svix++]) : 0; + i = (ewix ? ewix <= svmax : svix < svmax) + ? SvIVx(svargs[ewix ? ewix-1 : svix++]) : 0; precis = (i < 0) ? 0 : i; - q++; } else { precis = 0; @@ -6013,36 +7049,22 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV has_precis = TRUE; } - if (vectorize) { - if (args) { - vecsv = va_arg(*args, SV*); - vecstr = (U8*)SvPVx(vecsv,veclen); - utf = DO_UTF8(vecsv); - } - else if (svix < svmax) { - vecsv = svargs[svix++]; - vecstr = (U8*)SvPVx(vecsv,veclen); - utf = DO_UTF8(vecsv); - } - else { - vecstr = (U8*)""; - veclen = 0; - } - } - /* SIZE */ switch (*q) { -#ifdef HAS_QUAD +#if defined(HAS_QUAD) || (defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)) case 'L': /* Ld */ + /* FALL THROUGH */ +#endif +#ifdef HAS_QUAD case 'q': /* qd */ intsize = 'q'; q++; break; #endif case 'l': -#ifdef HAS_QUAD - if (*(q + 1) == 'l') { /* lld */ +#if defined(HAS_QUAD) || (defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)) + if (*(q + 1) == 'l') { /* lld, llf */ intsize = 'q'; q += 2; break; @@ -6058,20 +7080,22 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV /* CONVERSION */ + if (*q == '%') { + eptr = q++; + elen = 1; + goto string; + } + + if (!args) + argsv = (efix ? efix <= svmax : svix < svmax) ? + svargs[efix ? efix-1 : svix++] : &PL_sv_undef; + switch (c = *q++) { /* STRINGS */ - case '%': - eptr = q - 1; - elen = 1; - goto string; - case 'c': - if (args) - uv = va_arg(*args, int); - else - uv = (svix < svmax) ? SvIVx(svargs[svix++]) : 0; + uv = args ? va_arg(*args, int) : SvIVx(argsv); if ((uv > 255 || (uv > 127 && SvUTF8(sv))) && !IN_BYTE) { eptr = (char*)utf8buf; elen = uv_to_utf8((U8*)eptr, uv) - utf8buf; @@ -6100,8 +7124,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV elen = sizeof nullstr - 1; } } - else if (svix < svmax) { - argsv = svargs[svix++]; + else { eptr = SvPVx(argsv, elen); if (DO_UTF8(argsv)) { if (has_precis && precis < elen) { @@ -6125,7 +7148,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV */ if (!args) goto unknown; - argsv = va_arg(*args,SV*); + argsv = va_arg(*args, SV*); eptr = SvPVx(argsv, elen); if (DO_UTF8(argsv)) is_utf = TRUE; @@ -6141,10 +7164,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV case 'p': if (alt) goto unknown; - if (args) - uv = PTR2UV(va_arg(*args, void*)); - else - uv = (svix < svmax) ? PTR2UV(svargs[svix++]) : 0; + uv = PTR2UV(args ? va_arg(*args, void*) : argsv); base = 16; goto integer; @@ -6158,13 +7178,11 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV case 'd': case 'i': if (vectorize) { - I32 ulen; - if (!veclen) { - vectorize = FALSE; - break; - } - if (utf) - iv = (IV)utf8_to_uv(vecstr, &ulen); + STRLEN ulen; + if (!veclen) + continue; + if (vec_utf) + iv = (IV)utf8_to_uv(vecstr, veclen, &ulen, 0); else { iv = *vecstr; ulen = 1; @@ -6184,7 +7202,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV } } else { - iv = (svix < svmax) ? SvIVx(svargs[svix++]) : 0; + iv = SvIVx(argsv); switch (intsize) { case 'h': iv = (short)iv; break; default: break; @@ -6239,14 +7257,12 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV uns_integer: if (vectorize) { - I32 ulen; + STRLEN ulen; vector: - if (!veclen) { - vectorize = FALSE; - break; - } - if (utf) - uv = utf8_to_uv(vecstr, &ulen); + if (!veclen) + continue; + if (vec_utf) + uv = utf8_to_uv(vecstr, veclen, &ulen, 0); else { uv = *vecstr; ulen = 1; @@ -6266,7 +7282,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV } } else { - uv = (svix < svmax) ? SvUVx(svargs[svix++]) : 0; + uv = SvUVx(argsv); switch (intsize) { case 'h': uv = (unsigned short)uv; break; default: break; @@ -6355,10 +7371,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV /* This is evil, but floating point is even more evil */ vectorize = FALSE; - if (args) - nv = va_arg(*args, NV); - else - nv = (svix < svmax) ? SvNVx(svargs[svix++]) : 0.0; + nv = args ? va_arg(*args, NV) : SvNVx(argsv); need = 0; if (c != 'e' && c != 'E') { @@ -6384,11 +7397,14 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV eptr = ebuf + sizeof ebuf; *--eptr = '\0'; *--eptr = c; -#ifdef USE_LONG_DOUBLE +#if defined(USE_LONG_DOUBLE) && defined(PERL_PRIfldbl) { - static char const my_prifldbl[] = PERL_PRIfldbl; - char const *p = my_prifldbl + sizeof my_prifldbl - 3; - while (p >= my_prifldbl) { *--eptr = *p--; } + /* Copy the one or more characters in a long double + * format before the 'base' ([efgEFG]) character to + * the format string. */ + static char const prifldbl[] = PERL_PRIfldbl; + char const *p = prifldbl + sizeof(prifldbl) - 3; + while (p >= prifldbl) { *--eptr = *p--; } } #endif if (has_precis) { @@ -6410,11 +7426,10 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV *--eptr = '#'; *--eptr = '%'; - { - RESTORE_NUMERIC_STANDARD(); - (void)sprintf(PL_efloatbuf, eptr, nv); - RESTORE_NUMERIC_LOCAL(); - } + /* No taint. Otherwise we are in the strange situation + * where printf() taints but print($float) doesn't. + * --jhi */ + (void)sprintf(PL_efloatbuf, eptr, nv); eptr = PL_efloatbuf; elen = strlen(PL_efloatbuf); @@ -6436,8 +7451,8 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV #endif } } - else if (svix < svmax) - sv_setuv_mg(svargs[svix++], (UV)i); + else + sv_setuv_mg(argsv, (UV)i); continue; /* not "break" */ /* UNKNOWN */ @@ -6452,7 +7467,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV (PL_op->op_type == OP_PRTF) ? "printf" : "sprintf"); if (c) { if (isPRINT(c)) - Perl_sv_catpvf(aTHX_ msg, + Perl_sv_catpvf(aTHX_ msg, "\"%%%c\"", c & 0xFF); else Perl_sv_catpvf(aTHX_ msg, @@ -6472,7 +7487,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV /* ... right here, because formatting flags should not apply */ SvGROW(sv, SvCUR(sv) + elen + 1); p = SvEND(sv); - memcpy(p, eptr, elen); + Copy(eptr, p, elen, char); p += elen; *p = '\0'; SvCUR(sv) = p - SvPVX(sv); @@ -6502,7 +7517,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV *p++ = '0'; } if (elen) { - memcpy(p, eptr, elen); + Copy(eptr, p, elen, char); p += elen; } if (gap && left) { @@ -6511,7 +7526,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV } if (vectorize) { if (veclen) { - memcpy(p, dotstr, dotstrlen); + Copy(dotstr, p, dotstrlen, char); p += dotstrlen; } else @@ -6573,7 +7588,7 @@ Perl_fp_dup(pTHX_ PerlIO *fp, char type) return ret; /* create anew and remember what it is */ - ret = PerlIO_fdupopen(fp); + ret = PerlIO_fdupopen(aTHX_ fp); ptr_table_store(PL_ptr_table, fp, ret); return ret; } @@ -6755,10 +7770,110 @@ Perl_ptr_table_split(pTHX_ PTR_TBL_t *tbl) } } +void +Perl_ptr_table_clear(pTHX_ PTR_TBL_t *tbl) +{ + register PTR_TBL_ENT_t **array; + register PTR_TBL_ENT_t *entry; + register PTR_TBL_ENT_t *oentry = Null(PTR_TBL_ENT_t*); + UV riter = 0; + UV max; + + if (!tbl || !tbl->tbl_items) { + return; + } + + array = tbl->tbl_ary; + entry = array[0]; + max = tbl->tbl_max; + + for (;;) { + if (entry) { + oentry = entry; + entry = entry->next; + Safefree(oentry); + } + if (!entry) { + if (++riter > max) { + break; + } + entry = array[riter]; + } + } + + tbl->tbl_items = 0; +} + +void +Perl_ptr_table_free(pTHX_ PTR_TBL_t *tbl) +{ + if (!tbl) { + return; + } + ptr_table_clear(tbl); + Safefree(tbl->tbl_ary); + Safefree(tbl); +} + #ifdef DEBUGGING char *PL_watch_pvx; #endif +STATIC SV * +S_gv_share(pTHX_ SV *sstr) +{ + GV *gv = (GV*)sstr; + SV *sv = &PL_sv_no; /* just need SvREADONLY-ness */ + + if (GvIO(gv) || GvFORM(gv)) { + GvSHARED_off(gv); /* GvIOs cannot be shared. nor can GvFORMs */ + } + else if (!GvCV(gv)) { + GvCV(gv) = (CV*)sv; + } + else { + /* CvPADLISTs cannot be shared */ + if (!CvXSUB(GvCV(gv))) { + GvSHARED_off(gv); + } + } + + if (!GvSHARED(gv)) { +#if 0 + PerlIO_printf(Perl_debug_log, "gv_share: unable to share %s::%s\n", + HvNAME(GvSTASH(gv)), GvNAME(gv)); +#endif + return Nullsv; + } + + /* + * write attempts will die with + * "Modification of a read-only value attempted" + */ + if (!GvSV(gv)) { + GvSV(gv) = sv; + } + else { + SvREADONLY_on(GvSV(gv)); + } + + if (!GvAV(gv)) { + GvAV(gv) = (AV*)sv; + } + else { + SvREADONLY_on(GvAV(gv)); + } + + if (!GvHV(gv)) { + GvHV(gv) = (HV*)sv; + } + else { + SvREADONLY_on(GvAV(gv)); + } + + return sstr; /* he_dup() will SvREFCNT_inc() */ +} + SV * Perl_sv_dup(pTHX_ SV *sstr) { @@ -6891,6 +8006,18 @@ Perl_sv_dup(pTHX_ SV *sstr) LvTYPE(dstr) = LvTYPE(sstr); break; case SVt_PVGV: + if (GvSHARED((GV*)sstr)) { + SV *share; + if ((share = gv_share(sstr))) { + del_SV(dstr); + dstr = share; +#if 0 + PerlIO_printf(Perl_debug_log, "sv_dup: sharing %s::%s\n", + HvNAME(GvSTASH(share)), GvNAME(share)); +#endif + break; + } + } SvANY(dstr) = new_XPVGV(); SvCUR(dstr) = SvCUR(sstr); SvLEN(dstr) = SvLEN(sstr); @@ -7449,6 +8576,14 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl) av = (AV*)POPPTR(ss,ix); TOPPTR(nss,ix) = av_dup(av); break; + case SAVEt_PADSV: + longval = (long)POPLONG(ss,ix); + TOPLONG(nss,ix) = longval; + ptr = POPPTR(ss,ix); + TOPPTR(nss,ix) = any_dup(ptr, proto_perl); + sv = (SV*)POPPTR(ss,ix); + TOPPTR(nss,ix) = sv_dup(sv); + break; default: Perl_croak(aTHX_ "panic: ss_dup inconsistency"); } @@ -7508,6 +8643,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_scopestack = 0; PL_savestack = 0; PL_retstack = 0; + PL_sig_pending = 0; # else /* !DEBUGGING */ Zero(my_perl, 1, PerlInterpreter); # endif /* DEBUGGING */ @@ -7534,6 +8670,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_scopestack = 0; PL_savestack = 0; PL_retstack = 0; + PL_sig_pending = 0; # else /* !DEBUGGING */ Zero(my_perl, 1, PerlInterpreter); # endif /* DEBUGGING */ @@ -7622,6 +8759,8 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, ptr_table_store(PL_ptr_table, &proto_perl->Icompiling, &PL_compiling); if (!specialWARN(PL_compiling.cop_warnings)) PL_compiling.cop_warnings = sv_dup_inc(PL_compiling.cop_warnings); + if (!specialCopIO(PL_compiling.cop_io)) + PL_compiling.cop_io = sv_dup_inc(PL_compiling.cop_io); PL_curcop = (COP*)any_dup(proto_perl->Tcurcop, proto_perl); /* pseudo environmental stuff */ @@ -7747,8 +8886,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_laststype = proto_perl->Ilaststype; PL_mess_sv = Nullsv; - PL_orslen = proto_perl->Iorslen; - PL_ors = SAVEPVN(proto_perl->Iors, PL_orslen); + PL_ors_sv = sv_dup_inc(proto_perl->Iors_sv); PL_ofmt = SAVEPV(proto_perl->Iofmt); /* interpreter atexit processing */ @@ -7890,7 +9028,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_numeric_name = SAVEPV(proto_perl->Inumeric_name); PL_numeric_standard = proto_perl->Inumeric_standard; PL_numeric_local = proto_perl->Inumeric_local; - PL_numeric_radix = proto_perl->Inumeric_radix; + PL_numeric_radix = sv_dup_inc(proto_perl->Inumeric_radix); #endif /* !USE_LOCALE_NUMERIC */ /* utf8 character classes */ @@ -7932,12 +9070,18 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_uudmap['M'] = 0; /* reinits on demand */ PL_bitcount = Nullch; /* reinits on demand */ + if (proto_perl->Ipsig_pend) { + Newz(0, PL_psig_pend, SIG_SIZE, int); + } + else { + PL_psig_pend = (int*)NULL; + } + if (proto_perl->Ipsig_ptr) { - int sig_num[] = { SIG_NUM }; - Newz(0, PL_psig_ptr, sizeof(sig_num)/sizeof(*sig_num), SV*); - Newz(0, PL_psig_name, sizeof(sig_num)/sizeof(*sig_num), SV*); - for (i = 1; PL_sig_name[i]; i++) { - PL_psig_ptr[i] = sv_dup_inc(proto_perl->Ipsig_ptr[i]); + Newz(0, PL_psig_ptr, SIG_SIZE, SV*); + Newz(0, PL_psig_name, SIG_SIZE, SV*); + for (i = 1; i < SIG_SIZE; i++) { + PL_psig_ptr[i] = sv_dup_inc(proto_perl->Ipsig_ptr[i]); PL_psig_name[i] = sv_dup_inc(proto_perl->Ipsig_name[i]); } } @@ -7948,7 +9092,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, /* thrdvar.h stuff */ - if (flags & 1) { + if (flags & CLONEf_COPY_STACKS) { /* next allocation will be PL_tmps_stack[PL_tmps_ix+1] */ PL_tmps_ix = proto_perl->Ttmps_ix; PL_tmps_max = proto_perl->Ttmps_max; @@ -8031,8 +9175,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_nrs = sv_dup_inc(proto_perl->Tnrs); PL_rs = sv_dup_inc(proto_perl->Trs); PL_last_in_gv = gv_dup(proto_perl->Tlast_in_gv); - PL_ofslen = proto_perl->Tofslen; - PL_ofs = SAVEPVN(proto_perl->Tofs, PL_ofslen); + PL_ofs_sv = sv_dup_inc(proto_perl->Tofs_sv); PL_defoutgv = gv_dup_inc(proto_perl->Tdefoutgv); PL_chopset = proto_perl->Tchopset; /* XXX never deallocated */ PL_toptarget = sv_dup_inc(proto_perl->Ttoptarget); @@ -8135,6 +9278,11 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_reginterp_cnt = 0; PL_reg_starttry = 0; + if (!(flags & CLONEf_KEEP_PTR_TABLE)) { + ptr_table_free(PL_ptr_table); + PL_ptr_table = NULL; + } + #ifdef PERL_OBJECT return (PerlInterpreter*)pPerl; #else @@ -8166,9 +9314,15 @@ do_clean_objs(pTHXo_ SV *sv) if (SvROK(sv) && SvOBJECT(rv = SvRV(sv))) { DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning object ref:\n "), sv_dump(sv));) - SvROK_off(sv); - SvRV(sv) = 0; - SvREFCNT_dec(rv); + if (SvWEAKREF(sv)) { + sv_del_backref(sv); + SvWEAKREF_off(sv); + SvRV(sv) = 0; + } else { + SvROK_off(sv); + SvRV(sv) = 0; + SvREFCNT_dec(rv); + } } /* XXX Might want to check arrays, etc. */