X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=sv.c;h=341792412b0e0ca51efc553005ef7ddb5d56f04c;hb=9aa983d27b0af31badfcbbb76567f6e557076b41;hp=3b3351cb2a3bafe1e067548e59d3d60e10a138b6;hpb=28e5dec85047e189010079efa89eed07bc9eddc8;p=p5sagit%2Fp5-mst-13.2.git diff --git a/sv.c b/sv.c index 3b3351c..3417924 100644 --- a/sv.c +++ b/sv.c @@ -1,6 +1,6 @@ /* sv.c * - * Copyright (c) 1991-2000, Larry Wall + * Copyright (c) 1991-2001, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -1320,6 +1320,14 @@ See C. void Perl_sv_setuv(pTHX_ register SV *sv, UV u) { + /* With these two if statements: + u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865 + + without + u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865 + + If you wish to remove them, please benchmark to see what the effect is + */ if (u <= (UV)IV_MAX) { sv_setiv(sv, (IV)u); return; @@ -1340,6 +1348,14 @@ Like C, but also handles 'set' magic. void Perl_sv_setuv_mg(pTHX_ register SV *sv, UV u) { + /* With these two if statements: + u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865 + + without + u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865 + + If you wish to remove them, please benchmark to see what the effect is + */ if (u <= (UV)IV_MAX) { sv_setiv(sv, (IV)u); } else { @@ -1511,7 +1527,7 @@ S_not_a_number(pTHX_ SV *sv) Now, "3.2" will become NV=3.2 IV=3 NOK, IOKp, because the flags meaning changes - now IV and NV together means that the two are interchangeable SvIVX == (IV) SvNVX && SvNVX == (NV) SvIVX; - + The benefit of this is operations such as pp_add know that if SvIOK is true for both left and right operands, then integer addition can be used instead of floating point. (for cases where the result won't @@ -1551,7 +1567,7 @@ STATIC int S_sv_2inuv_non_preserve (pTHX_ register SV *sv, I32 numtype) { NV nv = SvNVX(sv); /* Code simpler and had compiler problems if */ UV nv_as_uv = U_V(nv); /* these are not in simple variables. */ - DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2inuv_non '%s', IV=0x%"UVxf" NV=%g inttype=%X\n", SvPVX(sv), SvIVX(sv), nv, numtype)); + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2inuv_non '%s', IV=0x%"UVxf" NV=%g inttype=%"UVXf"\n", SvPVX(sv), SvIVX(sv), nv, (UV)numtype)); if (nv_as_uv <= (UV)IV_MAX) { (void)SvIOKp_on(sv); (void)SvNOKp_on(sv); @@ -1609,7 +1625,7 @@ S_sv_2inuv_non_preserve (pTHX_ register SV *sv, I32 numtype) { #else /* We've just lost integer precision, nothing we could do. */ SvUVX(sv) = nv_as_uv; - DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2niuv_non UV? '%s', UV=0x%"UVxf" NV=%g U_V(NV)=0x%"UVxf" inttype=%X\n", SvPVX(sv), SvIVX(sv), nv, nv_as_uv, numtype)); + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2niuv_non UV? '%s', UV=0x%"UVxf" NV=%g U_V(NV)=0x%"UVxf" inttype=%"UVXf"\n", SvPVX(sv), SvIVX(sv), nv, nv_as_uv, (UV)numtype)); /* UV and NV slots equally valid only if we have casting symmetry. */ if (numtype & IS_NUMBER_NOT_INT) { SvIsUV_on(sv); @@ -1632,7 +1648,7 @@ S_sv_2inuv_non_preserve (pTHX_ register SV *sv, I32 numtype) { STATIC int S_sv_2iuv_non_preserve (pTHX_ register SV *sv, I32 numtype) { - DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%"UVxf" NV=%g inttype=%X\n", SvPVX(sv), SvIVX(sv), SvNVX(sv), numtype)); + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%"UVxf" NV=%g inttype=%"UVXf"\n", SvPVX(sv), SvIVX(sv), SvNVX(sv), (UV)numtype)); if (SvNVX(sv) < (NV)IV_MIN) { (void)SvIOKp_on(sv); (void)SvNOK_on(sv); @@ -1662,17 +1678,22 @@ S_sv_2iuv_non_preserve (pTHX_ register SV *sv, I32 numtype) SvIsUV_on(sv); SvUVX(sv) = U_V(SvNVX(sv)); if ((NV)(SvUVX(sv)) == SvNVX(sv)) { + if (SvUVX(sv) == UV_MAX) { + /* As we know that NVs don't preserve UVs, UV_MAX cannot + possibly be preserved by NV. Hence, it must be overflow. + NOK, IOKp */ + return IS_NUMBER_OVERFLOW_UV; + } SvIOK_on(sv); /* Integer is precise. NOK, UOK */ } else { /* Integer is imprecise. NOK, IOKp */ } return IS_NUMBER_OVERFLOW_IV; } - return S_sv_2inuv_non_preserve (sv, numtype); + return S_sv_2inuv_non_preserve(aTHX_ sv, numtype); } #endif /* NV_PRESERVES_UV*/ - IV Perl_sv_2iv(pTHX_ register SV *sv) { @@ -1771,7 +1792,7 @@ Perl_sv_2iv(pTHX_ register SV *sv) (NV)UVX == NVX are both true, but the values differ. :-( Hopefully for 2s complement IV_MIN is something like 0x8000000000000000 which will be exact. NWC */ - } + } else { SvUVX(sv) = U_V(SvNVX(sv)); if ( @@ -2022,7 +2043,7 @@ Perl_sv_2uv(pTHX_ register SV *sv) (NV)UVX == NVX are both true, but the values differ. :-( Hopefully for 2s complement IV_MIN is something like 0x8000000000000000 which will be exact. NWC */ - } + } else { SvUVX(sv) = U_V(SvNVX(sv)); if ( @@ -2067,17 +2088,32 @@ Perl_sv_2uv(pTHX_ register SV *sv) } else { #ifdef HAS_STRTOUL UV u; + char *num_begin = SvPVX(sv); int save_errno = errno; + + /* seems that strtoul taking numbers that start with - is + implementation dependant, and can't be relied upon. */ + if (numtype & IS_NUMBER_NEG) { + /* Not totally defensive. assumine that looks_like_num + didn't lie about a - sign */ + while (isSPACE(*num_begin)) + num_begin++; + if (*num_begin == '-') + num_begin++; + } + /* Is it an integer that we could convert with strtoul? So try it, and if it doesn't set errno then it's pukka. This should be faster than going atof and then thinking. */ if (((numtype & (IS_NUMBER_TO_INT_BY_STRTOL | IS_NUMBER_NOT_INT)) == IS_NUMBER_TO_INT_BY_STRTOL) && ((errno = 0), 1) /* always true */ - && ((u = Strtoul(SvPVX(sv), Null(char**), 10)), 1) /* ditto */ + && ((u = Strtoul(num_begin, Null(char**), 10)), 1) /* ditto */ && (errno == 0) - /* If known to be negative, check it didn't undeflow IV */ - && ((numtype & IS_NUMBER_NEG) ? ((IV)u <= 0) : 1)) { + /* If known to be negative, check it didn't undeflow IV + XXX possibly we should put more negative values as NVs + direct rather than go via atof below */ + && ((numtype & IS_NUMBER_NEG) ? (u <= (UV)IV_MIN) : 1)) { errno = save_errno; if (SvTYPE(sv) < SVt_PVIV) @@ -2086,14 +2122,9 @@ Perl_sv_2uv(pTHX_ register SV *sv) /* If it's negative must use IV. IV-over-UV optimisation */ - if (numtype & IS_NUMBER_NEG || u <= (UV) IV_MAX) { - /* strtoul is defined to return negated value if the - number starts with a minus sign. Assuming 2s - complement, this value will be in range for a negative - IV if casting the bit pattern to IV doesn't produce - a positive value. Allow -0 by checking it's <= 0 - hence (numtype & IS_NUMBER_NEG) test above - */ + if (numtype & IS_NUMBER_NEG) { + SvIVX(sv) = -(IV)u; + } else if (u <= (UV) IV_MAX) { SvIVX(sv) = (IV)u; } else { /* it didn't overflow, and it was positive. */ @@ -2386,7 +2417,7 @@ S_asUV(pTHX_ SV *sv) * LONG_MAX and LONG_MIN when given out of range values. ANSI says they should * do this, and vendors have had 11 years to get it right. * However, will try to make it still work with only atol - * + * * IS_NUMBER_TO_INT_BY_ATOL 123456789 or 123456789.3 definitely < IV_MAX * IS_NUMBER_TO_INT_BY_STRTOL 123456789 or 123456789.3 if digits = IV_MAX * IS_NUMBER_TO_INT_BY_ATOF 123456789e0 or >> IV_MAX @@ -2440,7 +2471,7 @@ Perl_looks_like_number(pTHX_ SV *sv) nbegin = s; /* - * we return IS_NUMBER_TO_INT_BY_ATOL if the number can converted to + * we return IS_NUMBER_TO_INT_BY_ATOL if the number can converted to * integer with atol() without overflow, IS_NUMBER_TO_INT_BY_STRTOL if * possibly slightly larger than max int, IS_NUMBER_TO_INT_BY_ATOF if you * will need (int)atof(). @@ -2920,7 +2951,7 @@ Perl_sv_utf8_upgrade(pTHX_ register SV *sv) e = SvEND(sv); t = s; while (t < e) { - if ((hibit = *t++ & 0x80)) + if ((hibit = UTF8_IS_CONTINUED(*t++))) break; } @@ -3012,7 +3043,7 @@ Perl_sv_utf8_decode(pTHX_ register SV *sv) return FALSE; e = SvEND(sv); while (c < e) { - if (*c++ & 0x80) { + if (UTF8_IS_CONTINUED(*c++)) { SvUTF8_on(sv); break; } @@ -3347,14 +3378,19 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) SvRV(dstr) = SvREFCNT_inc(SvRV(sstr)); SvROK_on(dstr); if (sflags & SVp_NOK) { - SvNOK_on(dstr); + SvNOKp_on(dstr); + /* Only set the public OK flag if the source has public OK. */ + if (sflags & SVf_NOK) + SvFLAGS(dstr) |= SVf_NOK; SvNVX(dstr) = SvNVX(sstr); } if (sflags & SVp_IOK) { - (void)SvIOK_on(dstr); - SvIVX(dstr) = SvIVX(sstr); + (void)SvIOKp_on(dstr); + if (sflags & SVf_IOK) + SvFLAGS(dstr) |= SVf_IOK; if (sflags & SVf_IVisUV) SvIsUV_on(dstr); + SvIVX(dstr) = SvIVX(sstr); } if (SvAMAGIC(sstr)) { SvAMAGIC_on(dstr); @@ -3404,36 +3440,51 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) *SvEND(dstr) = '\0'; (void)SvPOK_only(dstr); } - if ((sflags & SVf_UTF8) && !IN_BYTE) + if (sflags & SVf_UTF8) SvUTF8_on(dstr); /*SUPPRESS 560*/ if (sflags & SVp_NOK) { - SvNOK_on(dstr); + SvNOKp_on(dstr); + if (sflags & SVf_NOK) + SvFLAGS(dstr) |= SVf_NOK; SvNVX(dstr) = SvNVX(sstr); } if (sflags & SVp_IOK) { - (void)SvIOK_on(dstr); - SvIVX(dstr) = SvIVX(sstr); + (void)SvIOKp_on(dstr); + if (sflags & SVf_IOK) + SvFLAGS(dstr) |= SVf_IOK; if (sflags & SVf_IVisUV) SvIsUV_on(dstr); - } - } - else if (sflags & SVp_NOK) { - SvNVX(dstr) = SvNVX(sstr); - (void)SvNOK_only(dstr); - if (sflags & SVf_IOK) { - (void)SvIOK_on(dstr); SvIVX(dstr) = SvIVX(sstr); - /* XXXX Do we want to set IsUV for IV(ROK)? Be extra safe... */ - if (sflags & SVf_IVisUV) - SvIsUV_on(dstr); } } else if (sflags & SVp_IOK) { - (void)SvIOK_only(dstr); - SvIVX(dstr) = SvIVX(sstr); + if (sflags & SVf_IOK) + (void)SvIOK_only(dstr); + else { + SvOK_off(dstr); + SvIOKp_on(dstr); + } + /* XXXX Do we want to set IsUV for IV(ROK)? Be extra safe... */ if (sflags & SVf_IVisUV) SvIsUV_on(dstr); + SvIVX(dstr) = SvIVX(sstr); + if (sflags & SVp_NOK) { + if (sflags & SVf_NOK) + (void)SvNOK_on(dstr); + else + (void)SvNOKp_on(dstr); + SvNVX(dstr) = SvNVX(sstr); + } + } + else if (sflags & SVp_NOK) { + if (sflags & SVf_NOK) + (void)SvNOK_only(dstr); + else { + SvOK_off(dstr); + SvNOKp_on(dstr); + } + SvNVX(dstr) = SvNVX(sstr); } else { if (dtype == SVt_PVGV) { @@ -3492,7 +3543,7 @@ Perl_sv_setpvn(pTHX_ register SV *sv, register const char *ptr, register STRLEN Move(ptr,dptr,len,char); dptr[len] = '\0'; SvCUR_set(sv, len); - (void)SvPOK_only(sv); /* validate pointer */ + (void)SvPOK_only_UTF8(sv); /* validate pointer */ SvTAINT(sv); } @@ -3536,7 +3587,7 @@ Perl_sv_setpv(pTHX_ register SV *sv, register const char *ptr) SvGROW(sv, len + 1); Move(ptr,SvPVX(sv),len+1,char); SvCUR_set(sv, len); - (void)SvPOK_only(sv); /* validate pointer */ + (void)SvPOK_only_UTF8(sv); /* validate pointer */ SvTAINT(sv); } @@ -3586,7 +3637,7 @@ Perl_sv_usepvn(pTHX_ register SV *sv, register char *ptr, register STRLEN len) SvCUR_set(sv, len); SvLEN_set(sv, len+1); *SvEND(sv) = '\0'; - (void)SvPOK_only(sv); /* validate pointer */ + (void)SvPOK_only_UTF8(sv); /* validate pointer */ SvTAINT(sv); } @@ -3723,27 +3774,42 @@ Perl_sv_catpvn_mg(pTHX_ register SV *sv, register const char *ptr, register STRL /* =for apidoc sv_catsv -Concatenates the string from SV C onto the end of the string in SV -C. Handles 'get' magic, but not 'set' magic. See C. +Concatenates the string from SV C onto the end of the string in +SV C. Modifies C but not C. Handles 'get' magic, but +not 'set' magic. See C. -=cut -*/ +=cut */ void -Perl_sv_catsv(pTHX_ SV *dstr, register SV *sstr) +Perl_sv_catsv(pTHX_ SV *dsv, register SV *ssv) { - char *s; - STRLEN len; - if (!sstr) + char *spv; + STRLEN slen; + if (!ssv) return; - if ((s = SvPV(sstr, len))) { - if (DO_UTF8(sstr)) { - sv_utf8_upgrade(dstr); - sv_catpvn(dstr,s,len); - SvUTF8_on(dstr); + if ((spv = SvPV(ssv, slen))) { + bool dutf8 = DO_UTF8(dsv); + bool sutf8 = DO_UTF8(ssv); + + if (dutf8 == sutf8) + sv_catpvn(dsv,spv,slen); + else { + if (dutf8) { + /* Not modifying source SV, so taking a temporary copy. */ + SV* csv = sv_2mortal(newSVsv(ssv)); + char *cpv; + STRLEN clen; + + sv_utf8_upgrade(csv); + cpv = SvPV(csv,clen); + sv_catpvn(dsv,cpv,clen); + } + else { + sv_utf8_upgrade(dsv); + sv_catpvn(dsv,spv,slen); + SvUTF8_on(dsv); /* If dsv has no wide characters. */ + } } - else - sv_catpvn(dstr,s,len); } } @@ -3756,10 +3822,10 @@ Like C, but also handles 'set' magic. */ void -Perl_sv_catsv_mg(pTHX_ SV *dstr, register SV *sstr) +Perl_sv_catsv_mg(pTHX_ SV *dsv, register SV *ssv) { - sv_catsv(dstr,sstr); - SvSETMAGIC(dstr); + sv_catsv(dsv,ssv); + SvSETMAGIC(dsv); } /* @@ -3877,7 +3943,7 @@ Perl_sv_magic(pTHX_ register SV *sv, SV *obj, int how, const char *name, I32 nam mg->mg_virtual = &PL_vtbl_amagicelem; break; case 'c': - mg->mg_virtual = 0; + mg->mg_virtual = &PL_vtbl_ovrld; break; case 'B': mg->mg_virtual = &PL_vtbl_bm; @@ -4246,7 +4312,7 @@ Perl_sv_clear(pTHX_ register SV *sv) SvREADONLY_on(&tmpref); /* DESTROY() could be naughty */ SvREFCNT(&tmpref) = 1; - do { + do { stash = SvSTASH(sv); destructor = StashHANDLER(stash,DESTROY); if (destructor) { @@ -4512,11 +4578,9 @@ Perl_sv_len_utf8(pTHX_ register SV *sv) if (!sv) return 0; -#ifdef NOTYET if (SvGMAGICAL(sv)) return mg_length(sv); else -#endif { STRLEN len; U8 *s = (U8*)SvPV(sv, len); @@ -4568,17 +4632,18 @@ Perl_sv_pos_b2u(pTHX_ register SV *sv, I32* offsetp) s = (U8*)SvPV(sv, len); if (len < *offsetp) - Perl_croak(aTHX_ "panic: bad byte offset"); + Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset"); send = s + *offsetp; len = 0; while (s < send) { - s += UTF8SKIP(s); - ++len; - } - if (s != send) { - if (ckWARN_d(WARN_UTF8)) - Perl_warner(aTHX_ WARN_UTF8, "Malformed UTF-8 character"); - --len; + STRLEN n; + + if (utf8_to_uv(s, UTF8SKIP(s), &n, 0)) { + s += n; + len++; + } + else + break; } *offsetp = len; return; @@ -4620,13 +4685,30 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) /* do not utf8ize the comparands as a side-effect */ if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTE) { + if (PL_hints & HINT_UTF8_DISTINCT) + return FALSE; + if (SvUTF8(sv1)) { - pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2); - pv2tmp = TRUE; + (void)utf8_to_bytes((U8*)(pv1 = savepvn(pv1, cur1)), &cur1); + { + IV scur1 = cur1; + if (scur1 < 0) { + Safefree(pv1); + return 0; + } + } + pv1tmp = TRUE; } else { - pv1 = (char*)bytes_to_utf8((U8*)pv1, &cur1); - pv1tmp = TRUE; + (void)utf8_to_bytes((U8*)(pv2 = savepvn(pv2, cur2)), &cur2); + { + IV scur2 = cur2; + if (scur2 < 0) { + Safefree(pv2); + return 0; + } + } + pv2tmp = TRUE; } } @@ -4676,6 +4758,9 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2) /* do not utf8ize the comparands as a side-effect */ if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTE) { + if (PL_hints & HINT_UTF8_DISTINCT) + return SvUTF8(sv1) ? 1 : -1; + if (SvUTF8(sv1)) { pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2); pv2tmp = TRUE; @@ -5155,7 +5240,7 @@ Perl_sv_inc(pTHX_ register SV *sv) if ((flags & (SVp_NOK|SVp_IOK)) == SVp_NOK) { /* It's (privately or publicly) a float, but not tested as an integer, so test it to see. */ - (void) SvIV(sv); + (void) SvIV(sv); flags = SvFLAGS(sv); } if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) { @@ -5206,7 +5291,7 @@ Perl_sv_inc(pTHX_ register SV *sv) so $a="9.22337203685478e+18"; $a+0; $a++ needs to be the same as $a="9.22337203685478e+18"; $a++ or we go insane. */ - + (void) sv_2iv(sv); if (SvIOK(sv)) goto oops_its_int; @@ -5349,7 +5434,7 @@ Perl_sv_dec(pTHX_ register SV *sv) so $a="9.22337203685478e+18"; $a+0; $a-- needs to be the same as $a="9.22337203685478e+18"; $a-- or we go insane. */ - + (void) sv_2iv(sv); if (SvIOK(sv)) goto oops_its_int;