X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=sv.c;h=341792412b0e0ca51efc553005ef7ddb5d56f04c;hb=9aa983d27b0af31badfcbbb76567f6e557076b41;hp=208cc10573b0ab9295cab8b31884a3322c8b1d45;hpb=f0e40504961b24be8b96e8007fefc833f9e69328;p=p5sagit%2Fp5-mst-13.2.git diff --git a/sv.c b/sv.c index 208cc10..3417924 100644 --- a/sv.c +++ b/sv.c @@ -1,6 +1,6 @@ /* sv.c * - * Copyright (c) 1991-2000, Larry Wall + * Copyright (c) 1991-2001, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -1322,10 +1322,10 @@ Perl_sv_setuv(pTHX_ register SV *sv, UV u) { /* With these two if statements: u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865 - + without u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865 - + If you wish to remove them, please benchmark to see what the effect is */ if (u <= (UV)IV_MAX) { @@ -1350,10 +1350,10 @@ Perl_sv_setuv_mg(pTHX_ register SV *sv, UV u) { /* With these two if statements: u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865 - + without u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865 - + If you wish to remove them, please benchmark to see what the effect is */ if (u <= (UV)IV_MAX) { @@ -1527,7 +1527,7 @@ S_not_a_number(pTHX_ SV *sv) Now, "3.2" will become NV=3.2 IV=3 NOK, IOKp, because the flags meaning changes - now IV and NV together means that the two are interchangeable SvIVX == (IV) SvNVX && SvNVX == (NV) SvIVX; - + The benefit of this is operations such as pp_add know that if SvIOK is true for both left and right operands, then integer addition can be used instead of floating point. (for cases where the result won't @@ -1567,7 +1567,7 @@ STATIC int S_sv_2inuv_non_preserve (pTHX_ register SV *sv, I32 numtype) { NV nv = SvNVX(sv); /* Code simpler and had compiler problems if */ UV nv_as_uv = U_V(nv); /* these are not in simple variables. */ - DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2inuv_non '%s', IV=0x%"UVxf" NV=%g inttype=%X\n", SvPVX(sv), SvIVX(sv), nv, numtype)); + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2inuv_non '%s', IV=0x%"UVxf" NV=%g inttype=%"UVXf"\n", SvPVX(sv), SvIVX(sv), nv, (UV)numtype)); if (nv_as_uv <= (UV)IV_MAX) { (void)SvIOKp_on(sv); (void)SvNOKp_on(sv); @@ -1625,7 +1625,7 @@ S_sv_2inuv_non_preserve (pTHX_ register SV *sv, I32 numtype) { #else /* We've just lost integer precision, nothing we could do. */ SvUVX(sv) = nv_as_uv; - DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2niuv_non UV? '%s', UV=0x%"UVxf" NV=%g U_V(NV)=0x%"UVxf" inttype=%X\n", SvPVX(sv), SvIVX(sv), nv, nv_as_uv, numtype)); + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2niuv_non UV? '%s', UV=0x%"UVxf" NV=%g U_V(NV)=0x%"UVxf" inttype=%"UVXf"\n", SvPVX(sv), SvIVX(sv), nv, nv_as_uv, (UV)numtype)); /* UV and NV slots equally valid only if we have casting symmetry. */ if (numtype & IS_NUMBER_NOT_INT) { SvIsUV_on(sv); @@ -1648,7 +1648,7 @@ S_sv_2inuv_non_preserve (pTHX_ register SV *sv, I32 numtype) { STATIC int S_sv_2iuv_non_preserve (pTHX_ register SV *sv, I32 numtype) { - DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%"UVxf" NV=%g inttype=%X\n", SvPVX(sv), SvIVX(sv), SvNVX(sv), numtype)); + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%"UVxf" NV=%g inttype=%"UVXf"\n", SvPVX(sv), SvIVX(sv), SvNVX(sv), (UV)numtype)); if (SvNVX(sv) < (NV)IV_MIN) { (void)SvIOKp_on(sv); (void)SvNOK_on(sv); @@ -1678,6 +1678,12 @@ S_sv_2iuv_non_preserve (pTHX_ register SV *sv, I32 numtype) SvIsUV_on(sv); SvUVX(sv) = U_V(SvNVX(sv)); if ((NV)(SvUVX(sv)) == SvNVX(sv)) { + if (SvUVX(sv) == UV_MAX) { + /* As we know that NVs don't preserve UVs, UV_MAX cannot + possibly be preserved by NV. Hence, it must be overflow. + NOK, IOKp */ + return IS_NUMBER_OVERFLOW_UV; + } SvIOK_on(sv); /* Integer is precise. NOK, UOK */ } else { /* Integer is imprecise. NOK, IOKp */ @@ -1786,7 +1792,7 @@ Perl_sv_2iv(pTHX_ register SV *sv) (NV)UVX == NVX are both true, but the values differ. :-( Hopefully for 2s complement IV_MIN is something like 0x8000000000000000 which will be exact. NWC */ - } + } else { SvUVX(sv) = U_V(SvNVX(sv)); if ( @@ -2037,7 +2043,7 @@ Perl_sv_2uv(pTHX_ register SV *sv) (NV)UVX == NVX are both true, but the values differ. :-( Hopefully for 2s complement IV_MIN is something like 0x8000000000000000 which will be exact. NWC */ - } + } else { SvUVX(sv) = U_V(SvNVX(sv)); if ( @@ -2084,7 +2090,7 @@ Perl_sv_2uv(pTHX_ register SV *sv) UV u; char *num_begin = SvPVX(sv); int save_errno = errno; - + /* seems that strtoul taking numbers that start with - is implementation dependant, and can't be relied upon. */ if (numtype & IS_NUMBER_NEG) { @@ -2095,7 +2101,7 @@ Perl_sv_2uv(pTHX_ register SV *sv) if (*num_begin == '-') num_begin++; } - + /* Is it an integer that we could convert with strtoul? So try it, and if it doesn't set errno then it's pukka. This should be faster than going atof and then thinking. */ @@ -2104,7 +2110,7 @@ Perl_sv_2uv(pTHX_ register SV *sv) && ((errno = 0), 1) /* always true */ && ((u = Strtoul(num_begin, Null(char**), 10)), 1) /* ditto */ && (errno == 0) - /* If known to be negative, check it didn't undeflow IV + /* If known to be negative, check it didn't undeflow IV XXX possibly we should put more negative values as NVs direct rather than go via atof below */ && ((numtype & IS_NUMBER_NEG) ? (u <= (UV)IV_MIN) : 1)) { @@ -2411,7 +2417,7 @@ S_asUV(pTHX_ SV *sv) * LONG_MAX and LONG_MIN when given out of range values. ANSI says they should * do this, and vendors have had 11 years to get it right. * However, will try to make it still work with only atol - * + * * IS_NUMBER_TO_INT_BY_ATOL 123456789 or 123456789.3 definitely < IV_MAX * IS_NUMBER_TO_INT_BY_STRTOL 123456789 or 123456789.3 if digits = IV_MAX * IS_NUMBER_TO_INT_BY_ATOF 123456789e0 or >> IV_MAX @@ -2465,7 +2471,7 @@ Perl_looks_like_number(pTHX_ SV *sv) nbegin = s; /* - * we return IS_NUMBER_TO_INT_BY_ATOL if the number can converted to + * we return IS_NUMBER_TO_INT_BY_ATOL if the number can converted to * integer with atol() without overflow, IS_NUMBER_TO_INT_BY_STRTOL if * possibly slightly larger than max int, IS_NUMBER_TO_INT_BY_ATOF if you * will need (int)atof(). @@ -2934,7 +2940,7 @@ Perl_sv_utf8_upgrade(pTHX_ register SV *sv) char *s, *t, *e; int hibit = 0; - if (!sv || !SvPOK(sv) || !SvCUR(sv) || SvUTF8(sv)) + if (!sv || !SvPOK(sv) || SvUTF8(sv)) return; /* This function could be much more efficient if we had a FLAG in SVs @@ -2945,7 +2951,7 @@ Perl_sv_utf8_upgrade(pTHX_ register SV *sv) e = SvEND(sv); t = s; while (t < e) { - if ((hibit = *t++ & 0x80)) + if ((hibit = UTF8_IS_CONTINUED(*t++))) break; } @@ -3037,7 +3043,7 @@ Perl_sv_utf8_decode(pTHX_ register SV *sv) return FALSE; e = SvEND(sv); while (c < e) { - if (*c++ & 0x80) { + if (UTF8_IS_CONTINUED(*c++)) { SvUTF8_on(sv); break; } @@ -3372,14 +3378,19 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) SvRV(dstr) = SvREFCNT_inc(SvRV(sstr)); SvROK_on(dstr); if (sflags & SVp_NOK) { - SvNOK_on(dstr); + SvNOKp_on(dstr); + /* Only set the public OK flag if the source has public OK. */ + if (sflags & SVf_NOK) + SvFLAGS(dstr) |= SVf_NOK; SvNVX(dstr) = SvNVX(sstr); } if (sflags & SVp_IOK) { - (void)SvIOK_on(dstr); - SvIVX(dstr) = SvIVX(sstr); + (void)SvIOKp_on(dstr); + if (sflags & SVf_IOK) + SvFLAGS(dstr) |= SVf_IOK; if (sflags & SVf_IVisUV) SvIsUV_on(dstr); + SvIVX(dstr) = SvIVX(sstr); } if (SvAMAGIC(sstr)) { SvAMAGIC_on(dstr); @@ -3429,36 +3440,51 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) *SvEND(dstr) = '\0'; (void)SvPOK_only(dstr); } - if ((sflags & SVf_UTF8) && !IN_BYTE) + if (sflags & SVf_UTF8) SvUTF8_on(dstr); /*SUPPRESS 560*/ if (sflags & SVp_NOK) { - SvNOK_on(dstr); + SvNOKp_on(dstr); + if (sflags & SVf_NOK) + SvFLAGS(dstr) |= SVf_NOK; SvNVX(dstr) = SvNVX(sstr); } if (sflags & SVp_IOK) { - (void)SvIOK_on(dstr); - SvIVX(dstr) = SvIVX(sstr); + (void)SvIOKp_on(dstr); + if (sflags & SVf_IOK) + SvFLAGS(dstr) |= SVf_IOK; if (sflags & SVf_IVisUV) SvIsUV_on(dstr); - } - } - else if (sflags & SVp_NOK) { - SvNVX(dstr) = SvNVX(sstr); - (void)SvNOK_only(dstr); - if (sflags & SVf_IOK) { - (void)SvIOK_on(dstr); SvIVX(dstr) = SvIVX(sstr); - /* XXXX Do we want to set IsUV for IV(ROK)? Be extra safe... */ - if (sflags & SVf_IVisUV) - SvIsUV_on(dstr); } } else if (sflags & SVp_IOK) { - (void)SvIOK_only(dstr); - SvIVX(dstr) = SvIVX(sstr); + if (sflags & SVf_IOK) + (void)SvIOK_only(dstr); + else { + SvOK_off(dstr); + SvIOKp_on(dstr); + } + /* XXXX Do we want to set IsUV for IV(ROK)? Be extra safe... */ if (sflags & SVf_IVisUV) SvIsUV_on(dstr); + SvIVX(dstr) = SvIVX(sstr); + if (sflags & SVp_NOK) { + if (sflags & SVf_NOK) + (void)SvNOK_on(dstr); + else + (void)SvNOKp_on(dstr); + SvNVX(dstr) = SvNVX(sstr); + } + } + else if (sflags & SVp_NOK) { + if (sflags & SVf_NOK) + (void)SvNOK_only(dstr); + else { + SvOK_off(dstr); + SvNOKp_on(dstr); + } + SvNVX(dstr) = SvNVX(sstr); } else { if (dtype == SVt_PVGV) { @@ -3517,7 +3543,7 @@ Perl_sv_setpvn(pTHX_ register SV *sv, register const char *ptr, register STRLEN Move(ptr,dptr,len,char); dptr[len] = '\0'; SvCUR_set(sv, len); - (void)SvPOK_only(sv); /* validate pointer */ + (void)SvPOK_only_UTF8(sv); /* validate pointer */ SvTAINT(sv); } @@ -3561,7 +3587,7 @@ Perl_sv_setpv(pTHX_ register SV *sv, register const char *ptr) SvGROW(sv, len + 1); Move(ptr,SvPVX(sv),len+1,char); SvCUR_set(sv, len); - (void)SvPOK_only(sv); /* validate pointer */ + (void)SvPOK_only_UTF8(sv); /* validate pointer */ SvTAINT(sv); } @@ -3611,7 +3637,7 @@ Perl_sv_usepvn(pTHX_ register SV *sv, register char *ptr, register STRLEN len) SvCUR_set(sv, len); SvLEN_set(sv, len+1); *SvEND(sv) = '\0'; - (void)SvPOK_only(sv); /* validate pointer */ + (void)SvPOK_only_UTF8(sv); /* validate pointer */ SvTAINT(sv); } @@ -3748,61 +3774,41 @@ Perl_sv_catpvn_mg(pTHX_ register SV *sv, register const char *ptr, register STRL /* =for apidoc sv_catsv -Concatenates the string from SV C onto the end of the string in SV -C. Handles 'get' magic, but not 'set' magic. See C. +Concatenates the string from SV C onto the end of the string in +SV C. Modifies C but not C. Handles 'get' magic, but +not 'set' magic. See C. -=cut -*/ +=cut */ void Perl_sv_catsv(pTHX_ SV *dsv, register SV *ssv) { + char *spv; + STRLEN slen; if (!ssv) return; - else { - STRLEN slen; - char *spv; - - if ((spv = SvPV(ssv, slen))) { - bool dutf8 = DO_UTF8(dsv); - bool sutf8 = DO_UTF8(ssv); - - if (dutf8 != sutf8) { - STRLEN dlen; - char *dpv; - - /* We may modify dsv but not ssv. */ - - if (!dutf8) - sv_utf8_upgrade(dsv); - dpv = SvPV(dsv, dlen); - SvGROW(dsv, dlen + 2 * slen + 1); - if (dutf8) /* && !sutf8 */ { - char *s = spv; - char *e = s + slen; - char *d = dpv + dlen; - char *dorig = d; - - while (s < e) { - U8 c = *s++; - - if (UTF8_IS_ASCII(c)) - *d++ = c; - else { - *d++ = UTF8_EIGHT_BIT_HI(c); - *d++ = UTF8_EIGHT_BIT_LO(c); - } - } - SvCUR(dsv) += d - dorig; - *d = 0; - } - else /* !dutf8 (was) && sutf8 */ { - sv_catpvn(dsv, spv, slen); - SvUTF8_on(dsv); - } + if ((spv = SvPV(ssv, slen))) { + bool dutf8 = DO_UTF8(dsv); + bool sutf8 = DO_UTF8(ssv); + + if (dutf8 == sutf8) + sv_catpvn(dsv,spv,slen); + else { + if (dutf8) { + /* Not modifying source SV, so taking a temporary copy. */ + SV* csv = sv_2mortal(newSVsv(ssv)); + char *cpv; + STRLEN clen; + + sv_utf8_upgrade(csv); + cpv = SvPV(csv,clen); + sv_catpvn(dsv,cpv,clen); + } + else { + sv_utf8_upgrade(dsv); + sv_catpvn(dsv,spv,slen); + SvUTF8_on(dsv); /* If dsv has no wide characters. */ } - else - sv_catpvn(dsv, spv, slen); } } } @@ -3832,20 +3838,20 @@ Handles 'get' magic, but not 'set' magic. See C. */ void -Perl_sv_catpv(pTHX_ register SV *sv, register const char *pv) +Perl_sv_catpv(pTHX_ register SV *sv, register const char *ptr) { register STRLEN len; STRLEN tlen; char *junk; - if (!pv) + if (!ptr) return; junk = SvPV_force(sv, tlen); - len = strlen(pv); + len = strlen(ptr); SvGROW(sv, tlen + len + 1); - if (pv == junk) - pv = SvPVX(sv); - Move(pv,SvPVX(sv)+tlen,len+1,char); + if (ptr == junk) + ptr = SvPVX(sv); + Move(ptr,SvPVX(sv)+tlen,len+1,char); SvCUR(sv) += len; (void)SvPOK_only_UTF8(sv); /* validate pointer */ SvTAINT(sv); @@ -3860,9 +3866,9 @@ Like C, but also handles 'set' magic. */ void -Perl_sv_catpv_mg(pTHX_ register SV *sv, register const char *pv) +Perl_sv_catpv_mg(pTHX_ register SV *sv, register const char *ptr) { - sv_catpv(sv,pv); + sv_catpv(sv,ptr); SvSETMAGIC(sv); } @@ -3937,7 +3943,7 @@ Perl_sv_magic(pTHX_ register SV *sv, SV *obj, int how, const char *name, I32 nam mg->mg_virtual = &PL_vtbl_amagicelem; break; case 'c': - mg->mg_virtual = 0; + mg->mg_virtual = &PL_vtbl_ovrld; break; case 'B': mg->mg_virtual = &PL_vtbl_bm; @@ -4306,7 +4312,7 @@ Perl_sv_clear(pTHX_ register SV *sv) SvREADONLY_on(&tmpref); /* DESTROY() could be naughty */ SvREFCNT(&tmpref) = 1; - do { + do { stash = SvSTASH(sv); destructor = StashHANDLER(stash,DESTROY); if (destructor) { @@ -4626,17 +4632,18 @@ Perl_sv_pos_b2u(pTHX_ register SV *sv, I32* offsetp) s = (U8*)SvPV(sv, len); if (len < *offsetp) - Perl_croak(aTHX_ "panic: bad byte offset"); + Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset"); send = s + *offsetp; len = 0; while (s < send) { - s += UTF8SKIP(s); - ++len; - } - if (s != send) { - if (ckWARN_d(WARN_UTF8)) - Perl_warner(aTHX_ WARN_UTF8, "Malformed UTF-8 character"); - --len; + STRLEN n; + + if (utf8_to_uv(s, UTF8SKIP(s), &n, 0)) { + s += n; + len++; + } + else + break; } *offsetp = len; return; @@ -4678,13 +4685,30 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) /* do not utf8ize the comparands as a side-effect */ if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTE) { + if (PL_hints & HINT_UTF8_DISTINCT) + return FALSE; + if (SvUTF8(sv1)) { - pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2); - pv2tmp = TRUE; + (void)utf8_to_bytes((U8*)(pv1 = savepvn(pv1, cur1)), &cur1); + { + IV scur1 = cur1; + if (scur1 < 0) { + Safefree(pv1); + return 0; + } + } + pv1tmp = TRUE; } else { - pv1 = (char*)bytes_to_utf8((U8*)pv1, &cur1); - pv1tmp = TRUE; + (void)utf8_to_bytes((U8*)(pv2 = savepvn(pv2, cur2)), &cur2); + { + IV scur2 = cur2; + if (scur2 < 0) { + Safefree(pv2); + return 0; + } + } + pv2tmp = TRUE; } } @@ -4734,6 +4758,9 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2) /* do not utf8ize the comparands as a side-effect */ if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTE) { + if (PL_hints & HINT_UTF8_DISTINCT) + return SvUTF8(sv1) ? 1 : -1; + if (SvUTF8(sv1)) { pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2); pv2tmp = TRUE; @@ -5213,7 +5240,7 @@ Perl_sv_inc(pTHX_ register SV *sv) if ((flags & (SVp_NOK|SVp_IOK)) == SVp_NOK) { /* It's (privately or publicly) a float, but not tested as an integer, so test it to see. */ - (void) SvIV(sv); + (void) SvIV(sv); flags = SvFLAGS(sv); } if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) { @@ -5264,7 +5291,7 @@ Perl_sv_inc(pTHX_ register SV *sv) so $a="9.22337203685478e+18"; $a+0; $a++ needs to be the same as $a="9.22337203685478e+18"; $a++ or we go insane. */ - + (void) sv_2iv(sv); if (SvIOK(sv)) goto oops_its_int; @@ -5407,7 +5434,7 @@ Perl_sv_dec(pTHX_ register SV *sv) so $a="9.22337203685478e+18"; $a+0; $a-- needs to be the same as $a="9.22337203685478e+18"; $a-- or we go insane. */ - + (void) sv_2iv(sv); if (SvIOK(sv)) goto oops_its_int;