X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=sv.c;h=581c788318551c43504957e97908a9c291ff12ce;hb=3f939f220e50adb6f28f2dd14f06461c7cebfe14;hp=4794596d381f44b630217b3c76cd11e34d43b796;hpb=55ada37460e1f7d48d5322c1594b5cc7ccbf3306;p=p5sagit%2Fp5-mst-13.2.git diff --git a/sv.c b/sv.c index 4794596..581c788 100644 --- a/sv.c +++ b/sv.c @@ -1,6 +1,6 @@ /* sv.c * - * Copyright (c) 1991-2000, Larry Wall + * Copyright (c) 1991-2001, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -63,7 +63,7 @@ static void do_clean_all(pTHXo_ SV *sv); #define del_SV(p) \ STMT_START { \ LOCK_SV_MUTEX; \ - if (PL_debug & 32768) \ + if (DEBUG_D_TEST) \ del_sv(p); \ else \ plant_SV(p); \ @@ -73,7 +73,7 @@ static void do_clean_all(pTHXo_ SV *sv); STATIC void S_del_sv(pTHX_ SV *p) { - if (PL_debug & 32768) { + if (DEBUG_D_TEST) { SV* sva; SV* sv; SV* svend; @@ -137,6 +137,7 @@ S_more_sv(pTHX) if (PL_nice_chunk) { sv_add_arena(PL_nice_chunk, PL_nice_chunk_size, 0); PL_nice_chunk = Nullch; + PL_nice_chunk_size = 0; } else { char *chunk; /* must use New here to match call to */ @@ -147,20 +148,24 @@ S_more_sv(pTHX) return sv; } -STATIC void +STATIC I32 S_visit(pTHX_ SVFUNC_t f) { SV* sva; SV* sv; register SV* svend; + I32 visited = 0; for (sva = PL_sv_arenaroot; sva; sva = (SV*)SvANY(sva)) { svend = &sva[SvREFCNT(sva)]; for (sv = sva + 1; sv < svend; ++sv) { - if (SvTYPE(sv) != SVTYPEMASK) + if (SvTYPE(sv) != SVTYPEMASK && SvREFCNT(sv)) { (FCALL)(aTHXo_ sv); + ++visited; + } } } + return visited; } void @@ -181,12 +186,14 @@ Perl_sv_clean_objs(pTHX) PL_in_clean_objs = FALSE; } -void +I32 Perl_sv_clean_all(pTHX) { + I32 cleaned; PL_in_clean_all = TRUE; - visit(do_clean_all); + cleaned = visit(do_clean_all); PL_in_clean_all = FALSE; + return cleaned; } void @@ -1322,10 +1329,10 @@ Perl_sv_setuv(pTHX_ register SV *sv, UV u) { /* With these two if statements: u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865 - + without u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865 - + If you wish to remove them, please benchmark to see what the effect is */ if (u <= (UV)IV_MAX) { @@ -1350,10 +1357,10 @@ Perl_sv_setuv_mg(pTHX_ register SV *sv, UV u) { /* With these two if statements: u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865 - + without u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865 - + If you wish to remove them, please benchmark to see what the effect is */ if (u <= (UV)IV_MAX) { @@ -1424,12 +1431,12 @@ S_not_a_number(pTHX_ SV *sv) { char tmpbuf[64]; char *d = tmpbuf; - char *s; char *limit = tmpbuf + sizeof(tmpbuf) - 8; /* each *s can expand to 4 chars + "...\0", i.e. need room for 8 chars */ - for (s = SvPVX(sv); *s && d < limit; s++) { + char *s, *end; + for (s = SvPVX(sv), end = s + SvCUR(sv); s < end && d < limit; s++) { int ch = *s & 0xFF; if (ch & 128 && !isPRINT_LC(ch)) { *d++ = 'M'; @@ -1452,6 +1459,10 @@ S_not_a_number(pTHX_ SV *sv) *d++ = '\\'; *d++ = '\\'; } + else if (ch == '\0') { + *d++ = '\\'; + *d++ = '0'; + } else if (isPRINT_LC(ch)) *d++ = ch; else { @@ -1459,7 +1470,7 @@ S_not_a_number(pTHX_ SV *sv) *d++ = toCTRL(ch); } } - if (*s) { + if (s < end) { *d++ = '.'; *d++ = '.'; *d++ = '.'; @@ -1527,7 +1538,7 @@ S_not_a_number(pTHX_ SV *sv) Now, "3.2" will become NV=3.2 IV=3 NOK, IOKp, because the flags meaning changes - now IV and NV together means that the two are interchangeable SvIVX == (IV) SvNVX && SvNVX == (NV) SvIVX; - + The benefit of this is operations such as pp_add know that if SvIOK is true for both left and right operands, then integer addition can be used instead of floating point. (for cases where the result won't @@ -1567,7 +1578,7 @@ STATIC int S_sv_2inuv_non_preserve (pTHX_ register SV *sv, I32 numtype) { NV nv = SvNVX(sv); /* Code simpler and had compiler problems if */ UV nv_as_uv = U_V(nv); /* these are not in simple variables. */ - DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2inuv_non '%s', IV=0x%"UVxf" NV=%g inttype=%X\n", SvPVX(sv), SvIVX(sv), nv, numtype)); + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2inuv_non '%s', IV=0x%"UVxf" NV=%g inttype=%"UVXf"\n", SvPVX(sv), SvIVX(sv), nv, (UV)numtype)); if (nv_as_uv <= (UV)IV_MAX) { (void)SvIOKp_on(sv); (void)SvNOKp_on(sv); @@ -1625,7 +1636,7 @@ S_sv_2inuv_non_preserve (pTHX_ register SV *sv, I32 numtype) { #else /* We've just lost integer precision, nothing we could do. */ SvUVX(sv) = nv_as_uv; - DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2niuv_non UV? '%s', UV=0x%"UVxf" NV=%g U_V(NV)=0x%"UVxf" inttype=%X\n", SvPVX(sv), SvIVX(sv), nv, nv_as_uv, numtype)); + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2niuv_non UV? '%s', UV=0x%"UVxf" NV=%g U_V(NV)=0x%"UVxf" inttype=%"UVXf"\n", SvPVX(sv), SvIVX(sv), nv, nv_as_uv, (UV)numtype)); /* UV and NV slots equally valid only if we have casting symmetry. */ if (numtype & IS_NUMBER_NOT_INT) { SvIsUV_on(sv); @@ -1648,7 +1659,7 @@ S_sv_2inuv_non_preserve (pTHX_ register SV *sv, I32 numtype) { STATIC int S_sv_2iuv_non_preserve (pTHX_ register SV *sv, I32 numtype) { - DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%"UVxf" NV=%g inttype=%X\n", SvPVX(sv), SvIVX(sv), SvNVX(sv), numtype)); + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%"UVxf" NV=%g inttype=%"UVXf"\n", SvPVX(sv), SvIVX(sv), SvNVX(sv), (UV)numtype)); if (SvNVX(sv) < (NV)IV_MIN) { (void)SvIOKp_on(sv); (void)SvNOK_on(sv); @@ -1678,6 +1689,12 @@ S_sv_2iuv_non_preserve (pTHX_ register SV *sv, I32 numtype) SvIsUV_on(sv); SvUVX(sv) = U_V(SvNVX(sv)); if ((NV)(SvUVX(sv)) == SvNVX(sv)) { + if (SvUVX(sv) == UV_MAX) { + /* As we know that NVs don't preserve UVs, UV_MAX cannot + possibly be preserved by NV. Hence, it must be overflow. + NOK, IOKp */ + return IS_NUMBER_OVERFLOW_UV; + } SvIOK_on(sv); /* Integer is precise. NOK, UOK */ } else { /* Integer is imprecise. NOK, IOKp */ @@ -1714,7 +1731,7 @@ Perl_sv_2iv(pTHX_ register SV *sv) if (SvROK(sv)) { SV* tmpstr; if (SvAMAGIC(sv) && (tmpstr=AMG_CALLun(sv,numer)) && - (SvRV(tmpstr) != SvRV(sv))) + (SvTYPE(tmpstr) != SVt_RV || (SvRV(tmpstr) != SvRV(sv)))) return SvIV(tmpstr); return PTR2IV(SvRV(sv)); } @@ -1786,7 +1803,7 @@ Perl_sv_2iv(pTHX_ register SV *sv) (NV)UVX == NVX are both true, but the values differ. :-( Hopefully for 2s complement IV_MIN is something like 0x8000000000000000 which will be exact. NWC */ - } + } else { SvUVX(sv) = U_V(SvNVX(sv)); if ( @@ -1968,7 +1985,7 @@ Perl_sv_2uv(pTHX_ register SV *sv) if (SvROK(sv)) { SV* tmpstr; if (SvAMAGIC(sv) && (tmpstr=AMG_CALLun(sv,numer)) && - (SvRV(tmpstr) != SvRV(sv))) + (SvTYPE(tmpstr) != SVt_RV || (SvRV(tmpstr) != SvRV(sv)))) return SvUV(tmpstr); return PTR2UV(SvRV(sv)); } @@ -2037,7 +2054,7 @@ Perl_sv_2uv(pTHX_ register SV *sv) (NV)UVX == NVX are both true, but the values differ. :-( Hopefully for 2s complement IV_MIN is something like 0x8000000000000000 which will be exact. NWC */ - } + } else { SvUVX(sv) = U_V(SvNVX(sv)); if ( @@ -2084,7 +2101,7 @@ Perl_sv_2uv(pTHX_ register SV *sv) UV u; char *num_begin = SvPVX(sv); int save_errno = errno; - + /* seems that strtoul taking numbers that start with - is implementation dependant, and can't be relied upon. */ if (numtype & IS_NUMBER_NEG) { @@ -2095,7 +2112,7 @@ Perl_sv_2uv(pTHX_ register SV *sv) if (*num_begin == '-') num_begin++; } - + /* Is it an integer that we could convert with strtoul? So try it, and if it doesn't set errno then it's pukka. This should be faster than going atof and then thinking. */ @@ -2104,7 +2121,7 @@ Perl_sv_2uv(pTHX_ register SV *sv) && ((errno = 0), 1) /* always true */ && ((u = Strtoul(num_begin, Null(char**), 10)), 1) /* ditto */ && (errno == 0) - /* If known to be negative, check it didn't undeflow IV + /* If known to be negative, check it didn't undeflow IV XXX possibly we should put more negative values as NVs direct rather than go via atof below */ && ((numtype & IS_NUMBER_NEG) ? (u <= (UV)IV_MIN) : 1)) { @@ -2252,7 +2269,7 @@ Perl_sv_2nv(pTHX_ register SV *sv) if (SvROK(sv)) { SV* tmpstr; if (SvAMAGIC(sv) && (tmpstr=AMG_CALLun(sv,numer)) && - (SvRV(tmpstr) != SvRV(sv))) + (SvTYPE(tmpstr) != SVt_RV || (SvRV(tmpstr) != SvRV(sv)))) return SvNV(tmpstr); return PTR2NV(SvRV(sv)); } @@ -2289,7 +2306,10 @@ Perl_sv_2nv(pTHX_ register SV *sv) } else if (SvTYPE(sv) < SVt_PVNV) sv_upgrade(sv, SVt_PVNV); - if (SvIOKp(sv) && + if (SvNOKp(sv) && !(SvIOK(sv) || SvPOK(sv))) { + SvNOK_on(sv); + } + else if (SvIOKp(sv) && (!SvPOKp(sv) || !strchr(SvPVX(sv),'.') || !looks_like_number(sv))) { SvNVX(sv) = SvIsUV(sv) ? (NV)SvUVX(sv) : (NV)SvIVX(sv); @@ -2411,7 +2431,7 @@ S_asUV(pTHX_ SV *sv) * LONG_MAX and LONG_MIN when given out of range values. ANSI says they should * do this, and vendors have had 11 years to get it right. * However, will try to make it still work with only atol - * + * * IS_NUMBER_TO_INT_BY_ATOL 123456789 or 123456789.3 definitely < IV_MAX * IS_NUMBER_TO_INT_BY_STRTOL 123456789 or 123456789.3 if digits = IV_MAX * IS_NUMBER_TO_INT_BY_ATOF 123456789e0 or >> IV_MAX @@ -2442,6 +2462,9 @@ Perl_looks_like_number(pTHX_ SV *sv) I32 numtype = 0; I32 sawinf = 0; STRLEN len; +#ifdef USE_LOCALE_NUMERIC + bool specialradix = FALSE; +#endif if (SvPOK(sv)) { sbegin = SvPVX(sv); @@ -2465,7 +2488,7 @@ Perl_looks_like_number(pTHX_ SV *sv) nbegin = s; /* - * we return IS_NUMBER_TO_INT_BY_ATOL if the number can converted to + * we return IS_NUMBER_TO_INT_BY_ATOL if the number can converted to * integer with atol() without overflow, IS_NUMBER_TO_INT_BY_STRTOL if * possibly slightly larger than max int, IS_NUMBER_TO_INT_BY_ATOF if you * will need (int)atof(). @@ -2506,23 +2529,34 @@ Perl_looks_like_number(pTHX_ SV *sv) UV_MAX= 18446744073709551615) so be cautious */ numtype |= IS_NUMBER_TO_INT_BY_STRTOL | IS_NUMBER_AS_LONG_AS_IV_MAX; - if (*s == '.' + if ( #ifdef USE_LOCALE_NUMERIC - || IS_NUMERIC_RADIX(*s) + (specialradix = IS_NUMERIC_RADIX(s, send)) || #endif - ) { - s++; + *s == '.') { +#ifdef USE_LOCALE_NUMERIC + if (specialradix) + s += SvCUR(PL_numeric_radix_sv); + else +#endif + s++; numtype |= IS_NUMBER_NOT_INT; while (isDIGIT(*s)) /* optional digits after the radix */ s++; } } - else if (*s == '.' + else if ( #ifdef USE_LOCALE_NUMERIC - || IS_NUMERIC_RADIX(*s) + (specialradix = IS_NUMERIC_RADIX(s, send)) || #endif + *s == '.' ) { - s++; +#ifdef USE_LOCALE_NUMERIC + if (specialradix) + s += SvCUR(PL_numeric_radix_sv); + else +#endif + s++; numtype |= IS_NUMBER_TO_INT_BY_ATOL | IS_NUMBER_NOT_INT; /* no digits before the radix means we need digits after it */ if (isDIGIT(*s)) { @@ -2613,6 +2647,12 @@ uiv_2buf(char *buf, IV iv, UV uv, int is_uv, char **peob) char * Perl_sv_2pv(pTHX_ register SV *sv, STRLEN *lp) { + return sv_2pv_flags(sv, lp, SV_GMAGIC); +} + +char * +Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags) +{ register char *s; int olderrno; SV *tsv; @@ -2624,7 +2664,8 @@ Perl_sv_2pv(pTHX_ register SV *sv, STRLEN *lp) return ""; } if (SvGMAGICAL(sv)) { - mg_get(sv); + if (flags & SV_GMAGIC) + mg_get(sv); if (SvPOKp(sv)) { *lp = SvCUR(sv); return SvPVX(sv); @@ -2655,7 +2696,7 @@ Perl_sv_2pv(pTHX_ register SV *sv, STRLEN *lp) if (SvROK(sv)) { SV* tmpstr; if (SvAMAGIC(sv) && (tmpstr=AMG_CALLun(sv,string)) && - (SvRV(tmpstr) != SvRV(sv))) + (SvTYPE(tmpstr) != SVt_RV || (SvRV(tmpstr) != SvRV(sv)))) return SvPV(tmpstr,*lp); sv = (SV*)SvRV(sv); if (!sv) @@ -2669,7 +2710,7 @@ Perl_sv_2pv(pTHX_ register SV *sv, STRLEN *lp) (SVs_OBJECT|SVf_OK|SVs_GMG|SVs_SMG|SVs_RMG)) == (SVs_OBJECT|SVs_RMG)) && strEQ(s=HvNAME(SvSTASH(sv)), "Regexp") - && (mg = mg_find(sv, 'r'))) { + && (mg = mg_find(sv, PERL_MAGIC_qr))) { regexp *re = (regexp *)mg->mg_obj; if (!mg->mg_ptr) { @@ -2865,7 +2906,8 @@ Perl_sv_2pvbyte_nolen(pTHX_ register SV *sv) char * Perl_sv_2pvbyte(pTHX_ register SV *sv, STRLEN *lp) { - return sv_2pv(sv,lp); + sv_utf8_downgrade(sv,0); + return SvPV(sv,*lp); } char * @@ -2894,7 +2936,7 @@ Perl_sv_2bool(pTHX_ register SV *sv) if (SvROK(sv)) { SV* tmpsv; if (SvAMAGIC(sv) && (tmpsv=AMG_CALLun(sv,bool_)) && - (SvRV(tmpsv) != SvRV(sv))) + (SvTYPE(tmpsv) != SVt_RV || (SvRV(tmpsv) != SvRV(sv)))) return SvTRUE(tmpsv); return SvRV(sv) != 0; } @@ -2924,45 +2966,80 @@ Perl_sv_2bool(pTHX_ register SV *sv) =for apidoc sv_utf8_upgrade Convert the PV of an SV to its UTF8-encoded form. +Forces the SV to string form it it is not already. +Always sets the SvUTF8 flag to avoid future validity checks even +if all the bytes have hibit clear. =cut */ -void +STRLEN Perl_sv_utf8_upgrade(pTHX_ register SV *sv) { - char *s, *t, *e; + return sv_utf8_upgrade_flags(sv, SV_GMAGIC); +} + +/* +=for apidoc sv_utf8_upgrade_flags + +Convert the PV of an SV to its UTF8-encoded form. +Forces the SV to string form it it is not already. +Always sets the SvUTF8 flag to avoid future validity checks even +if all the bytes have hibit clear. If C has C bit set, +will C on C if appropriate, else not. C and +C are implemented in terms of this function. + +=cut +*/ + +STRLEN +Perl_sv_utf8_upgrade_flags(pTHX_ register SV *sv, I32 flags) +{ + U8 *s, *t, *e; int hibit = 0; - if (!sv || !SvPOK(sv) || SvUTF8(sv)) - return; + if (!sv) + return 0; + + if (!SvPOK(sv)) { + STRLEN len = 0; + (void) sv_2pv_flags(sv,&len, flags); + if (!SvPOK(sv)) + return len; + } + + if (SvUTF8(sv)) + return SvCUR(sv); + + if (SvREADONLY(sv) && SvFAKE(sv)) { + sv_force_normal(sv); + } /* This function could be much more efficient if we had a FLAG in SVs * to signal if there are any hibit chars in the PV. * Given that there isn't make loop fast as possible */ - s = SvPVX(sv); - e = SvEND(sv); + s = (U8 *) SvPVX(sv); + e = (U8 *) SvEND(sv); t = s; while (t < e) { - if ((hibit = *t++ & 0x80)) + U8 ch = *t++; + if ((hibit = !NATIVE_IS_INVARIANT(ch))) break; } - if (hibit) { STRLEN len; - if (SvREADONLY(sv) && SvFAKE(sv)) { - sv_force_normal(sv); - s = SvPVX(sv); - } + len = SvCUR(sv) + 1; /* Plus the \0 */ SvPVX(sv) = (char*)bytes_to_utf8((U8*)s, &len); SvCUR(sv) = len - 1; if (SvLEN(sv) != 0) Safefree(s); /* No longer using what was there before. */ SvLEN(sv) = len; /* No longer know the real size. */ - SvUTF8_on(sv); } + /* Mark as UTF-8 even if no hibit - saves scanning loop */ + SvUTF8_on(sv); + return SvCUR(sv); } /* @@ -2981,12 +3058,37 @@ Perl_sv_utf8_downgrade(pTHX_ register SV* sv, bool fail_ok) { if (SvPOK(sv) && SvUTF8(sv)) { if (SvCUR(sv)) { - char *c = SvPVX(sv); - STRLEN len = SvCUR(sv); + U8 *s; + STRLEN len; - if (!utf8_to_bytes((U8*)c, &len)) { + if (SvREADONLY(sv) && SvFAKE(sv)) + sv_force_normal(sv); + s = (U8 *) SvPV(sv, len); + if (!utf8_to_bytes(s, &len)) { if (fail_ok) return FALSE; +#ifdef USE_BYTES_DOWNGRADES + else if (IN_BYTES) { + U8 *d = s; + U8 *e = (U8 *) SvEND(sv); + int first = 1; + while (s < e) { + UV ch = utf8n_to_uvchr(s,(e-s),&len,0); + if (first && ch > 255) { + if (PL_op) + Perl_warner(aTHX_ WARN_UTF8, "Wide character in byte %s", + PL_op_desc[PL_op->op_type]); + else + Perl_warner(aTHX_ WARN_UTF8, "Wide character in byte"); + first = 0; + } + *d++ = ch; + s += len; + } + *d = '\0'; + len = (d - (U8 *) SvPVX(sv)); + } +#endif else { if (PL_op) Perl_croak(aTHX_ "Wide character in %s", @@ -2997,9 +3099,8 @@ Perl_sv_utf8_downgrade(pTHX_ register SV* sv, bool fail_ok) } SvCUR(sv) = len; } - SvUTF8_off(sv); } - + SvUTF8_off(sv); return TRUE; } @@ -3007,7 +3108,8 @@ Perl_sv_utf8_downgrade(pTHX_ register SV* sv, bool fail_ok) =for apidoc sv_utf8_encode Convert the PV of an SV to UTF8-encoded, but then turn off the C -flag so that it looks like bytes again. Nothing calls this. +flag so that it looks like octets again. Used as a building block +for encode_utf8 in Encode.xs =cut */ @@ -3015,29 +3117,43 @@ flag so that it looks like bytes again. Nothing calls this. void Perl_sv_utf8_encode(pTHX_ register SV *sv) { - sv_utf8_upgrade(sv); + (void) sv_utf8_upgrade(sv); SvUTF8_off(sv); } +/* +=for apidoc sv_utf8_decode + +Convert the octets in the PV from UTF-8 to chars. Scan for validity and then +turn of SvUTF8 if needed so that we see characters. Used as a building block +for decode_utf8 in Encode.xs + +=cut +*/ + + + bool Perl_sv_utf8_decode(pTHX_ register SV *sv) { if (SvPOK(sv)) { - char *c; - char *e; - bool has_utf = FALSE; + U8 *c; + U8 *e; + + /* The octets may have got themselves encoded - get them back as bytes */ if (!sv_utf8_downgrade(sv, TRUE)) return FALSE; /* it is actually just a matter of turning the utf8 flag on, but * we want to make sure everything inside is valid utf8 first. */ - c = SvPVX(sv); - if (!is_utf8_string((U8*)c, SvCUR(sv)+1)) + c = (U8 *) SvPVX(sv); + if (!is_utf8_string(c, SvCUR(sv)+1)) return FALSE; - e = SvEND(sv); + e = (U8 *) SvEND(sv); while (c < e) { - if (*c++ & 0x80) { + U8 ch = *c++; + if (!UTF8_IS_INVARIANT(ch)) { SvUTF8_on(sv); break; } @@ -3063,9 +3179,30 @@ C. =cut */ +/* sv_setsv() is aliased to Perl_sv_setsv_macro; this function provided + for binary compatibility only +*/ void Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) { + sv_setsv_flags(dstr, sstr, SV_GMAGIC); +} + +/* +=for apidoc sv_setsv_flags + +Copies the contents of the source SV C into the destination SV C. +The source SV may be destroyed if it is mortal. Does not handle 'set' +magic. If C has C bit set, will C on C if +appropriate, else not. C and C are implemented +in terms of this function. + +=cut +*/ + +void +Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV *sstr, I32 flags) +{ register U32 sflags; register int dtype; register int stype; @@ -3184,7 +3321,7 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) char *name = GvNAME(sstr); STRLEN len = GvNAMELEN(sstr); sv_upgrade(dstr, SVt_PVGV); - sv_magic(dstr, dstr, '*', Nullch, 0); + sv_magic(dstr, dstr, PERL_MAGIC_glob, Nullch, 0); GvSTASH(dstr) = (HV*)SvREFCNT_inc(GvSTASH(sstr)); GvNAME(dstr) = savepvn(name, len); GvNAMELEN(dstr) = len; @@ -3195,6 +3332,13 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) && GvCV(dstr) && PL_sortcop == CvSTART(GvCV(dstr))) Perl_croak(aTHX_ "Can't redefine active sort subroutine %s", GvNAME(dstr)); + +#ifdef GV_SHARED_CHECK + if (GvSHARED((GV*)dstr)) { + Perl_croak(aTHX_ PL_no_modify); + } +#endif + (void)SvOK_off(dstr); GvINTRO_off(dstr); /* one-shot flag */ gp_free((GV*)dstr); @@ -3212,7 +3356,7 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) /* FALL THROUGH */ default: - if (SvGMAGICAL(sstr)) { + if (SvGMAGICAL(sstr) && (flags & SV_GMAGIC)) { mg_get(sstr); if (SvTYPE(sstr) != stype) { stype = SvTYPE(sstr); @@ -3235,6 +3379,12 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) SV *dref = 0; int intro = GvINTRO(dstr); +#ifdef GV_SHARED_CHECK + if (GvSHARED((GV*)dstr)) { + Perl_croak(aTHX_ PL_no_modify); + } +#endif + if (intro) { GP *gp; gp_free((GV*)dstr); @@ -3289,7 +3439,6 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) if (!GvCVGEN((GV*)dstr) && (CvROOT(cv) || CvXSUB(cv))) { - SV *const_sv; /* ahem, death to those who redefine * active sort subs */ if (PL_curstackinfo->si_type == PERLSI_SORT && @@ -3372,14 +3521,19 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) SvRV(dstr) = SvREFCNT_inc(SvRV(sstr)); SvROK_on(dstr); if (sflags & SVp_NOK) { - SvNOK_on(dstr); + SvNOKp_on(dstr); + /* Only set the public OK flag if the source has public OK. */ + if (sflags & SVf_NOK) + SvFLAGS(dstr) |= SVf_NOK; SvNVX(dstr) = SvNVX(sstr); } if (sflags & SVp_IOK) { - (void)SvIOK_on(dstr); - SvIVX(dstr) = SvIVX(sstr); + (void)SvIOKp_on(dstr); + if (sflags & SVf_IOK) + SvFLAGS(dstr) |= SVf_IOK; if (sflags & SVf_IVisUV) SvIsUV_on(dstr); + SvIVX(dstr) = SvIVX(sstr); } if (SvAMAGIC(sstr)) { SvAMAGIC_on(dstr); @@ -3429,36 +3583,51 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) *SvEND(dstr) = '\0'; (void)SvPOK_only(dstr); } - if ((sflags & SVf_UTF8) && !IN_BYTE) + if (sflags & SVf_UTF8) SvUTF8_on(dstr); /*SUPPRESS 560*/ if (sflags & SVp_NOK) { - SvNOK_on(dstr); + SvNOKp_on(dstr); + if (sflags & SVf_NOK) + SvFLAGS(dstr) |= SVf_NOK; SvNVX(dstr) = SvNVX(sstr); } if (sflags & SVp_IOK) { - (void)SvIOK_on(dstr); - SvIVX(dstr) = SvIVX(sstr); + (void)SvIOKp_on(dstr); + if (sflags & SVf_IOK) + SvFLAGS(dstr) |= SVf_IOK; if (sflags & SVf_IVisUV) SvIsUV_on(dstr); - } - } - else if (sflags & SVp_NOK) { - SvNVX(dstr) = SvNVX(sstr); - (void)SvNOK_only(dstr); - if (sflags & SVf_IOK) { - (void)SvIOK_on(dstr); SvIVX(dstr) = SvIVX(sstr); - /* XXXX Do we want to set IsUV for IV(ROK)? Be extra safe... */ - if (sflags & SVf_IVisUV) - SvIsUV_on(dstr); } } else if (sflags & SVp_IOK) { - (void)SvIOK_only(dstr); - SvIVX(dstr) = SvIVX(sstr); + if (sflags & SVf_IOK) + (void)SvIOK_only(dstr); + else { + (void)SvOK_off(dstr); + (void)SvIOKp_on(dstr); + } + /* XXXX Do we want to set IsUV for IV(ROK)? Be extra safe... */ if (sflags & SVf_IVisUV) SvIsUV_on(dstr); + SvIVX(dstr) = SvIVX(sstr); + if (sflags & SVp_NOK) { + if (sflags & SVf_NOK) + (void)SvNOK_on(dstr); + else + (void)SvNOKp_on(dstr); + SvNVX(dstr) = SvNVX(sstr); + } + } + else if (sflags & SVp_NOK) { + if (sflags & SVf_NOK) + (void)SvNOK_only(dstr); + else { + (void)SvOK_off(dstr); + SvNOKp_on(dstr); + } + SvNVX(dstr) = SvNVX(sstr); } else { if (dtype == SVt_PVGV) { @@ -3500,16 +3669,18 @@ void Perl_sv_setpvn(pTHX_ register SV *sv, register const char *ptr, register STRLEN len) { register char *dptr; - { - /* len is STRLEN which is unsigned, need to copy to signed */ - IV iv = len; - assert(iv >= 0); - } + SV_CHECK_THINKFIRST(sv); if (!ptr) { (void)SvOK_off(sv); return; } + else { + /* len is STRLEN which is unsigned, need to copy to signed */ + IV iv = len; + if (iv < 0) + Perl_croak(aTHX_ "panic: sv_setpvn called with negative strlen"); + } (void)SvUPGRADE(sv, SVt_PV); SvGROW(sv, len + 1); @@ -3517,7 +3688,7 @@ Perl_sv_setpvn(pTHX_ register SV *sv, register const char *ptr, register STRLEN Move(ptr,dptr,len,char); dptr[len] = '\0'; SvCUR_set(sv, len); - (void)SvPOK_only(sv); /* validate pointer */ + (void)SvPOK_only_UTF8(sv); /* validate pointer */ SvTAINT(sv); } @@ -3561,7 +3732,7 @@ Perl_sv_setpv(pTHX_ register SV *sv, register const char *ptr) SvGROW(sv, len + 1); Move(ptr,SvPVX(sv),len+1,char); SvCUR_set(sv, len); - (void)SvPOK_only(sv); /* validate pointer */ + (void)SvPOK_only_UTF8(sv); /* validate pointer */ SvTAINT(sv); } @@ -3611,7 +3782,7 @@ Perl_sv_usepvn(pTHX_ register SV *sv, register char *ptr, register STRLEN len) SvCUR_set(sv, len); SvLEN_set(sv, len+1); *SvEND(sv) = '\0'; - (void)SvPOK_only(sv); /* validate pointer */ + (void)SvPOK_only_UTF8(sv); /* validate pointer */ SvTAINT(sv); } @@ -3707,27 +3878,50 @@ Perl_sv_chop(pTHX_ register SV *sv, register char *ptr) /* like set but assuming =for apidoc sv_catpvn Concatenates the string onto the end of the string which is in the SV. The -C indicates number of bytes to copy. Handles 'get' magic, but not -'set' magic. See C. +C indicates number of bytes to copy. If the SV has the UTF8 +status set, then the bytes appended should be valid UTF8. +Handles 'get' magic, but not 'set' magic. See C. =cut */ +/* sv_catpvn() is aliased to Perl_sv_catpvn_macro; this function provided + for binary compatibility only +*/ void -Perl_sv_catpvn(pTHX_ register SV *sv, register const char *ptr, register STRLEN len) +Perl_sv_catpvn(pTHX_ SV *dsv, const char* sstr, STRLEN slen) { - STRLEN tlen; - char *junk; + sv_catpvn_flags(dsv, sstr, slen, SV_GMAGIC); +} - junk = SvPV_force(sv, tlen); - SvGROW(sv, tlen + len + 1); - if (ptr == junk) - ptr = SvPVX(sv); - Move(ptr,SvPVX(sv)+tlen,len,char); - SvCUR(sv) += len; - *SvEND(sv) = '\0'; - (void)SvPOK_only_UTF8(sv); /* validate pointer */ - SvTAINT(sv); +/* +=for apidoc sv_catpvn_flags + +Concatenates the string onto the end of the string which is in the SV. The +C indicates number of bytes to copy. If the SV has the UTF8 +status set, then the bytes appended should be valid UTF8. +If C has C bit set, will C on C if +appropriate, else not. C and C are implemented +in terms of this function. + +=cut +*/ + +void +Perl_sv_catpvn_flags(pTHX_ register SV *dsv, register const char *sstr, register STRLEN slen, I32 flags) +{ + STRLEN dlen; + char *dstr; + + dstr = SvPV_force_flags(dsv, dlen, flags); + SvGROW(dsv, dlen + slen + 1); + if (sstr == dstr) + sstr = SvPVX(dsv); + Move(sstr, SvPVX(dsv) + dlen, slen, char); + SvCUR(dsv) += slen; + *SvEND(dsv) = '\0'; + (void)SvPOK_only_UTF8(dsv); /* validate pointer */ + SvTAINT(dsv); } /* @@ -3748,27 +3942,58 @@ Perl_sv_catpvn_mg(pTHX_ register SV *sv, register const char *ptr, register STRL /* =for apidoc sv_catsv -Concatenates the string from SV C onto the end of the string in SV -C. Handles 'get' magic, but not 'set' magic. See C. +Concatenates the string from SV C onto the end of the string in +SV C. Modifies C but not C. Handles 'get' magic, but +not 'set' magic. See C. -=cut -*/ +=cut */ +/* sv_catsv() is aliased to Perl_sv_catsv_macro; this function provided + for binary compatibility only +*/ void Perl_sv_catsv(pTHX_ SV *dstr, register SV *sstr) { - char *s; - STRLEN len; - if (!sstr) + sv_catsv_flags(dstr, sstr, SV_GMAGIC); +} + +/* +=for apidoc sv_catsv_flags + +Concatenates the string from SV C onto the end of the string in +SV C. Modifies C but not C. If C has C +bit set, will C on the SVs if appropriate, else not. C +and C are implemented in terms of this function. + +=cut */ + +void +Perl_sv_catsv_flags(pTHX_ SV *dsv, register SV *ssv, I32 flags) +{ + char *spv; + STRLEN slen; + if (!ssv) return; - if ((s = SvPV(sstr, len))) { - if (DO_UTF8(sstr)) { - sv_utf8_upgrade(dstr); - sv_catpvn(dstr,s,len); - SvUTF8_on(dstr); + if ((spv = SvPV(ssv, slen))) { + bool sutf8 = DO_UTF8(ssv); + bool dutf8; + + if (SvGMAGICAL(dsv) && (flags & SV_GMAGIC)) + mg_get(dsv); + dutf8 = DO_UTF8(dsv); + + if (dutf8 != sutf8) { + if (dutf8) { + /* Not modifying source SV, so taking a temporary copy. */ + SV* csv = sv_2mortal(newSVpvn(spv, slen)); + + sv_utf8_upgrade(csv); + spv = SvPV(csv, slen); + } + else + sv_utf8_upgrade_nomg(dsv); } - else - sv_catpvn(dstr,s,len); + sv_catpvn_nomg(dsv, spv, slen); } } @@ -3781,20 +4006,20 @@ Like C, but also handles 'set' magic. */ void -Perl_sv_catsv_mg(pTHX_ SV *dstr, register SV *sstr) +Perl_sv_catsv_mg(pTHX_ SV *dsv, register SV *ssv) { - sv_catsv(dstr,sstr); - SvSETMAGIC(dstr); + sv_catsv(dsv,ssv); + SvSETMAGIC(dsv); } /* =for apidoc sv_catpv Concatenates the string onto the end of the string which is in the SV. -Handles 'get' magic, but not 'set' magic. See C. +If the SV has the UTF8 status set, then the bytes appended should be +valid UTF8. Handles 'get' magic, but not 'set' magic. See C. -=cut -*/ +=cut */ void Perl_sv_catpv(pTHX_ register SV *sv, register const char *ptr) @@ -3860,12 +4085,23 @@ Perl_sv_magic(pTHX_ register SV *sv, SV *obj, int how, const char *name, I32 nam MAGIC* mg; if (SvREADONLY(sv)) { - if (PL_curcop != &PL_compiling && !strchr("gBf", how)) + if (PL_curcop != &PL_compiling + /* XXX this used to be !strchr("gBf", how), which seems to + * implicity be equal to !strchr("gBf\0", how), ie \0 matches + * too. I find this suprising, but have hadded PERL_MAGIC_sv + * to the list of things to check - DAPM 19-May-01 */ + && how != PERL_MAGIC_regex_global + && how != PERL_MAGIC_bm + && how != PERL_MAGIC_fm + && how != PERL_MAGIC_sv + ) + { Perl_croak(aTHX_ PL_no_modify); + } } - if (SvMAGICAL(sv) || (how == 't' && SvTYPE(sv) >= SVt_PVMG)) { + if (SvMAGICAL(sv) || (how == PERL_MAGIC_taint && SvTYPE(sv) >= SVt_PVMG)) { if (SvMAGIC(sv) && (mg = mg_find(sv, how))) { - if (how == 't') + if (how == PERL_MAGIC_taint) mg->mg_len |= 1; return; } @@ -3875,134 +4111,148 @@ Perl_sv_magic(pTHX_ register SV *sv, SV *obj, int how, const char *name, I32 nam } Newz(702,mg, 1, MAGIC); mg->mg_moremagic = SvMAGIC(sv); - SvMAGIC(sv) = mg; - if (!obj || obj == sv || how == '#' || how == 'r') + + /* Some magic sontains a reference loop, where the sv and object refer to + each other. To prevent a avoid a reference loop that would prevent such + objects being freed, we look for such loops and if we find one we avoid + incrementing the object refcount. */ + if (!obj || obj == sv || + how == PERL_MAGIC_arylen || + how == PERL_MAGIC_qr || + (SvTYPE(obj) == SVt_PVGV && + (GvSV(obj) == sv || GvHV(obj) == (HV*)sv || GvAV(obj) == (AV*)sv || + GvCV(obj) == (CV*)sv || GvIOp(obj) == (IO*)sv || + GvFORM(obj) == (CV*)sv))) + { mg->mg_obj = obj; + } else { mg->mg_obj = SvREFCNT_inc(obj); mg->mg_flags |= MGf_REFCOUNTED; } mg->mg_type = how; mg->mg_len = namlen; - if (name) + if (name) { if (namlen >= 0) mg->mg_ptr = savepvn(name, namlen); else if (namlen == HEf_SVKEY) mg->mg_ptr = (char*)SvREFCNT_inc((SV*)name); + } switch (how) { - case 0: + case PERL_MAGIC_sv: mg->mg_virtual = &PL_vtbl_sv; break; - case 'A': + case PERL_MAGIC_overload: mg->mg_virtual = &PL_vtbl_amagic; break; - case 'a': + case PERL_MAGIC_overload_elem: mg->mg_virtual = &PL_vtbl_amagicelem; break; - case 'c': - mg->mg_virtual = 0; + case PERL_MAGIC_overload_table: + mg->mg_virtual = &PL_vtbl_ovrld; break; - case 'B': + case PERL_MAGIC_bm: mg->mg_virtual = &PL_vtbl_bm; break; - case 'D': + case PERL_MAGIC_regdata: mg->mg_virtual = &PL_vtbl_regdata; break; - case 'd': + case PERL_MAGIC_regdatum: mg->mg_virtual = &PL_vtbl_regdatum; break; - case 'E': + case PERL_MAGIC_env: mg->mg_virtual = &PL_vtbl_env; break; - case 'f': + case PERL_MAGIC_fm: mg->mg_virtual = &PL_vtbl_fm; break; - case 'e': + case PERL_MAGIC_envelem: mg->mg_virtual = &PL_vtbl_envelem; break; - case 'g': + case PERL_MAGIC_regex_global: mg->mg_virtual = &PL_vtbl_mglob; break; - case 'I': + case PERL_MAGIC_isa: mg->mg_virtual = &PL_vtbl_isa; break; - case 'i': + case PERL_MAGIC_isaelem: mg->mg_virtual = &PL_vtbl_isaelem; break; - case 'k': + case PERL_MAGIC_nkeys: mg->mg_virtual = &PL_vtbl_nkeys; break; - case 'L': + case PERL_MAGIC_dbfile: SvRMAGICAL_on(sv); mg->mg_virtual = 0; break; - case 'l': + case PERL_MAGIC_dbline: mg->mg_virtual = &PL_vtbl_dbline; break; #ifdef USE_THREADS - case 'm': + case PERL_MAGIC_mutex: mg->mg_virtual = &PL_vtbl_mutex; break; #endif /* USE_THREADS */ #ifdef USE_LOCALE_COLLATE - case 'o': + case PERL_MAGIC_collxfrm: mg->mg_virtual = &PL_vtbl_collxfrm; break; #endif /* USE_LOCALE_COLLATE */ - case 'P': + case PERL_MAGIC_tied: mg->mg_virtual = &PL_vtbl_pack; break; - case 'p': - case 'q': + case PERL_MAGIC_tiedelem: + case PERL_MAGIC_tiedscalar: mg->mg_virtual = &PL_vtbl_packelem; break; - case 'r': + case PERL_MAGIC_qr: mg->mg_virtual = &PL_vtbl_regexp; break; - case 'S': + case PERL_MAGIC_sig: mg->mg_virtual = &PL_vtbl_sig; break; - case 's': + case PERL_MAGIC_sigelem: mg->mg_virtual = &PL_vtbl_sigelem; break; - case 't': + case PERL_MAGIC_taint: mg->mg_virtual = &PL_vtbl_taint; mg->mg_len = 1; break; - case 'U': + case PERL_MAGIC_uvar: mg->mg_virtual = &PL_vtbl_uvar; break; - case 'v': + case PERL_MAGIC_vec: mg->mg_virtual = &PL_vtbl_vec; break; - case 'x': + case PERL_MAGIC_substr: mg->mg_virtual = &PL_vtbl_substr; break; - case 'y': + case PERL_MAGIC_defelem: mg->mg_virtual = &PL_vtbl_defelem; break; - case '*': + case PERL_MAGIC_glob: mg->mg_virtual = &PL_vtbl_glob; break; - case '#': + case PERL_MAGIC_arylen: mg->mg_virtual = &PL_vtbl_arylen; break; - case '.': + case PERL_MAGIC_pos: mg->mg_virtual = &PL_vtbl_pos; break; - case '<': + case PERL_MAGIC_backref: mg->mg_virtual = &PL_vtbl_backref; break; - case '~': /* Reserved for use by extensions not perl internals. */ + case PERL_MAGIC_ext: + /* Reserved for use by extensions not perl internals. */ /* Useful for attaching extension internal data to perl vars. */ /* Note that multiple extensions may clash if magical scalars */ /* etc holding private data from one are passed to another. */ SvRMAGICAL_on(sv); break; default: - Perl_croak(aTHX_ "Don't know how to handle magic of type '%c'", how); + Perl_croak(aTHX_ "Don't know how to handle magic of type \\%o", how); } mg_magical(sv); if (SvGMAGICAL(sv)) @@ -4031,11 +4281,12 @@ Perl_sv_unmagic(pTHX_ SV *sv, int type) *mgp = mg->mg_moremagic; if (vtbl && vtbl->svt_free) CALL_FPTR(vtbl->svt_free)(aTHX_ sv, mg); - if (mg->mg_ptr && mg->mg_type != 'g') + if (mg->mg_ptr && mg->mg_type != PERL_MAGIC_regex_global) { if (mg->mg_len >= 0) Safefree(mg->mg_ptr); else if (mg->mg_len == HEf_SVKEY) SvREFCNT_dec((SV*)mg->mg_ptr); + } if (mg->mg_flags & MGf_REFCOUNTED) SvREFCNT_dec(mg->mg_obj); Safefree(mg); @@ -4045,7 +4296,7 @@ Perl_sv_unmagic(pTHX_ SV *sv, int type) } if (!SvMAGIC(sv)) { SvMAGICAL_off(sv); - SvFLAGS(sv) |= (SvFLAGS(sv) & (SVp_IOK|SVp_NOK|SVp_POK)) >> PRIVSHIFT; + SvFLAGS(sv) |= (SvFLAGS(sv) & (SVp_NOK|SVp_POK)) >> PRIVSHIFT; } return 0; @@ -4084,11 +4335,11 @@ S_sv_add_backref(pTHX_ SV *tsv, SV *sv) { AV *av; MAGIC *mg; - if (SvMAGICAL(tsv) && (mg = mg_find(tsv, '<'))) + if (SvMAGICAL(tsv) && (mg = mg_find(tsv, PERL_MAGIC_backref))) av = (AV*)mg->mg_obj; else { av = newAV(); - sv_magic(tsv, (SV*)av, '<', NULL, 0); + sv_magic(tsv, (SV*)av, PERL_MAGIC_backref, NULL, 0); SvREFCNT_dec(av); /* for sv_magic */ } av_push(av,sv); @@ -4102,7 +4353,7 @@ S_sv_del_backref(pTHX_ SV *sv) I32 i; SV *tsv = SvRV(sv); MAGIC *mg; - if (!SvMAGICAL(tsv) || !(mg = mg_find(tsv, '<'))) + if (!SvMAGICAL(tsv) || !(mg = mg_find(tsv, PERL_MAGIC_backref))) Perl_croak(aTHX_ "panic: del_backref"); av = (AV *)mg->mg_obj; svp = AvARRAY(av); @@ -4261,7 +4512,7 @@ Perl_sv_clear(pTHX_ register SV *sv) if (SvOBJECT(sv)) { if (PL_defstash) { /* Still have a symbol table? */ - djSP; + dSP; CV* destructor; SV tmpref; @@ -4271,7 +4522,7 @@ Perl_sv_clear(pTHX_ register SV *sv) SvREADONLY_on(&tmpref); /* DESTROY() could be naughty */ SvREFCNT(&tmpref) = 1; - do { + do { stash = SvSTASH(sv); destructor = StashHANDLER(stash,DESTROY); if (destructor) { @@ -4591,17 +4842,19 @@ Perl_sv_pos_b2u(pTHX_ register SV *sv, I32* offsetp) s = (U8*)SvPV(sv, len); if (len < *offsetp) - Perl_croak(aTHX_ "panic: bad byte offset"); + Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset"); send = s + *offsetp; len = 0; while (s < send) { - s += UTF8SKIP(s); - ++len; - } - if (s != send) { - if (ckWARN_d(WARN_UTF8)) - Perl_warner(aTHX_ WARN_UTF8, "Malformed UTF-8 character"); - --len; + STRLEN n; + /* Call utf8n_to_uvchr() to validate the sequence */ + utf8n_to_uvchr(s, UTF8SKIP(s), &n, 0); + if (n > 0) { + s += n; + len++; + } + else + break; } *offsetp = len; return; @@ -4624,8 +4877,7 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) char *pv2; STRLEN cur2; I32 eq = 0; - bool pv1tmp = FALSE; - bool pv2tmp = FALSE; + char *tpv = Nullch; if (!sv1) { pv1 = ""; @@ -4642,24 +4894,35 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) pv2 = SvPV(sv2, cur2); /* do not utf8ize the comparands as a side-effect */ - if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTE) { + if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { + bool is_utf8 = TRUE; + /* UTF-8ness differs */ + if (PL_hints & HINT_UTF8_DISTINCT) + return FALSE; + if (SvUTF8(sv1)) { - pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2); - pv2tmp = TRUE; + /* sv1 is the UTF-8 one , If is equal it must be downgrade-able */ + char *pv = (char*)bytes_from_utf8((U8*)pv1, &cur1, &is_utf8); + if (pv != pv1) + pv1 = tpv = pv; } else { - pv1 = (char*)bytes_to_utf8((U8*)pv1, &cur1); - pv1tmp = TRUE; + /* sv2 is the UTF-8 one , If is equal it must be downgrade-able */ + char *pv = (char *)bytes_from_utf8((U8*)pv2, &cur2, &is_utf8); + if (pv != pv2) + pv2 = tpv = pv; + } + if (is_utf8) { + /* Downgrade not possible - cannot be eq */ + return FALSE; } } if (cur1 == cur2) eq = memEQ(pv1, pv2, cur1); - if (pv1tmp) - Safefree(pv1); - if (pv2tmp) - Safefree(pv2); + if (tpv != Nullch) + Safefree(tpv); return eq; } @@ -4698,7 +4961,10 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2) pv2 = SvPV(sv2, cur2); /* do not utf8ize the comparands as a side-effect */ - if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTE) { + if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { + if (PL_hints & HINT_UTF8_DISTINCT) + return SvUTF8(sv1) ? 1 : -1; + if (SvUTF8(sv1)) { pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2); pv2tmp = TRUE; @@ -4792,7 +5058,7 @@ Perl_sv_cmp_locale(pTHX_ register SV *sv1, register SV *sv2) #ifdef USE_LOCALE_COLLATE /* - * Any scalar variable may carry an 'o' magic that contains the + * Any scalar variable may carry PERL_MAGIC_collxfrm magic that contains the * scalar data of the variable transformed to such a format that * a normal memory comparison can be used to compare the data * according to the locale settings. @@ -4802,7 +5068,7 @@ Perl_sv_collxfrm(pTHX_ SV *sv, STRLEN *nxp) { MAGIC *mg; - mg = SvMAGICAL(sv) ? mg_find(sv, 'o') : (MAGIC *) NULL; + mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_collxfrm) : (MAGIC *) NULL; if (!mg || !mg->mg_ptr || *(U32*)mg->mg_ptr != PL_collation_ix) { char *s, *xf; STRLEN len, xlen; @@ -4817,8 +5083,8 @@ Perl_sv_collxfrm(pTHX_ SV *sv, STRLEN *nxp) return xf + sizeof(PL_collation_ix); } if (! mg) { - sv_magic(sv, 0, 'o', 0, 0); - mg = mg_find(sv, 'o'); + sv_magic(sv, 0, PERL_MAGIC_collxfrm, 0, 0); + mg = mg_find(sv, PERL_MAGIC_collxfrm); assert(mg); } mg->mg_ptr = xf; @@ -4860,7 +5126,7 @@ Perl_sv_gets(pTHX_ register SV *sv, register PerlIO *fp, I32 append) register STDCHAR rslast; register STDCHAR *bp; register I32 cnt; - I32 i; + I32 i = 0; SV_CHECK_THINKFIRST(sv); (void)SvUPGRADE(sv, SVt_PV); @@ -5178,7 +5444,7 @@ Perl_sv_inc(pTHX_ register SV *sv) if ((flags & (SVp_NOK|SVp_IOK)) == SVp_NOK) { /* It's (privately or publicly) a float, but not tested as an integer, so test it to see. */ - (void) SvIV(sv); + (void) SvIV(sv); flags = SvFLAGS(sv); } if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) { @@ -5229,7 +5495,7 @@ Perl_sv_inc(pTHX_ register SV *sv) so $a="9.22337203685478e+18"; $a+0; $a++ needs to be the same as $a="9.22337203685478e+18"; $a++ or we go insane. */ - + (void) sv_2iv(sv); if (SvIOK(sv)) goto oops_its_int; @@ -5372,7 +5638,7 @@ Perl_sv_dec(pTHX_ register SV *sv) so $a="9.22337203685478e+18"; $a+0; $a-- needs to be the same as $a="9.22337203685478e+18"; $a-- or we go insane. */ - + (void) sv_2iv(sv); if (SvIOK(sv)) goto oops_its_int; @@ -5534,6 +5800,12 @@ Perl_newSVpvn_share(pTHX_ const char *src, I32 len, U32 hash) len = -len; is_utf8 = TRUE; } + if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT)) { + STRLEN tmplen = len; + /* See the note in hv.c:hv_fetch() --jhi */ + src = (char*)bytes_from_utf8((U8*)src, &tmplen, &is_utf8); + len = tmplen; + } if (!hash) PERL_HASH(hash, src, len); new_SV(sv); @@ -5986,6 +6258,23 @@ Get a sensible string out of the SV somehow. char * Perl_sv_pvn_force(pTHX_ SV *sv, STRLEN *lp) { + return sv_pvn_force_flags(sv, lp, SV_GMAGIC); +} + +/* +=for apidoc sv_pvn_force_flags + +Get a sensible string out of the SV somehow. +If C has C bit set, will C on C if +appropriate, else not. C and C are +implemented in terms of this function. + +=cut +*/ + +char * +Perl_sv_pvn_force_flags(pTHX_ SV *sv, STRLEN *lp, I32 flags) +{ char *s; if (SvTHINKFIRST(sv) && !SvROK(sv)) @@ -6000,7 +6289,7 @@ Perl_sv_pvn_force(pTHX_ SV *sv, STRLEN *lp) PL_op_name[PL_op->op_type]); } else - s = sv_2pv(sv, lp); + s = sv_2pv_flags(sv, lp, flags); if (s != SvPVX(sv)) { /* Almost, but not quite, sv_setpvn() */ STRLEN len = *lp; @@ -6025,18 +6314,21 @@ Perl_sv_pvn_force(pTHX_ SV *sv, STRLEN *lp) char * Perl_sv_pvbyte(pTHX_ SV *sv) { + sv_utf8_downgrade(sv,0); return sv_pv(sv); } char * Perl_sv_pvbyten(pTHX_ SV *sv, STRLEN *lp) { + sv_utf8_downgrade(sv,0); return sv_pvn(sv,lp); } char * Perl_sv_pvbyten_force(pTHX_ SV *sv, STRLEN *lp) { + sv_utf8_downgrade(sv,0); return sv_pvn_force(sv,lp); } @@ -6261,6 +6553,25 @@ Perl_sv_setref_iv(pTHX_ SV *rv, const char *classname, IV iv) } /* +=for apidoc sv_setref_uv + +Copies an unsigned integer into a new SV, optionally blessing the SV. The C +argument will be upgraded to an RV. That RV will be modified to point to +the new SV. The C argument indicates the package for the +blessing. Set C to C to avoid the blessing. The new SV +will be returned and will have a reference count of 1. + +=cut +*/ + +SV* +Perl_sv_setref_uv(pTHX_ SV *rv, const char *classname, UV uv) +{ + sv_setuv(newSVrv(rv,classname), uv); + return rv; +} + +/* =for apidoc sv_setref_nv Copies a double into a new SV, optionally blessing the SV. The C @@ -6354,7 +6665,7 @@ S_sv_unglob(pTHX_ SV *sv) SvREFCNT_dec(GvSTASH(sv)); GvSTASH(sv) = Nullhv; } - sv_unmagic(sv, '*'); + sv_unmagic(sv, PERL_MAGIC_glob); Safefree(GvNAME(sv)); GvMULTI_off(sv); @@ -6421,14 +6732,14 @@ Perl_sv_unref(pTHX_ SV *sv) void Perl_sv_taint(pTHX_ SV *sv) { - sv_magic((sv), Nullsv, 't', Nullch, 0); + sv_magic((sv), Nullsv, PERL_MAGIC_taint, Nullch, 0); } void Perl_sv_untaint(pTHX_ SV *sv) { if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { - MAGIC *mg = mg_find(sv, 't'); + MAGIC *mg = mg_find(sv, PERL_MAGIC_taint); if (mg) mg->mg_len &= ~1; } @@ -6438,7 +6749,7 @@ bool Perl_sv_tainted(pTHX_ SV *sv) { if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { - MAGIC *mg = mg_find(sv, 't'); + MAGIC *mg = mg_find(sv, PERL_MAGIC_taint); if (mg && ((mg->mg_len & 1) || ((mg->mg_len & 2) && mg->mg_obj == sv))) return TRUE; } @@ -6580,12 +6891,15 @@ Perl_sv_catpvf_mg_nocontext(SV *sv, const char* pat, ...) /* =for apidoc sv_catpvf -Processes its arguments like C and appends the formatted output -to an SV. Handles 'get' magic, but not 'set' magic. C must -typically be called after calling this function to handle 'set' magic. +Processes its arguments like C and appends the formatted +output to an SV. If the appended data contains "wide" characters +(including, but not limited to, SVs with a UTF-8 PV formatted with %s, +and characters >255 formatted with %c), the original SV might get +upgraded to UTF-8. Handles 'get' magic, but not 'set' magic. +C must typically be called after calling this function +to handle 'set' magic. -=cut -*/ +=cut */ void Perl_sv_catpvf(pTHX_ SV *sv, const char* pat, ...) @@ -6642,6 +6956,21 @@ Perl_sv_vsetpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV sv_vcatpvfn(sv, pat, patlen, args, svargs, svmax, maybe_tainted); } +STATIC I32 +S_expect_number(pTHX_ char** pattern) +{ + I32 var = 0; + switch (**pattern) { + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + while (isDIGIT(**pattern)) + var = var * 10 + (*(*pattern)++ - '0'); + } + return var; +} +#define EXPECT_NUMBER(pattern, var) (var = S_expect_number(aTHX_ &pattern)) + /* =for apidoc sv_vcatpvfn @@ -6663,7 +6992,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV STRLEN origlen; I32 svix = 0; static char nullstr[] = "(null)"; - SV *argsv; + SV *argsv = Nullsv; /* no matter what, this is a string now */ (void)SvPV_force(sv, origlen); @@ -6702,7 +7031,8 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV bool alt = FALSE; bool left = FALSE; bool vectorize = FALSE; - bool utf = FALSE; + bool vectorarg = FALSE; + bool vec_utf = FALSE; char fill = ' '; char plus = 0; char intsize = 0; @@ -6730,7 +7060,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV STRLEN veclen = 0; char c; int i; - unsigned base; + unsigned base = 0; IV iv; UV uv; NV nv; @@ -6739,10 +7069,13 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV STRLEN gap; char *dotstr = "."; STRLEN dotstrlen = 1; - I32 epix = 0; /* explicit parameter index */ + I32 efix = 0; /* explicit format parameter index */ I32 ewix = 0; /* explicit width index */ + I32 epix = 0; /* explicit precision index */ + I32 evix = 0; /* explicit vector index */ bool asterisk = FALSE; + /* echo everything up to the next format specification */ for (q = p; q < patend && *q != '%'; ++q) ; if (q > p) { sv_catpvn(sv, p, q - p); @@ -6751,6 +7084,25 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV if (q++ >= patend) break; +/* + We allow format specification elements in this order: + \d+\$ explicit format parameter index + [-+ 0#]+ flags + \*?(\d+\$)?v vector with optional (optionally specified) arg + \d+|\*(\d+\$)? width using optional (optionally specified) arg + \.(\d*|\*(\d+\$)?) precision using optional (optionally specified) arg + [hlqLV] size + [%bcdefginopsux_DFOUX] format (mandatory) +*/ + if (EXPECT_NUMBER(q, width)) { + if (*q == '$') { + ++q; + efix = width; + } else { + goto gotwidth; + } + } + /* FLAGS */ while (*q) { @@ -6774,63 +7126,60 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV q++; continue; - case '*': /* printf("%*vX",":",$ipv6addr) */ - if (q[1] != 'v') - break; - q++; - if (args) - vecsv = va_arg(*args, SV*); - else if (svix < svmax) - vecsv = svargs[svix++]; - else - continue; - dotstr = SvPVx(vecsv,dotstrlen); - if (DO_UTF8(vecsv)) - is_utf = TRUE; - /* FALL THROUGH */ - - case 'v': - vectorize = TRUE; - q++; - continue; - default: break; } break; } - /* WIDTH */ - - scanwidth: - + tryasterisk: if (*q == '*') { - if (asterisk) - goto unknown; + q++; + if (EXPECT_NUMBER(q, ewix)) + if (*q++ != '$') + goto unknown; asterisk = TRUE; + } + if (*q == 'v') { q++; + if (vectorize) + goto unknown; + if ((vectorarg = asterisk)) { + evix = ewix; + ewix = 0; + asterisk = FALSE; + } + vectorize = TRUE; + goto tryasterisk; } - switch (*q) { - case '1': case '2': case '3': - case '4': case '5': case '6': - case '7': case '8': case '9': - width = 0; - while (isDIGIT(*q)) - width = width * 10 + (*q++ - '0'); - if (*q == '$') { - if (asterisk && ewix == 0) { - ewix = width; - width = 0; - q++; - goto scanwidth; - } else if (epix == 0) { - epix = width; - width = 0; - q++; - goto scanwidth; - } else - goto unknown; + if (!asterisk) + EXPECT_NUMBER(q, width); + + if (vectorize) { + if (vectorarg) { + if (args) + vecsv = va_arg(*args, SV*); + else + vecsv = (evix ? evix <= svmax : svix < svmax) ? + svargs[ewix ? ewix-1 : svix++] : &PL_sv_undef; + dotstr = SvPVx(vecsv, dotstrlen); + if (DO_UTF8(vecsv)) + is_utf = TRUE; + } + if (args) { + vecsv = va_arg(*args, SV*); + vecstr = (U8*)SvPVx(vecsv,veclen); + vec_utf = DO_UTF8(vecsv); + } + else if (efix ? efix <= svmax : svix < svmax) { + vecsv = svargs[efix ? efix-1 : svix++]; + vecstr = (U8*)SvPVx(vecsv,veclen); + vec_utf = DO_UTF8(vecsv); + } + else { + vecstr = (U8*)""; + veclen = 0; } } @@ -6843,19 +7192,22 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV left |= (i < 0); width = (i < 0) ? -i : i; } + gotwidth: /* PRECISION */ if (*q == '.') { q++; if (*q == '*') { + q++; + if (EXPECT_NUMBER(q, epix) && *q++ != '$') + goto unknown; if (args) i = va_arg(*args, int); else i = (ewix ? ewix <= svmax : svix < svmax) ? SvIVx(svargs[ewix ? ewix-1 : svix++]) : 0; precis = (i < 0) ? 0 : i; - q++; } else { precis = 0; @@ -6865,23 +7217,6 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV has_precis = TRUE; } - if (vectorize) { - if (args) { - vecsv = va_arg(*args, SV*); - vecstr = (U8*)SvPVx(vecsv,veclen); - utf = DO_UTF8(vecsv); - } - else if (epix ? epix <= svmax : svix < svmax) { - vecsv = svargs[epix ? epix-1 : svix++]; - vecstr = (U8*)SvPVx(vecsv,veclen); - utf = DO_UTF8(vecsv); - } - else { - vecstr = (U8*)""; - veclen = 0; - } - } - /* SIZE */ switch (*q) { @@ -6913,24 +7248,27 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV /* CONVERSION */ + if (*q == '%') { + eptr = q++; + elen = 1; + goto string; + } + + if (!args) + argsv = (efix ? efix <= svmax : svix < svmax) ? + svargs[efix ? efix-1 : svix++] : &PL_sv_undef; + switch (c = *q++) { /* STRINGS */ - case '%': - eptr = q - 1; - elen = 1; - goto string; - case 'c': - if (args) - uv = va_arg(*args, int); - else - uv = (epix ? epix <= svmax : svix < svmax) ? - SvIVx(svargs[epix ? epix-1 : svix++]) : 0; - if ((uv > 255 || (uv > 127 && SvUTF8(sv))) && !IN_BYTE) { + uv = args ? va_arg(*args, int) : SvIVx(argsv); + if ((uv > 255 || + (!UNI_IS_INVARIANT(uv) && SvUTF8(sv))) + && !IN_BYTES) { eptr = (char*)utf8buf; - elen = uv_to_utf8((U8*)eptr, uv) - utf8buf; + elen = uvchr_to_utf8((U8*)eptr, uv) - utf8buf; is_utf = TRUE; } else { @@ -6956,8 +7294,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV elen = sizeof nullstr - 1; } } - else if (epix ? epix <= svmax : svix < svmax) { - argsv = svargs[epix ? epix-1 : svix++]; + else { eptr = SvPVx(argsv, elen); if (DO_UTF8(argsv)) { if (has_precis && precis < elen) { @@ -6981,7 +7318,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV */ if (!args) goto unknown; - argsv = va_arg(*args,SV*); + argsv = va_arg(*args, SV*); eptr = SvPVx(argsv, elen); if (DO_UTF8(argsv)) is_utf = TRUE; @@ -6997,11 +7334,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV case 'p': if (alt) goto unknown; - if (args) - uv = PTR2UV(va_arg(*args, void*)); - else - uv = (epix ? epix <= svmax : svix < svmax) ? - PTR2UV(svargs[epix ? epix-1 : svix++]) : 0; + uv = PTR2UV(args ? va_arg(*args, void*) : argsv); base = 16; goto integer; @@ -7016,12 +7349,10 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV case 'i': if (vectorize) { STRLEN ulen; - if (!veclen) { - vectorize = FALSE; - break; - } - if (utf) - iv = (IV)utf8_to_uv(vecstr, veclen, &ulen, 0); + if (!veclen) + continue; + if (vec_utf) + iv = (IV)utf8n_to_uvchr(vecstr, veclen, &ulen, 0); else { iv = *vecstr; ulen = 1; @@ -7041,8 +7372,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV } } else { - iv = (epix ? epix <= svmax : svix < svmax) ? - SvIVx(svargs[epix ? epix-1 : svix++]) : 0; + iv = SvIVx(argsv); switch (intsize) { case 'h': iv = (short)iv; break; default: break; @@ -7099,12 +7429,10 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV if (vectorize) { STRLEN ulen; vector: - if (!veclen) { - vectorize = FALSE; - break; - } - if (utf) - uv = utf8_to_uv(vecstr, veclen, &ulen, 0); + if (!veclen) + continue; + if (vec_utf) + uv = utf8n_to_uvchr(vecstr, veclen, &ulen, 0); else { uv = *vecstr; ulen = 1; @@ -7124,8 +7452,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV } } else { - uv = (epix ? epix <= svmax : svix < svmax) ? - SvUVx(svargs[epix ? epix-1 : svix++]) : 0; + uv = SvUVx(argsv); switch (intsize) { case 'h': uv = (unsigned short)uv; break; default: break; @@ -7214,11 +7541,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV /* This is evil, but floating point is even more evil */ vectorize = FALSE; - if (args) - nv = va_arg(*args, NV); - else - nv = (epix ? epix <= svmax : svix < svmax) ? - SvNVx(svargs[epix ? epix-1 : svix++]) : 0.0; + nv = args ? va_arg(*args, NV) : SvNVx(argsv); need = 0; if (c != 'e' && c != 'E') { @@ -7298,8 +7621,8 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV #endif } } - else if (epix ? epix <= svmax : svix < svmax) - sv_setuv_mg(svargs[epix ? epix-1 : svix++], (UV)i); + else + sv_setuv_mg(argsv, (UV)i); continue; /* not "break" */ /* UNKNOWN */ @@ -7334,7 +7657,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV /* ... right here, because formatting flags should not apply */ SvGROW(sv, SvCUR(sv) + elen + 1); p = SvEND(sv); - memcpy(p, eptr, elen); + Copy(eptr, p, elen, char); p += elen; *p = '\0'; SvCUR(sv) = p - SvPVX(sv); @@ -7364,7 +7687,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV *p++ = '0'; } if (elen) { - memcpy(p, eptr, elen); + Copy(eptr, p, elen, char); p += elen; } if (gap && left) { @@ -7373,7 +7696,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV } if (vectorize) { if (veclen) { - memcpy(p, dotstr, dotstrlen); + Copy(dotstr, p, dotstrlen, char); p += dotstrlen; } else @@ -7483,8 +7806,8 @@ Perl_gp_dup(pTHX_ GP *gp) MAGIC * Perl_mg_dup(pTHX_ MAGIC *mg) { - MAGIC *mgret = (MAGIC*)NULL; - MAGIC *mgprev; + MAGIC *mgprev = (MAGIC*)NULL; + MAGIC *mgret; if (!mg) return (MAGIC*)NULL; /* look for it in the table first */ @@ -7495,15 +7818,15 @@ Perl_mg_dup(pTHX_ MAGIC *mg) for (; mg; mg = mg->mg_moremagic) { MAGIC *nmg; Newz(0, nmg, 1, MAGIC); - if (!mgret) - mgret = nmg; - else + if (mgprev) mgprev->mg_moremagic = nmg; + else + mgret = nmg; nmg->mg_virtual = mg->mg_virtual; /* XXX copy dynamic vtable? */ nmg->mg_private = mg->mg_private; nmg->mg_type = mg->mg_type; nmg->mg_flags = mg->mg_flags; - if (mg->mg_type == 'r') { + if (mg->mg_type == PERL_MAGIC_qr) { nmg->mg_obj = (SV*)re_dup((REGEXP*)mg->mg_obj); } else { @@ -7513,10 +7836,12 @@ Perl_mg_dup(pTHX_ MAGIC *mg) } nmg->mg_len = mg->mg_len; nmg->mg_ptr = mg->mg_ptr; /* XXX random ptr? */ - if (mg->mg_ptr && mg->mg_type != 'g') { + if (mg->mg_ptr && mg->mg_type != PERL_MAGIC_regex_global) { if (mg->mg_len >= 0) { nmg->mg_ptr = SAVEPVN(mg->mg_ptr, mg->mg_len); - if (mg->mg_type == 'c' && AMT_AMAGIC((AMT*)mg->mg_ptr)) { + if (mg->mg_type == PERL_MAGIC_overload_table && + AMT_AMAGIC((AMT*)mg->mg_ptr)) + { AMT *amtp = (AMT*)mg->mg_ptr; AMT *namtp = (AMT*)nmg->mg_ptr; I32 i; @@ -7617,10 +7942,110 @@ Perl_ptr_table_split(pTHX_ PTR_TBL_t *tbl) } } +void +Perl_ptr_table_clear(pTHX_ PTR_TBL_t *tbl) +{ + register PTR_TBL_ENT_t **array; + register PTR_TBL_ENT_t *entry; + register PTR_TBL_ENT_t *oentry = Null(PTR_TBL_ENT_t*); + UV riter = 0; + UV max; + + if (!tbl || !tbl->tbl_items) { + return; + } + + array = tbl->tbl_ary; + entry = array[0]; + max = tbl->tbl_max; + + for (;;) { + if (entry) { + oentry = entry; + entry = entry->next; + Safefree(oentry); + } + if (!entry) { + if (++riter > max) { + break; + } + entry = array[riter]; + } + } + + tbl->tbl_items = 0; +} + +void +Perl_ptr_table_free(pTHX_ PTR_TBL_t *tbl) +{ + if (!tbl) { + return; + } + ptr_table_clear(tbl); + Safefree(tbl->tbl_ary); + Safefree(tbl); +} + #ifdef DEBUGGING char *PL_watch_pvx; #endif +STATIC SV * +S_gv_share(pTHX_ SV *sstr) +{ + GV *gv = (GV*)sstr; + SV *sv = &PL_sv_no; /* just need SvREADONLY-ness */ + + if (GvIO(gv) || GvFORM(gv)) { + GvSHARED_off(gv); /* GvIOs cannot be shared. nor can GvFORMs */ + } + else if (!GvCV(gv)) { + GvCV(gv) = (CV*)sv; + } + else { + /* CvPADLISTs cannot be shared */ + if (!CvXSUB(GvCV(gv))) { + GvSHARED_off(gv); + } + } + + if (!GvSHARED(gv)) { +#if 0 + PerlIO_printf(Perl_debug_log, "gv_share: unable to share %s::%s\n", + HvNAME(GvSTASH(gv)), GvNAME(gv)); +#endif + return Nullsv; + } + + /* + * write attempts will die with + * "Modification of a read-only value attempted" + */ + if (!GvSV(gv)) { + GvSV(gv) = sv; + } + else { + SvREADONLY_on(GvSV(gv)); + } + + if (!GvAV(gv)) { + GvAV(gv) = (AV*)sv; + } + else { + SvREADONLY_on(GvAV(gv)); + } + + if (!GvHV(gv)) { + GvHV(gv) = (HV*)sv; + } + else { + SvREADONLY_on(GvAV(gv)); + } + + return sstr; /* he_dup() will SvREFCNT_inc() */ +} + SV * Perl_sv_dup(pTHX_ SV *sstr) { @@ -7662,14 +8087,18 @@ Perl_sv_dup(pTHX_ SV *sstr) break; case SVt_RV: SvANY(dstr) = new_XRV(); - SvRV(dstr) = sv_dup_inc(SvRV(sstr)); + SvRV(dstr) = SvRV(sstr) && SvWEAKREF(SvRV(sstr)) + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); break; case SVt_PV: SvANY(dstr) = new_XPV(); SvCUR(dstr) = SvCUR(sstr); SvLEN(dstr) = SvLEN(sstr); if (SvROK(sstr)) - SvRV(dstr) = sv_dup_inc(SvRV(sstr)); + SvRV(dstr) = SvWEAKREF(SvRV(sstr)) + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); else if (SvPVX(sstr) && SvLEN(sstr)) SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvLEN(sstr)-1); else @@ -7681,7 +8110,9 @@ Perl_sv_dup(pTHX_ SV *sstr) SvLEN(dstr) = SvLEN(sstr); SvIVX(dstr) = SvIVX(sstr); if (SvROK(sstr)) - SvRV(dstr) = sv_dup_inc(SvRV(sstr)); + SvRV(dstr) = SvWEAKREF(SvRV(sstr)) + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); else if (SvPVX(sstr) && SvLEN(sstr)) SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvLEN(sstr)-1); else @@ -7694,7 +8125,9 @@ Perl_sv_dup(pTHX_ SV *sstr) SvIVX(dstr) = SvIVX(sstr); SvNVX(dstr) = SvNVX(sstr); if (SvROK(sstr)) - SvRV(dstr) = sv_dup_inc(SvRV(sstr)); + SvRV(dstr) = SvWEAKREF(SvRV(sstr)) + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); else if (SvPVX(sstr) && SvLEN(sstr)) SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvLEN(sstr)-1); else @@ -7709,7 +8142,9 @@ Perl_sv_dup(pTHX_ SV *sstr) SvMAGIC(dstr) = mg_dup(SvMAGIC(sstr)); SvSTASH(dstr) = hv_dup_inc(SvSTASH(sstr)); if (SvROK(sstr)) - SvRV(dstr) = sv_dup_inc(SvRV(sstr)); + SvRV(dstr) = SvWEAKREF(SvRV(sstr)) + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); else if (SvPVX(sstr) && SvLEN(sstr)) SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvLEN(sstr)-1); else @@ -7724,7 +8159,9 @@ Perl_sv_dup(pTHX_ SV *sstr) SvMAGIC(dstr) = mg_dup(SvMAGIC(sstr)); SvSTASH(dstr) = hv_dup_inc(SvSTASH(sstr)); if (SvROK(sstr)) - SvRV(dstr) = sv_dup_inc(SvRV(sstr)); + SvRV(dstr) = SvWEAKREF(SvRV(sstr)) + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); else if (SvPVX(sstr) && SvLEN(sstr)) SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvLEN(sstr)-1); else @@ -7742,7 +8179,9 @@ Perl_sv_dup(pTHX_ SV *sstr) SvMAGIC(dstr) = mg_dup(SvMAGIC(sstr)); SvSTASH(dstr) = hv_dup_inc(SvSTASH(sstr)); if (SvROK(sstr)) - SvRV(dstr) = sv_dup_inc(SvRV(sstr)); + SvRV(dstr) = SvWEAKREF(SvRV(sstr)) + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); else if (SvPVX(sstr) && SvLEN(sstr)) SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvLEN(sstr)-1); else @@ -7753,6 +8192,18 @@ Perl_sv_dup(pTHX_ SV *sstr) LvTYPE(dstr) = LvTYPE(sstr); break; case SVt_PVGV: + if (GvSHARED((GV*)sstr)) { + SV *share; + if ((share = gv_share(sstr))) { + del_SV(dstr); + dstr = share; +#if 0 + PerlIO_printf(Perl_debug_log, "sv_dup: sharing %s::%s\n", + HvNAME(GvSTASH(share)), GvNAME(share)); +#endif + break; + } + } SvANY(dstr) = new_XPVGV(); SvCUR(dstr) = SvCUR(sstr); SvLEN(dstr) = SvLEN(sstr); @@ -7761,7 +8212,9 @@ Perl_sv_dup(pTHX_ SV *sstr) SvMAGIC(dstr) = mg_dup(SvMAGIC(sstr)); SvSTASH(dstr) = hv_dup_inc(SvSTASH(sstr)); if (SvROK(sstr)) - SvRV(dstr) = sv_dup_inc(SvRV(sstr)); + SvRV(dstr) = SvWEAKREF(SvRV(sstr)) + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); else if (SvPVX(sstr) && SvLEN(sstr)) SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvLEN(sstr)-1); else @@ -7782,7 +8235,9 @@ Perl_sv_dup(pTHX_ SV *sstr) SvMAGIC(dstr) = mg_dup(SvMAGIC(sstr)); SvSTASH(dstr) = hv_dup_inc(SvSTASH(sstr)); if (SvROK(sstr)) - SvRV(dstr) = sv_dup_inc(SvRV(sstr)); + SvRV(dstr) = SvWEAKREF(SvRV(sstr)) + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); else if (SvPVX(sstr) && SvLEN(sstr)) SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvLEN(sstr)-1); else @@ -7900,7 +8355,7 @@ dup_pvcv: CvROOT(dstr) = OpREFCNT_inc(CvROOT(sstr)); CvXSUB(dstr) = CvXSUB(sstr); CvXSUBANY(dstr) = CvXSUBANY(sstr); - CvGV(dstr) = gv_dup_inc(CvGV(sstr)); + CvGV(dstr) = gv_dup(CvGV(sstr)); CvDEPTH(dstr) = CvDEPTH(sstr); if (CvPADLIST(sstr) && !AvREAL(CvPADLIST(sstr))) { /* XXX padlists are real, but pretend to be not */ @@ -7911,7 +8366,10 @@ dup_pvcv: } else CvPADLIST(dstr) = av_dup_inc(CvPADLIST(sstr)); - CvOUTSIDE(dstr) = cv_dup_inc(CvOUTSIDE(sstr)); + if (!CvANON(sstr) || CvCLONED(sstr)) + CvOUTSIDE(dstr) = cv_dup_inc(CvOUTSIDE(sstr)); + else + CvOUTSIDE(dstr) = cv_dup(CvOUTSIDE(sstr)); CvFLAGS(dstr) = CvFLAGS(sstr); break; default: @@ -7965,7 +8423,7 @@ Perl_cx_dup(pTHX_ PERL_CONTEXT *cxs, I32 ix, I32 max) ncx->blk_sub.argarray = (cx->blk_sub.hasargs ? av_dup_inc(cx->blk_sub.argarray) : Nullav); - ncx->blk_sub.savearray = av_dup(cx->blk_sub.savearray); + ncx->blk_sub.savearray = av_dup_inc(cx->blk_sub.savearray); ncx->blk_sub.olddepth = cx->blk_sub.olddepth; ncx->blk_sub.hasargs = cx->blk_sub.hasargs; ncx->blk_sub.lval = cx->blk_sub.lval; @@ -8219,6 +8677,7 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl) TOPIV(nss,ix) = iv; break; case SAVEt_FREESV: + case SAVEt_MORTALIZESV: sv = (SV*)POPPTR(ss,ix); TOPPTR(nss,ix) = sv_dup_inc(sv); break; @@ -8378,6 +8837,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_scopestack = 0; PL_savestack = 0; PL_retstack = 0; + PL_sig_pending = 0; # else /* !DEBUGGING */ Zero(my_perl, 1, PerlInterpreter); # endif /* DEBUGGING */ @@ -8404,6 +8864,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_scopestack = 0; PL_savestack = 0; PL_retstack = 0; + PL_sig_pending = 0; # else /* !DEBUGGING */ Zero(my_perl, 1, PerlInterpreter); # endif /* DEBUGGING */ @@ -8550,7 +9011,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_defgv = gv_dup(proto_perl->Idefgv); PL_argvgv = gv_dup(proto_perl->Iargvgv); PL_argvoutgv = gv_dup(proto_perl->Iargvoutgv); - PL_argvout_stack = av_dup(proto_perl->Iargvout_stack); + PL_argvout_stack = av_dup_inc(proto_perl->Iargvout_stack); /* shortcuts to regexp stuff */ PL_replgv = gv_dup(proto_perl->Ireplgv); @@ -8761,7 +9222,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_numeric_name = SAVEPV(proto_perl->Inumeric_name); PL_numeric_standard = proto_perl->Inumeric_standard; PL_numeric_local = proto_perl->Inumeric_local; - PL_numeric_radix = proto_perl->Inumeric_radix; + PL_numeric_radix_sv = sv_dup_inc(proto_perl->Inumeric_radix_sv); #endif /* !USE_LOCALE_NUMERIC */ /* utf8 character classes */ @@ -8803,12 +9264,18 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_uudmap['M'] = 0; /* reinits on demand */ PL_bitcount = Nullch; /* reinits on demand */ + if (proto_perl->Ipsig_pend) { + Newz(0, PL_psig_pend, SIG_SIZE, int); + } + else { + PL_psig_pend = (int*)NULL; + } + if (proto_perl->Ipsig_ptr) { - int sig_num[] = { SIG_NUM }; - Newz(0, PL_psig_ptr, sizeof(sig_num)/sizeof(*sig_num), SV*); - Newz(0, PL_psig_name, sizeof(sig_num)/sizeof(*sig_num), SV*); - for (i = 1; PL_sig_name[i]; i++) { - PL_psig_ptr[i] = sv_dup_inc(proto_perl->Ipsig_ptr[i]); + Newz(0, PL_psig_ptr, SIG_SIZE, SV*); + Newz(0, PL_psig_name, SIG_SIZE, SV*); + for (i = 1; i < SIG_SIZE; i++) { + PL_psig_ptr[i] = sv_dup_inc(proto_perl->Ipsig_ptr[i]); PL_psig_name[i] = sv_dup_inc(proto_perl->Ipsig_name[i]); } } @@ -8819,7 +9286,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, /* thrdvar.h stuff */ - if (flags & 1) { + if (flags & CLONEf_COPY_STACKS) { /* next allocation will be PL_tmps_stack[PL_tmps_ix+1] */ PL_tmps_ix = proto_perl->Ttmps_ix; PL_tmps_max = proto_perl->Ttmps_max; @@ -8969,7 +9436,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_regendp = (I32*)NULL; PL_reglastparen = (U32*)NULL; PL_regtill = Nullch; - PL_regprev = '\n'; PL_reg_start_tmp = (char**)NULL; PL_reg_start_tmpl = 0; PL_regdata = (struct reg_data*)NULL; @@ -9005,6 +9471,11 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_reginterp_cnt = 0; PL_reg_starttry = 0; + if (!(flags & CLONEf_KEEP_PTR_TABLE)) { + ptr_table_free(PL_ptr_table); + PL_ptr_table = NULL; + } + #ifdef PERL_OBJECT return (PerlInterpreter*)pPerl; #else @@ -9035,7 +9506,7 @@ do_clean_objs(pTHXo_ SV *sv) SV* rv; if (SvROK(sv) && SvOBJECT(rv = SvRV(sv))) { - DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning object ref:\n "), sv_dump(sv));) + DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning object ref:\n "), sv_dump(sv))); if (SvWEAKREF(sv)) { sv_del_backref(sv); SvWEAKREF_off(sv); @@ -9061,7 +9532,7 @@ do_clean_named_objs(pTHXo_ SV *sv) (GvIO(sv) && SvOBJECT(GvIO(sv))) || (GvCV(sv) && SvOBJECT(GvCV(sv))) ) { - DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning named glob object:\n "), sv_dump(sv));) + DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning named glob object:\n "), sv_dump(sv))); SvREFCNT_dec(sv); } } @@ -9071,7 +9542,7 @@ do_clean_named_objs(pTHXo_ SV *sv) static void do_clean_all(pTHXo_ SV *sv) { - DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning loops: SV at 0x%"UVxf"\n", PTR2UV(sv)) );) + DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning loops: SV at 0x%"UVxf"\n", PTR2UV(sv)) )); SvFLAGS(sv) |= SVf_BREAK; SvREFCNT_dec(sv); }