X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=pp.c;h=d55c4a8eb62e92f7d08e14131770090ed6c8d620;hb=594c10dca58a5fa69624af729798b94360003867;hp=01dac5cf1d58747b27c9214dd315ee908ff1a9a3;hpb=2723d2164bbb7a5d9949447f06dde05bf516db63;p=p5sagit%2Fp5-mst-13.2.git diff --git a/pp.c b/pp.c index 01dac5c..d55c4a8 100644 --- a/pp.c +++ b/pp.c @@ -1,7 +1,7 @@ /* pp.c * * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, - * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others + * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -61,7 +61,8 @@ PP(pp_padav) dVAR; dSP; dTARGET; I32 gimme; if (PL_op->op_private & OPpLVAL_INTRO) - SAVECLEARSV(PAD_SVl(PL_op->op_targ)); + if (!(PL_op->op_private & OPpPAD_STATE)) + SAVECLEARSV(PAD_SVl(PL_op->op_targ)); EXTEND(SP, 1); if (PL_op->op_flags & OPf_REF) { PUSHs(TARG); @@ -104,7 +105,8 @@ PP(pp_padhv) XPUSHs(TARG); if (PL_op->op_private & OPpLVAL_INTRO) - SAVECLEARSV(PAD_SVl(PL_op->op_targ)); + if (!(PL_op->op_private & OPpPAD_STATE)) + SAVECLEARSV(PAD_SVl(PL_op->op_targ)); if (PL_op->op_flags & OPf_REF) RETURN; else if (LVRET) { @@ -138,7 +140,7 @@ PP(pp_rv2gv) GV * const gv = (GV*) sv_newmortal(); gv_init(gv, 0, "", 0, 0); GvIOp(gv) = (IO *)sv; - (void)SvREFCNT_inc(sv); + SvREFCNT_inc_void_NN(sv); sv = (SV*) gv; } else if (SvTYPE(sv) != SVt_PVGV) @@ -172,7 +174,7 @@ PP(pp_rv2gv) } if (SvTYPE(sv) < SVt_RV) sv_upgrade(sv, SVt_RV); - if (SvPVX_const(sv)) { + else if (SvPVX_const(sv)) { SvPV_free(sv); SvLEN_set(sv, 0); SvCUR_set(sv, 0); @@ -220,6 +222,50 @@ PP(pp_rv2gv) RETURN; } +/* Helper function for pp_rv2sv and pp_rv2av */ +GV * +Perl_softref2xv(pTHX_ SV *const sv, const char *const what, const U32 type, + SV ***spp) +{ + dVAR; + GV *gv; + + if (PL_op->op_private & HINT_STRICT_REFS) { + if (SvOK(sv)) + Perl_die(aTHX_ PL_no_symref_sv, sv, what); + else + Perl_die(aTHX_ PL_no_usym, what); + } + if (!SvOK(sv)) { + if (PL_op->op_flags & OPf_REF) + Perl_die(aTHX_ PL_no_usym, what); + if (ckWARN(WARN_UNINITIALIZED)) + report_uninit(sv); + if (type != SVt_PV && GIMME_V == G_ARRAY) { + (*spp)--; + return NULL; + } + **spp = &PL_sv_undef; + return NULL; + } + if ((PL_op->op_flags & OPf_SPECIAL) && + !(PL_op->op_flags & OPf_MOD)) + { + gv = gv_fetchsv(sv, 0, type); + if (!gv + && (!is_gv_magical_sv(sv,0) + || !(gv = gv_fetchsv(sv, GV_ADD, type)))) + { + **spp = &PL_sv_undef; + return NULL; + } + } + else { + gv = gv_fetchsv(sv, GV_ADD, type); + } + return gv; +} + PP(pp_rv2sv) { dVAR; dSP; dTOPss; @@ -234,7 +280,10 @@ PP(pp_rv2sv) case SVt_PVAV: case SVt_PVHV: case SVt_PVCV: + case SVt_PVFM: + case SVt_PVIO: DIE(aTHX_ "Not a SCALAR reference"); + default: NOOP; } } else { @@ -246,33 +295,9 @@ PP(pp_rv2sv) if (SvROK(sv)) goto wasref; } - if (PL_op->op_private & HINT_STRICT_REFS) { - if (SvOK(sv)) - DIE(aTHX_ PL_no_symref_sv, sv, "a SCALAR"); - else - DIE(aTHX_ PL_no_usym, "a SCALAR"); - } - if (!SvOK(sv)) { - if (PL_op->op_flags & OPf_REF) - DIE(aTHX_ PL_no_usym, "a SCALAR"); - if (ckWARN(WARN_UNINITIALIZED)) - report_uninit(sv); - RETSETUNDEF; - } - if ((PL_op->op_flags & OPf_SPECIAL) && - !(PL_op->op_flags & OPf_MOD)) - { - gv = (GV*)gv_fetchsv(sv, 0, SVt_PV); - if (!gv - && (!is_gv_magical_sv(sv, 0) - || !(gv = (GV*)gv_fetchsv(sv, GV_ADD, SVt_PV)))) - { - RETSETUNDEF; - } - } - else { - gv = (GV*)gv_fetchsv(sv, GV_ADD, SVt_PV); - } + gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp); + if (!gv) + RETURN; } sv = GvSVn(gv); } @@ -298,8 +323,7 @@ PP(pp_av2arylen) AV * const av = (AV*)TOPs; SV ** const sv = Perl_av_arylen_p(aTHX_ (AV*)av); if (!*sv) { - *sv = newSV(0); - sv_upgrade(*sv, SVt_PVMG); + *sv = newSV_type(SVt_PVMG); sv_magic(*sv, (SV*)av, PERL_MAGIC_arylen, NULL, 0); } SETs(*sv); @@ -320,7 +344,7 @@ PP(pp_pos) if (LvTARG(TARG) != sv) { if (LvTARG(TARG)) SvREFCNT_dec(LvTARG(TARG)); - LvTARG(TARG) = SvREFCNT_inc(sv); + LvTARG(TARG) = SvREFCNT_inc_simple(sv); } PUSHs(TARG); /* no SvSETMAGIC */ RETURN; @@ -332,7 +356,7 @@ PP(pp_pos) I32 i = mg->mg_len; if (DO_UTF8(sv)) sv_pos_b2u(sv, &i); - PUSHi(i + PL_curcop->cop_arybase); + PUSHi(i + CopARYBASE_get(PL_curcop)); RETURN; } } @@ -344,7 +368,7 @@ PP(pp_rv2cv) { dVAR; dSP; GV *gv; - HV *stash; + HV *stash_unused; const I32 flags = (PL_op->op_flags & OPf_SPECIAL) ? 0 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT) @@ -353,7 +377,7 @@ PP(pp_rv2cv) /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */ /* (But not in defined().) */ - CV *cv = sv_2cv(TOPs, &stash, &gv, flags); + CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags); if (cv) { if (CvCLONE(cv)) cv = (CV*)sv_2mortal((SV*)cv_clone(cv)); @@ -382,18 +406,25 @@ PP(pp_prototype) SV *ret = &PL_sv_undef; if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) { - const char * const s = SvPVX_const(TOPs); + const char * s = SvPVX_const(TOPs); if (strnEQ(s, "CORE::", 6)) { - const int code = keyword(s + 6, SvCUR(TOPs) - 6); + const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1); if (code < 0) { /* Overridable. */ #define MAX_ARGS_OP ((sizeof(I32) - 1) * 2) - int i = 0, n = 0, seen_question = 0; + int i = 0, n = 0, seen_question = 0, defgv = 0; I32 oa; char str[ MAX_ARGS_OP * 2 + 2 ]; /* One ';', one '\0' */ if (code == -KEY_chop || code == -KEY_chomp || code == -KEY_exec || code == -KEY_system) goto set; + if (code == -KEY_mkdir) { + ret = sv_2mortal(newSVpvs("_;$")); + goto set; + } + if (code == -KEY_readpipe) { + s = "CORE::backtick"; + } while (i < MAXO) { /* The slow way. */ if (strEQ(s + 6, PL_op_name[i]) || strEQ(s + 6, PL_op_desc[i])) @@ -404,9 +435,10 @@ PP(pp_prototype) } goto nonesuch; /* Should not happen... */ found: + defgv = PL_opargs[i] & OA_DEFGV; oa = PL_opargs[i] >> OASHIFT; while (oa) { - if (oa & OA_OPTIONAL && !seen_question) { + if (oa & OA_OPTIONAL && !seen_question && !defgv) { seen_question = 1; str[n++] = ';'; } @@ -420,6 +452,8 @@ PP(pp_prototype) str[n++] = ("?$@@%&*$")[oa & (OA_OPTIONAL - 1)]; oa = oa >> 4; } + if (defgv && str[n - 1] == '$') + str[n - 1] = '_'; str[n++] = '\0'; ret = sv_2mortal(newSVpvn(str, n - 1)); } @@ -487,19 +521,19 @@ S_refto(pTHX_ SV *sv) if (!(sv = LvTARG(sv))) sv = &PL_sv_undef; else - (void)SvREFCNT_inc(sv); + SvREFCNT_inc_void_NN(sv); } else if (SvTYPE(sv) == SVt_PVAV) { if (!AvREAL((AV*)sv) && AvREIFY((AV*)sv)) av_reify((AV*)sv); SvTEMP_off(sv); - (void)SvREFCNT_inc(sv); + SvREFCNT_inc_void_NN(sv); } else if (SvPADTMP(sv) && !IS_PADGV(sv)) sv = newSVsv(sv); else { SvTEMP_off(sv); - (void)SvREFCNT_inc(sv); + SvREFCNT_inc_void_NN(sv); } rv = sv_newmortal(); sv_upgrade(rv, SVt_RV); @@ -543,7 +577,7 @@ PP(pp_bless) if (len == 0 && ckWARN(WARN_MISC)) Perl_warner(aTHX_ packWARN(WARN_MISC), "Explicit blessing to '' (assuming package main)"); - stash = gv_stashpvn(ptr, len, TRUE); + stash = gv_stashpvn(ptr, len, GV_ADD); } (void)sv_bless(TOPs, stash); @@ -637,13 +671,22 @@ PP(pp_study) if (SvSCREAM(sv)) RETPUSHYES; } - else { - if (PL_lastscream) { - SvSCREAM_off(PL_lastscream); - SvREFCNT_dec(PL_lastscream); - } - PL_lastscream = SvREFCNT_inc(sv); + s = (unsigned char*)(SvPV(sv, len)); + pos = len; + if (pos <= 0 || !SvPOK(sv) || SvUTF8(sv)) { + /* No point in studying a zero length string, and not safe to study + anything that doesn't appear to be a simple scalar (and hence might + change between now and when the regexp engine runs without our set + magic ever running) such as a reference to an object with overloaded + stringification. */ + RETPUSHNO; + } + + if (PL_lastscream) { + SvSCREAM_off(PL_lastscream); + SvREFCNT_dec(PL_lastscream); } + PL_lastscream = SvREFCNT_inc_simple(sv); s = (unsigned char*)(SvPV(sv, len)); pos = len; @@ -771,7 +814,7 @@ PP(pp_undef) if (cv_const_sv((CV*)sv) && ckWARN(WARN_MISC)) Perl_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined", CvANON((CV*)sv) ? "(anonymous)" : GvENAME(CvGV((CV*)sv))); - /* FALL THROUGH */ + /* FALLTHROUGH */ case SVt_PVFM: { /* let user-undef'd sub keep its identity */ @@ -785,6 +828,15 @@ PP(pp_undef) SvSetMagicSV(sv, &PL_sv_undef); else { GP *gp; + HV *stash; + + /* undef *Foo:: */ + if((stash = GvHV((GV*)sv)) && HvNAME_get(stash)) + mro_isa_changed_in(stash); + /* undef *Pkg::meth_name ... */ + else if(GvCVu((GV*)sv) && (stash = GvSTASH((GV*)sv)) && HvNAME_get(stash)) + mro_method_changed_in(stash); + gp_free((GV*)sv); Newxz(gp, 1, GP); GvGP(sv) = gp_ref(gp); @@ -987,7 +1039,47 @@ PP(pp_pow) #endif { dPOPTOPnnrl; + +#if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG) + /* + We are building perl with long double support and are on an AIX OS + afflicted with a powl() function that wrongly returns NaNQ for any + negative base. This was reported to IBM as PMR #23047-379 on + 03/06/2006. The problem exists in at least the following versions + of AIX and the libm fileset, and no doubt others as well: + + AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50 + AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29 + AIX 5.2.0 bos.adt.libm 5.2.0.85 + + So, until IBM fixes powl(), we provide the following workaround to + handle the problem ourselves. Our logic is as follows: for + negative bases (left), we use fmod(right, 2) to check if the + exponent is an odd or even integer: + + - if odd, powl(left, right) == -powl(-left, right) + - if even, powl(left, right) == powl(-left, right) + + If the exponent is not an integer, the result is rightly NaNQ, so + we just return that (as NV_NAN). + */ + + if (left < 0.0) { + NV mod2 = Perl_fmod( right, 2.0 ); + if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */ + SETn( -Perl_pow( -left, right) ); + } else if (mod2 == 0.0) { /* even integer */ + SETn( Perl_pow( -left, right) ); + } else { /* fractional power */ + SETn( NV_NAN ); + } + } else { + SETn( Perl_pow( left, right) ); + } +#else SETn( Perl_pow( left, right) ); +#endif /* HAS_AIX_POWL_NEG_BASE_BUG */ + #ifdef PERL_PRESERVE_IVUV if (is_int) SvIV_please(TOPs); @@ -1046,6 +1138,7 @@ PP(pp_multiply) bhigh = blow >> (4 * sizeof (UV)); blow &= botmask; if (ahigh && bhigh) { + NOOP; /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000 which is overflow. Drop to NVs below. */ } else if (!ahigh && !bhigh) { @@ -1222,7 +1315,11 @@ PP(pp_divide) #endif /* PERL_TRY_UV_DIVIDE */ { dPOPPOPnnrl; +#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) + if (! Perl_isnan(right) && right == 0.0) +#else if (right == 0.0) +#endif DIE(aTHX_ "Illegal division by zero"); PUSHn( left / right ); RETURN; @@ -1387,7 +1484,7 @@ PP(pp_repeat) count = (IV)nv; } else - count = SvIVx(sv); + count = SvIV(sv); if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) { dMARK; static const char oom_list_extend[] = "Out of memory during list extend"; @@ -1453,7 +1550,7 @@ PP(pp_repeat) SvCUR_set(TARG, 0); else { const STRLEN max = (UV)count * len; - if (len > ((MEM_SIZE)~0)/count) + if (len > MEM_SIZE_MAX / count) Perl_croak(aTHX_ oom_string_extend); MEM_WRAP_CHECK_1(max, char, oom_string_extend); SvGROW(TARG, max + 1); @@ -1702,8 +1799,15 @@ PP(pp_lt) } #endif { +#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) + dPOPTOPnnrl; + if (Perl_isnan(left) || Perl_isnan(right)) + RETSETNO; + SETs(boolSV(left < right)); +#else dPOPnv; SETs(boolSV(TOPn < value)); +#endif RETURN; } } @@ -1778,8 +1882,15 @@ PP(pp_gt) } #endif { +#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) + dPOPTOPnnrl; + if (Perl_isnan(left) || Perl_isnan(right)) + RETSETNO; + SETs(boolSV(left > right)); +#else dPOPnv; SETs(boolSV(TOPn > value)); +#endif RETURN; } } @@ -1854,8 +1965,15 @@ PP(pp_le) } #endif { +#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) + dPOPTOPnnrl; + if (Perl_isnan(left) || Perl_isnan(right)) + RETSETNO; + SETs(boolSV(left <= right)); +#else dPOPnv; SETs(boolSV(TOPn <= value)); +#endif RETURN; } } @@ -1930,8 +2048,15 @@ PP(pp_ge) } #endif { +#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) + dPOPTOPnnrl; + if (Perl_isnan(left) || Perl_isnan(right)) + RETSETNO; + SETs(boolSV(left >= right)); +#else dPOPnv; SETs(boolSV(TOPn >= value)); +#endif RETURN; } } @@ -1999,8 +2124,15 @@ PP(pp_ne) } #endif { +#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) + dPOPTOPnnrl; + if (Perl_isnan(left) || Perl_isnan(right)) + RETSETYES; + SETs(boolSV(left != right)); +#else dPOPnv; SETs(boolSV(TOPn != value)); +#endif RETURN; } } @@ -2205,50 +2337,32 @@ PP(pp_bit_and) } } -PP(pp_bit_xor) -{ - dVAR; dSP; dATARGET; tryAMAGICbin(bxor,opASSIGN); - { - dPOPTOPssrl; - SvGETMAGIC(left); - SvGETMAGIC(right); - if (SvNIOKp(left) || SvNIOKp(right)) { - if (PL_op->op_private & HINT_INTEGER) { - const IV i = (USE_LEFT(left) ? SvIV_nomg(left) : 0) ^ SvIV_nomg(right); - SETi(i); - } - else { - const UV u = (USE_LEFT(left) ? SvUV_nomg(left) : 0) ^ SvUV_nomg(right); - SETu(u); - } - } - else { - do_vop(PL_op->op_type, TARG, left, right); - SETTARG; - } - RETURN; - } -} - PP(pp_bit_or) { - dVAR; dSP; dATARGET; tryAMAGICbin(bor,opASSIGN); + dVAR; dSP; dATARGET; + const int op_type = PL_op->op_type; + + tryAMAGICbin_var((op_type == OP_BIT_OR ? bor_amg : bxor_amg), opASSIGN); { dPOPTOPssrl; SvGETMAGIC(left); SvGETMAGIC(right); if (SvNIOKp(left) || SvNIOKp(right)) { if (PL_op->op_private & HINT_INTEGER) { - const IV i = (USE_LEFT(left) ? SvIV_nomg(left) : 0) | SvIV_nomg(right); - SETi(i); + const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0); + const IV r = SvIV_nomg(right); + const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r); + SETi(result); } else { - const UV u = (USE_LEFT(left) ? SvUV_nomg(left) : 0) | SvUV_nomg(right); - SETu(u); + const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0); + const UV r = SvUV_nomg(right); + const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r); + SETu(result); } } else { - do_vop(PL_op->op_type, TARG, left, right); + do_vop(op_type, TARG, left, right); SETTARG; } RETURN; @@ -2360,16 +2474,16 @@ PP(pp_complement) if (SvUTF8(TARG)) { /* Calculate exact length, let's not estimate. */ STRLEN targlen = 0; - U8 *result; - U8 *send; STRLEN l; UV nchar = 0; UV nwide = 0; + U8 * const send = tmps + len; + U8 * const origtmps = tmps; + const UV utf8flags = UTF8_ALLOW_ANYUV; - send = tmps + len; while (tmps < send) { - const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV); - tmps += UTF8SKIP(tmps); + const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags); + tmps += l; targlen += UNISKIP(~c); nchar++; if (c > 0xff) @@ -2377,33 +2491,39 @@ PP(pp_complement) } /* Now rewind strings and write them. */ - tmps -= len; + tmps = origtmps; if (nwide) { - Newxz(result, targlen + 1, U8); + U8 *result; + U8 *p; + + Newx(result, targlen + 1, U8); + p = result; while (tmps < send) { - const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV); - tmps += UTF8SKIP(tmps); - result = uvchr_to_utf8_flags(result, ~c, UNICODE_ALLOW_ANY); + const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags); + tmps += l; + p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY); } - *result = '\0'; - result -= targlen; - sv_setpvn(TARG, (char*)result, targlen); + *p = '\0'; + sv_usepvn_flags(TARG, (char*)result, targlen, + SV_HAS_TRAILING_NUL); SvUTF8_on(TARG); } else { - Newxz(result, nchar + 1, U8); + U8 *result; + U8 *p; + + Newx(result, nchar + 1, U8); + p = result; while (tmps < send) { - const U8 c = (U8)utf8n_to_uvchr(tmps, 0, &l, UTF8_ALLOW_ANY); - tmps += UTF8SKIP(tmps); - *result++ = ~c; + const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags); + tmps += l; + *p++ = ~c; } - *result = '\0'; - result -= nchar; - sv_setpvn(TARG, (char*)result, nchar); + *p = '\0'; + sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL); SvUTF8_off(TARG); } - Safefree(result); SETs(TARG); RETURN; } @@ -2413,7 +2533,7 @@ PP(pp_complement) for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++) *tmps = ~*tmps; tmpl = (long*)tmps; - for ( ; anum >= sizeof(long); anum -= sizeof(long), tmpl++) + for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++) *tmpl = ~*tmpl; tmps = (U8*)tmpl; } @@ -2441,19 +2561,30 @@ PP(pp_i_multiply) PP(pp_i_divide) { + IV num; dVAR; dSP; dATARGET; tryAMAGICbin(div,opASSIGN); { dPOPiv; if (value == 0) - DIE(aTHX_ "Illegal division by zero"); - value = POPi / value; + DIE(aTHX_ "Illegal division by zero"); + num = POPi; + + /* avoid FPE_INTOVF on some platforms when num is IV_MIN */ + if (value == -1) + value = - num; + else + value = num / value; PUSHi( value ); RETURN; } } +#if defined(__GLIBC__) && IVSIZE == 8 STATIC PP(pp_i_modulo_0) +#else +PP(pp_i_modulo) +#endif { /* This is the vanilla old i_modulo. */ dVAR; dSP; dATARGET; tryAMAGICbin(modulo,opASSIGN); @@ -2461,7 +2592,11 @@ PP(pp_i_modulo_0) dPOPTOPiirl; if (!right) DIE(aTHX_ "Illegal modulus zero"); - SETi( left % right ); + /* avoid FPE_INTOVF on some platforms when left is IV_MIN */ + if (right == -1) + SETi( 0 ); + else + SETi( left % right ); RETURN; } } @@ -2469,6 +2604,7 @@ PP(pp_i_modulo_0) #if defined(__GLIBC__) && IVSIZE == 8 STATIC PP(pp_i_modulo_1) + { /* This is the i_modulo with the workaround for the _moddi3 bug * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround). @@ -2478,11 +2614,14 @@ PP(pp_i_modulo_1) dPOPTOPiirl; if (!right) DIE(aTHX_ "Illegal modulus zero"); - SETi( left % PERL_ABS(right) ); + /* avoid FPE_INTOVF on some platforms when left is IV_MIN */ + if (right == -1) + SETi( 0 ); + else + SETi( left % PERL_ABS(right) ); RETURN; } } -#endif PP(pp_i_modulo) { @@ -2502,7 +2641,6 @@ PP(pp_i_modulo) * opcode dispatch table if that is the case, remembering to * also apply the workaround so that this first round works * right, too. See [perl #9402] for more information. */ -#if defined(__GLIBC__) && IVSIZE == 8 { IV l = 3; IV r = -10; @@ -2518,11 +2656,15 @@ PP(pp_i_modulo) right = PERL_ABS(right); } } -#endif - SETi( left % right ); + /* avoid FPE_INTOVF on some platforms when left is IV_MIN */ + if (right == -1) + SETi( 0 ); + else + SETi( left % right ); RETURN; } } +#endif PP(pp_i_add) { @@ -2643,20 +2785,43 @@ PP(pp_atan2) PP(pp_sin) { - dVAR; dSP; dTARGET; tryAMAGICun(sin); - { - const NV value = POPn; - XPUSHn(Perl_sin(value)); - RETURN; + dVAR; dSP; dTARGET; + int amg_type = sin_amg; + const char *neg_report = NULL; + NV (*func)(NV) = Perl_sin; + const int op_type = PL_op->op_type; + + switch (op_type) { + case OP_COS: + amg_type = cos_amg; + func = Perl_cos; + break; + case OP_EXP: + amg_type = exp_amg; + func = Perl_exp; + break; + case OP_LOG: + amg_type = log_amg; + func = Perl_log; + neg_report = "log"; + break; + case OP_SQRT: + amg_type = sqrt_amg; + func = Perl_sqrt; + neg_report = "sqrt"; + break; } -} -PP(pp_cos) -{ - dVAR; dSP; dTARGET; tryAMAGICun(cos); + tryAMAGICun_var(amg_type); { const NV value = POPn; - XPUSHn(Perl_cos(value)); + if (neg_report) { + if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) { + SET_NUMERIC_STANDARD(); + DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value); + } + } + XPUSHn(func(value)); RETURN; } } @@ -2705,46 +2870,6 @@ PP(pp_srand) RETPUSHYES; } -PP(pp_exp) -{ - dVAR; dSP; dTARGET; tryAMAGICun(exp); - { - NV value; - value = POPn; - value = Perl_exp(value); - XPUSHn(value); - RETURN; - } -} - -PP(pp_log) -{ - dVAR; dSP; dTARGET; tryAMAGICun(log); - { - const NV value = POPn; - if (value <= 0.0) { - SET_NUMERIC_STANDARD(); - DIE(aTHX_ "Can't take log of %"NVgf, value); - } - XPUSHn(Perl_log(value)); - RETURN; - } -} - -PP(pp_sqrt) -{ - dVAR; dSP; dTARGET; tryAMAGICun(sqrt); - { - const NV value = POPn; - if (value < 0.0) { - SET_NUMERIC_STANDARD(); - DIE(aTHX_ "Can't take sqrt of %"NVgf, value); - } - XPUSHn(Perl_sqrt(value)); - RETURN; - } -} - PP(pp_int) { dVAR; dSP; dTARGET; tryAMAGICun(int); @@ -2821,37 +2946,6 @@ PP(pp_abs) RETURN; } - -PP(pp_hex) -{ - dVAR; dSP; dTARGET; - const char *tmps; - I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; - STRLEN len; - NV result_nv; - UV result_uv; - SV* const sv = POPs; - - tmps = (SvPV_const(sv, len)); - if (DO_UTF8(sv)) { - /* If Unicode, try to downgrade - * If not possible, croak. */ - SV* const tsv = sv_2mortal(newSVsv(sv)); - - SvUTF8_on(tsv); - sv_utf8_downgrade(tsv, FALSE); - tmps = SvPV_const(tsv, len); - } - result_uv = grok_hex (tmps, &len, &flags, &result_nv); - if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) { - XPUSHn(result_nv); - } - else { - XPUSHu(result_uv); - } - RETURN; -} - PP(pp_oct) { dVAR; dSP; dTARGET; @@ -2872,12 +2966,17 @@ PP(pp_oct) sv_utf8_downgrade(tsv, FALSE); tmps = SvPV_const(tsv, len); } + if (PL_op->op_type == OP_HEX) + goto hex; + while (*tmps && len && isSPACE(*tmps)) tmps++, len--; if (*tmps == '0') tmps++, len--; - if (*tmps == 'x') + if (*tmps == 'x') { + hex: result_uv = grok_hex (tmps, &len, &flags, &result_nv); + } else if (*tmps == 'b') result_uv = grok_bin (tmps, &len, &flags, &result_nv); else @@ -2899,7 +2998,22 @@ PP(pp_length) dVAR; dSP; dTARGET; SV * const sv = TOPs; - if (DO_UTF8(sv)) + if (SvAMAGIC(sv)) { + /* For an overloaded scalar, we can't know in advance if it's going to + be UTF-8 or not. Also, we can't call sv_len_utf8 as it likes to + cache the length. Maybe that should be a documented feature of it. + */ + STRLEN len; + const char *const p = SvPV_const(sv, len); + + if (DO_UTF8(sv)) { + SETi(utf8_length((U8*)p, (U8*)p + len)); + } + else + SETi(len); + + } + else if (DO_UTF8(sv)) SETi(sv_len_utf8(sv)); else SETi(sv_len(sv)); @@ -2918,7 +3032,7 @@ PP(pp_substr) I32 fail; const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; const char *tmps; - const I32 arybase = PL_curcop->cop_arybase; + const I32 arybase = CopARYBASE_get(PL_curcop); SV *repl_sv = NULL; const char *repl = NULL; STRLEN repl_len; @@ -3046,7 +3160,9 @@ PP(pp_substr) Perl_warner(aTHX_ packWARN(WARN_SUBSTR), "Attempt to use reference as lvalue in substr"); } - if (SvOK(sv)) /* is it defined ? */ + if (isGV_with_GP(sv)) + SvPV_force_nolen(sv); + else if (SvOK(sv)) /* is it defined ? */ (void)SvPOK_only_UTF8(sv); else sv_setpvn(sv,"",0); /* avoid lexical reincarnation */ @@ -3056,14 +3172,12 @@ PP(pp_substr) sv_upgrade(TARG, SVt_PVLV); sv_magic(TARG, NULL, PERL_MAGIC_substr, NULL, 0); } - else - SvOK_off(TARG); LvTYPE(TARG) = 'x'; if (LvTARG(TARG) != sv) { if (LvTARG(TARG)) SvREFCNT_dec(LvTARG(TARG)); - LvTARG(TARG) = SvREFCNT_inc(sv); + LvTARG(TARG) = SvREFCNT_inc_simple(sv); } LvTARGOFF(TARG) = upos; LvTARGLEN(TARG) = urem; @@ -3094,7 +3208,7 @@ PP(pp_vec) if (LvTARG(TARG) != src) { if (LvTARG(TARG)) SvREFCNT_dec(LvTARG(TARG)); - LvTARG(TARG) = SvREFCNT_inc(src); + LvTARG(TARG) = SvREFCNT_inc_simple(src); } LvTARGOFF(TARG) = offset; LvTARGLEN(TARG) = size; @@ -3115,9 +3229,9 @@ PP(pp_index) STRLEN llen = 0; I32 offset; I32 retval; - const char *tmps; - const char *tmps2; - const I32 arybase = PL_curcop->cop_arybase; + const char *big_p; + const char *little_p; + const I32 arybase = CopARYBASE_get(PL_curcop); bool big_utf8; bool little_utf8; const bool is_index = PL_op->op_type == OP_INDEX; @@ -3129,6 +3243,9 @@ PP(pp_index) } little = POPs; big = POPs; + big_p = SvPV_const(big, biglen); + little_p = SvPV_const(little, llen); + big_utf8 = DO_UTF8(big); little_utf8 = DO_UTF8(little); if (big_utf8 ^ little_utf8) { @@ -3136,9 +3253,7 @@ PP(pp_index) if (little_utf8 && !PL_encoding) { /* Well, maybe instead we might be able to downgrade the small string? */ - STRLEN little_len; - const U8 * const little_pv = (U8*) SvPV_const(little, little_len); - char * const pv = (char*)bytes_from_utf8(little_pv, &little_len, + char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen, &little_utf8); if (little_utf8) { /* If the large string is ISO-8859-1, and it's not possible to @@ -3151,13 +3266,11 @@ PP(pp_index) /* At this point, pv is a malloc()ed string. So donate it to temp to ensure it will get free()d */ little = temp = newSV(0); - sv_usepvn(temp, pv, little_len); + sv_usepvn(temp, pv, llen); + little_p = SvPVX(little); } else { - SV * const bytes = little_utf8 ? big : little; - STRLEN len; - const char * const p = SvPV_const(bytes, len); - - temp = newSVpvn(p, len); + temp = little_utf8 + ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen); if (PL_encoding) { sv_recode_to_utf8(temp, PL_encoding); @@ -3167,35 +3280,58 @@ PP(pp_index) if (little_utf8) { big = temp; big_utf8 = TRUE; + big_p = SvPV_const(big, biglen); } else { little = temp; + little_p = SvPV_const(little, llen); } } } - if (!is_index) { - tmps2 = SvPV_const(little, llen); + if (SvGAMAGIC(big)) { + /* Life just becomes a lot easier if I use a temporary here. + Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously) + will trigger magic and overloading again, as will fbm_instr() + */ + big = sv_2mortal(newSVpvn(big_p, biglen)); + if (big_utf8) + SvUTF8_on(big); + big_p = SvPVX(big); + } + if (SvGAMAGIC(little) || (is_index && !SvOK(little))) { + /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will + warn on undef, and we've already triggered a warning with the + SvPV_const some lines above. We can't remove that, as we need to + call some SvPV to trigger overloading early and find out if the + string is UTF-8. + This is all getting to messy. The API isn't quite clean enough, + because data access has side effects. + */ + little = sv_2mortal(newSVpvn(little_p, llen)); + if (little_utf8) + SvUTF8_on(little); + little_p = SvPVX(little); } - tmps = SvPV_const(big, biglen); if (MAXARG < 3) offset = is_index ? 0 : biglen; else { if (big_utf8 && offset > 0) sv_pos_u2b(big, &offset, 0); - offset += llen; + if (!is_index) + offset += llen; } if (offset < 0) offset = 0; else if (offset > (I32)biglen) offset = biglen; - if (!(tmps2 = is_index - ? fbm_instr((unsigned char*)tmps + offset, - (unsigned char*)tmps + biglen, little, 0) - : rninstr(tmps, tmps + offset, - tmps2, tmps2 + llen))) + if (!(little_p = is_index + ? fbm_instr((unsigned char*)big_p + offset, + (unsigned char*)big_p + biglen, little, 0) + : rninstr(big_p, big_p + offset, + little_p, little_p + llen))) retval = -1; else { - retval = tmps2 - tmps; + retval = little_p - big_p; if (retval > 0 && big_utf8) sv_pos_b2u(big, &retval); } @@ -3209,6 +3345,8 @@ PP(pp_index) PP(pp_sprintf) { dVAR; dSP; dMARK; dORIGMARK; dTARGET; + if (SvTAINTED(MARK[1])) + TAINT_PROPER("sprintf"); do_sprintf(TARG, SP-MARK, MARK+1); TAINT_IF(SvTAINTED(TARG)); SP = ORIGMARK; @@ -3219,20 +3357,20 @@ PP(pp_sprintf) PP(pp_ord) { dVAR; dSP; dTARGET; + SV *argsv = POPs; STRLEN len; const U8 *s = (U8*)SvPV_const(argsv, len); - SV *tmpsv; if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) { - tmpsv = sv_2mortal(newSVsv(argsv)); + SV * const tmpsv = sv_2mortal(newSVsv(argsv)); s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding); argsv = tmpsv; } XPUSHu(DO_UTF8(argsv) ? utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) : - (*s & 0xff)); + (UV)(*s & 0xff)); RETURN; } @@ -3275,20 +3413,21 @@ PP(pp_chr) *tmps++ = (char)value; *tmps = '\0'; (void)SvPOK_only(TARG); + if (PL_encoding && !IN_BYTES) { sv_recode_to_utf8(TARG, PL_encoding); tmps = SvPVX(TARG); if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) || - memEQ(tmps, "\xef\xbf\xbd\0", 4)) { - SvGROW(TARG, 3); + UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) { + SvGROW(TARG, 2); tmps = SvPVX(TARG); - SvCUR_set(TARG, 2); - *tmps++ = (U8)UTF8_EIGHT_BIT_HI(value); - *tmps++ = (U8)UTF8_EIGHT_BIT_LO(value); + SvCUR_set(TARG, 1); + *tmps++ = (char)value; *tmps = '\0'; - SvUTF8_on(TARG); + SvUTF8_off(TARG); } } + XPUSHs(TARG); RETURN; } @@ -3346,28 +3485,64 @@ PP(pp_ucfirst) { dVAR; dSP; - SV *sv = TOPs; - const U8 *s; + SV *source = TOPs; STRLEN slen; + STRLEN need; + SV *dest; + bool inplace = TRUE; + bool doing_utf8; const int op_type = PL_op->op_type; + const U8 *s; + U8 *d; + U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; + STRLEN ulen; + STRLEN tculen; + + SvGETMAGIC(source); + if (SvOK(source)) { + s = (const U8*)SvPV_nomg_const(source, slen); + } else { + s = (const U8*)""; + slen = 0; + } - SvGETMAGIC(sv); - if (DO_UTF8(sv) && - (s = (const U8*)SvPV_nomg_const(sv, slen)) && slen && - UTF8_IS_START(*s)) { - U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; - STRLEN ulen; - STRLEN tculen; - + if (slen && DO_UTF8(source) && UTF8_IS_START(*s)) { + doing_utf8 = TRUE; utf8_to_uvchr(s, &ulen); if (op_type == OP_UCFIRST) { toTITLE_utf8(s, tmpbuf, &tculen); } else { toLOWER_utf8(s, tmpbuf, &tculen); } + /* If the two differ, we definately cannot do inplace. */ + inplace = (ulen == tculen); + need = slen + 1 - ulen + tculen; + } else { + doing_utf8 = FALSE; + need = slen + 1; + } + + if (SvPADTMP(source) && !SvREADONLY(source) && inplace && SvTEMP(source)) { + /* We can convert in place. */ - if (!SvPADTMP(sv) || SvREADONLY(sv) || ulen != tculen) { - dTARGET; + dest = source; + s = d = (U8*)SvPV_force_nomg(source, slen); + } else { + dTARGET; + + dest = TARG; + + SvUPGRADE(dest, SVt_PV); + d = (U8*)SvGROW(dest, need); + (void)SvPOK_only(dest); + + SETs(dest); + + inplace = FALSE; + } + + if (doing_utf8) { + if(!inplace) { /* slen is the byte length of the whole SV. * ulen is the byte length of the original Unicode character * stored as UTF-8 at s. @@ -3375,124 +3550,144 @@ PP(pp_ucfirst) * lowercased) Unicode character stored as UTF-8 at tmpbuf. * We first set the result to be the titlecased (/lowercased) * character, and then append the rest of the SV data. */ - sv_setpvn(TARG, (char*)tmpbuf, tculen); + sv_setpvn(dest, (char*)tmpbuf, tculen); if (slen > ulen) - sv_catpvn(TARG, (char*)(s + ulen), slen - ulen); - SvUTF8_on(TARG); - SETs(TARG); + sv_catpvn(dest, (char*)(s + ulen), slen - ulen); + SvUTF8_on(dest); } else { - s = (U8*)SvPV_force_nomg(sv, slen); - Copy(tmpbuf, s, tculen, U8); + Copy(tmpbuf, d, tculen, U8); + SvCUR_set(dest, need - 1); } } else { - U8 *s1; - if (!SvPADTMP(sv) || SvREADONLY(sv)) { - dTARGET; - SvUTF8_off(TARG); /* decontaminate */ - sv_setsv_nomg(TARG, sv); - sv = TARG; - SETs(sv); - } - s1 = (U8*)SvPV_force_nomg(sv, slen); - if (*s1) { + if (*s) { if (IN_LOCALE_RUNTIME) { TAINT; - SvTAINTED_on(sv); - *s1 = (op_type == OP_UCFIRST) - ? toUPPER_LC(*s1) : toLOWER_LC(*s1); + SvTAINTED_on(dest); + *d = (op_type == OP_UCFIRST) + ? toUPPER_LC(*s) : toLOWER_LC(*s); } else - *s1 = (op_type == OP_UCFIRST) ? toUPPER(*s1) : toLOWER(*s1); + *d = (op_type == OP_UCFIRST) ? toUPPER(*s) : toLOWER(*s); + } else { + /* See bug #39028 */ + *d = *s; + } + + if (SvUTF8(source)) + SvUTF8_on(dest); + + if (!inplace) { + /* This will copy the trailing NUL */ + Copy(s + 1, d + 1, slen, U8); + SvCUR_set(dest, need - 1); } } - SvSETMAGIC(sv); + SvSETMAGIC(dest); RETURN; } +/* There's so much setup/teardown code common between uc and lc, I wonder if + it would be worth merging the two, and just having a switch outside each + of the three tight loops. */ PP(pp_uc) { dVAR; dSP; - SV *sv = TOPs; + SV *source = TOPs; STRLEN len; + STRLEN min; + SV *dest; + const U8 *s; + U8 *d; - SvGETMAGIC(sv); - if (DO_UTF8(sv)) { + SvGETMAGIC(source); + + if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source) + && SvTEMP(source) && !DO_UTF8(source)) { + /* We can convert in place. */ + + dest = source; + s = d = (U8*)SvPV_force_nomg(source, len); + min = len + 1; + } else { dTARGET; - STRLEN ulen; - register U8 *d; - const U8 *s; - const U8 *send; - U8 tmpbuf[UTF8_MAXBYTES+1]; - s = (const U8*)SvPV_nomg_const(sv,len); - if (!len) { - SvUTF8_off(TARG); /* decontaminate */ - sv_setpvn(TARG, "", 0); - SETs(TARG); - } - else { - STRLEN min = len + 1; + dest = TARG; - SvUPGRADE(TARG, SVt_PV); - SvGROW(TARG, min); - (void)SvPOK_only(TARG); - d = (U8*)SvPVX(TARG); - send = s + len; - while (s < send) { - STRLEN u = UTF8SKIP(s); - - toUPPER_utf8(s, tmpbuf, &ulen); - if (ulen > u && (SvLEN(TARG) < (min += ulen - u))) { - /* If the eventually required minimum size outgrows - * the available space, we need to grow. */ - const UV o = d - (U8*)SvPVX_const(TARG); - - /* If someone uppercases one million U+03B0s we - * SvGROW() one million times. Or we could try - * guessing how much to allocate without allocating - * too much. Such is life. */ - SvGROW(TARG, min); - d = (U8*)SvPVX(TARG) + o; - } - Copy(tmpbuf, d, ulen, U8); - d += ulen; - s += u; - } - *d = '\0'; - SvUTF8_on(TARG); - SvCUR_set(TARG, d - (U8*)SvPVX_const(TARG)); - SETs(TARG); + /* The old implementation would copy source into TARG at this point. + This had the side effect that if source was undef, TARG was now + an undefined SV with PADTMP set, and they don't warn inside + sv_2pv_flags(). However, we're now getting the PV direct from + source, which doesn't have PADTMP set, so it would warn. Hence the + little games. */ + + if (SvOK(source)) { + s = (const U8*)SvPV_nomg_const(source, len); + } else { + s = (const U8*)""; + len = 0; } + min = len + 1; + + SvUPGRADE(dest, SVt_PV); + d = (U8*)SvGROW(dest, min); + (void)SvPOK_only(dest); + + SETs(dest); } - else { - U8 *s; - if (!SvPADTMP(sv) || SvREADONLY(sv)) { - dTARGET; - SvUTF8_off(TARG); /* decontaminate */ - sv_setsv_nomg(TARG, sv); - sv = TARG; - SETs(sv); + + /* Overloaded values may have toggled the UTF-8 flag on source, so we need + to check DO_UTF8 again here. */ + + if (DO_UTF8(source)) { + const U8 *const send = s + len; + U8 tmpbuf[UTF8_MAXBYTES+1]; + + while (s < send) { + const STRLEN u = UTF8SKIP(s); + STRLEN ulen; + + toUPPER_utf8(s, tmpbuf, &ulen); + if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { + /* If the eventually required minimum size outgrows + * the available space, we need to grow. */ + const UV o = d - (U8*)SvPVX_const(dest); + + /* If someone uppercases one million U+03B0s we SvGROW() one + * million times. Or we could try guessing how much to + allocate without allocating too much. Such is life. */ + SvGROW(dest, min); + d = (U8*)SvPVX(dest) + o; + } + Copy(tmpbuf, d, ulen, U8); + d += ulen; + s += u; } - s = (U8*)SvPV_force_nomg(sv, len); + SvUTF8_on(dest); + *d = '\0'; + SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); + } else { if (len) { - register const U8 *send = s + len; - + const U8 *const send = s + len; if (IN_LOCALE_RUNTIME) { TAINT; - SvTAINTED_on(sv); - for (; s < send; s++) - *s = toUPPER_LC(*s); + SvTAINTED_on(dest); + for (; s < send; d++, s++) + *d = toUPPER_LC(*s); } else { - for (; s < send; s++) - *s = toUPPER(*s); + for (; s < send; d++, s++) + *d = toUPPER(*s); } } + if (source != dest) { + *d = '\0'; + SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); + } } - SvSETMAGIC(sv); + SvSETMAGIC(dest); RETURN; } @@ -3500,104 +3695,115 @@ PP(pp_lc) { dVAR; dSP; - SV *sv = TOPs; + SV *source = TOPs; STRLEN len; + STRLEN min; + SV *dest; + const U8 *s; + U8 *d; - SvGETMAGIC(sv); - if (DO_UTF8(sv)) { + SvGETMAGIC(source); + + if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source) + && SvTEMP(source) && !DO_UTF8(source)) { + /* We can convert in place. */ + + dest = source; + s = d = (U8*)SvPV_force_nomg(source, len); + min = len + 1; + } else { dTARGET; - const U8 *s; - STRLEN ulen; - register U8 *d; - const U8 *send; - U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; - s = (const U8*)SvPV_nomg_const(sv,len); - if (!len) { - SvUTF8_off(TARG); /* decontaminate */ - sv_setpvn(TARG, "", 0); - SETs(TARG); + dest = TARG; + + /* The old implementation would copy source into TARG at this point. + This had the side effect that if source was undef, TARG was now + an undefined SV with PADTMP set, and they don't warn inside + sv_2pv_flags(). However, we're now getting the PV direct from + source, which doesn't have PADTMP set, so it would warn. Hence the + little games. */ + + if (SvOK(source)) { + s = (const U8*)SvPV_nomg_const(source, len); + } else { + s = (const U8*)""; + len = 0; } - else { - STRLEN min = len + 1; + min = len + 1; - SvUPGRADE(TARG, SVt_PV); - SvGROW(TARG, min); - (void)SvPOK_only(TARG); - d = (U8*)SvPVX(TARG); - send = s + len; - while (s < send) { - const STRLEN u = UTF8SKIP(s); - const UV uv = toLOWER_utf8(s, tmpbuf, &ulen); + SvUPGRADE(dest, SVt_PV); + d = (U8*)SvGROW(dest, min); + (void)SvPOK_only(dest); + + SETs(dest); + } + + /* Overloaded values may have toggled the UTF-8 flag on source, so we need + to check DO_UTF8 again here. */ + + if (DO_UTF8(source)) { + const U8 *const send = s + len; + U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; + + while (s < send) { + const STRLEN u = UTF8SKIP(s); + STRLEN ulen; + const UV uv = toLOWER_utf8(s, tmpbuf, &ulen); #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3 /* Unicode U+03A3 */ - if (uv == GREEK_CAPITAL_LETTER_SIGMA) { - /* - * Now if the sigma is NOT followed by - * /$ignorable_sequence$cased_letter/; - * and it IS preceded by - * /$cased_letter$ignorable_sequence/; - * where $ignorable_sequence is - * [\x{2010}\x{AD}\p{Mn}]* - * and $cased_letter is - * [\p{Ll}\p{Lo}\p{Lt}] - * then it should be mapped to 0x03C2, - * (GREEK SMALL LETTER FINAL SIGMA), - * instead of staying 0x03A3. - * "should be": in other words, - * this is not implemented yet. - * See lib/unicore/SpecialCasing.txt. - */ - } - if (ulen > u && (SvLEN(TARG) < (min += ulen - u))) { - /* If the eventually required minimum size outgrows - * the available space, we need to grow. */ - const UV o = d - (U8*)SvPVX_const(TARG); - - /* If someone lowercases one million U+0130s we - * SvGROW() one million times. Or we could try - * guessing how much to allocate without allocating. - * too much. Such is life. */ - SvGROW(TARG, min); - d = (U8*)SvPVX(TARG) + o; - } - Copy(tmpbuf, d, ulen, U8); - d += ulen; - s += u; + if (uv == GREEK_CAPITAL_LETTER_SIGMA) { + NOOP; + /* + * Now if the sigma is NOT followed by + * /$ignorable_sequence$cased_letter/; + * and it IS preceded by /$cased_letter$ignorable_sequence/; + * where $ignorable_sequence is [\x{2010}\x{AD}\p{Mn}]* + * and $cased_letter is [\p{Ll}\p{Lo}\p{Lt}] + * then it should be mapped to 0x03C2, + * (GREEK SMALL LETTER FINAL SIGMA), + * instead of staying 0x03A3. + * "should be": in other words, this is not implemented yet. + * See lib/unicore/SpecialCasing.txt. + */ } - *d = '\0'; - SvUTF8_on(TARG); - SvCUR_set(TARG, d - (U8*)SvPVX_const(TARG)); - SETs(TARG); - } - } - else { - U8 *s; - if (!SvPADTMP(sv) || SvREADONLY(sv)) { - dTARGET; - SvUTF8_off(TARG); /* decontaminate */ - sv_setsv_nomg(TARG, sv); - sv = TARG; - SETs(sv); + if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { + /* If the eventually required minimum size outgrows + * the available space, we need to grow. */ + const UV o = d - (U8*)SvPVX_const(dest); + + /* If someone lowercases one million U+0130s we SvGROW() one + * million times. Or we could try guessing how much to + allocate without allocating too much. Such is life. */ + SvGROW(dest, min); + d = (U8*)SvPVX(dest) + o; + } + Copy(tmpbuf, d, ulen, U8); + d += ulen; + s += u; } - - s = (U8*)SvPV_force_nomg(sv, len); + SvUTF8_on(dest); + *d = '\0'; + SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); + } else { if (len) { - register const U8 * const send = s + len; - + const U8 *const send = s + len; if (IN_LOCALE_RUNTIME) { TAINT; - SvTAINTED_on(sv); - for (; s < send; s++) - *s = toLOWER_LC(*s); + SvTAINTED_on(dest); + for (; s < send; d++, s++) + *d = toLOWER_LC(*s); } else { - for (; s < send; s++) - *s = toLOWER(*s); + for (; s < send; d++, s++) + *d = toLOWER(*s); } } + if (source != dest) { + *d = '\0'; + SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); + } } - SvSETMAGIC(sv); + SvSETMAGIC(dest); RETURN; } @@ -3661,12 +3867,12 @@ PP(pp_aslice) register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET); if (SvTYPE(av) == SVt_PVAV) { - const I32 arybase = PL_curcop->cop_arybase; + const I32 arybase = CopARYBASE_get(PL_curcop); if (lval && PL_op->op_private & OPpLVAL_INTRO) { register SV **svp; I32 max = -1; for (svp = MARK + 1; svp <= SP; svp++) { - const I32 elem = SvIVx(*svp); + const I32 elem = SvIV(*svp); if (elem > max) max = elem; } @@ -3675,7 +3881,7 @@ PP(pp_aslice) } while (++MARK <= SP) { register SV **svp; - I32 elem = SvIVx(*MARK); + I32 elem = SvIV(*MARK); if (elem > 0) elem -= arybase; @@ -3703,7 +3909,7 @@ PP(pp_each) { dVAR; dSP; - HV * const hash = (HV*)POPs; + HV * hash = (HV*)POPs; HE *entry; const I32 gimme = GIMME_V; @@ -3860,20 +4066,25 @@ PP(pp_hslice) } he = hv_fetch_ent(hv, keysv, lval, 0); - svp = he ? &HeVAL(he) : 0; + svp = he ? &HeVAL(he) : NULL; if (lval) { if (!svp || *svp == &PL_sv_undef) { - DIE(aTHX_ PL_no_helem_sv, keysv); + DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv)); } if (localizing) { - if (preeminent) - save_helem(hv, keysv, svp); - else { - STRLEN keylen; - const char *key = SvPV_const(keysv, keylen); - SAVEDELETE(hv, savepvn(key,keylen), keylen); - } + if (HvNAME_get(hv) && isGV(*svp)) + save_gp((GV*)*svp, !(PL_op->op_flags & OPf_SPECIAL)); + else { + if (preeminent) + save_helem(hv, keysv, svp); + else { + STRLEN keylen; + const char * const key = SvPV_const(keysv, keylen); + SAVEDELETE(hv, savepvn(key,keylen), + SvUTF8(keysv) ? -(I32)keylen : (I32)keylen); + } + } } } *MARK = svp ? *svp : &PL_sv_undef; @@ -3909,14 +4120,14 @@ PP(pp_lslice) SV ** const lastlelem = PL_stack_base + POPMARK; SV ** const firstlelem = PL_stack_base + POPMARK + 1; register SV ** const firstrelem = lastlelem + 1; - const I32 arybase = PL_curcop->cop_arybase; - I32 is_something_there = PL_op->op_flags & OPf_MOD; + const I32 arybase = CopARYBASE_get(PL_curcop); + I32 is_something_there = FALSE; register const I32 max = lastrelem - lastlelem; register SV **lelem; if (GIMME != G_ARRAY) { - I32 ix = SvIVx(*lastlelem); + I32 ix = SvIV(*lastlelem); if (ix < 0) ix += max; else @@ -3935,7 +4146,7 @@ PP(pp_lslice) } for (lelem = firstlelem; lelem <= lastlelem; lelem++) { - I32 ix = SvIVx(*lelem); + I32 ix = SvIV(*lelem); if (ix < 0) ix += max; else @@ -3959,16 +4170,17 @@ PP(pp_anonlist) { dVAR; dSP; dMARK; dORIGMARK; const I32 items = SP - MARK; - SV * const av = sv_2mortal((SV*)av_make(items, MARK+1)); + SV * const av = (SV *) av_make(items, MARK+1); SP = ORIGMARK; /* av_make() might realloc stack_sp */ - XPUSHs(av); + XPUSHs(sv_2mortal((PL_op->op_flags & OPf_SPECIAL) + ? newRV_noinc(av) : av)); RETURN; } PP(pp_anonhash) { dVAR; dSP; dMARK; dORIGMARK; - HV* const hv = (HV*)sv_2mortal((SV*)newHV()); + HV* const hv = newHV(); while (MARK < SP) { SV * const key = *++MARK; @@ -3980,7 +4192,8 @@ PP(pp_anonhash) (void)hv_store_ent(hv,key,val,0); } SP = ORIGMARK; - XPUSHs((SV*)hv); + XPUSHs(sv_2mortal((PL_op->op_flags & OPf_SPECIAL) + ? newRV_noinc((SV*) hv) : (SV*)hv)); RETURN; } @@ -3996,7 +4209,6 @@ PP(pp_splice) I32 newlen; I32 after; I32 diff; - SV **tmparyval = NULL; const MAGIC * const mg = SvTIED_mg((SV*)ary, PERL_MAGIC_tied); if (mg) { @@ -4013,11 +4225,11 @@ PP(pp_splice) SP++; if (++MARK < SP) { - offset = i = SvIVx(*MARK); + offset = i = SvIV(*MARK); if (offset < 0) offset += AvFILLp(ary) + 1; else - offset -= PL_curcop->cop_arybase; + offset -= CopARYBASE_get(PL_curcop); if (offset < 0) DIE(aTHX_ PL_no_aelem, i); if (++MARK < SP) { @@ -4062,6 +4274,7 @@ PP(pp_splice) } if (diff < 0) { /* shrinking the area */ + SV **tmparyval = NULL; if (newlen) { Newx(tmparyval, newlen, SV*); /* so remember insertion */ Copy(MARK, tmparyval, newlen, SV*); @@ -4100,7 +4313,7 @@ PP(pp_splice) *dst-- = *src--; } dst = AvARRAY(ary); - SvPV_set(ary, (char*)(AvARRAY(ary) - diff)); /* diff is negative */ + AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */ AvMAX(ary) += diff; } else { @@ -4122,22 +4335,21 @@ PP(pp_splice) } } else { /* no, expanding (or same) */ + SV** tmparyval = NULL; if (length) { Newx(tmparyval, length, SV*); /* so remember deletion */ Copy(AvARRAY(ary)+offset, tmparyval, length, SV*); } if (diff > 0) { /* expanding */ - /* push up or down? */ - if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) { if (offset) { src = AvARRAY(ary); dst = src - diff; Move(src, dst, offset, SV*); } - SvPV_set(ary, (char*)(AvARRAY(ary) - diff));/* diff is positive */ + AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */ AvMAX(ary) += diff; AvFILLp(ary) += diff; } @@ -4171,7 +4383,6 @@ PP(pp_splice) dst++; } } - Safefree(tmparyval); } MARK += length - 1; } @@ -4182,10 +4393,10 @@ PP(pp_splice) while (length-- > 0) SvREFCNT_dec(tmparyval[length]); } - Safefree(tmparyval); } else *MARK = &PL_sv_undef; + Safefree(tmparyval); } SP = MARK; RETURN; @@ -4194,7 +4405,7 @@ PP(pp_splice) PP(pp_push) { dVAR; dSP; dMARK; dORIGMARK; dTARGET; - register AV *ary = (AV*)*++MARK; + register AV * const ary = (AV*)*++MARK; const MAGIC * const mg = SvTIED_mg((SV*)ary, PERL_MAGIC_tied); if (mg) { @@ -4209,39 +4420,31 @@ PP(pp_push) PUSHi( AvFILL(ary) + 1 ); } else { + PL_delaymagic = DM_DELAY; for (++MARK; MARK <= SP; MARK++) { SV * const sv = newSV(0); if (*MARK) sv_setsv(sv, *MARK); av_store(ary, AvFILLp(ary)+1, sv); } + if (PL_delaymagic & DM_ARRAY) + mg_set((SV*)ary); + + PL_delaymagic = 0; SP = ORIGMARK; PUSHi( AvFILLp(ary) + 1 ); } RETURN; } -PP(pp_pop) -{ - dVAR; - dSP; - AV * const av = (AV*)POPs; - SV * const sv = av_pop(av); - if (AvREAL(av)) - (void)sv_2mortal(sv); - PUSHs(sv); - RETURN; -} - PP(pp_shift) { dVAR; dSP; AV * const av = (AV*)POPs; - SV * const sv = av_shift(av); + SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av); EXTEND(SP, 1); - if (!sv) - RETPUSHUNDEF; + assert (sv); if (AvREAL(av)) (void)sv_2mortal(sv); PUSHs(sv); @@ -4297,7 +4500,7 @@ PP(pp_reverse) register I32 tmp; dTARGET; STRLEN len; - I32 padoff_du; + PADOFFSET padoff_du; SvUTF8_off(TARG); /* decontaminate */ if (SP - MARK > 1) @@ -4306,7 +4509,8 @@ PP(pp_reverse) sv_setsv(TARG, (SP > MARK) ? *SP : (padoff_du = find_rundefsvoffset(), - (padoff_du == NOT_IN_PAD || PAD_COMPNAME_FLAGS(padoff_du) & SVpad_OUR) + (padoff_du == NOT_IN_PAD + || PAD_COMPNAME_FLAGS_isOUR(padoff_du)) ? DEFSV : PAD_SVl(padoff_du))); up = SvPV_force(TARG, len); if (len > 1) { @@ -4363,7 +4567,7 @@ PP(pp_split) register SV *dstr; register const char *m; I32 iters = 0; - const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (strend - s); + const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (STRLEN)(strend - s); I32 maxiters = slen + 10; const char *orig; const I32 origlimit = limit; @@ -4373,7 +4577,7 @@ PP(pp_split) const I32 oldsave = PL_savestack_ix; I32 make_mortal = 1; bool multiline = 0; - MAGIC *mg = (MAGIC *) NULL; + MAGIC *mg = NULL; #ifdef DEBUGGING Copy(&LvTARGOFF(POPs), &pm, 1, PMOP*); @@ -4384,18 +4588,20 @@ PP(pp_split) DIE(aTHX_ "panic: pp_split"); rx = PM_GETRE(pm); - TAINT_IF((pm->op_pmflags & PMf_LOCALE) && - (pm->op_pmflags & (PMf_WHITE | PMf_SKIPWHITE))); + TAINT_IF((rx->extflags & RXf_PMf_LOCALE) && + (rx->extflags & (RXf_WHITE | RXf_SKIPWHITE))); RX_MATCH_UTF8_set(rx, do_utf8); - if (pm->op_pmreplroot) { #ifdef USE_ITHREADS - ary = GvAVn((GV*)PAD_SVl(INT2PTR(PADOFFSET, pm->op_pmreplroot))); + if (pm->op_pmreplrootu.op_pmtargetoff) { + ary = GvAVn((GV*)PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)); + } #else - ary = GvAVn((GV*)pm->op_pmreplroot); -#endif + if (pm->op_pmreplrootu.op_pmtargetgv) { + ary = GvAVn(pm->op_pmreplrootu.op_pmtargetgv); } +#endif else if (gimme != G_ARRAY) ary = GvAVn(PL_defgv); else @@ -4425,8 +4631,12 @@ PP(pp_split) } base = SP - PL_stack_base; orig = s; - if (pm->op_pmflags & PMf_SKIPWHITE) { - if (pm->op_pmflags & PMf_LOCALE) { + if (rx->extflags & RXf_SKIPWHITE) { + if (do_utf8) { + while (*s == ' ' || is_utf8_space((U8*)s)) + s += UTF8SKIP(s); + } + else if (rx->extflags & RXf_PMf_LOCALE) { while (isSPACE_LC(*s)) s++; } @@ -4435,19 +4645,32 @@ PP(pp_split) s++; } } - if (pm->op_pmflags & PMf_MULTILINE) { + if (rx->extflags & PMf_MULTILINE) { multiline = 1; } if (!limit) limit = maxiters + 2; - if (pm->op_pmflags & PMf_WHITE) { + if (rx->extflags & RXf_WHITE) { while (--limit) { m = s; - while (m < strend && - !((pm->op_pmflags & PMf_LOCALE) - ? isSPACE_LC(*m) : isSPACE(*m))) - ++m; + /* this one uses 'm' and is a negative test */ + if (do_utf8) { + while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) { + const int t = UTF8SKIP(m); + /* is_utf8_space returns FALSE for malform utf8 */ + if (strend - m < t) + m = strend; + else + m += t; + } + } else if (rx->extflags & RXf_PMf_LOCALE) { + while (m < strend && !isSPACE_LC(*m)) + ++m; + } else { + while (m < strend && !isSPACE(*m)) + ++m; + } if (m >= strend) break; @@ -4458,14 +4681,26 @@ PP(pp_split) (void)SvUTF8_on(dstr); XPUSHs(dstr); - s = m + 1; - while (s < strend && - ((pm->op_pmflags & PMf_LOCALE) - ? isSPACE_LC(*s) : isSPACE(*s))) - ++s; + /* skip the whitespace found last */ + if (do_utf8) + s = m + UTF8SKIP(m); + else + s = m + 1; + + /* this one uses 's' and is a positive test */ + if (do_utf8) { + while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) )) + s += UTF8SKIP(s); + } else if (rx->extflags & RXf_PMf_LOCALE) { + while (s < strend && isSPACE_LC(*s)) + ++s; + } else { + while (s < strend && isSPACE(*s)) + ++s; + } } } - else if (rx->precomp[0] == '^' && rx->precomp[1] == '\0') { + else if (rx->extflags & RXf_START_ONLY) { while (--limit) { for (m = s; m < strend && *m != '\n'; m++) ; @@ -4481,15 +4716,62 @@ PP(pp_split) s = m; } } - else if (do_utf8 == ((rx->reganch & ROPT_UTF8) != 0) && - (rx->reganch & RE_USE_INTUIT) && !rx->nparens - && (rx->reganch & ROPT_CHECK_ALL) - && !(rx->reganch & ROPT_ANCH)) { - const int tail = (rx->reganch & RE_INTUIT_TAIL); - SV * const csv = CALLREG_INTUIT_STRING(aTHX_ rx); + else if (rx->extflags & RXf_NULL && !(s >= strend)) { + /* + Pre-extend the stack, either the number of bytes or + characters in the string or a limited amount, triggered by: + + my ($x, $y) = split //, $str; + or + split //, $str, $i; + */ + const U32 items = limit - 1; + if (items < slen) + EXTEND(SP, items); + else + EXTEND(SP, slen); + + if (do_utf8) { + while (--limit) { + /* keep track of how many bytes we skip over */ + m = s; + s += UTF8SKIP(s); + dstr = newSVpvn(m, s-m); + + if (make_mortal) + sv_2mortal(dstr); + + (void)SvUTF8_on(dstr); + PUSHs(dstr); + + if (s >= strend) + break; + } + } else { + while (--limit) { + dstr = newSVpvn(s, 1); + + s++; + + if (make_mortal) + sv_2mortal(dstr); + + PUSHs(dstr); + + if (s >= strend) + break; + } + } + } + else if (do_utf8 == ((rx->extflags & RXf_UTF8) != 0) && + (rx->extflags & RXf_USE_INTUIT) && !rx->nparens + && (rx->extflags & RXf_CHECK_ALL) + && !(rx->extflags & RXf_ANCH)) { + const int tail = (rx->extflags & RXf_INTUIT_TAIL); + SV * const csv = CALLREG_INTUIT_STRING(rx); - len = rx->minlen; - if (len == 1 && !(rx->reganch & ROPT_UTF8) && !tail) { + len = rx->minlenret; + if (len == 1 && !(rx->extflags & RXf_UTF8) && !tail) { const char c = *SvPV_nolen_const(csv); while (--limit) { for (m = s; m < strend && *m != c; m++) @@ -4536,7 +4818,7 @@ PP(pp_split) { I32 rex_return; PUTBACK; - rex_return = CALLREGEXEC(aTHX_ rx, (char*)s, (char*)strend, (char*)orig, 1 , + rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 , sv, NULL, 0); SPAGAIN; if (rex_return == 0) @@ -4549,7 +4831,7 @@ PP(pp_split) s = orig + (m - s); strend = s + (strend - m); } - m = rx->startp[0] + orig; + m = rx->offs[0].start + orig; dstr = newSVpvn(s, m-s); if (make_mortal) sv_2mortal(dstr); @@ -4559,8 +4841,8 @@ PP(pp_split) if (rx->nparens) { I32 i; for (i = 1; i <= (I32)rx->nparens; i++) { - s = rx->startp[i] + orig; - m = rx->endp[i] + orig; + s = rx->offs[i].start + orig; + m = rx->offs[i].end + orig; /* japhy (07/27/01) -- the (m && s) test doesn't catch parens that didn't match -- they should be set to @@ -4577,7 +4859,7 @@ PP(pp_split) XPUSHs(dstr); } } - s = rx->endp[0] + orig; + s = rx->offs[0].end + orig; } } @@ -4650,6 +4932,19 @@ PP(pp_split) RETURN; } +PP(pp_once) +{ + dSP; + SV *const sv = PAD_SVl(PL_op->op_targ); + + if (SvPADSTALE(sv)) { + /* First time. */ + SvPADSTALE_off(sv); + RETURNOP(cLOGOP->op_other); + } + RETURNOP(cLOGOP->op_next); +} + PP(pp_lock) { dVAR;