X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=pp.c;h=d5e4dd7fb03d14271d85a4a0b1d29d1d822de3d7;hb=78ad9108a21eee2b26e0d459b81a566d11b0f4e5;hp=d28a8c21d6001419915da9c3d924a70d4204340d;hpb=f722798beaa437498e1f879b01996be44f2fdc1a;p=p5sagit%2Fp5-mst-13.2.git diff --git a/pp.c b/pp.c index d28a8c2..d5e4dd7 100644 --- a/pp.c +++ b/pp.c @@ -1,6 +1,6 @@ /* pp.c * - * Copyright (c) 1991-1999, Larry Wall + * Copyright (c) 1991-2001, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -28,37 +28,6 @@ static double UV_MAX_cxux = ((double)UV_MAX); #endif /* - * Types used in bitwise operations. - * - * Normally we'd just use IV and UV. However, some hardware and - * software combinations (e.g. Alpha and current OSF/1) don't have a - * floating-point type to use for NV that has adequate bits to fully - * hold an IV/UV. (In other words, sizeof(long) == sizeof(double).) - * - * It just so happens that "int" is the right size almost everywhere. - */ -typedef int IBW; -typedef unsigned UBW; - -/* - * Mask used after bitwise operations. - * - * There is at least one realm (Cray word machines) that doesn't - * have an integral type (except char) small enough to be represented - * in a double without loss; that is, it has no 32-bit type. - */ -#if LONGSIZE > 4 && defined(_CRAY) && !defined(_CRAYMPP) -# define BW_BITS 32 -# define BW_MASK ((1 << BW_BITS) - 1) -# define BW_SIGN (1 << (BW_BITS - 1)) -# define BWi(i) (((i) & BW_SIGN) ? ((i) | ~BW_MASK) : ((i) & BW_MASK)) -# define BWu(u) ((u) & BW_MASK) -#else -# define BWi(i) (i) -# define BWu(u) (u) -#endif - -/* * Offset for integer pack/unpack. * * On architectures where I16 and I32 aren't really 16 and 32 bits, @@ -86,7 +55,7 @@ typedef unsigned UBW; # define PERL_NATINT_PACK #endif -#if BYTEORDER > 0xFFFF && defined(_CRAY) && !defined(_CRAYMPP) +#if LONGSIZE > 4 && defined(_CRAY) # if BYTEORDER == 0x12345678 # define OFF16(p) (char*)(p) # define OFF32(p) (char*)(p) @@ -113,10 +82,6 @@ typedef unsigned UBW; /* variations on pp_null */ -#ifdef I_UNISTD -#include -#endif - /* XXX I can't imagine anyone who doesn't have this actually _needs_ it, since pid_t is an integral type. --AD 2/20/1998 @@ -127,7 +92,7 @@ extern Pid_t getpid (void); PP(pp_stub) { - djSP; + dSP; if (GIMME_V == G_SCALAR) XPUSHs(&PL_sv_undef); RETURN; @@ -142,13 +107,18 @@ PP(pp_scalar) PP(pp_padav) { - djSP; dTARGET; + dSP; dTARGET; if (PL_op->op_private & OPpLVAL_INTRO) SAVECLEARSV(PL_curpad[PL_op->op_targ]); EXTEND(SP, 1); if (PL_op->op_flags & OPf_REF) { PUSHs(TARG); RETURN; + } else if (LVRET) { + if (GIMME == G_SCALAR) + Perl_croak(aTHX_ "Can't return array to lvalue scalar context"); + PUSHs(TARG); + RETURN; } if (GIMME == G_ARRAY) { I32 maxarg = AvFILL((AV*)TARG) + 1; @@ -176,7 +146,7 @@ PP(pp_padav) PP(pp_padhv) { - djSP; dTARGET; + dSP; dTARGET; I32 gimme; XPUSHs(TARG); @@ -184,6 +154,11 @@ PP(pp_padhv) SAVECLEARSV(PL_curpad[PL_op->op_targ]); if (PL_op->op_flags & OPf_REF) RETURN; + else if (LVRET) { + if (GIMME == G_SCALAR) + Perl_croak(aTHX_ "Can't return hash to lvalue scalar context"); + RETURN; + } gimme = GIMME_V; if (gimme == G_ARRAY) { RETURNOP(do_kv()); @@ -209,7 +184,7 @@ PP(pp_padany) PP(pp_rv2gv) { - djSP; dTOPss; + dSP; dTOPss; if (SvROK(sv)) { wasref: @@ -229,46 +204,56 @@ PP(pp_rv2gv) else { if (SvTYPE(sv) != SVt_PVGV) { char *sym; - STRLEN n_a; + STRLEN len; if (SvGMAGICAL(sv)) { mg_get(sv); if (SvROK(sv)) goto wasref; } - if (!SvOK(sv)) { - /* If this is a 'my' scalar and flag is set then vivify + if (!SvOK(sv) && sv != &PL_sv_undef) { + /* If this is a 'my' scalar and flag is set then vivify * NI-S 1999/05/07 - */ + */ if (PL_op->op_private & OPpDEREF) { - GV *gv = (GV *) newSV(0); - STRLEN len = 0; - char *name = ""; - if (cUNOP->op_first->op_type == OP_PADSV) { - SV *padname = *av_fetch(PL_comppad_name, cUNOP->op_first->op_targ, 4); - name = SvPV(padname,len); + char *name; + GV *gv; + if (cUNOP->op_targ) { + STRLEN len; + SV *namesv = PL_curpad[cUNOP->op_targ]; + name = SvPV(namesv, len); + gv = (GV*)NEWSV(0,0); + gv_init(gv, CopSTASH(PL_curcop), name, len, 0); + } + else { + name = CopSTASHPV(PL_curcop); + gv = newGVgen(name); } - gv_init(gv, PL_curcop->cop_stash, name, len, 0); - sv_upgrade(sv, SVt_RV); - SvRV(sv) = (SV *) gv; + if (SvTYPE(sv) < SVt_RV) + sv_upgrade(sv, SVt_RV); + SvRV(sv) = (SV*)gv; SvROK_on(sv); SvSETMAGIC(sv); goto wasref; - } + } if (PL_op->op_flags & OPf_REF || PL_op->op_private & HINT_STRICT_REFS) DIE(aTHX_ PL_no_usym, "a symbol"); if (ckWARN(WARN_UNINITIALIZED)) - Perl_warner(aTHX_ WARN_UNINITIALIZED, PL_warn_uninit); + report_uninit(); RETSETUNDEF; } - sym = SvPV(sv, n_a); + sym = SvPV(sv,len); if ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD)) { sv = (SV*)gv_fetchpv(sym, FALSE, SVt_PVGV); - if (!sv) + if (!sv + && (!is_gv_magical(sym,len,0) + || !(sv = (SV*)gv_fetchpv(sym, TRUE, SVt_PVGV)))) + { RETSETUNDEF; + } } else { if (PL_op->op_private & HINT_STRICT_REFS) @@ -285,7 +270,7 @@ PP(pp_rv2gv) PP(pp_rv2sv) { - djSP; dTOPss; + dSP; dTOPss; if (SvROK(sv)) { wasref: @@ -302,7 +287,7 @@ PP(pp_rv2sv) else { GV *gv = (GV*)sv; char *sym; - STRLEN n_a; + STRLEN len; if (SvTYPE(gv) != SVt_PVGV) { if (SvGMAGICAL(sv)) { @@ -315,16 +300,20 @@ PP(pp_rv2sv) PL_op->op_private & HINT_STRICT_REFS) DIE(aTHX_ PL_no_usym, "a SCALAR"); if (ckWARN(WARN_UNINITIALIZED)) - Perl_warner(aTHX_ WARN_UNINITIALIZED, PL_warn_uninit); + report_uninit(); RETSETUNDEF; } - sym = SvPV(sv, n_a); + sym = SvPV(sv, len); if ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD)) { gv = (GV*)gv_fetchpv(sym, FALSE, SVt_PV); - if (!gv) + if (!gv + && (!is_gv_magical(sym,len,0) + || !(gv = (GV*)gv_fetchpv(sym, TRUE, SVt_PV)))) + { RETSETUNDEF; + } } else { if (PL_op->op_private & HINT_STRICT_REFS) @@ -346,7 +335,7 @@ PP(pp_rv2sv) PP(pp_av2arylen) { - djSP; + dSP; AV *av = (AV*)TOPs; SV *sv = AvARYLEN(av); if (!sv) { @@ -360,9 +349,9 @@ PP(pp_av2arylen) PP(pp_pos) { - djSP; dTARGET; dPOPss; + dSP; dTARGET; dPOPss; - if (PL_op->op_flags & OPf_MOD) { + if (PL_op->op_flags & OPf_MOD || LVRET) { if (SvTYPE(TARG) < SVt_PVLV) { sv_upgrade(TARG, SVt_PVLV); sv_magic(TARG, Nullsv, '.', Nullch, 0); @@ -384,7 +373,7 @@ PP(pp_pos) mg = mg_find(sv, 'g'); if (mg && mg->mg_len >= 0) { I32 i = mg->mg_len; - if (IN_UTF8) + if (DO_UTF8(sv)) sv_pos_b2u(sv, &i); PUSHi(i + PL_curcop->cop_arybase); RETURN; @@ -396,7 +385,7 @@ PP(pp_pos) PP(pp_rv2cv) { - djSP; + dSP; GV *gv; HV *stash; @@ -406,6 +395,12 @@ PP(pp_rv2cv) if (cv) { if (CvCLONE(cv)) cv = (CV*)sv_2mortal((SV*)cv_clone(cv)); + if ((PL_op->op_private & OPpLVAL_INTRO)) { + if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE))) + cv = GvCV(gv); + if (!CvLVALUE(cv)) + DIE(aTHX_ "Can't modify non-lvalue subroutine call"); + } } else cv = (CV*)&PL_sv_undef; @@ -415,7 +410,7 @@ PP(pp_rv2cv) PP(pp_prototype) { - djSP; + dSP; CV *cv; HV *stash; GV *gv; @@ -426,7 +421,7 @@ PP(pp_prototype) char *s = SvPVX(TOPs); if (strnEQ(s, "CORE::", 6)) { int code; - + code = keyword(s + 6, SvCUR(TOPs) - 6); if (code < 0) { /* Overridable. */ #define MAX_ARGS_OP ((sizeof(I32) - 1) * 2) @@ -446,17 +441,19 @@ PP(pp_prototype) found: oa = PL_opargs[i] >> OASHIFT; while (oa) { - if (oa & OA_OPTIONAL) { + if (oa & OA_OPTIONAL && !seen_question) { seen_question = 1; str[n++] = ';'; } - else if (seen_question) + else if (n && str[0] == ';' && seen_question) goto set; /* XXXX system, exec */ - if ((oa & (OA_OPTIONAL - 1)) >= OA_AVREF - && (oa & (OA_OPTIONAL - 1)) <= OA_HVREF) { + if ((oa & (OA_OPTIONAL - 1)) >= OA_AVREF + && (oa & (OA_OPTIONAL - 1)) <= OA_SCALARREF + /* But globs are already references (kinda) */ + && (oa & (OA_OPTIONAL - 1)) != OA_FILEREF + ) { str[n++] = '\\'; } - /* What to do with R ((un)tie, tied, (sys)read, recv)? */ str[n++] = ("?$@@%&*$")[oa & (OA_OPTIONAL - 1)]; oa = oa >> 4; } @@ -467,7 +464,7 @@ PP(pp_prototype) goto set; else { /* None such */ nonesuch: - Perl_croak(aTHX_ "Can't find an opnumber for \"%s\"", s+6); + DIE(aTHX_ "Can't find an opnumber for \"%s\"", s+6); } } } @@ -481,7 +478,7 @@ PP(pp_prototype) PP(pp_anoncode) { - djSP; + dSP; CV* cv = (CV*)PL_curpad[PL_op->op_targ]; if (CvCLONE(cv)) cv = (CV*)sv_2mortal((SV*)cv_clone(cv)); @@ -492,14 +489,14 @@ PP(pp_anoncode) PP(pp_srefgen) { - djSP; + dSP; *SP = refto(*SP); RETURN; } PP(pp_refgen) { - djSP; dMARK; + dSP; dMARK; if (GIMME != G_ARRAY) { if (++MARK <= SP) *MARK = *SP; @@ -528,6 +525,12 @@ S_refto(pTHX_ SV *sv) else (void)SvREFCNT_inc(sv); } + else if (SvTYPE(sv) == SVt_PVAV) { + if (!AvREAL((AV*)sv) && AvREIFY((AV*)sv)) + av_reify((AV*)sv); + SvTEMP_off(sv); + (void)SvREFCNT_inc(sv); + } else if (SvPADTMP(sv)) sv = newSVsv(sv); else { @@ -543,7 +546,7 @@ S_refto(pTHX_ SV *sv) PP(pp_ref) { - djSP; dTARGET; + dSP; dTARGET; SV *sv; char *pv; @@ -563,17 +566,21 @@ PP(pp_ref) PP(pp_bless) { - djSP; + dSP; HV *stash; if (MAXARG == 1) - stash = PL_curcop->cop_stash; + stash = CopSTASH(PL_curcop); else { SV *ssv = POPs; STRLEN len; - char *ptr = SvPV(ssv,len); - if (ckWARN(WARN_UNSAFE) && len == 0) - Perl_warner(aTHX_ WARN_UNSAFE, + char *ptr; + + if (ssv && !SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv)) + Perl_croak(aTHX_ "Attempt to bless into a reference"); + ptr = SvPV(ssv,len); + if (ckWARN(WARN_MISC) && len == 0) + Perl_warner(aTHX_ WARN_MISC, "Explicit blessing to '' (assuming package main)"); stash = gv_stashpvn(ptr, len, TRUE); } @@ -588,9 +595,9 @@ PP(pp_gelem) SV *sv; SV *tmpRef; char *elem; - djSP; + dSP; STRLEN n_a; - + sv = POPs; elem = SvPV(sv, n_a); gv = (GV*)POPs; @@ -609,6 +616,9 @@ PP(pp_gelem) case 'F': if (strEQ(elem, "FILEHANDLE")) /* XXX deprecate in 5.005 */ tmpRef = (SV*)GvIOp(gv); + else + if (strEQ(elem, "FORMAT")) + tmpRef = (SV*)GvFORM(gv); break; case 'G': if (strEQ(elem, "GLOB")) @@ -649,7 +659,7 @@ PP(pp_gelem) PP(pp_study) { - djSP; dPOPss; + dSP; dPOPss; register unsigned char *s; register I32 pos; register I32 ch; @@ -711,7 +721,7 @@ PP(pp_study) PP(pp_trans) { - djSP; dTARG; + dSP; dTARG; SV *sv; if (PL_op->op_flags & OPf_STACKED) @@ -729,7 +739,7 @@ PP(pp_trans) PP(pp_schop) { - djSP; dTARGET; + dSP; dTARGET; do_chop(TARG, TOPs); SETTARG; RETURN; @@ -737,23 +747,24 @@ PP(pp_schop) PP(pp_chop) { - djSP; dMARK; dTARGET; - while (SP > MARK) - do_chop(TARG, POPs); + dSP; dMARK; dTARGET; dORIGMARK; + while (MARK < SP) + do_chop(TARG, *++MARK); + SP = ORIGMARK; PUSHTARG; RETURN; } PP(pp_schomp) { - djSP; dTARGET; + dSP; dTARGET; SETi(do_chomp(TOPs)); RETURN; } PP(pp_chomp) { - djSP; dMARK; dTARGET; + dSP; dMARK; dTARGET; register I32 count = 0; while (SP > MARK) @@ -764,7 +775,7 @@ PP(pp_chomp) PP(pp_defined) { - djSP; + dSP; register SV* sv; sv = POPs; @@ -794,7 +805,7 @@ PP(pp_defined) PP(pp_undef) { - djSP; + dSP; SV *sv; if (!PL_op->op_private) { @@ -819,14 +830,14 @@ PP(pp_undef) hv_undef((HV*)sv); break; case SVt_PVCV: - if (ckWARN(WARN_UNSAFE) && cv_const_sv((CV*)sv)) - Perl_warner(aTHX_ WARN_UNSAFE, "Constant subroutine %s undefined", + if (ckWARN(WARN_MISC) && cv_const_sv((CV*)sv)) + Perl_warner(aTHX_ WARN_MISC, "Constant subroutine %s undefined", CvANON((CV*)sv) ? "(anonymous)" : GvENAME(CvGV((CV*)sv))); /* FALL THROUGH */ case SVt_PVFM: { /* let user-undef'd sub keep its identity */ - GV* gv = (GV*)SvREFCNT_inc(CvGV((CV*)sv)); + GV* gv = CvGV((CV*)sv); cv_undef((CV*)sv); CvGV((CV*)sv) = gv; } @@ -840,7 +851,7 @@ PP(pp_undef) Newz(602, gp, 1, GP); GvGP(sv) = gp_ref(gp); GvSV(sv) = NEWSV(72,0); - GvLINE(sv) = PL_curcop->cop_line; + GvLINE(sv) = CopLINE(PL_curcop); GvEGV(sv) = (GV*)sv; GvMULTI_on(sv); } @@ -861,9 +872,9 @@ PP(pp_undef) PP(pp_predec) { - djSP; + dSP; if (SvREADONLY(TOPs) || SvTYPE(TOPs) > SVt_PVLV) - Perl_croak(aTHX_ PL_no_modify); + DIE(aTHX_ PL_no_modify); if (SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs) && SvIVX(TOPs) != IV_MIN) { @@ -878,9 +889,9 @@ PP(pp_predec) PP(pp_postinc) { - djSP; dTARGET; + dSP; dTARGET; if (SvREADONLY(TOPs) || SvTYPE(TOPs) > SVt_PVLV) - Perl_croak(aTHX_ PL_no_modify); + DIE(aTHX_ PL_no_modify); sv_setsv(TARG, TOPs); if (SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs) && SvIVX(TOPs) != IV_MAX) @@ -899,9 +910,9 @@ PP(pp_postinc) PP(pp_postdec) { - djSP; dTARGET; - if(SvREADONLY(TOPs) || SvTYPE(TOPs) > SVt_PVLV) - Perl_croak(aTHX_ PL_no_modify); + dSP; dTARGET; + if (SvREADONLY(TOPs) || SvTYPE(TOPs) > SVt_PVLV) + DIE(aTHX_ PL_no_modify); sv_setsv(TARG, TOPs); if (SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs) && SvIVX(TOPs) != IV_MIN) @@ -920,17 +931,125 @@ PP(pp_postdec) PP(pp_pow) { - djSP; dATARGET; tryAMAGICbin(pow,opASSIGN); + dSP; dATARGET; tryAMAGICbin(pow,opASSIGN); { dPOPTOPnnrl; - SETn( pow( left, right) ); + SETn( Perl_pow( left, right) ); RETURN; } } PP(pp_multiply) { - djSP; dATARGET; tryAMAGICbin(mult,opASSIGN); + dSP; dATARGET; tryAMAGICbin(mult,opASSIGN); +#ifdef PERL_PRESERVE_IVUV + SvIV_please(TOPs); + if (SvIOK(TOPs)) { + /* Unless the left argument is integer in range we are going to have to + use NV maths. Hence only attempt to coerce the right argument if + we know the left is integer. */ + /* Left operand is defined, so is it IV? */ + SvIV_please(TOPm1s); + if (SvIOK(TOPm1s)) { + bool auvok = SvUOK(TOPm1s); + bool buvok = SvUOK(TOPs); + const UV topmask = (~ (UV)0) << (4 * sizeof (UV)); + const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV))); + UV alow; + UV ahigh; + UV blow; + UV bhigh; + + if (auvok) { + alow = SvUVX(TOPm1s); + } else { + IV aiv = SvIVX(TOPm1s); + if (aiv >= 0) { + alow = aiv; + auvok = TRUE; /* effectively it's a UV now */ + } else { + alow = -aiv; /* abs, auvok == false records sign */ + } + } + if (buvok) { + blow = SvUVX(TOPs); + } else { + IV biv = SvIVX(TOPs); + if (biv >= 0) { + blow = biv; + buvok = TRUE; /* effectively it's a UV now */ + } else { + blow = -biv; /* abs, buvok == false records sign */ + } + } + + /* If this does sign extension on unsigned it's time for plan B */ + ahigh = alow >> (4 * sizeof (UV)); + alow &= botmask; + bhigh = blow >> (4 * sizeof (UV)); + blow &= botmask; + if (ahigh && bhigh) { + /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000 + which is overflow. Drop to NVs below. */ + } else if (!ahigh && !bhigh) { + /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001 + so the unsigned multiply cannot overflow. */ + UV product = alow * blow; + if (auvok == buvok) { + /* -ve * -ve or +ve * +ve gives a +ve result. */ + SP--; + SETu( product ); + RETURN; + } else if (product <= (UV)IV_MIN) { + /* 2s complement assumption that (UV)-IV_MIN is correct. */ + /* -ve result, which could overflow an IV */ + SP--; + SETi( -product ); + RETURN; + } /* else drop to NVs below. */ + } else { + /* One operand is large, 1 small */ + UV product_middle; + if (bhigh) { + /* swap the operands */ + ahigh = bhigh; + bhigh = blow; /* bhigh now the temp var for the swap */ + blow = alow; + alow = bhigh; + } + /* now, ((ahigh * blow) << half_UV_len) + (alow * blow) + multiplies can't overflow. shift can, add can, -ve can. */ + product_middle = ahigh * blow; + if (!(product_middle & topmask)) { + /* OK, (ahigh * blow) won't lose bits when we shift it. */ + UV product_low; + product_middle <<= (4 * sizeof (UV)); + product_low = alow * blow; + + /* as for pp_add, UV + something mustn't get smaller. + IIRC ANSI mandates this wrapping *behaviour* for + unsigned whatever the actual representation*/ + product_low += product_middle; + if (product_low >= product_middle) { + /* didn't overflow */ + if (auvok == buvok) { + /* -ve * -ve or +ve * +ve gives a +ve result. */ + SP--; + SETu( product_low ); + RETURN; + } else if (product_low <= (UV)IV_MIN) { + /* 2s complement assumption again */ + /* -ve result, which could overflow an IV */ + SP--; + SETi( -product_low ); + RETURN; + } /* else drop to NVs below. */ + } + } /* product_middle too large */ + } /* ahigh && bhigh */ + } /* SvIOK(TOPm1s) */ + } /* SvIOK(TOPs) */ +#endif { dPOPTOPnnrl; SETn( left * right ); @@ -940,7 +1059,7 @@ PP(pp_multiply) PP(pp_divide) { - djSP; dATARGET; tryAMAGICbin(div,opASSIGN); + dSP; dATARGET; tryAMAGICbin(div,opASSIGN); { dPOPPOPnnrl; NV value; @@ -969,7 +1088,7 @@ PP(pp_divide) PP(pp_modulo) { - djSP; dATARGET; tryAMAGICbin(modulo,opASSIGN); + dSP; dATARGET; tryAMAGICbin(modulo,opASSIGN); { UV left; UV right; @@ -979,7 +1098,7 @@ PP(pp_modulo) NV dright; NV dleft; - if (SvIOK(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)) { + if (SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)) { IV i = SvIVX(POPs); right = (right_neg = (i < 0)) ? -i : i; } @@ -991,7 +1110,7 @@ PP(pp_modulo) dright = -dright; } - if (!use_double && SvIOK(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)) { + if (!use_double && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)) { IV i = SvIVX(POPs); left = (left_neg = (i < 0)) ? -i : i; } @@ -1028,8 +1147,8 @@ PP(pp_modulo) #endif /* Backward-compatibility clause: */ - dright = floor(dright + 0.5); - dleft = floor(dleft + 0.5); + dright = Perl_floor(dright + 0.5); + dleft = Perl_floor(dleft + 0.5); if (!dright) DIE(aTHX_ "Illegal modulus zero"); @@ -1069,9 +1188,9 @@ PP(pp_modulo) PP(pp_repeat) { - djSP; dATARGET; tryAMAGICbin(repeat,opASSIGN); + dSP; dATARGET; tryAMAGICbin(repeat,opASSIGN); { - register I32 count = POPi; + register IV count = POPi; if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) { dMARK; I32 items = SP - MARK; @@ -1094,12 +1213,13 @@ PP(pp_repeat) SP -= items; } else { /* Note: mark already snarfed by pp_list */ - SV *tmpstr; + SV *tmpstr = POPs; STRLEN len; + bool isutf; - tmpstr = POPs; SvSetSV(TARG, tmpstr); SvPV_force(TARG, len); + isutf = DO_UTF8(TARG); if (count != 1) { if (count < 1) SvCUR_set(TARG, 0); @@ -1110,7 +1230,20 @@ PP(pp_repeat) } *SvEND(TARG) = '\0'; } - (void)SvPOK_only(TARG); + if (isutf) + (void)SvPOK_only_UTF8(TARG); + else + (void)SvPOK_only(TARG); + + if (PL_op->op_private & OPpREPEAT_DOLIST) { + /* The parser saw this as a list repeat, and there + are probably several items on the stack. But we're + in scalar context, and there's no pp_list to save us + now. So drop the rest of the items -- robin@kitsite.com + */ + dMARK; + SP = MARK; + } PUSHTARG; } RETURN; @@ -1119,28 +1252,133 @@ PP(pp_repeat) PP(pp_subtract) { - djSP; dATARGET; tryAMAGICbin(subtr,opASSIGN); + dSP; dATARGET; bool useleft; tryAMAGICbin(subtr,opASSIGN); + useleft = USE_LEFT(TOPm1s); +#ifdef PERL_PRESERVE_IVUV + /* See comments in pp_add (in pp_hot.c) about Overflow, and how + "bad things" happen if you rely on signed integers wrapping. */ + SvIV_please(TOPs); + if (SvIOK(TOPs)) { + /* Unless the left argument is integer in range we are going to have to + use NV maths. Hence only attempt to coerce the right argument if + we know the left is integer. */ + register UV auv; + bool auvok; + bool a_valid = 0; + + if (!useleft) { + auv = 0; + a_valid = auvok = 1; + /* left operand is undef, treat as zero. */ + } else { + /* Left operand is defined, so is it IV? */ + SvIV_please(TOPm1s); + if (SvIOK(TOPm1s)) { + if ((auvok = SvUOK(TOPm1s))) + auv = SvUVX(TOPm1s); + else { + register IV aiv = SvIVX(TOPm1s); + if (aiv >= 0) { + auv = aiv; + auvok = 1; /* Now acting as a sign flag. */ + } else { /* 2s complement assumption for IV_MIN */ + auv = (UV)-aiv; + } + } + a_valid = 1; + } + } + if (a_valid) { + bool result_good = 0; + UV result; + register UV buv; + bool buvok = SvUOK(TOPs); + + if (buvok) + buv = SvUVX(TOPs); + else { + register IV biv = SvIVX(TOPs); + if (biv >= 0) { + buv = biv; + buvok = 1; + } else + buv = (UV)-biv; + } + /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve, + else "IV" now, independant of how it came in. + if a, b represents positive, A, B negative, a maps to -A etc + a - b => (a - b) + A - b => -(a + b) + a - B => (a + b) + A - B => -(a - b) + all UV maths. negate result if A negative. + subtract if signs same, add if signs differ. */ + + if (auvok ^ buvok) { + /* Signs differ. */ + result = auv + buv; + if (result >= auv) + result_good = 1; + } else { + /* Signs same */ + if (auv >= buv) { + result = auv - buv; + /* Must get smaller */ + if (result <= auv) + result_good = 1; + } else { + result = buv - auv; + if (result <= buv) { + /* result really should be -(auv-buv). as its negation + of true value, need to swap our result flag */ + auvok = !auvok; + result_good = 1; + } + } + } + if (result_good) { + SP--; + if (auvok) + SETu( result ); + else { + /* Negate result */ + if (result <= (UV)IV_MIN) + SETi( -(IV)result ); + else { + /* result valid, but out of range for IV. */ + SETn( -(NV)result ); + } + } + RETURN; + } /* Overflow, drop through to NVs. */ + } + } +#endif + useleft = USE_LEFT(TOPm1s); { - dPOPTOPnnrl_ul; - SETn( left - right ); - RETURN; + dPOPnv; + if (!useleft) { + /* left operand is undef, treat as zero - value */ + SETn(-value); + RETURN; + } + SETn( TOPn - value ); + RETURN; } } PP(pp_left_shift) { - djSP; dATARGET; tryAMAGICbin(lshift,opASSIGN); + dSP; dATARGET; tryAMAGICbin(lshift,opASSIGN); { - IBW shift = POPi; + IV shift = POPi; if (PL_op->op_private & HINT_INTEGER) { - IBW i = TOPi; - i = BWi(i) << shift; - SETi(BWi(i)); + IV i = TOPi; + SETi(i << shift); } else { - UBW u = TOPu; - u <<= shift; - SETu(BWu(u)); + UV u = TOPu; + SETu(u << shift); } RETURN; } @@ -1148,18 +1386,16 @@ PP(pp_left_shift) PP(pp_right_shift) { - djSP; dATARGET; tryAMAGICbin(rshift,opASSIGN); + dSP; dATARGET; tryAMAGICbin(rshift,opASSIGN); { - IBW shift = POPi; + IV shift = POPi; if (PL_op->op_private & HINT_INTEGER) { - IBW i = TOPi; - i = BWi(i) >> shift; - SETi(BWi(i)); + IV i = TOPi; + SETi(i >> shift); } else { - UBW u = TOPu; - u >>= shift; - SETu(BWu(u)); + UV u = TOPu; + SETu(u >> shift); } RETURN; } @@ -1167,7 +1403,75 @@ PP(pp_right_shift) PP(pp_lt) { - djSP; tryAMAGICbinSET(lt,0); + dSP; tryAMAGICbinSET(lt,0); +#ifdef PERL_PRESERVE_IVUV + SvIV_please(TOPs); + if (SvIOK(TOPs)) { + SvIV_please(TOPm1s); + if (SvIOK(TOPm1s)) { + bool auvok = SvUOK(TOPm1s); + bool buvok = SvUOK(TOPs); + + if (!auvok && !buvok) { /* ## IV < IV ## */ + IV aiv = SvIVX(TOPm1s); + IV biv = SvIVX(TOPs); + + SP--; + SETs(boolSV(aiv < biv)); + RETURN; + } + if (auvok && buvok) { /* ## UV < UV ## */ + UV auv = SvUVX(TOPm1s); + UV buv = SvUVX(TOPs); + + SP--; + SETs(boolSV(auv < buv)); + RETURN; + } + if (auvok) { /* ## UV < IV ## */ + UV auv; + IV biv; + + biv = SvIVX(TOPs); + SP--; + if (biv < 0) { + /* As (a) is a UV, it's >=0, so it cannot be < */ + SETs(&PL_sv_no); + RETURN; + } + auv = SvUVX(TOPs); + if (auv >= (UV) IV_MAX) { + /* As (b) is an IV, it cannot be > IV_MAX */ + SETs(&PL_sv_no); + RETURN; + } + SETs(boolSV(auv < (UV)biv)); + RETURN; + } + { /* ## IV < UV ## */ + IV aiv; + UV buv; + + aiv = SvIVX(TOPm1s); + if (aiv < 0) { + /* As (b) is a UV, it's >=0, so it must be < */ + SP--; + SETs(&PL_sv_yes); + RETURN; + } + buv = SvUVX(TOPs); + SP--; + if (buv > (UV) IV_MAX) { + /* As (a) is an IV, it cannot be > IV_MAX */ + SETs(&PL_sv_yes); + RETURN; + } + SETs(boolSV((UV)aiv < buv)); + RETURN; + } + } + } +#endif { dPOPnv; SETs(boolSV(TOPn < value)); @@ -1177,7 +1481,75 @@ PP(pp_lt) PP(pp_gt) { - djSP; tryAMAGICbinSET(gt,0); + dSP; tryAMAGICbinSET(gt,0); +#ifdef PERL_PRESERVE_IVUV + SvIV_please(TOPs); + if (SvIOK(TOPs)) { + SvIV_please(TOPm1s); + if (SvIOK(TOPm1s)) { + bool auvok = SvUOK(TOPm1s); + bool buvok = SvUOK(TOPs); + + if (!auvok && !buvok) { /* ## IV > IV ## */ + IV aiv = SvIVX(TOPm1s); + IV biv = SvIVX(TOPs); + + SP--; + SETs(boolSV(aiv > biv)); + RETURN; + } + if (auvok && buvok) { /* ## UV > UV ## */ + UV auv = SvUVX(TOPm1s); + UV buv = SvUVX(TOPs); + + SP--; + SETs(boolSV(auv > buv)); + RETURN; + } + if (auvok) { /* ## UV > IV ## */ + UV auv; + IV biv; + + biv = SvIVX(TOPs); + SP--; + if (biv < 0) { + /* As (a) is a UV, it's >=0, so it must be > */ + SETs(&PL_sv_yes); + RETURN; + } + auv = SvUVX(TOPs); + if (auv > (UV) IV_MAX) { + /* As (b) is an IV, it cannot be > IV_MAX */ + SETs(&PL_sv_yes); + RETURN; + } + SETs(boolSV(auv > (UV)biv)); + RETURN; + } + { /* ## IV > UV ## */ + IV aiv; + UV buv; + + aiv = SvIVX(TOPm1s); + if (aiv < 0) { + /* As (b) is a UV, it's >=0, so it cannot be > */ + SP--; + SETs(&PL_sv_no); + RETURN; + } + buv = SvUVX(TOPs); + SP--; + if (buv >= (UV) IV_MAX) { + /* As (a) is an IV, it cannot be > IV_MAX */ + SETs(&PL_sv_no); + RETURN; + } + SETs(boolSV((UV)aiv > buv)); + RETURN; + } + } + } +#endif { dPOPnv; SETs(boolSV(TOPn > value)); @@ -1187,7 +1559,75 @@ PP(pp_gt) PP(pp_le) { - djSP; tryAMAGICbinSET(le,0); + dSP; tryAMAGICbinSET(le,0); +#ifdef PERL_PRESERVE_IVUV + SvIV_please(TOPs); + if (SvIOK(TOPs)) { + SvIV_please(TOPm1s); + if (SvIOK(TOPm1s)) { + bool auvok = SvUOK(TOPm1s); + bool buvok = SvUOK(TOPs); + + if (!auvok && !buvok) { /* ## IV <= IV ## */ + IV aiv = SvIVX(TOPm1s); + IV biv = SvIVX(TOPs); + + SP--; + SETs(boolSV(aiv <= biv)); + RETURN; + } + if (auvok && buvok) { /* ## UV <= UV ## */ + UV auv = SvUVX(TOPm1s); + UV buv = SvUVX(TOPs); + + SP--; + SETs(boolSV(auv <= buv)); + RETURN; + } + if (auvok) { /* ## UV <= IV ## */ + UV auv; + IV biv; + + biv = SvIVX(TOPs); + SP--; + if (biv < 0) { + /* As (a) is a UV, it's >=0, so a cannot be <= */ + SETs(&PL_sv_no); + RETURN; + } + auv = SvUVX(TOPs); + if (auv > (UV) IV_MAX) { + /* As (b) is an IV, it cannot be > IV_MAX */ + SETs(&PL_sv_no); + RETURN; + } + SETs(boolSV(auv <= (UV)biv)); + RETURN; + } + { /* ## IV <= UV ## */ + IV aiv; + UV buv; + + aiv = SvIVX(TOPm1s); + if (aiv < 0) { + /* As (b) is a UV, it's >=0, so a must be <= */ + SP--; + SETs(&PL_sv_yes); + RETURN; + } + buv = SvUVX(TOPs); + SP--; + if (buv >= (UV) IV_MAX) { + /* As (a) is an IV, it cannot be > IV_MAX */ + SETs(&PL_sv_yes); + RETURN; + } + SETs(boolSV((UV)aiv <= buv)); + RETURN; + } + } + } +#endif { dPOPnv; SETs(boolSV(TOPn <= value)); @@ -1197,7 +1637,75 @@ PP(pp_le) PP(pp_ge) { - djSP; tryAMAGICbinSET(ge,0); + dSP; tryAMAGICbinSET(ge,0); +#ifdef PERL_PRESERVE_IVUV + SvIV_please(TOPs); + if (SvIOK(TOPs)) { + SvIV_please(TOPm1s); + if (SvIOK(TOPm1s)) { + bool auvok = SvUOK(TOPm1s); + bool buvok = SvUOK(TOPs); + + if (!auvok && !buvok) { /* ## IV >= IV ## */ + IV aiv = SvIVX(TOPm1s); + IV biv = SvIVX(TOPs); + + SP--; + SETs(boolSV(aiv >= biv)); + RETURN; + } + if (auvok && buvok) { /* ## UV >= UV ## */ + UV auv = SvUVX(TOPm1s); + UV buv = SvUVX(TOPs); + + SP--; + SETs(boolSV(auv >= buv)); + RETURN; + } + if (auvok) { /* ## UV >= IV ## */ + UV auv; + IV biv; + + biv = SvIVX(TOPs); + SP--; + if (biv < 0) { + /* As (a) is a UV, it's >=0, so it must be >= */ + SETs(&PL_sv_yes); + RETURN; + } + auv = SvUVX(TOPs); + if (auv >= (UV) IV_MAX) { + /* As (b) is an IV, it cannot be > IV_MAX */ + SETs(&PL_sv_yes); + RETURN; + } + SETs(boolSV(auv >= (UV)biv)); + RETURN; + } + { /* ## IV >= UV ## */ + IV aiv; + UV buv; + + aiv = SvIVX(TOPm1s); + if (aiv < 0) { + /* As (b) is a UV, it's >=0, so a cannot be >= */ + SP--; + SETs(&PL_sv_no); + RETURN; + } + buv = SvUVX(TOPs); + SP--; + if (buv > (UV) IV_MAX) { + /* As (a) is an IV, it cannot be > IV_MAX */ + SETs(&PL_sv_no); + RETURN; + } + SETs(boolSV((UV)aiv >= buv)); + RETURN; + } + } + } +#endif { dPOPnv; SETs(boolSV(TOPn >= value)); @@ -1207,7 +1715,73 @@ PP(pp_ge) PP(pp_ne) { - djSP; tryAMAGICbinSET(ne,0); + dSP; tryAMAGICbinSET(ne,0); +#ifndef NV_PRESERVES_UV + if (SvROK(TOPs) && SvROK(TOPm1s)) { + SETs(boolSV(SvRV(TOPs) != SvRV(TOPm1s))); + RETURN; + } +#endif +#ifdef PERL_PRESERVE_IVUV + SvIV_please(TOPs); + if (SvIOK(TOPs)) { + SvIV_please(TOPm1s); + if (SvIOK(TOPm1s)) { + bool auvok = SvUOK(TOPm1s); + bool buvok = SvUOK(TOPs); + + if (!auvok && !buvok) { /* ## IV <=> IV ## */ + IV aiv = SvIVX(TOPm1s); + IV biv = SvIVX(TOPs); + + SP--; + SETs(boolSV(aiv != biv)); + RETURN; + } + if (auvok && buvok) { /* ## UV != UV ## */ + UV auv = SvUVX(TOPm1s); + UV buv = SvUVX(TOPs); + + SP--; + SETs(boolSV(auv != buv)); + RETURN; + } + { /* ## Mixed IV,UV ## */ + IV iv; + UV uv; + + /* != is commutative so swap if needed (save code) */ + if (auvok) { + /* swap. top of stack (b) is the iv */ + iv = SvIVX(TOPs); + SP--; + if (iv < 0) { + /* As (a) is a UV, it's >0, so it cannot be == */ + SETs(&PL_sv_yes); + RETURN; + } + uv = SvUVX(TOPs); + } else { + iv = SvIVX(TOPm1s); + SP--; + if (iv < 0) { + /* As (b) is a UV, it's >0, so it cannot be == */ + SETs(&PL_sv_yes); + RETURN; + } + uv = SvUVX(*(SP+1)); /* Do I want TOPp1s() ? */ + } + /* we know iv is >= 0 */ + if (uv > (UV) IV_MAX) { + SETs(&PL_sv_yes); + RETURN; + } + SETs(boolSV((UV)iv != uv)); + RETURN; + } + } + } +#endif { dPOPnv; SETs(boolSV(TOPn != value)); @@ -1217,11 +1791,102 @@ PP(pp_ne) PP(pp_ncmp) { - djSP; dTARGET; tryAMAGICbin(ncmp,0); + dSP; dTARGET; tryAMAGICbin(ncmp,0); +#ifndef NV_PRESERVES_UV + if (SvROK(TOPs) && SvROK(TOPm1s)) { + SETi(PTR2UV(SvRV(TOPs)) - PTR2UV(SvRV(TOPm1s))); + RETURN; + } +#endif +#ifdef PERL_PRESERVE_IVUV + /* Fortunately it seems NaN isn't IOK */ + SvIV_please(TOPs); + if (SvIOK(TOPs)) { + SvIV_please(TOPm1s); + if (SvIOK(TOPm1s)) { + bool leftuvok = SvUOK(TOPm1s); + bool rightuvok = SvUOK(TOPs); + I32 value; + if (!leftuvok && !rightuvok) { /* ## IV <=> IV ## */ + IV leftiv = SvIVX(TOPm1s); + IV rightiv = SvIVX(TOPs); + + if (leftiv > rightiv) + value = 1; + else if (leftiv < rightiv) + value = -1; + else + value = 0; + } else if (leftuvok && rightuvok) { /* ## UV <=> UV ## */ + UV leftuv = SvUVX(TOPm1s); + UV rightuv = SvUVX(TOPs); + + if (leftuv > rightuv) + value = 1; + else if (leftuv < rightuv) + value = -1; + else + value = 0; + } else if (leftuvok) { /* ## UV <=> IV ## */ + UV leftuv; + IV rightiv; + + rightiv = SvIVX(TOPs); + if (rightiv < 0) { + /* As (a) is a UV, it's >=0, so it cannot be < */ + value = 1; + } else { + leftuv = SvUVX(TOPm1s); + if (leftuv > (UV) IV_MAX) { + /* As (b) is an IV, it cannot be > IV_MAX */ + value = 1; + } else if (leftuv > (UV)rightiv) { + value = 1; + } else if (leftuv < (UV)rightiv) { + value = -1; + } else { + value = 0; + } + } + } else { /* ## IV <=> UV ## */ + IV leftiv; + UV rightuv; + + leftiv = SvIVX(TOPm1s); + if (leftiv < 0) { + /* As (b) is a UV, it's >=0, so it must be < */ + value = -1; + } else { + rightuv = SvUVX(TOPs); + if (rightuv > (UV) IV_MAX) { + /* As (a) is an IV, it cannot be > IV_MAX */ + value = -1; + } else if (leftiv > (UV)rightuv) { + value = 1; + } else if (leftiv < (UV)rightuv) { + value = -1; + } else { + value = 0; + } + } + } + SP--; + SETi(value); + RETURN; + } + } +#endif { dPOPTOPnnrl; I32 value; +#ifdef Perl_isnan + if (Perl_isnan(left) || Perl_isnan(right)) { + SETs(&PL_sv_undef); + RETURN; + } + value = (left > right) - (left < right); +#else if (left == right) value = 0; else if (left < right) @@ -1232,6 +1897,7 @@ PP(pp_ncmp) SETs(&PL_sv_undef); RETURN; } +#endif SETi(value); RETURN; } @@ -1239,7 +1905,7 @@ PP(pp_ncmp) PP(pp_slt) { - djSP; tryAMAGICbinSET(slt,0); + dSP; tryAMAGICbinSET(slt,0); { dPOPTOPssrl; int cmp = ((PL_op->op_private & OPpLOCALE) @@ -1252,7 +1918,7 @@ PP(pp_slt) PP(pp_sgt) { - djSP; tryAMAGICbinSET(sgt,0); + dSP; tryAMAGICbinSET(sgt,0); { dPOPTOPssrl; int cmp = ((PL_op->op_private & OPpLOCALE) @@ -1265,7 +1931,7 @@ PP(pp_sgt) PP(pp_sle) { - djSP; tryAMAGICbinSET(sle,0); + dSP; tryAMAGICbinSET(sle,0); { dPOPTOPssrl; int cmp = ((PL_op->op_private & OPpLOCALE) @@ -1278,7 +1944,7 @@ PP(pp_sle) PP(pp_sge) { - djSP; tryAMAGICbinSET(sge,0); + dSP; tryAMAGICbinSET(sge,0); { dPOPTOPssrl; int cmp = ((PL_op->op_private & OPpLOCALE) @@ -1291,7 +1957,7 @@ PP(pp_sge) PP(pp_seq) { - djSP; tryAMAGICbinSET(seq,0); + dSP; tryAMAGICbinSET(seq,0); { dPOPTOPssrl; SETs(boolSV(sv_eq(left, right))); @@ -1301,7 +1967,7 @@ PP(pp_seq) PP(pp_sne) { - djSP; tryAMAGICbinSET(sne,0); + dSP; tryAMAGICbinSET(sne,0); { dPOPTOPssrl; SETs(boolSV(!sv_eq(left, right))); @@ -1311,7 +1977,13 @@ PP(pp_sne) PP(pp_scmp) { - djSP; dTARGET; tryAMAGICbin(scmp,0); + dSP; dTARGET; tryAMAGICbin(scmp,0); +#ifndef NV_PRESERVES_UV + if (SvROK(TOPs) && SvROK(TOPm1s)) { + SETi(PTR2UV(SvRV(TOPs)) - PTR2UV(SvRV(TOPm1s))); + RETURN; + } +#endif { dPOPTOPssrl; int cmp = ((PL_op->op_private & OPpLOCALE) @@ -1324,17 +1996,17 @@ PP(pp_scmp) PP(pp_bit_and) { - djSP; dATARGET; tryAMAGICbin(band,opASSIGN); + dSP; dATARGET; tryAMAGICbin(band,opASSIGN); { dPOPTOPssrl; if (SvNIOKp(left) || SvNIOKp(right)) { if (PL_op->op_private & HINT_INTEGER) { - IBW value = SvIV(left) & SvIV(right); - SETi(BWi(value)); + IV i = SvIV(left) & SvIV(right); + SETi(i); } else { - UBW value = SvUV(left) & SvUV(right); - SETu(BWu(value)); + UV u = SvUV(left) & SvUV(right); + SETu(u); } } else { @@ -1347,17 +2019,17 @@ PP(pp_bit_and) PP(pp_bit_xor) { - djSP; dATARGET; tryAMAGICbin(bxor,opASSIGN); + dSP; dATARGET; tryAMAGICbin(bxor,opASSIGN); { dPOPTOPssrl; if (SvNIOKp(left) || SvNIOKp(right)) { if (PL_op->op_private & HINT_INTEGER) { - IBW value = (USE_LEFT(left) ? SvIV(left) : 0) ^ SvIV(right); - SETi(BWi(value)); + IV i = (USE_LEFT(left) ? SvIV(left) : 0) ^ SvIV(right); + SETi(i); } else { - UBW value = (USE_LEFT(left) ? SvUV(left) : 0) ^ SvUV(right); - SETu(BWu(value)); + UV u = (USE_LEFT(left) ? SvUV(left) : 0) ^ SvUV(right); + SETu(u); } } else { @@ -1370,17 +2042,17 @@ PP(pp_bit_xor) PP(pp_bit_or) { - djSP; dATARGET; tryAMAGICbin(bor,opASSIGN); + dSP; dATARGET; tryAMAGICbin(bor,opASSIGN); { dPOPTOPssrl; if (SvNIOKp(left) || SvNIOKp(right)) { if (PL_op->op_private & HINT_INTEGER) { - IBW value = (USE_LEFT(left) ? SvIV(left) : 0) | SvIV(right); - SETi(BWi(value)); + IV i = (USE_LEFT(left) ? SvIV(left) : 0) | SvIV(right); + SETi(i); } else { - UBW value = (USE_LEFT(left) ? SvUV(left) : 0) | SvUV(right); - SETu(BWu(value)); + UV u = (USE_LEFT(left) ? SvUV(left) : 0) | SvUV(right); + SETu(u); } } else { @@ -1393,14 +2065,38 @@ PP(pp_bit_or) PP(pp_negate) { - djSP; dTARGET; tryAMAGICun(neg); + dSP; dTARGET; tryAMAGICun(neg); { dTOPss; + int flags = SvFLAGS(sv); if (SvGMAGICAL(sv)) mg_get(sv); - if (SvIOKp(sv) && !SvNOKp(sv) && !SvPOKp(sv) && SvIVX(sv) != IV_MIN) - SETi(-SvIVX(sv)); - else if (SvNIOKp(sv)) + if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) { + /* It's publicly an integer, or privately an integer-not-float */ + oops_its_an_int: + if (SvIsUV(sv)) { + if (SvIVX(sv) == IV_MIN) { + /* 2s complement assumption. */ + SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */ + RETURN; + } + else if (SvUVX(sv) <= IV_MAX) { + SETi(-SvIVX(sv)); + RETURN; + } + } + else if (SvIVX(sv) != IV_MIN) { + SETi(-SvIVX(sv)); + RETURN; + } +#ifdef PERL_PRESERVE_IVUV + else { + SETu((UV)IV_MIN); + RETURN; + } +#endif + } + if (SvNIOKp(sv)) SETn(-SvNV(sv)); else if (SvPOKp(sv)) { STRLEN len; @@ -1413,12 +2109,16 @@ PP(pp_negate) sv_setsv(TARG, sv); *SvPV_force(TARG, len) = *s == '-' ? '+' : '-'; } - else if (IN_UTF8 && *(U8*)s >= 0xc0 && isIDFIRST_utf8((U8*)s)) { + else if (DO_UTF8(sv) && UTF8_IS_START(*s) && isIDFIRST_utf8((U8*)s)) { sv_setpvn(TARG, "-", 1); sv_catsv(TARG, sv); } - else - sv_setnv(TARG, -SvNV(sv)); + else { + SvIV_please(sv); + if (SvIOK(sv)) + goto oops_its_an_int; + sv_setnv(TARG, -SvNV(sv)); + } SETTARG; } else @@ -1429,42 +2129,93 @@ PP(pp_negate) PP(pp_not) { - djSP; tryAMAGICunSET(not); + dSP; tryAMAGICunSET(not); *PL_stack_sp = boolSV(!SvTRUE(*PL_stack_sp)); return NORMAL; } PP(pp_complement) { - djSP; dTARGET; tryAMAGICun(compl); + dSP; dTARGET; tryAMAGICun(compl); { dTOPss; if (SvNIOKp(sv)) { if (PL_op->op_private & HINT_INTEGER) { - IBW value = ~SvIV(sv); - SETi(BWi(value)); + IV i = ~SvIV(sv); + SETi(i); } else { - UBW value = ~SvUV(sv); - SETu(BWu(value)); + UV u = ~SvUV(sv); + SETu(u); } } else { - register char *tmps; - register long *tmpl; + register U8 *tmps; register I32 anum; STRLEN len; SvSetSV(TARG, sv); - tmps = SvPV_force(TARG, len); + tmps = (U8*)SvPV_force(TARG, len); anum = len; + if (SvUTF8(TARG)) { + /* Calculate exact length, let's not estimate. */ + STRLEN targlen = 0; + U8 *result; + U8 *send; + STRLEN l; + UV nchar = 0; + UV nwide = 0; + + send = tmps + len; + while (tmps < send) { + UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV); + tmps += UTF8SKIP(tmps); + targlen += UNISKIP(~c); + nchar++; + if (c > 0xff) + nwide++; + } + + /* Now rewind strings and write them. */ + tmps -= len; + + if (nwide) { + Newz(0, result, targlen + 1, U8); + while (tmps < send) { + UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV); + tmps += UTF8SKIP(tmps); + result = uvchr_to_utf8(result, ~c); + } + *result = '\0'; + result -= targlen; + sv_setpvn(TARG, (char*)result, targlen); + SvUTF8_on(TARG); + } + else { + Newz(0, result, nchar + 1, U8); + while (tmps < send) { + U8 c = (U8)utf8n_to_uvchr(tmps, 0, &l, UTF8_ALLOW_ANY); + tmps += UTF8SKIP(tmps); + *result++ = ~c; + } + *result = '\0'; + result -= nchar; + sv_setpvn(TARG, (char*)result, nchar); + } + Safefree(result); + SETs(TARG); + RETURN; + } #ifdef LIBERAL - for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++) - *tmps = ~*tmps; - tmpl = (long*)tmps; - for ( ; anum >= sizeof(long); anum -= sizeof(long), tmpl++) - *tmpl = ~*tmpl; - tmps = (char*)tmpl; + { + register long *tmpl; + for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++) + *tmps = ~*tmps; + tmpl = (long*)tmps; + for ( ; anum >= sizeof(long); anum -= sizeof(long), tmpl++) + *tmpl = ~*tmpl; + tmps = (U8*)tmpl; + } #endif for ( ; anum > 0; anum--, tmps++) *tmps = ~*tmps; @@ -1479,7 +2230,7 @@ PP(pp_complement) PP(pp_i_multiply) { - djSP; dATARGET; tryAMAGICbin(mult,opASSIGN); + dSP; dATARGET; tryAMAGICbin(mult,opASSIGN); { dPOPTOPiirl; SETi( left * right ); @@ -1489,7 +2240,7 @@ PP(pp_i_multiply) PP(pp_i_divide) { - djSP; dATARGET; tryAMAGICbin(div,opASSIGN); + dSP; dATARGET; tryAMAGICbin(div,opASSIGN); { dPOPiv; if (value == 0) @@ -1502,7 +2253,7 @@ PP(pp_i_divide) PP(pp_i_modulo) { - djSP; dATARGET; tryAMAGICbin(modulo,opASSIGN); + dSP; dATARGET; tryAMAGICbin(modulo,opASSIGN); { dPOPTOPiirl; if (!right) @@ -1514,9 +2265,9 @@ PP(pp_i_modulo) PP(pp_i_add) { - djSP; dATARGET; tryAMAGICbin(add,opASSIGN); + dSP; dATARGET; tryAMAGICbin(add,opASSIGN); { - dPOPTOPiirl; + dPOPTOPiirl_ul; SETi( left + right ); RETURN; } @@ -1524,9 +2275,9 @@ PP(pp_i_add) PP(pp_i_subtract) { - djSP; dATARGET; tryAMAGICbin(subtr,opASSIGN); + dSP; dATARGET; tryAMAGICbin(subtr,opASSIGN); { - dPOPTOPiirl; + dPOPTOPiirl_ul; SETi( left - right ); RETURN; } @@ -1534,7 +2285,7 @@ PP(pp_i_subtract) PP(pp_i_lt) { - djSP; tryAMAGICbinSET(lt,0); + dSP; tryAMAGICbinSET(lt,0); { dPOPTOPiirl; SETs(boolSV(left < right)); @@ -1544,7 +2295,7 @@ PP(pp_i_lt) PP(pp_i_gt) { - djSP; tryAMAGICbinSET(gt,0); + dSP; tryAMAGICbinSET(gt,0); { dPOPTOPiirl; SETs(boolSV(left > right)); @@ -1554,7 +2305,7 @@ PP(pp_i_gt) PP(pp_i_le) { - djSP; tryAMAGICbinSET(le,0); + dSP; tryAMAGICbinSET(le,0); { dPOPTOPiirl; SETs(boolSV(left <= right)); @@ -1564,7 +2315,7 @@ PP(pp_i_le) PP(pp_i_ge) { - djSP; tryAMAGICbinSET(ge,0); + dSP; tryAMAGICbinSET(ge,0); { dPOPTOPiirl; SETs(boolSV(left >= right)); @@ -1574,7 +2325,7 @@ PP(pp_i_ge) PP(pp_i_eq) { - djSP; tryAMAGICbinSET(eq,0); + dSP; tryAMAGICbinSET(eq,0); { dPOPTOPiirl; SETs(boolSV(left == right)); @@ -1584,7 +2335,7 @@ PP(pp_i_eq) PP(pp_i_ne) { - djSP; tryAMAGICbinSET(ne,0); + dSP; tryAMAGICbinSET(ne,0); { dPOPTOPiirl; SETs(boolSV(left != right)); @@ -1594,7 +2345,7 @@ PP(pp_i_ne) PP(pp_i_ncmp) { - djSP; dTARGET; tryAMAGICbin(ncmp,0); + dSP; dTARGET; tryAMAGICbin(ncmp,0); { dPOPTOPiirl; I32 value; @@ -1612,7 +2363,7 @@ PP(pp_i_ncmp) PP(pp_i_negate) { - djSP; dTARGET; tryAMAGICun(neg); + dSP; dTARGET; tryAMAGICun(neg); SETi(-TOPi); RETURN; } @@ -1621,7 +2372,7 @@ PP(pp_i_negate) PP(pp_atan2) { - djSP; dTARGET; tryAMAGICbin(atan2,0); + dSP; dTARGET; tryAMAGICbin(atan2,0); { dPOPTOPnnrl; SETn(Perl_atan2(left, right)); @@ -1631,7 +2382,7 @@ PP(pp_atan2) PP(pp_sin) { - djSP; dTARGET; tryAMAGICun(sin); + dSP; dTARGET; tryAMAGICun(sin); { NV value; value = POPn; @@ -1643,7 +2394,7 @@ PP(pp_sin) PP(pp_cos) { - djSP; dTARGET; tryAMAGICun(cos); + dSP; dTARGET; tryAMAGICun(cos); { NV value; value = POPn; @@ -1670,7 +2421,7 @@ extern double drand48 (void); PP(pp_rand) { - djSP; dTARGET; + dSP; dTARGET; NV value; if (MAXARG < 1) value = 1.0; @@ -1689,7 +2440,7 @@ PP(pp_rand) PP(pp_srand) { - djSP; + dSP; UV anum; if (MAXARG < 1) anum = seed(); @@ -1726,7 +2477,6 @@ S_seed(pTHX) #define SEED_C3 269 #define SEED_C5 26107 - dTHR; #ifndef PERL_NO_DEV_RANDOM int fd; #endif @@ -1775,17 +2525,17 @@ S_seed(pTHX) u = (U32)SEED_C1 * when; # endif #endif - u += SEED_C3 * (U32)getpid(); - u += SEED_C4 * (U32)(UV)PL_stack_sp; + u += SEED_C3 * (U32)PerlProc_getpid(); + u += SEED_C4 * (U32)PTR2UV(PL_stack_sp); #ifndef PLAN9 /* XXX Plan9 assembler chokes on this; fix needed */ - u += SEED_C5 * (U32)(UV)&when; + u += SEED_C5 * (U32)PTR2UV(&when); #endif return u; } PP(pp_exp) { - djSP; dTARGET; tryAMAGICun(exp); + dSP; dTARGET; tryAMAGICun(exp); { NV value; value = POPn; @@ -1797,12 +2547,12 @@ PP(pp_exp) PP(pp_log) { - djSP; dTARGET; tryAMAGICun(log); + dSP; dTARGET; tryAMAGICun(log); { NV value; value = POPn; if (value <= 0.0) { - RESTORE_NUMERIC_STANDARD(); + SET_NUMERIC_STANDARD(); DIE(aTHX_ "Can't take log of %g", value); } value = Perl_log(value); @@ -1813,12 +2563,12 @@ PP(pp_log) PP(pp_sqrt) { - djSP; dTARGET; tryAMAGICun(sqrt); + dSP; dTARGET; tryAMAGICun(sqrt); { NV value; value = POPn; if (value < 0.0) { - RESTORE_NUMERIC_STANDARD(); + SET_NUMERIC_STANDARD(); DIE(aTHX_ "Can't take sqrt of %g", value); } value = Perl_sqrt(value); @@ -1829,27 +2579,52 @@ PP(pp_sqrt) PP(pp_int) { - djSP; dTARGET; + dSP; dTARGET; tryAMAGICun(int); { - NV value = TOPn; - IV iv; - - if (SvIOKp(TOPs) && !SvNOKp(TOPs) && !SvPOKp(TOPs)) { - iv = SvIVX(TOPs); - SETi(iv); - } - else { - if (value >= 0.0) - (void)Perl_modf(value, &value); - else { - (void)Perl_modf(-value, &value); - value = -value; - } - iv = I_V(value); - if (iv == value) - SETi(iv); - else - SETn(value); + NV value; + IV iv = TOPi; /* attempt to convert to IV if possible. */ + /* XXX it's arguable that compiler casting to IV might be subtly + different from modf (for numbers inside (IV_MIN,UV_MAX)) in which + else preferring IV has introduced a subtle behaviour change bug. OTOH + relying on floating point to be accurate is a bug. */ + + if (SvIOK(TOPs)) { + if (SvIsUV(TOPs)) { + UV uv = TOPu; + SETu(uv); + } else + SETi(iv); + } else { + value = TOPn; + if (value >= 0.0) { + if (value < (NV)UV_MAX + 0.5) { + SETu(U_V(value)); + } else { +#if defined(HAS_MODFL) || defined(LONG_DOUBLE_EQUALS_DOUBLE) + (void)Perl_modf(value, &value); +#else + double tmp = (double)value; + (void)Perl_modf(tmp, &tmp); + value = (NV)tmp; +#endif + SETn(value); + } + } + else { + if (value > (NV)IV_MIN - 0.5) { + SETi(I_V(value)); + } else { +#if defined(HAS_MODFL) || defined(LONG_DOUBLE_EQUALS_DOUBLE) + (void)Perl_modf(-value, &value); + value = -value; +#else + double tmp = (double)value; + (void)Perl_modf(-tmp, &tmp); + value = -(NV)tmp; +#endif + SETn(value); + } + } } } RETURN; @@ -1857,20 +2632,32 @@ PP(pp_int) PP(pp_abs) { - djSP; dTARGET; tryAMAGICun(abs); + dSP; dTARGET; tryAMAGICun(abs); { - NV value = TOPn; - IV iv; - - if (SvIOKp(TOPs) && !SvNOKp(TOPs) && !SvPOKp(TOPs) && - (iv = SvIVX(TOPs)) != IV_MIN) { - if (iv < 0) - iv = -iv; - SETi(iv); - } - else { + /* This will cache the NV value if string isn't actually integer */ + IV iv = TOPi; + + if (SvIOK(TOPs)) { + /* IVX is precise */ + if (SvIsUV(TOPs)) { + SETu(TOPu); /* force it to be numeric only */ + } else { + if (iv >= 0) { + SETi(iv); + } else { + if (iv != IV_MIN) { + SETi(-iv); + } else { + /* 2s complement assumption. Also, not really needed as + IV_MIN and -IV_MIN should both be %100...00 and NV-able */ + SETu(IV_MIN); + } + } + } + } else{ + NV value = TOPn; if (value < 0.0) - value = -value; + value = -value; SETn(value); } } @@ -1879,36 +2666,38 @@ PP(pp_abs) PP(pp_hex) { - djSP; dTARGET; + dSP; dTARGET; char *tmps; - I32 argtype; - STRLEN n_a; + STRLEN argtype; + STRLEN len; - tmps = POPpx; - XPUSHu(scan_hex(tmps, 99, &argtype)); + tmps = (SvPVx(POPs, len)); + argtype = 1; /* allow underscores */ + XPUSHn(scan_hex(tmps, len, &argtype)); RETURN; } PP(pp_oct) { - djSP; dTARGET; - UV value; - I32 argtype; + dSP; dTARGET; + NV value; + STRLEN argtype; char *tmps; - STRLEN n_a; + STRLEN len; - tmps = POPpx; - while (*tmps && isSPACE(*tmps)) - tmps++; + tmps = (SvPVx(POPs, len)); + while (*tmps && len && isSPACE(*tmps)) + tmps++, len--; if (*tmps == '0') - tmps++; + tmps++, len--; + argtype = 1; /* allow underscores */ if (*tmps == 'x') - value = scan_hex(++tmps, 99, &argtype); + value = scan_hex(++tmps, --len, &argtype); else if (*tmps == 'b') - value = scan_bin(++tmps, 99, &argtype); + value = scan_bin(++tmps, --len, &argtype); else - value = scan_oct(tmps, 99, &argtype); - XPUSHu(value); + value = scan_oct(tmps, len, &argtype); + XPUSHn(value); RETURN; } @@ -1916,60 +2705,73 @@ PP(pp_oct) PP(pp_length) { - djSP; dTARGET; - - if (IN_UTF8) { - SETi( sv_len_utf8(TOPs) ); - RETURN; - } + dSP; dTARGET; + SV *sv = TOPs; - SETi( sv_len(TOPs) ); + if (DO_UTF8(sv)) + SETi(sv_len_utf8(sv)); + else + SETi(sv_len(sv)); RETURN; } PP(pp_substr) { - djSP; dTARGET; + dSP; dTARGET; SV *sv; I32 len; STRLEN curlen; - STRLEN utfcurlen; + STRLEN utf8_curlen; I32 pos; I32 rem; I32 fail; - I32 lvalue = PL_op->op_flags & OPf_MOD; + I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; char *tmps; I32 arybase = PL_curcop->cop_arybase; + SV *repl_sv = NULL; char *repl = 0; STRLEN repl_len; + int num_args = PL_op->op_private & 7; + bool repl_need_utf8_upgrade = FALSE; + bool repl_is_utf8 = FALSE; SvTAINTED_off(TARG); /* decontaminate */ - if (MAXARG > 2) { - if (MAXARG > 3) { - sv = POPs; - repl = SvPV(sv, repl_len); + SvUTF8_off(TARG); /* decontaminate */ + if (num_args > 2) { + if (num_args > 3) { + repl_sv = POPs; + repl = SvPV(repl_sv, repl_len); + repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv); } len = POPi; } pos = POPi; sv = POPs; PUTBACK; + if (repl_sv) { + if (repl_is_utf8) { + if (!DO_UTF8(sv)) + sv_utf8_upgrade(sv); + } + else if (DO_UTF8(sv)) + repl_need_utf8_upgrade = TRUE; + } tmps = SvPV(sv, curlen); - if (IN_UTF8) { - utfcurlen = sv_len_utf8(sv); - if (utfcurlen == curlen) - utfcurlen = 0; + if (DO_UTF8(sv)) { + utf8_curlen = sv_len_utf8(sv); + if (utf8_curlen == curlen) + utf8_curlen = 0; else - curlen = utfcurlen; + curlen = utf8_curlen; } else - utfcurlen = 0; + utf8_curlen = 0; if (pos >= arybase) { pos -= arybase; rem = curlen-pos; fail = rem; - if (MAXARG > 2) { + if (num_args > 2) { if (len < 0) { rem += len; if (rem < 0) @@ -1981,7 +2783,7 @@ PP(pp_substr) } else { pos += curlen; - if (MAXARG < 3) + if (num_args < 3) rem = curlen; else if (len >= 0) { rem = pos+len; @@ -1999,16 +2801,37 @@ PP(pp_substr) rem -= pos; } if (fail < 0) { - if (ckWARN(WARN_SUBSTR) || lvalue || repl) + if (lvalue || repl) + Perl_croak(aTHX_ "substr outside of string"); + if (ckWARN(WARN_SUBSTR)) Perl_warner(aTHX_ WARN_SUBSTR, "substr outside of string"); RETPUSHUNDEF; } else { - if (utfcurlen) + I32 upos = pos; + I32 urem = rem; + if (utf8_curlen) sv_pos_u2b(sv, &pos, &rem); tmps += pos; sv_setpvn(TARG, tmps, rem); - if (lvalue) { /* it's an lvalue! */ + if (utf8_curlen) + SvUTF8_on(TARG); + if (repl) { + SV* repl_sv_copy = NULL; + + if (repl_need_utf8_upgrade) { + repl_sv_copy = newSVsv(repl_sv); + sv_utf8_upgrade(repl_sv_copy); + repl = SvPV(repl_sv_copy, repl_len); + repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv); + } + sv_insert(sv, pos, rem, repl, repl_len); + if (repl_is_utf8) + SvUTF8_on(sv); + if (repl_sv_copy) + SvREFCNT_dec(repl_sv_copy); + } + else if (lvalue) { /* it's an lvalue! */ if (!SvGMAGICAL(sv)) { if (SvROK(sv)) { STRLEN n_a; @@ -2018,7 +2841,7 @@ PP(pp_substr) "Attempt to use reference as lvalue in substr"); } if (SvOK(sv)) /* is it defined ? */ - (void)SvPOK_only(sv); + (void)SvPOK_only_UTF8(sv); else sv_setpvn(sv,"",0); /* avoid lexical reincarnation */ } @@ -2034,11 +2857,9 @@ PP(pp_substr) SvREFCNT_dec(LvTARG(TARG)); LvTARG(TARG) = SvREFCNT_inc(sv); } - LvTARGOFF(TARG) = pos; - LvTARGLEN(TARG) = rem; + LvTARGOFF(TARG) = upos; + LvTARGLEN(TARG) = urem; } - else if (repl) - sv_insert(sv, pos, rem, repl, repl_len); } SPAGAIN; PUSHs(TARG); /* avoid SvSETMAGIC here */ @@ -2047,86 +2868,36 @@ PP(pp_substr) PP(pp_vec) { - djSP; dTARGET; - register I32 size = POPi; - register I32 offset = POPi; + dSP; dTARGET; + register IV size = POPi; + register IV offset = POPi; register SV *src = POPs; - I32 lvalue = PL_op->op_flags & OPf_MOD; - STRLEN srclen; - unsigned char *s = (unsigned char*)SvPV(src, srclen); - unsigned long retnum; - I32 len; - - SvTAINTED_off(TARG); /* decontaminate */ - offset *= size; /* turn into bit offset */ - len = (offset + size + 7) / 8; - if (offset < 0 || size < 1) - retnum = 0; - else { - if (lvalue) { /* it's an lvalue! */ - if (SvTYPE(TARG) < SVt_PVLV) { - sv_upgrade(TARG, SVt_PVLV); - sv_magic(TARG, Nullsv, 'v', Nullch, 0); - } + I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; - LvTYPE(TARG) = 'v'; - if (LvTARG(TARG) != src) { - if (LvTARG(TARG)) - SvREFCNT_dec(LvTARG(TARG)); - LvTARG(TARG) = SvREFCNT_inc(src); - } - LvTARGOFF(TARG) = offset; - LvTARGLEN(TARG) = size; - } - if (len > srclen) { - if (size <= 8) - retnum = 0; - else { - offset >>= 3; - if (size == 16) { - if (offset >= srclen) - retnum = 0; - else - retnum = (unsigned long) s[offset] << 8; - } - else if (size == 32) { - if (offset >= srclen) - retnum = 0; - else if (offset + 1 >= srclen) - retnum = (unsigned long) s[offset] << 24; - else if (offset + 2 >= srclen) - retnum = ((unsigned long) s[offset] << 24) + - ((unsigned long) s[offset + 1] << 16); - else - retnum = ((unsigned long) s[offset] << 24) + - ((unsigned long) s[offset + 1] << 16) + - (s[offset + 2] << 8); - } - } + SvTAINTED_off(TARG); /* decontaminate */ + if (lvalue) { /* it's an lvalue! */ + if (SvTYPE(TARG) < SVt_PVLV) { + sv_upgrade(TARG, SVt_PVLV); + sv_magic(TARG, Nullsv, 'v', Nullch, 0); } - else if (size < 8) - retnum = (s[offset >> 3] >> (offset & 7)) & ((1 << size) - 1); - else { - offset >>= 3; - if (size == 8) - retnum = s[offset]; - else if (size == 16) - retnum = ((unsigned long) s[offset] << 8) + s[offset+1]; - else if (size == 32) - retnum = ((unsigned long) s[offset] << 24) + - ((unsigned long) s[offset + 1] << 16) + - (s[offset + 2] << 8) + s[offset+3]; + LvTYPE(TARG) = 'v'; + if (LvTARG(TARG) != src) { + if (LvTARG(TARG)) + SvREFCNT_dec(LvTARG(TARG)); + LvTARG(TARG) = SvREFCNT_inc(src); } + LvTARGOFF(TARG) = offset; + LvTARGLEN(TARG) = size; } - sv_setuv(TARG, (UV)retnum); + sv_setuv(TARG, do_vecget(src, offset, size)); PUSHs(TARG); RETURN; } PP(pp_index) { - djSP; dTARGET; + dSP; dTARGET; SV *big; SV *little; I32 offset; @@ -2143,7 +2914,7 @@ PP(pp_index) little = POPs; big = POPs; tmps = SvPV(big, biglen); - if (IN_UTF8 && offset > 0) + if (offset > 0 && DO_UTF8(big)) sv_pos_u2b(big, &offset, 0); if (offset < 0) offset = 0; @@ -2154,7 +2925,7 @@ PP(pp_index) retval = -1; else retval = tmps2 - tmps; - if (IN_UTF8 && retval > 0) + if (retval > 0 && DO_UTF8(big)) sv_pos_b2u(big, &retval); PUSHi(retval + arybase); RETURN; @@ -2162,7 +2933,7 @@ PP(pp_index) PP(pp_rindex) { - djSP; dTARGET; + dSP; dTARGET; SV *big; SV *little; STRLEN blen; @@ -2182,7 +2953,7 @@ PP(pp_rindex) if (MAXARG < 3) offset = blen; else { - if (IN_UTF8 && offset > 0) + if (offset > 0 && DO_UTF8(big)) sv_pos_u2b(big, &offset, 0); offset = offset - arybase + llen; } @@ -2195,7 +2966,7 @@ PP(pp_rindex) retval = -1; else retval = tmps2 - tmps; - if (IN_UTF8 && retval > 0) + if (retval > 0 && DO_UTF8(big)) sv_pos_b2u(big, &retval); PUSHi(retval + arybase); RETURN; @@ -2203,7 +2974,7 @@ PP(pp_rindex) PP(pp_sprintf) { - djSP; dMARK; dORIGMARK; dTARGET; + dSP; dMARK; dORIGMARK; dTARGET; do_sprintf(TARG, SP-MARK, MARK+1); TAINT_IF(SvTAINTED(TARG)); SP = ORIGMARK; @@ -2213,35 +2984,30 @@ PP(pp_sprintf) PP(pp_ord) { - djSP; dTARGET; - UV value; - STRLEN n_a; - U8 *tmps = (U8*)POPpx; - I32 retlen; + dSP; dTARGET; + SV *argsv = POPs; + STRLEN len; + U8 *s = (U8*)SvPVx(argsv, len); - if (IN_UTF8 && (*tmps & 0x80)) - value = utf8_to_uv(tmps, &retlen); - else - value = (UV)(*tmps & 255); - XPUSHu(value); + XPUSHu(DO_UTF8(argsv) ? utf8_to_uvchr(s, 0) : (*s & 0xff)); RETURN; } PP(pp_chr) { - djSP; dTARGET; + dSP; dTARGET; char *tmps; - U32 value = POPu; + UV value = POPu; (void)SvUPGRADE(TARG,SVt_PV); - if (IN_UTF8 && value >= 128) { - SvGROW(TARG,8); - tmps = SvPVX(TARG); - tmps = (char*)uv_to_utf8((U8*)tmps, (UV)value); + if (value > 255 && !IN_BYTE) { + SvGROW(TARG, UNISKIP(value)+1); + tmps = (char*)uvchr_to_utf8((U8*)SvPVX(TARG), value); SvCUR_set(TARG, tmps - SvPVX(TARG)); *tmps = '\0'; (void)SvPOK_only(TARG); + SvUTF8_on(TARG); XPUSHs(TARG); RETURN; } @@ -2258,7 +3024,7 @@ PP(pp_chr) PP(pp_crypt) { - djSP; dTARGET; dPOPTOPssrl; + dSP; dTARGET; dPOPTOPssrl; STRLEN n_a; #ifdef HAS_CRYPT char *tmps = SvPV(left, n_a); @@ -2268,7 +3034,7 @@ PP(pp_crypt) sv_setpv(TARG, PerlProc_crypt(tmps, SvPV(right, n_a))); #endif #else - DIE(aTHX_ + DIE(aTHX_ "The crypt() function is unimplemented due to excessive paranoia."); #endif SETs(TARG); @@ -2277,31 +3043,32 @@ PP(pp_crypt) PP(pp_ucfirst) { - djSP; + dSP; SV *sv = TOPs; register U8 *s; STRLEN slen; - if (IN_UTF8 && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) { - I32 ulen; - U8 tmpbuf[10]; + if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && UTF8_IS_START(*s)) { + STRLEN ulen; + U8 tmpbuf[UTF8_MAXLEN+1]; U8 *tend; - UV uv = utf8_to_uv(s, &ulen); + UV uv; if (PL_op->op_private & OPpLOCALE) { TAINT; SvTAINTED_on(sv); - uv = toTITLE_LC_uni(uv); + uv = toTITLE_LC_uvchr(utf8n_to_uvchr(s, slen, &ulen, 0)); } else uv = toTITLE_utf8(s); - tend = uv_to_utf8(tmpbuf, uv); + tend = uvchr_to_utf8(tmpbuf, uv); - if (!SvPADTMP(sv) || tend - tmpbuf != ulen) { + if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) { dTARGET; sv_setpvn(TARG, (char*)tmpbuf, tend - tmpbuf); sv_catpvn(TARG, (char*)(s + ulen), slen - ulen); + SvUTF8_on(TARG); SETs(TARG); } else { @@ -2310,8 +3077,9 @@ PP(pp_ucfirst) } } else { - if (!SvPADTMP(sv)) { + if (!SvPADTMP(sv) || SvREADONLY(sv)) { dTARGET; + SvUTF8_off(TARG); /* decontaminate */ sv_setsv(TARG, sv); sv = TARG; SETs(sv); @@ -2334,31 +3102,32 @@ PP(pp_ucfirst) PP(pp_lcfirst) { - djSP; + dSP; SV *sv = TOPs; register U8 *s; STRLEN slen; - if (IN_UTF8 && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) { - I32 ulen; - U8 tmpbuf[10]; + if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && UTF8_IS_START(*s)) { + STRLEN ulen; + U8 tmpbuf[UTF8_MAXLEN+1]; U8 *tend; - UV uv = utf8_to_uv(s, &ulen); + UV uv; if (PL_op->op_private & OPpLOCALE) { TAINT; SvTAINTED_on(sv); - uv = toLOWER_LC_uni(uv); + uv = toLOWER_LC_uvchr(utf8n_to_uvchr(s, slen, &ulen, 0)); } else uv = toLOWER_utf8(s); - tend = uv_to_utf8(tmpbuf, uv); + tend = uvchr_to_utf8(tmpbuf, uv); - if (!SvPADTMP(sv) || tend - tmpbuf != ulen) { + if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) { dTARGET; sv_setpvn(TARG, (char*)tmpbuf, tend - tmpbuf); sv_catpvn(TARG, (char*)(s + ulen), slen - ulen); + SvUTF8_on(TARG); SETs(TARG); } else { @@ -2367,8 +3136,9 @@ PP(pp_lcfirst) } } else { - if (!SvPADTMP(sv)) { + if (!SvPADTMP(sv) || SvREADONLY(sv)) { dTARGET; + SvUTF8_off(TARG); /* decontaminate */ sv_setsv(TARG, sv); sv = TARG; SETs(sv); @@ -2383,7 +3153,6 @@ PP(pp_lcfirst) else *s = toLOWER(*s); } - SETs(sv); } if (SvSMAGICAL(sv)) mg_set(sv); @@ -2392,19 +3161,20 @@ PP(pp_lcfirst) PP(pp_uc) { - djSP; + dSP; SV *sv = TOPs; register U8 *s; STRLEN len; - if (IN_UTF8) { + if (DO_UTF8(sv)) { dTARGET; - I32 ulen; + STRLEN ulen; register U8 *d; U8 *send; s = (U8*)SvPV(sv,len); if (!len) { + SvUTF8_off(TARG); /* decontaminate */ sv_setpvn(TARG, "", 0); SETs(TARG); } @@ -2418,24 +3188,26 @@ PP(pp_uc) TAINT; SvTAINTED_on(TARG); while (s < send) { - d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv(s, &ulen))); + d = uvchr_to_utf8(d, toUPPER_LC_uvchr( utf8n_to_uvchr(s, len, &ulen, 0))); s += ulen; } } else { while (s < send) { - d = uv_to_utf8(d, toUPPER_utf8( s )); + d = uvchr_to_utf8(d, toUPPER_utf8( s )); s += UTF8SKIP(s); } } *d = '\0'; + SvUTF8_on(TARG); SvCUR_set(TARG, d - (U8*)SvPVX(TARG)); SETs(TARG); } } else { - if (!SvPADTMP(sv)) { + if (!SvPADTMP(sv) || SvREADONLY(sv)) { dTARGET; + SvUTF8_off(TARG); /* decontaminate */ sv_setsv(TARG, sv); sv = TARG; SETs(sv); @@ -2463,19 +3235,20 @@ PP(pp_uc) PP(pp_lc) { - djSP; + dSP; SV *sv = TOPs; register U8 *s; STRLEN len; - if (IN_UTF8) { + if (DO_UTF8(sv)) { dTARGET; - I32 ulen; + STRLEN ulen; register U8 *d; U8 *send; s = (U8*)SvPV(sv,len); if (!len) { + SvUTF8_off(TARG); /* decontaminate */ sv_setpvn(TARG, "", 0); SETs(TARG); } @@ -2489,24 +3262,26 @@ PP(pp_lc) TAINT; SvTAINTED_on(TARG); while (s < send) { - d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv(s, &ulen))); + d = uvchr_to_utf8(d, toLOWER_LC_uvchr( utf8n_to_uvchr(s, len, &ulen, 0))); s += ulen; } } else { while (s < send) { - d = uv_to_utf8(d, toLOWER_utf8(s)); + d = uvchr_to_utf8(d, toLOWER_utf8(s)); s += UTF8SKIP(s); } } *d = '\0'; + SvUTF8_on(TARG); SvCUR_set(TARG, d - (U8*)SvPVX(TARG)); SETs(TARG); } } else { - if (!SvPADTMP(sv)) { + if (!SvPADTMP(sv) || SvREADONLY(sv)) { dTARGET; + SvUTF8_off(TARG); /* decontaminate */ sv_setsv(TARG, sv); sv = TARG; SETs(sv); @@ -2535,19 +3310,20 @@ PP(pp_lc) PP(pp_quotemeta) { - djSP; dTARGET; + dSP; dTARGET; SV *sv = TOPs; STRLEN len; register char *s = SvPV(sv,len); register char *d; + SvUTF8_off(TARG); /* decontaminate */ if (len) { (void)SvUPGRADE(TARG, SVt_PV); SvGROW(TARG, (len * 2) + 1); d = SvPVX(TARG); - if (IN_UTF8) { + if (DO_UTF8(sv)) { while (len) { - if (*s & 0x80) { + if (UTF8_IS_CONTINUED(*s)) { STRLEN ulen = UTF8SKIP(s); if (ulen > len) ulen = len; @@ -2562,6 +3338,7 @@ PP(pp_quotemeta) len--; } } + SvUTF8_on(TARG); } else { while (len--) { @@ -2572,7 +3349,7 @@ PP(pp_quotemeta) } *d = '\0'; SvCUR_set(TARG, d - SvPVX(TARG)); - (void)SvPOK_only(TARG); + (void)SvPOK_only_UTF8(TARG); } else sv_setpvn(TARG, s, len); @@ -2586,10 +3363,10 @@ PP(pp_quotemeta) PP(pp_aslice) { - djSP; dMARK; dORIGMARK; + dSP; dMARK; dORIGMARK; register SV** svp; register AV* av = (AV*)POPs; - register I32 lval = PL_op->op_flags & OPf_MOD; + register I32 lval = (PL_op->op_flags & OPf_MOD || LVRET); I32 arybase = PL_curcop->cop_arybase; I32 elem; @@ -2631,7 +3408,7 @@ PP(pp_aslice) PP(pp_each) { - djSP; dTARGET; + dSP; HV *hash = (HV*)POPs; HE *entry; I32 gimme = GIMME_V; @@ -2646,12 +3423,13 @@ PP(pp_each) if (entry) { PUSHs(hv_iterkeysv(entry)); /* won't clobber stack_sp */ if (gimme == G_ARRAY) { + SV *val; PUTBACK; /* might clobber stack_sp */ - sv_setsv(TARG, realhv ? - hv_iterval(hash, entry) : avhv_iterval((AV*)hash, entry)); + val = realhv ? + hv_iterval(hash, entry) : avhv_iterval((AV*)hash, entry); SPAGAIN; - PUSHs(TARG); + PUSHs(val); } } else if (gimme == G_SCALAR) @@ -2672,7 +3450,7 @@ PP(pp_keys) PP(pp_delete) { - djSP; + dSP; I32 gimme = GIMME_V; I32 discard = (gimme == G_VOID) ? G_DISCARD : 0; SV *sv; @@ -2683,13 +3461,28 @@ PP(pp_delete) U32 hvtype; hv = (HV*)POPs; hvtype = SvTYPE(hv); - while (++MARK <= SP) { - if (hvtype == SVt_PVHV) + if (hvtype == SVt_PVHV) { /* hash element */ + while (++MARK <= SP) { sv = hv_delete_ent(hv, *MARK, discard, 0); - else - DIE(aTHX_ "Not a HASH reference"); - *MARK = sv ? sv : &PL_sv_undef; + *MARK = sv ? sv : &PL_sv_undef; + } + } + else if (hvtype == SVt_PVAV) { + if (PL_op->op_flags & OPf_SPECIAL) { /* array element */ + while (++MARK <= SP) { + sv = av_delete((AV*)hv, SvIV(*MARK), discard); + *MARK = sv ? sv : &PL_sv_undef; + } + } + else { /* pseudo-hash element */ + while (++MARK <= SP) { + sv = avhv_delete_ent((AV*)hv, *MARK, discard, 0); + *MARK = sv ? sv : &PL_sv_undef; + } + } } + else + DIE(aTHX_ "Not a HASH reference"); if (discard) SP = ORIGMARK; else if (gimme == G_SCALAR) { @@ -2703,6 +3496,12 @@ PP(pp_delete) hv = (HV*)POPs; if (SvTYPE(hv) == SVt_PVHV) sv = hv_delete_ent(hv, keysv, discard, 0); + else if (SvTYPE(hv) == SVt_PVAV) { + if (PL_op->op_flags & OPf_SPECIAL) + sv = av_delete((AV*)hv, SvIV(keysv), discard); + else + sv = avhv_delete_ent((AV*)hv, keysv, discard, 0); + } else DIE(aTHX_ "Not a HASH reference"); if (!sv) @@ -2715,15 +3514,33 @@ PP(pp_delete) PP(pp_exists) { - djSP; - SV *tmpsv = POPs; - HV *hv = (HV*)POPs; + dSP; + SV *tmpsv; + HV *hv; + + if (PL_op->op_private & OPpEXISTS_SUB) { + GV *gv; + CV *cv; + SV *sv = POPs; + cv = sv_2cv(sv, &hv, &gv, FALSE); + if (cv) + RETPUSHYES; + if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv)) + RETPUSHYES; + RETPUSHNO; + } + tmpsv = POPs; + hv = (HV*)POPs; if (SvTYPE(hv) == SVt_PVHV) { if (hv_exists_ent(hv, tmpsv, 0)) RETPUSHYES; } else if (SvTYPE(hv) == SVt_PVAV) { - if (avhv_exists_ent((AV*)hv, tmpsv, 0)) + if (PL_op->op_flags & OPf_SPECIAL) { /* array element */ + if (av_exists((AV*)hv, SvIV(tmpsv))) + RETPUSHYES; + } + else if (avhv_exists_ent((AV*)hv, tmpsv, 0)) /* pseudo-hash element */ RETPUSHYES; } else { @@ -2734,9 +3551,9 @@ PP(pp_exists) PP(pp_hslice) { - djSP; dMARK; dORIGMARK; + dSP; dMARK; dORIGMARK; register HV *hv = (HV*)POPs; - register I32 lval = PL_op->op_flags & OPf_MOD; + register I32 lval = (PL_op->op_flags & OPf_MOD || LVRET); I32 realhv = (SvTYPE(hv) == SVt_PVHV); if (!realhv && PL_op->op_private & OPpLVAL_INTRO) @@ -2746,6 +3563,9 @@ PP(pp_hslice) while (++MARK <= SP) { SV *keysv = *MARK; SV **svp; + I32 preeminent = SvRMAGICAL(hv) ? 1 : + realhv ? hv_exists_ent(hv, keysv, 0) + : avhv_exists_ent((AV*)hv, keysv, 0); if (realhv) { HE *he = hv_fetch_ent(hv, keysv, lval, 0); svp = he ? &HeVAL(he) : 0; @@ -2758,8 +3578,15 @@ PP(pp_hslice) STRLEN n_a; DIE(aTHX_ PL_no_helem, SvPV(keysv, n_a)); } - if (PL_op->op_private & OPpLVAL_INTRO) - save_helem(hv, keysv, svp); + if (PL_op->op_private & OPpLVAL_INTRO) { + if (preeminent) + save_helem(hv, keysv, svp); + else { + STRLEN keylen; + char *key = SvPV(keysv, keylen); + SAVEDELETE(hv, savepvn(key,keylen), keylen); + } + } } *MARK = svp ? *svp : &PL_sv_undef; } @@ -2776,7 +3603,7 @@ PP(pp_hslice) PP(pp_list) { - djSP; dMARK; + dSP; dMARK; if (GIMME != G_ARRAY) { if (++MARK <= SP) *MARK = *SP; /* unwanted list, return last item */ @@ -2789,7 +3616,7 @@ PP(pp_list) PP(pp_lslice) { - djSP; + dSP; SV **lastrelem = PL_stack_sp; SV **lastlelem = PL_stack_base + POPMARK; SV **firstlelem = PL_stack_base + POPMARK + 1; @@ -2825,7 +3652,7 @@ PP(pp_lslice) ix = SvIVx(*lelem); if (ix < 0) ix += max; - else + else ix -= arybase; if (ix < 0 || ix >= max) *lelem = &PL_sv_undef; @@ -2844,7 +3671,7 @@ PP(pp_lslice) PP(pp_anonlist) { - djSP; dMARK; dORIGMARK; + dSP; dMARK; dORIGMARK; I32 items = SP - MARK; SV *av = sv_2mortal((SV*)av_make(items, MARK+1)); SP = ORIGMARK; /* av_make() might realloc stack_sp */ @@ -2854,7 +3681,7 @@ PP(pp_anonlist) PP(pp_anonhash) { - djSP; dMARK; dORIGMARK; + dSP; dMARK; dORIGMARK; HV* hv = (HV*)sv_2mortal((SV*)newHV()); while (MARK < SP) { @@ -2862,8 +3689,8 @@ PP(pp_anonhash) SV *val = NEWSV(46, 0); if (MARK < SP) sv_setsv(val, *++MARK); - else if (ckWARN(WARN_UNSAFE)) - Perl_warner(aTHX_ WARN_UNSAFE, "Odd number of elements in hash assignment"); + else if (ckWARN(WARN_MISC)) + Perl_warner(aTHX_ WARN_MISC, "Odd number of elements in hash assignment"); (void)hv_store_ent(hv,key,val,0); } SP = ORIGMARK; @@ -2873,7 +3700,7 @@ PP(pp_anonhash) PP(pp_splice) { - djSP; dMARK; dORIGMARK; + dSP; dMARK; dORIGMARK; register AV *ary = (AV*)*++MARK; register SV **src; register SV **dst; @@ -2886,7 +3713,7 @@ PP(pp_splice) SV **tmparyval = 0; MAGIC *mg; - if (mg = SvTIED_mg((SV*)ary, 'P')) { + if ((mg = SvTIED_mg((SV*)ary, 'P'))) { *MARK-- = SvTIED_obj((SV*)ary, mg); PUSHMARK(MARK); PUTBACK; @@ -3075,12 +3902,12 @@ PP(pp_splice) PP(pp_push) { - djSP; dMARK; dORIGMARK; dTARGET; + dSP; dMARK; dORIGMARK; dTARGET; register AV *ary = (AV*)*++MARK; register SV *sv = &PL_sv_undef; MAGIC *mg; - if (mg = SvTIED_mg((SV*)ary, 'P')) { + if ((mg = SvTIED_mg((SV*)ary, 'P'))) { *MARK-- = SvTIED_obj((SV*)ary, mg); PUSHMARK(MARK); PUTBACK; @@ -3105,7 +3932,7 @@ PP(pp_push) PP(pp_pop) { - djSP; + dSP; AV *av = (AV*)POPs; SV *sv = av_pop(av); if (AvREAL(av)) @@ -3116,7 +3943,7 @@ PP(pp_pop) PP(pp_shift) { - djSP; + dSP; AV *av = (AV*)POPs; SV *sv = av_shift(av); EXTEND(SP, 1); @@ -3130,13 +3957,13 @@ PP(pp_shift) PP(pp_unshift) { - djSP; dMARK; dORIGMARK; dTARGET; + dSP; dMARK; dORIGMARK; dTARGET; register AV *ary = (AV*)*++MARK; register SV *sv; register I32 i = 0; MAGIC *mg; - if (mg = SvTIED_mg((SV*)ary, 'P')) { + if ((mg = SvTIED_mg((SV*)ary, 'P'))) { *MARK-- = SvTIED_obj((SV*)ary, mg); PUSHMARK(MARK); PUTBACK; @@ -3160,7 +3987,7 @@ PP(pp_unshift) PP(pp_reverse) { - djSP; dMARK; + dSP; dMARK; register SV *tmp; SV **oldsp = SP; @@ -3171,6 +3998,7 @@ PP(pp_reverse) *MARK++ = *SP; *SP-- = tmp; } + /* safe as long as stack cannot get extended in the above */ SP = oldsp; } else { @@ -3180,28 +4008,28 @@ PP(pp_reverse) dTARGET; STRLEN len; + SvUTF8_off(TARG); /* decontaminate */ if (SP - MARK > 1) do_join(TARG, &PL_sv_no, MARK, SP); else sv_setsv(TARG, (SP > MARK) ? *SP : DEFSV); up = SvPV_force(TARG, len); if (len > 1) { - if (IN_UTF8) { /* first reverse each character */ + if (DO_UTF8(TARG)) { /* first reverse each character */ U8* s = (U8*)SvPVX(TARG); U8* send = (U8*)(s + len); while (s < send) { - if (*s < 0x80) { + if (UTF8_IS_INVARIANT(*s)) { s++; continue; } else { + if (!utf8_to_uvchr(s, 0)) + break; up = (char*)s; s += UTF8SKIP(s); down = (char*)(s - 1); - if (s > send || !((*down & 0xc0) == 0x80)) { - Perl_warn(aTHX_ "Malformed UTF-8 character"); - break; - } + /* reverse this character */ while (down > up) { tmp = *up; *up++ = *down; @@ -3217,7 +4045,7 @@ PP(pp_reverse) *up++ = *down; *down-- = tmp; } - (void)SvPOK_only(TARG); + (void)SvPOK_only_UTF8(TARG); } SP = MARK + 1; SETTARG; @@ -3265,28 +4093,37 @@ S_mul128(pTHX_ SV *sv, U8 m) #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ') #endif + PP(pp_unpack) { - djSP; + dSP; dPOPPOPssrl; - SV **oldsp = SP; + I32 start_sp_offset = SP - PL_stack_base; I32 gimme = GIMME_V; SV *sv; STRLEN llen; STRLEN rlen; register char *pat = SvPV(left, llen); +#ifdef PACKED_IS_OCTETS + /* Packed side is assumed to be octets - so force downgrade if it + has been UTF-8 encoded by accident + */ + register char *s = SvPVbyte(right, rlen); +#else register char *s = SvPV(right, rlen); +#endif char *strend = s + rlen; char *strbeg = s; register char *patend = pat + llen; I32 datumtype; register I32 len; register I32 bits; + register char *str; /* These must not be in registers: */ - I16 ashort; + short ashort; int aint; - I32 along; + long along; #ifdef HAS_QUAD Quad_t aquad; #endif @@ -3303,6 +4140,7 @@ PP(pp_unpack) register U32 culong; NV cdouble; int commas = 0; + int star; #ifdef PERL_NATINT_PACK int natint; /* native integer */ int unatint; /* unsigned native integer */ @@ -3327,6 +4165,11 @@ PP(pp_unpack) #endif if (isSPACE(datumtype)) continue; + if (datumtype == '#') { + while (pat < patend && *pat != '\n') + pat++; + continue; + } if (*pat == '!') { char *natstr = "sSiIlL"; @@ -3337,27 +4180,34 @@ PP(pp_unpack) pat++; } else - Perl_croak(aTHX_ "'!' allowed only after types %s", natstr); + DIE(aTHX_ "'!' allowed only after types %s", natstr); } + star = 0; if (pat >= patend) len = 1; else if (*pat == '*') { len = strend - strbeg; /* long enough */ pat++; + star = 1; } else if (isDIGIT(*pat)) { len = *pat++ - '0'; - while (isDIGIT(*pat)) + while (isDIGIT(*pat)) { len = (len * 10) + (*pat++ - '0'); + if (len < 0) + DIE(aTHX_ "Repeat count in unpack overflows"); + } } else len = (datumtype != '@'); + redo_switch: switch(datumtype) { default: - Perl_croak(aTHX_ "Invalid type in unpack: '%c'", (int)datumtype); + DIE(aTHX_ "Invalid type in unpack: '%c'", (int)datumtype); case ',': /* grandfather in commas but with a warning */ - if (commas++ == 0 && ckWARN(WARN_UNSAFE)) - Perl_warner(aTHX_ WARN_UNSAFE, "Invalid type in unpack: '%c'", (int)datumtype); + if (commas++ == 0 && ckWARN(WARN_UNPACK)) + Perl_warner(aTHX_ WARN_UNPACK, + "Invalid type in unpack: '%c'", (int)datumtype); break; case '%': if (len == 1 && pat[-1] != '1') @@ -3383,6 +4233,17 @@ PP(pp_unpack) DIE(aTHX_ "x outside of string"); s += len; break; + case '/': + if (start_sp_offset >= SP - PL_stack_base) + DIE(aTHX_ "/ must follow a numeric type"); + datumtype = *pat++; + if (*pat == '*') + pat++; /* ignore '*' for compatibility with pack */ + if (isDIGIT(*pat)) + DIE(aTHX_ "/ cannot take a count" ); + len = POPi; + star = 0; + goto redo_switch; case 'A': case 'Z': case 'a': @@ -3413,7 +4274,7 @@ PP(pp_unpack) break; case 'B': case 'b': - if (pat[-1] == '*' || len > (strend - s) * 8) + if (star || len > (strend - s) * 8) len = (strend - s) * 8; if (checksum) { if (!PL_bitcount) { @@ -3453,8 +4314,7 @@ PP(pp_unpack) sv = NEWSV(35, len + 1); SvCUR_set(sv, len); SvPOK_on(sv); - aptr = pat; /* borrow register */ - pat = SvPVX(sv); + str = SvPVX(sv); if (datumtype == 'b') { aint = len; for (len = 0; len < aint; len++) { @@ -3462,7 +4322,7 @@ PP(pp_unpack) bits >>= 1; else bits = *s++; - *pat++ = '0' + (bits & 1); + *str++ = '0' + (bits & 1); } } else { @@ -3472,22 +4332,20 @@ PP(pp_unpack) bits <<= 1; else bits = *s++; - *pat++ = '0' + ((bits & 128) != 0); + *str++ = '0' + ((bits & 128) != 0); } } - *pat = '\0'; - pat = aptr; /* unborrow register */ + *str = '\0'; XPUSHs(sv_2mortal(sv)); break; case 'H': case 'h': - if (pat[-1] == '*' || len > (strend - s) * 2) + if (star || len > (strend - s) * 2) len = (strend - s) * 2; sv = NEWSV(35, len + 1); SvCUR_set(sv, len); SvPOK_on(sv); - aptr = pat; /* borrow register */ - pat = SvPVX(sv); + str = SvPVX(sv); if (datumtype == 'h') { aint = len; for (len = 0; len < aint; len++) { @@ -3495,7 +4353,7 @@ PP(pp_unpack) bits >>= 4; else bits = *s++; - *pat++ = PL_hexdigit[bits & 15]; + *str++ = PL_hexdigit[bits & 15]; } } else { @@ -3505,11 +4363,10 @@ PP(pp_unpack) bits <<= 4; else bits = *s++; - *pat++ = PL_hexdigit[(bits >> 4) & 15]; + *str++ = PL_hexdigit[(bits >> 4) & 15]; } } - *pat = '\0'; - pat = aptr; /* unborrow register */ + *str = '\0'; XPUSHs(sv_2mortal(sv)); break; case 'c': @@ -3562,7 +4419,9 @@ PP(pp_unpack) len = strend - s; if (checksum) { while (len-- > 0 && s < strend) { - auint = utf8_to_uv((U8*)s, &along); + STRLEN alen; + auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0); + along = alen; s += along; if (checksum > 32) cdouble += (NV)auint; @@ -3574,7 +4433,9 @@ PP(pp_unpack) EXTEND(SP, len); EXTEND_MORTAL(len); while (len-- > 0 && s < strend) { - auint = utf8_to_uv((U8*)s, &along); + STRLEN alen; + auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0); + along = alen; s += along; sv = NEWSV(37, 0); sv_setuv(sv, (UV)auint); @@ -3593,6 +4454,7 @@ PP(pp_unpack) if (checksum) { #if SHORTSIZE != SIZE16 if (natint) { + short ashort; while (len-- > 0) { COPYNN(s, &ashort, sizeof(short)); s += sizeof(short); @@ -3619,6 +4481,7 @@ PP(pp_unpack) EXTEND_MORTAL(len); #if SHORTSIZE != SIZE16 if (natint) { + short ashort; while (len-- > 0) { COPYNN(s, &ashort, sizeof(short)); s += sizeof(short); @@ -3658,6 +4521,7 @@ PP(pp_unpack) if (checksum) { #if SHORTSIZE != SIZE16 if (unatint) { + unsigned short aushort; while (len-- > 0) { COPYNN(s, &aushort, sizeof(unsigned short)); s += sizeof(unsigned short); @@ -3687,6 +4551,7 @@ PP(pp_unpack) EXTEND_MORTAL(len); #if SHORTSIZE != SIZE16 if (unatint) { + unsigned short aushort; while (len-- > 0) { COPYNN(s, &aushort, sizeof(unsigned short)); s += sizeof(unsigned short); @@ -3824,6 +4689,9 @@ PP(pp_unpack) #endif { while (len-- > 0) { +#if LONGSIZE > SIZE32 && INTSIZE == SIZE32 + I32 along; +#endif COPY32(s, &along); #if LONGSIZE > SIZE32 if (along > 2147483647) @@ -3854,6 +4722,9 @@ PP(pp_unpack) #endif { while (len-- > 0) { +#if LONGSIZE > SIZE32 && INTSIZE == SIZE32 + I32 along; +#endif COPY32(s, &along); #if LONGSIZE > SIZE32 if (along > 2147483647) @@ -3881,6 +4752,7 @@ PP(pp_unpack) if (checksum) { #if LONGSIZE != SIZE32 if (unatint) { + unsigned long aulong; while (len-- > 0) { COPYNN(s, &aulong, sizeof(unsigned long)); s += sizeof(unsigned long); @@ -3916,6 +4788,7 @@ PP(pp_unpack) EXTEND_MORTAL(len); #if LONGSIZE != SIZE32 if (unatint) { + unsigned long aulong; while (len-- > 0) { COPYNN(s, &aulong, sizeof(unsigned long)); s += sizeof(unsigned long); @@ -3973,7 +4846,8 @@ PP(pp_unpack) while ((len > 0) && (s < strend)) { auv = (auv << 7) | (*s & 0x7f); - if (!(*s++ & 0x80)) { + /* UTF8_IS_XXXXX not right here - using constant 0x80 */ + if ((U8)(*s++) < 0x80) { bytes = 0; sv = NEWSV(40, 0); sv_setuv(sv, auv); @@ -3985,7 +4859,7 @@ PP(pp_unpack) char *t; STRLEN n_a; - sv = Perl_newSVpvf(aTHX_ "%.*Vu", (int)TYPE_DIGITS(UV), auv); + sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv); while (s < strend) { sv = mul128(sv, *s & 0x7f); if (!(*s++ & 0x80)) { @@ -4003,7 +4877,7 @@ PP(pp_unpack) } } if ((s >= strend) && bytes) - Perl_croak(aTHX_ "Unterminated compressed integer"); + DIE(aTHX_ "Unterminated compressed integer"); } break; case 'P': @@ -4120,9 +4994,9 @@ PP(pp_unpack) */ if (PL_uudmap['M'] == 0) { int i; - + for (i = 0; i < sizeof(PL_uuemap); i += 1) - PL_uudmap[PL_uuemap[i]] = i; + PL_uudmap[(U8)PL_uuemap[i]] = i; /* * Because ' ' and '`' map to the same value, * we need to decode them both the same. @@ -4139,22 +5013,22 @@ PP(pp_unpack) char hunk[4]; hunk[3] = '\0'; - len = PL_uudmap[*s++] & 077; + len = PL_uudmap[*(U8*)s++] & 077; while (len > 0) { if (s < strend && ISUUCHAR(*s)) - a = PL_uudmap[*s++] & 077; + a = PL_uudmap[*(U8*)s++] & 077; else a = 0; if (s < strend && ISUUCHAR(*s)) - b = PL_uudmap[*s++] & 077; + b = PL_uudmap[*(U8*)s++] & 077; else b = 0; if (s < strend && ISUUCHAR(*s)) - c = PL_uudmap[*s++] & 077; + c = PL_uudmap[*(U8*)s++] & 077; else c = 0; if (s < strend && ISUUCHAR(*s)) - d = PL_uudmap[*s++] & 077; + d = PL_uudmap[*(U8*)s++] & 077; else d = 0; hunk[0] = (a << 2) | (b >> 4); @@ -4205,7 +5079,7 @@ PP(pp_unpack) checksum = 0; } } - if (SP == oldsp && gimme == G_SCALAR) + if (SP - PL_stack_base == start_sp_offset && gimme == G_SCALAR) PUSHs(&PL_sv_undef); RETURN; } @@ -4317,11 +5191,12 @@ S_div128(pTHX_ SV *pnum, bool *done) PP(pp_pack) { - djSP; dMARK; dORIGMARK; dTARGET; + dSP; dMARK; dORIGMARK; dTARGET; register SV *cat = TARG; register I32 items; STRLEN fromlen; register char *pat = SvPVx(*++MARK, fromlen); + char *patcopy; register char *patend = pat + fromlen; register I32 len; I32 datumtype; @@ -4352,14 +5227,27 @@ PP(pp_pack) items = SP - MARK; MARK++; sv_setpvn(cat, "", 0); + patcopy = pat; while (pat < patend) { -#define NEXTFROM (items-- > 0 ? *MARK++ : &PL_sv_no) + SV *lengthcode = Nullsv; +#define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *MARK++ : &PL_sv_no) datumtype = *pat++ & 0xFF; #ifdef PERL_NATINT_PACK natint = 0; #endif - if (isSPACE(datumtype)) + if (isSPACE(datumtype)) { + patcopy++; continue; + } +#ifndef PACKED_IS_OCTETS + if (datumtype == 'U' && pat == patcopy+1) + SvUTF8_on(cat); +#endif + if (datumtype == '#') { + while (pat < patend && *pat != '\n') + pat++; + continue; + } if (*pat == '!') { char *natstr = "sSiIlL"; @@ -4370,7 +5258,7 @@ PP(pp_pack) pat++; } else - Perl_croak(aTHX_ "'!' allowed only after types %s", natstr); + DIE(aTHX_ "'!' allowed only after types %s", natstr); } if (*pat == '*') { len = strchr("@Xxu", datumtype) ? 0 : items; @@ -4378,17 +5266,29 @@ PP(pp_pack) } else if (isDIGIT(*pat)) { len = *pat++ - '0'; - while (isDIGIT(*pat)) + while (isDIGIT(*pat)) { len = (len * 10) + (*pat++ - '0'); + if (len < 0) + DIE(aTHX_ "Repeat count in pack overflows"); + } } else len = 1; + if (*pat == '/') { + ++pat; + if ((*pat != 'a' && *pat != 'A' && *pat != 'Z') || pat[1] != '*') + DIE(aTHX_ "/ must be followed by a*, A* or Z*"); + lengthcode = sv_2mortal(newSViv(sv_len(items > 0 + ? *MARK : &PL_sv_no) + + (*pat == 'Z' ? 1 : 0))); + } switch(datumtype) { default: - Perl_croak(aTHX_ "Invalid type in pack: '%c'", (int)datumtype); + DIE(aTHX_ "Invalid type in pack: '%c'", (int)datumtype); case ',': /* grandfather in commas but with a warning */ - if (commas++ == 0 && ckWARN(WARN_UNSAFE)) - Perl_warner(aTHX_ WARN_UNSAFE, "Invalid type in pack: '%c'", (int)datumtype); + if (commas++ == 0 && ckWARN(WARN_PACK)) + Perl_warner(aTHX_ WARN_PACK, + "Invalid type in pack: '%c'", (int)datumtype); break; case '%': DIE(aTHX_ "%% may only be used in unpack"); @@ -4420,10 +5320,16 @@ PP(pp_pack) case 'a': fromstr = NEXTFROM; aptr = SvPV(fromstr, fromlen); - if (pat[-1] == '*') + if (pat[-1] == '*') { len = fromlen; - if (fromlen > len) + if (datumtype == 'Z') + ++len; + } + if (fromlen >= len) { sv_catpvn(cat, aptr, len); + if (datumtype == 'Z') + *(SvEND(cat)-1) = '\0'; + } else { sv_catpvn(cat, aptr, fromlen); len -= fromlen; @@ -4446,15 +5352,14 @@ PP(pp_pack) case 'B': case 'b': { - char *savepat = pat; + register char *str; I32 saveitems; fromstr = NEXTFROM; saveitems = items; - aptr = SvPV(fromstr, fromlen); + str = SvPV(fromstr, fromlen); if (pat[-1] == '*') len = fromlen; - pat = aptr; aint = SvCUR(cat); SvCUR(cat) += (len+7)/8; SvGROW(cat, SvCUR(cat) + 1); @@ -4465,7 +5370,7 @@ PP(pp_pack) items = 0; if (datumtype == 'B') { for (len = 0; len++ < aint;) { - items |= *pat++ & 1; + items |= *str++ & 1; if (len & 7) items <<= 1; else { @@ -4476,7 +5381,7 @@ PP(pp_pack) } else { for (len = 0; len++ < aint;) { - if (*pat++ & 1) + if (*str++ & 1) items |= 128; if (len & 7) items >>= 1; @@ -4493,26 +5398,24 @@ PP(pp_pack) items >>= 7 - (aint & 7); *aptr++ = items & 0xff; } - pat = SvPVX(cat) + SvCUR(cat); - while (aptr <= pat) + str = SvPVX(cat) + SvCUR(cat); + while (aptr <= str) *aptr++ = '\0'; - pat = savepat; items = saveitems; } break; case 'H': case 'h': { - char *savepat = pat; + register char *str; I32 saveitems; fromstr = NEXTFROM; saveitems = items; - aptr = SvPV(fromstr, fromlen); + str = SvPV(fromstr, fromlen); if (pat[-1] == '*') len = fromlen; - pat = aptr; aint = SvCUR(cat); SvCUR(cat) += (len+1)/2; SvGROW(cat, SvCUR(cat) + 1); @@ -4523,10 +5426,10 @@ PP(pp_pack) items = 0; if (datumtype == 'H') { for (len = 0; len++ < aint;) { - if (isALPHA(*pat)) - items |= ((*pat++ & 15) + 9) & 15; + if (isALPHA(*str)) + items |= ((*str++ & 15) + 9) & 15; else - items |= *pat++ & 15; + items |= *str++ & 15; if (len & 1) items <<= 4; else { @@ -4537,10 +5440,10 @@ PP(pp_pack) } else { for (len = 0; len++ < aint;) { - if (isALPHA(*pat)) - items |= (((*pat++ & 15) + 9) & 15) << 4; + if (isALPHA(*str)) + items |= (((*str++ & 15) + 9) & 15) << 4; else - items |= (*pat++ & 15) << 4; + items |= (*str++ & 15) << 4; if (len & 1) items >>= 4; else { @@ -4551,11 +5454,10 @@ PP(pp_pack) } if (aint & 1) *aptr++ = items & 0xff; - pat = SvPVX(cat) + SvCUR(cat); - while (aptr <= pat) + str = SvPVX(cat) + SvCUR(cat); + while (aptr <= str) *aptr++ = '\0'; - pat = savepat; items = saveitems; } break; @@ -4572,8 +5474,8 @@ PP(pp_pack) while (len-- > 0) { fromstr = NEXTFROM; auint = SvUV(fromstr); - SvGROW(cat, SvCUR(cat) + 10); - SvCUR_set(cat, (char*)uv_to_utf8((U8*)SvEND(cat),auint) + SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN + 1); + SvCUR_set(cat, (char*)uvchr_to_utf8((U8*)SvEND(cat),auint) - SvPVX(cat)); } *SvEND(cat) = '\0'; @@ -4642,6 +5544,8 @@ PP(pp_pack) case 's': #if SHORTSIZE != SIZE16 if (natint) { + short ashort; + while (len-- > 0) { fromstr = NEXTFROM; ashort = SvIV(fromstr); @@ -4671,17 +5575,17 @@ PP(pp_pack) adouble = Perl_floor(SvNV(fromstr)); if (adouble < 0) - Perl_croak(aTHX_ "Cannot compress negative numbers"); + DIE(aTHX_ "Cannot compress negative numbers"); if ( -#ifdef BW_BITS - adouble <= BW_MASK +#if UVSIZE > 4 && UVSIZE >= NVSIZE + adouble <= 0xffffffff #else -#ifdef CXUX_BROKEN_CONSTANT_CONVERT +# ifdef CXUX_BROKEN_CONSTANT_CONVERT adouble <= UV_MAX_cxux -#else +# else adouble <= UV_MAX -#endif +# endif #endif ) { @@ -4705,7 +5609,7 @@ PP(pp_pack) /* Copy string and check for compliance */ from = SvPV(fromstr, len); if ((norm = is_an_int(from, len)) == NULL) - Perl_croak(aTHX_ "can compress only unsigned integer"); + DIE(aTHX_ "can compress only unsigned integer"); New('w', result, len, char); in = result + len; @@ -4724,15 +5628,16 @@ PP(pp_pack) do { double next = floor(adouble / 128); *--in = (unsigned char)(adouble - (next * 128)) | 0x80; - if (--in < buf) /* this cannot happen ;-) */ - Perl_croak(aTHX_ "Cannot compress integer"); + if (in <= buf) /* this cannot happen ;-) */ + DIE(aTHX_ "Cannot compress integer"); + in--; adouble = next; } while (adouble > 0); buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */ sv_catpvn(cat, in, (buf + sizeof(buf)) - in); } else - Perl_croak(aTHX_ "Cannot compress non integer"); + DIE(aTHX_ "Cannot compress non integer"); } break; case 'i': @@ -4765,6 +5670,8 @@ PP(pp_pack) case 'L': #if LONGSIZE != SIZE32 if (natint) { + unsigned long aulong; + while (len-- > 0) { fromstr = NEXTFROM; aulong = SvUV(fromstr); @@ -4784,6 +5691,8 @@ PP(pp_pack) case 'l': #if LONGSIZE != SIZE32 if (natint) { + long along; + while (len-- > 0) { fromstr = NEXTFROM; along = SvIV(fromstr); @@ -4804,7 +5713,7 @@ PP(pp_pack) case 'Q': while (len-- > 0) { fromstr = NEXTFROM; - auquad = (Uquad_t)SvIV(fromstr); + auquad = (Uquad_t)SvUV(fromstr); sv_catpvn(cat, (char*)&auquad, sizeof(Uquad_t)); } break; @@ -4815,7 +5724,7 @@ PP(pp_pack) sv_catpvn(cat, (char*)&aquad, sizeof(Quad_t)); } break; -#endif /* HAS_QUAD */ +#endif case 'P': len = 1; /* assume SV is correct length */ /* FALL THROUGH */ @@ -4831,9 +5740,13 @@ PP(pp_pack) * of pack() (and all copies of the result) are * gone. */ - if (ckWARN(WARN_UNSAFE) && (SvTEMP(fromstr) || SvPADTMP(fromstr))) - Perl_warner(aTHX_ WARN_UNSAFE, + if (ckWARN(WARN_PACK) && (SvTEMP(fromstr) + || (SvPADTMP(fromstr) + && !SvREADONLY(fromstr)))) + { + Perl_warner(aTHX_ WARN_PACK, "Attempt to pack pointer to temporary value"); + } if (SvPOK(fromstr) || SvNIOK(fromstr)) aptr = SvPV(fromstr,n_a); else @@ -4874,19 +5787,21 @@ PP(pp_pack) PP(pp_split) { - djSP; dTARG; + dSP; dTARG; AV *ary; - register I32 limit = POPi; /* note, negative is forever */ + register IV limit = POPi; /* note, negative is forever */ SV *sv = POPs; STRLEN len; register char *s = SvPV(sv, len); + bool do_utf8 = DO_UTF8(sv); char *strend = s + len; register PMOP *pm; register REGEXP *rx; register SV *dstr; register char *m; I32 iters = 0; - I32 maxiters = (strend - s) + 10; + STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (strend - s); + I32 maxiters = slen + 10; I32 i; char *orig; I32 origlimit = limit; @@ -4904,14 +5819,19 @@ PP(pp_split) pm = (PMOP*)POPs; #endif if (!pm || !s) - DIE(aTHX_ "panic: do_split"); + DIE(aTHX_ "panic: pp_split"); rx = pm->op_pmregexp; TAINT_IF((pm->op_pmflags & PMf_LOCALE) && (pm->op_pmflags & (PMf_WHITE | PMf_SKIPWHITE))); - if (pm->op_pmreplroot) + if (pm->op_pmreplroot) { +#ifdef USE_ITHREADS + ary = GvAVn((GV*)PL_curpad[(PADOFFSET)pm->op_pmreplroot]); +#else ary = GvAVn((GV*)pm->op_pmreplroot); +#endif + } else if (gimme != G_ARRAY) #ifdef USE_THREADS ary = (AV*)PL_curpad[0]; @@ -4926,13 +5846,14 @@ PP(pp_split) av_extend(ary,0); av_clear(ary); SPAGAIN; - if (mg = SvTIED_mg((SV*)ary, 'P')) { + if ((mg = SvTIED_mg((SV*)ary, 'P'))) { PUSHMARK(SP); XPUSHs(SvTIED_obj((SV*)ary, mg)); } else { if (!AvREAL(ary)) { AvREAL_on(ary); + AvREIFY_off(ary); for (i = AvFILLp(ary); i >= 0; i--) AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */ } @@ -4974,6 +5895,8 @@ PP(pp_split) sv_setpvn(dstr, s, m-s); if (make_mortal) sv_2mortal(dstr); + if (do_utf8) + (void)SvUTF8_on(dstr); XPUSHs(dstr); s = m + 1; @@ -4994,20 +5917,23 @@ PP(pp_split) sv_setpvn(dstr, s, m-s); if (make_mortal) sv_2mortal(dstr); + if (do_utf8) + (void)SvUTF8_on(dstr); XPUSHs(dstr); s = m; } } - else if ((rx->reganch & RE_USE_INTUIT) && !rx->nparens + else if (do_utf8 == ((rx->reganch & ROPT_UTF8) != 0) && + (rx->reganch & RE_USE_INTUIT) && !rx->nparens && (rx->reganch & ROPT_CHECK_ALL) && !(rx->reganch & ROPT_ANCH)) { int tail = (rx->reganch & RE_INTUIT_TAIL); SV *csv = CALLREG_INTUIT_STRING(aTHX_ rx); - char c; - i = rx->minlen; - if (i == 1 && !tail) { - c = *SvPV(csv,i); + len = rx->minlen; + if (len == 1 && !(rx->reganch & ROPT_UTF8) && !tail) { + STRLEN n_a; + char c = *SvPV(csv, n_a); while (--limit) { /*SUPPRESS 530*/ for (m = s; m < strend && *m != c; m++) ; @@ -5017,8 +5943,15 @@ PP(pp_split) sv_setpvn(dstr, s, m-s); if (make_mortal) sv_2mortal(dstr); + if (do_utf8) + (void)SvUTF8_on(dstr); XPUSHs(dstr); - s = m + 1; + /* The rx->minlen is in characters but we want to step + * s ahead by bytes. */ + if (do_utf8) + s = (char*)utf8_hop((U8*)m, len); + else + s = m + len; /* Fake \n at the end */ } } else { @@ -5032,15 +5965,22 @@ PP(pp_split) sv_setpvn(dstr, s, m-s); if (make_mortal) sv_2mortal(dstr); + if (do_utf8) + (void)SvUTF8_on(dstr); XPUSHs(dstr); - s = m + i; /* Fake \n at the end */ + /* The rx->minlen is in characters but we want to step + * s ahead by bytes. */ + if (do_utf8) + s = (char*)utf8_hop((U8*)m, len); + else + s = m + len; /* Fake \n at the end */ } } } else { - maxiters += (strend - s) * rx->nparens; + maxiters += slen * rx->nparens; while (s < strend && --limit -/* && (!rx->check_substr +/* && (!rx->check_substr || ((s = CALLREG_INTUIT_START(aTHX_ rx, sv, s, strend, 0, NULL)))) */ && CALLREGEXEC(aTHX_ rx, s, strend, orig, @@ -5059,6 +5999,8 @@ PP(pp_split) sv_setpvn(dstr, s, m-s); if (make_mortal) sv_2mortal(dstr); + if (do_utf8) + (void)SvUTF8_on(dstr); XPUSHs(dstr); if (rx->nparens) { for (i = 1; i <= rx->nparens; i++) { @@ -5072,6 +6014,8 @@ PP(pp_split) dstr = NEWSV(33, 0); if (make_mortal) sv_2mortal(dstr); + if (do_utf8) + (void)SvUTF8_on(dstr); XPUSHs(dstr); } } @@ -5086,10 +6030,13 @@ PP(pp_split) /* keep field after final delim? */ if (s < strend || (iters && origlimit)) { - dstr = NEWSV(34, strend-s); - sv_setpvn(dstr, s, strend-s); + STRLEN l = strend - s; + dstr = NEWSV(34, l); + sv_setpvn(dstr, s, l); if (make_mortal) sv_2mortal(dstr); + if (do_utf8) + (void)SvUTF8_on(dstr); XPUSHs(dstr); iters++; } @@ -5146,7 +6093,6 @@ PP(pp_split) void Perl_unlock_condpair(pTHX_ void *svv) { - dTHR; MAGIC *mg = mg_find((SV*)svv, 'm'); if (!mg) @@ -5156,36 +6102,19 @@ Perl_unlock_condpair(pTHX_ void *svv) Perl_croak(aTHX_ "panic: unlock_condpair unlocking mutex that we don't own"); MgOWNER(mg) = 0; COND_SIGNAL(MgOWNERCONDP(mg)); - DEBUG_S(PerlIO_printf(PerlIO_stderr(), "0x%lx: unlock 0x%lx\n", - (unsigned long)thr, (unsigned long)svv);) + DEBUG_S(PerlIO_printf(Perl_debug_log, "0x%"UVxf": unlock 0x%"UVxf"\n", + PTR2UV(thr), PTR2UV(svv));) MUTEX_UNLOCK(MgMUTEXP(mg)); } #endif /* USE_THREADS */ PP(pp_lock) { - djSP; + dSP; dTOPss; SV *retsv = sv; #ifdef USE_THREADS - MAGIC *mg; - - if (SvROK(sv)) - sv = SvRV(sv); - - mg = condpair_magic(sv); - MUTEX_LOCK(MgMUTEXP(mg)); - if (MgOWNER(mg) == thr) - MUTEX_UNLOCK(MgMUTEXP(mg)); - else { - while (MgOWNER(mg)) - COND_WAIT(MgOWNERCONDP(mg), MgMUTEXP(mg)); - MgOWNER(mg) = thr; - DEBUG_S(PerlIO_printf(PerlIO_stderr(), "0x%lx: pp_lock lock 0x%lx\n", - (unsigned long)thr, (unsigned long)sv);) - MUTEX_UNLOCK(MgMUTEXP(mg)); - save_destructor(Perl_unlock_condpair, sv); - } + sv_lock(sv); #endif /* USE_THREADS */ if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV || SvTYPE(retsv) == SVt_PVCV) { @@ -5197,8 +6126,8 @@ PP(pp_lock) PP(pp_threadsv) { - djSP; #ifdef USE_THREADS + dSP; EXTEND(SP, 1); if (PL_op->op_private & OPpLVAL_INTRO) PUSHs(*save_threadsv(PL_op->op_targ));