X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=pp.c;h=988619f34fccd9b670131f1925b474d6f28d3cf3;hb=2aa761807d7eac561ade382b2c66eebffcdf056f;hp=0e11daa9473ec08cfd7dfb567d13e77c267436c5;hpb=a15299417de39f35d2ce17e6891b4f961265fb6a;p=p5sagit%2Fp5-mst-13.2.git diff --git a/pp.c b/pp.c index 0e11daa..988619f 100644 --- a/pp.c +++ b/pp.c @@ -1,6 +1,6 @@ /* pp.c * - * Copyright (c) 1991-1997, Larry Wall + * Copyright (c) 1991-2000, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -13,6 +13,7 @@ */ #include "EXTERN.h" +#define PERL_IN_PP_C #include "perl.h" /* @@ -27,37 +28,6 @@ static double UV_MAX_cxux = ((double)UV_MAX); #endif /* - * Types used in bitwise operations. - * - * Normally we'd just use IV and UV. However, some hardware and - * software combinations (e.g. Alpha and current OSF/1) don't have a - * floating-point type to use for NV that has adequate bits to fully - * hold an IV/UV. (In other words, sizeof(long) == sizeof(double).) - * - * It just so happens that "int" is the right size almost everywhere. - */ -typedef int IBW; -typedef unsigned UBW; - -/* - * Mask used after bitwise operations. - * - * There is at least one realm (Cray word machines) that doesn't - * have an integral type (except char) small enough to be represented - * in a double without loss; that is, it has no 32-bit type. - */ -#if LONGSIZE > 4 && defined(_CRAY) && !defined(_CRAYMPP) -# define BW_BITS 32 -# define BW_MASK ((1 << BW_BITS) - 1) -# define BW_SIGN (1 << (BW_BITS - 1)) -# define BWi(i) (((i) & BW_SIGN) ? ((i) | ~BW_MASK) : ((i) & BW_MASK)) -# define BWu(u) ((u) & BW_MASK) -#else -# define BWi(i) (i) -# define BWu(u) (u) -#endif - -/* * Offset for integer pack/unpack. * * On architectures where I16 and I32 aren't really 16 and 32 bits, @@ -78,7 +48,14 @@ typedef unsigned UBW; #define SIZE16 2 #define SIZE32 4 -#if BYTEORDER > 0xFFFF && defined(_CRAY) && !defined(_CRAYMPP) +/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack(). + --jhi Feb 1999 */ + +#if SHORTSIZE != SIZE16 || LONGSIZE != SIZE32 +# define PERL_NATINT_PACK +#endif + +#if LONGSIZE > 4 && defined(_CRAY) # if BYTEORDER == 0x12345678 # define OFF16(p) (char*)(p) # define OFF32(p) (char*)(p) @@ -92,23 +69,17 @@ typedef unsigned UBW; # endif # define COPY16(s,p) (*(p) = 0, Copy(s, OFF16(p), SIZE16, char)) # define COPY32(s,p) (*(p) = 0, Copy(s, OFF32(p), SIZE32, char)) +# define COPYNN(s,p,n) (*(p) = 0, Copy(s, (char *)(p), n, char)) # define CAT16(sv,p) sv_catpvn(sv, OFF16(p), SIZE16) # define CAT32(sv,p) sv_catpvn(sv, OFF32(p), SIZE32) #else # define COPY16(s,p) Copy(s, p, SIZE16, char) # define COPY32(s,p) Copy(s, p, SIZE32, char) +# define COPYNN(s,p,n) Copy(s, (char *)(p), n, char) # define CAT16(sv,p) sv_catpvn(sv, (char*)(p), SIZE16) # define CAT32(sv,p) sv_catpvn(sv, (char*)(p), SIZE32) #endif -#ifndef PERL_OBJECT -static void doencodes _((SV* sv, char* s, I32 len)); -static SV* refto _((SV* sv)); -static U32 seed _((void)); -#endif - -static bool srand_called = FALSE; - /* variations on pp_null */ #ifdef I_UNISTD @@ -184,12 +155,12 @@ PP(pp_padhv) RETURN; gimme = GIMME_V; if (gimme == G_ARRAY) { - RETURNOP(do_kv(ARGS)); + RETURNOP(do_kv()); } else if (gimme == G_SCALAR) { SV* sv = sv_newmortal(); if (HvFILL((HV*)TARG)) - sv_setpvf(sv, "%ld/%ld", + Perl_sv_setpvf(aTHX_ sv, "%ld/%ld", (long)HvFILL((HV*)TARG), (long)HvMAX((HV*)TARG) + 1); else sv_setiv(sv, 0); @@ -200,17 +171,19 @@ PP(pp_padhv) PP(pp_padany) { - DIE("NOT IMPL LINE %d",__LINE__); + DIE(aTHX_ "NOT IMPL LINE %d",__LINE__); } /* Translations. */ PP(pp_rv2gv) { - djSP; dTOPss; + djSP; dTOPss; if (SvROK(sv)) { wasref: + tryAMAGICunDEREF(to_gv); + sv = SvRV(sv); if (SvTYPE(sv) == SVt_PVIO) { GV *gv = (GV*) sv_newmortal(); @@ -218,30 +191,68 @@ PP(pp_rv2gv) GvIOp(gv) = (IO *)sv; (void)SvREFCNT_inc(sv); sv = (SV*) gv; - } else if (SvTYPE(sv) != SVt_PVGV) - DIE("Not a GLOB reference"); + } + else if (SvTYPE(sv) != SVt_PVGV) + DIE(aTHX_ "Not a GLOB reference"); } else { if (SvTYPE(sv) != SVt_PVGV) { char *sym; + STRLEN len; if (SvGMAGICAL(sv)) { mg_get(sv); if (SvROK(sv)) goto wasref; } - if (!SvOK(sv)) { + if (!SvOK(sv) && sv != &PL_sv_undef) { + /* If this is a 'my' scalar and flag is set then vivify + * NI-S 1999/05/07 + */ + if (PL_op->op_private & OPpDEREF) { + char *name; + GV *gv; + if (cUNOP->op_targ) { + STRLEN len; + SV *namesv = PL_curpad[cUNOP->op_targ]; + name = SvPV(namesv, len); + gv = (GV*)NEWSV(0,0); + gv_init(gv, CopSTASH(PL_curcop), name, len, 0); + } + else { + name = CopSTASHPV(PL_curcop); + gv = newGVgen(name); + } + sv_upgrade(sv, SVt_RV); + SvRV(sv) = (SV*)gv; + SvROK_on(sv); + SvSETMAGIC(sv); + goto wasref; + } if (PL_op->op_flags & OPf_REF || PL_op->op_private & HINT_STRICT_REFS) - DIE(no_usym, "a symbol"); - if (PL_dowarn) - warn(warn_uninit); + DIE(aTHX_ PL_no_usym, "a symbol"); + if (ckWARN(WARN_UNINITIALIZED)) + report_uninit(); RETSETUNDEF; } - sym = SvPV(sv, PL_na); - if (PL_op->op_private & HINT_STRICT_REFS) - DIE(no_symref, sym, "a symbol"); - sv = (SV*)gv_fetchpv(sym, TRUE, SVt_PVGV); + sym = SvPV(sv,len); + if ((PL_op->op_flags & OPf_SPECIAL) && + !(PL_op->op_flags & OPf_MOD)) + { + sv = (SV*)gv_fetchpv(sym, FALSE, SVt_PVGV); + if (!sv + && (!is_gv_magical(sym,len,0) + || !(sv = (SV*)gv_fetchpv(sym, TRUE, SVt_PVGV)))) + { + RETSETUNDEF; + } + } + else { + if (PL_op->op_private & HINT_STRICT_REFS) + DIE(aTHX_ PL_no_symref, sym, "a symbol"); + sv = (SV*)gv_fetchpv(sym, TRUE, SVt_PVGV); + } } } if (PL_op->op_private & OPpLVAL_INTRO) @@ -256,17 +267,20 @@ PP(pp_rv2sv) if (SvROK(sv)) { wasref: + tryAMAGICunDEREF(to_sv); + sv = SvRV(sv); switch (SvTYPE(sv)) { case SVt_PVAV: case SVt_PVHV: case SVt_PVCV: - DIE("Not a SCALAR reference"); + DIE(aTHX_ "Not a SCALAR reference"); } } else { GV *gv = (GV*)sv; char *sym; + STRLEN len; if (SvTYPE(gv) != SVt_PVGV) { if (SvGMAGICAL(sv)) { @@ -277,15 +291,28 @@ PP(pp_rv2sv) if (!SvOK(sv)) { if (PL_op->op_flags & OPf_REF || PL_op->op_private & HINT_STRICT_REFS) - DIE(no_usym, "a SCALAR"); - if (PL_dowarn) - warn(warn_uninit); + DIE(aTHX_ PL_no_usym, "a SCALAR"); + if (ckWARN(WARN_UNINITIALIZED)) + report_uninit(); RETSETUNDEF; } - sym = SvPV(sv, PL_na); - if (PL_op->op_private & HINT_STRICT_REFS) - DIE(no_symref, sym, "a SCALAR"); - gv = (GV*)gv_fetchpv(sym, TRUE, SVt_PV); + sym = SvPV(sv, len); + if ((PL_op->op_flags & OPf_SPECIAL) && + !(PL_op->op_flags & OPf_MOD)) + { + gv = (GV*)gv_fetchpv(sym, FALSE, SVt_PV); + if (!gv + && (!is_gv_magical(sym,len,0) + || !(gv = (GV*)gv_fetchpv(sym, TRUE, SVt_PV)))) + { + RETSETUNDEF; + } + } + else { + if (PL_op->op_private & HINT_STRICT_REFS) + DIE(aTHX_ PL_no_symref, sym, "a SCALAR"); + gv = (GV*)gv_fetchpv(sym, TRUE, SVt_PV); + } } sv = GvSV(gv); } @@ -338,7 +365,10 @@ PP(pp_pos) if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { mg = mg_find(sv, 'g'); if (mg && mg->mg_len >= 0) { - PUSHi(mg->mg_len + PL_curcop->cop_arybase); + I32 i = mg->mg_len; + if (DO_UTF8(sv)) + sv_pos_b2u(sv, &i); + PUSHi(i + PL_curcop->cop_arybase); RETURN; } } @@ -358,6 +388,8 @@ PP(pp_rv2cv) if (cv) { if (CvCLONE(cv)) cv = (CV*)sv_2mortal((SV*)cv_clone(cv)); + if ((PL_op->op_private & OPpLVAL_INTRO) && !CvLVALUE(cv)) + DIE(aTHX_ "Can't modify non-lvalue subroutine call"); } else cv = (CV*)&PL_sv_undef; @@ -387,18 +419,22 @@ PP(pp_prototype) char str[ MAX_ARGS_OP * 2 + 2 ]; /* One ';', one '\0' */ while (i < MAXO) { /* The slow way. */ - if (strEQ(s + 6, op_name[i]) || strEQ(s + 6, op_desc[i])) + if (strEQ(s + 6, PL_op_name[i]) + || strEQ(s + 6, PL_op_desc[i])) + { goto found; + } i++; } goto nonesuch; /* Should not happen... */ found: - oa = opargs[i] >> OASHIFT; + oa = PL_opargs[i] >> OASHIFT; while (oa) { if (oa & OA_OPTIONAL) { seen_question = 1; str[n++] = ';'; - } else if (seen_question) + } + else if (n && str[0] == ';' && seen_question) goto set; /* XXXX system, exec */ if ((oa & (OA_OPTIONAL - 1)) >= OA_AVREF && (oa & (OA_OPTIONAL - 1)) <= OA_HVREF) { @@ -409,18 +445,19 @@ PP(pp_prototype) oa = oa >> 4; } str[n++] = '\0'; - ret = sv_2mortal(newSVpv(str, n - 1)); - } else if (code) /* Non-Overridable */ + ret = sv_2mortal(newSVpvn(str, n - 1)); + } + else if (code) /* Non-Overridable */ goto set; else { /* None such */ nonesuch: - croak("Cannot find an opnumber for \"%s\"", s+6); + DIE(aTHX_ "Can't find an opnumber for \"%s\"", s+6); } } } cv = sv_2cv(TOPs, &stash, &gv, FALSE); if (cv && SvPOK(cv)) - ret = sv_2mortal(newSVpv(SvPVX(cv), SvCUR(cv))); + ret = sv_2mortal(newSVpvn(SvPVX(cv), SvCUR(cv))); set: SETs(ret); RETURN; @@ -463,7 +500,7 @@ PP(pp_refgen) } STATIC SV* -refto(SV *sv) +S_refto(pTHX_ SV *sv) { SV* rv; @@ -472,6 +509,14 @@ refto(SV *sv) vivify_defelem(sv); if (!(sv = LvTARG(sv))) sv = &PL_sv_undef; + else + (void)SvREFCNT_inc(sv); + } + else if (SvTYPE(sv) == SVt_PVAV) { + if (!AvREAL((AV*)sv) && AvREIFY((AV*)sv)) + av_reify((AV*)sv); + SvTEMP_off(sv); + (void)SvREFCNT_inc(sv); } else if (SvPADTMP(sv)) sv = newSVsv(sv); @@ -512,13 +557,14 @@ PP(pp_bless) HV *stash; if (MAXARG == 1) - stash = PL_curcop->cop_stash; + stash = CopSTASH(PL_curcop); else { SV *ssv = POPs; STRLEN len; char *ptr = SvPV(ssv,len); - if (PL_dowarn && len == 0) - warn("Explicit blessing to '' (assuming package main)"); + if (ckWARN(WARN_MISC) && len == 0) + Perl_warner(aTHX_ WARN_MISC, + "Explicit blessing to '' (assuming package main)"); stash = gv_stashpvn(ptr, len, TRUE); } @@ -533,9 +579,10 @@ PP(pp_gelem) SV *tmpRef; char *elem; djSP; - + STRLEN n_a; + sv = POPs; - elem = SvPV(sv, PL_na); + elem = SvPV(sv, n_a); gv = (GV*)POPs; tmpRef = Nullsv; sv = Nullsv; @@ -552,6 +599,9 @@ PP(pp_gelem) case 'F': if (strEQ(elem, "FILEHANDLE")) /* XXX deprecate in 5.005 */ tmpRef = (SV*)GvIOp(gv); + else + if (strEQ(elem, "FORMAT")) + tmpRef = (SV*)GvFORM(gv); break; case 'G': if (strEQ(elem, "GLOB")) @@ -567,7 +617,7 @@ PP(pp_gelem) break; case 'N': if (strEQ(elem, "NAME")) - sv = newSVpv(GvNAME(gv), GvNAMELEN(gv)); + sv = newSVpvn(GvNAME(gv), GvNAMELEN(gv)); break; case 'P': if (strEQ(elem, "PACKAGE")) @@ -593,7 +643,6 @@ PP(pp_gelem) PP(pp_study) { djSP; dPOPss; - register UNOP *unop = cUNOP; register unsigned char *s; register I32 pos; register I32 ch; @@ -633,7 +682,7 @@ PP(pp_study) snext = PL_screamnext; if (!sfirst || !snext) - DIE("do_study: out of memory"); + DIE(aTHX_ "do_study: out of memory"); for (ch = 256; ch; --ch) *sfirst++ = -1; @@ -665,7 +714,7 @@ PP(pp_trans) EXTEND(SP,1); } TARG = sv_newmortal(); - PUSHi(do_trans(sv, PL_op)); + PUSHi(do_trans(sv)); RETURN; } @@ -716,11 +765,11 @@ PP(pp_defined) RETPUSHNO; switch (SvTYPE(sv)) { case SVt_PVAV: - if (AvMAX(sv) >= 0 || SvGMAGICAL(sv)) + if (AvMAX(sv) >= 0 || SvGMAGICAL(sv) || (SvRMAGICAL(sv) && mg_find(sv,'P'))) RETPUSHYES; break; case SVt_PVHV: - if (HvARRAY(sv) || SvGMAGICAL(sv)) + if (HvARRAY(sv) || SvGMAGICAL(sv) || (SvRMAGICAL(sv) && mg_find(sv,'P'))) RETPUSHYES; break; case SVt_PVCV: @@ -750,12 +799,8 @@ PP(pp_undef) if (!sv) RETPUSHUNDEF; - if (SvTHINKFIRST(sv)) { - if (SvREADONLY(sv)) - RETPUSHUNDEF; - if (SvROK(sv)) - sv_unref(sv); - } + if (SvTHINKFIRST(sv)) + sv_force_normal(sv); switch (SvTYPE(sv)) { case SVt_NULL: @@ -767,14 +812,17 @@ PP(pp_undef) hv_undef((HV*)sv); break; case SVt_PVCV: - if (PL_dowarn && cv_const_sv((CV*)sv)) - warn("Constant subroutine %s undefined", + if (ckWARN(WARN_MISC) && cv_const_sv((CV*)sv)) + Perl_warner(aTHX_ WARN_MISC, "Constant subroutine %s undefined", CvANON((CV*)sv) ? "(anonymous)" : GvENAME(CvGV((CV*)sv))); /* FALL THROUGH */ case SVt_PVFM: - { GV* gv = (GV*)SvREFCNT_inc(CvGV((CV*)sv)); - cv_undef((CV*)sv); - CvGV((CV*)sv) = gv; } /* let user-undef'd sub keep its identity */ + { + /* let user-undef'd sub keep its identity */ + GV* gv = (GV*)SvREFCNT_inc(CvGV((CV*)sv)); + cv_undef((CV*)sv); + CvGV((CV*)sv) = gv; + } break; case SVt_PVGV: if (SvFAKE(sv)) @@ -785,7 +833,7 @@ PP(pp_undef) Newz(602, gp, 1, GP); GvGP(sv) = gp_ref(gp); GvSV(sv) = NEWSV(72,0); - GvLINE(sv) = PL_curcop->cop_line; + GvLINE(sv) = CopLINE(PL_curcop); GvEGV(sv) = (GV*)sv; GvMULTI_on(sv); } @@ -808,8 +856,8 @@ PP(pp_predec) { djSP; if (SvREADONLY(TOPs) || SvTYPE(TOPs) > SVt_PVLV) - croak(no_modify); - if (SvIOK(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs) && + DIE(aTHX_ PL_no_modify); + if (SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs) && SvIVX(TOPs) != IV_MIN) { --SvIVX(TOPs); @@ -825,9 +873,9 @@ PP(pp_postinc) { djSP; dTARGET; if (SvREADONLY(TOPs) || SvTYPE(TOPs) > SVt_PVLV) - croak(no_modify); + DIE(aTHX_ PL_no_modify); sv_setsv(TARG, TOPs); - if (SvIOK(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs) && + if (SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs) && SvIVX(TOPs) != IV_MAX) { ++SvIVX(TOPs); @@ -845,10 +893,10 @@ PP(pp_postinc) PP(pp_postdec) { djSP; dTARGET; - if(SvREADONLY(TOPs) || SvTYPE(TOPs) > SVt_PVLV) - croak(no_modify); + if (SvREADONLY(TOPs) || SvTYPE(TOPs) > SVt_PVLV) + DIE(aTHX_ PL_no_modify); sv_setsv(TARG, TOPs); - if (SvIOK(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs) && + if (SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs) && SvIVX(TOPs) != IV_MIN) { --SvIVX(TOPs); @@ -868,7 +916,7 @@ PP(pp_pow) djSP; dATARGET; tryAMAGICbin(pow,opASSIGN); { dPOPTOPnnrl; - SETn( pow( left, right) ); + SETn( Perl_pow( left, right) ); RETURN; } } @@ -888,18 +936,19 @@ PP(pp_divide) djSP; dATARGET; tryAMAGICbin(div,opASSIGN); { dPOPPOPnnrl; - double value; + NV value; if (right == 0.0) - DIE("Illegal division by zero"); + DIE(aTHX_ "Illegal division by zero"); #ifdef SLOPPYDIVIDE /* insure that 20./5. == 4. */ { IV k; - if ((double)I_V(left) == left && - (double)I_V(right) == right && + if ((NV)I_V(left) == left && + (NV)I_V(right) == right && (k = I_V(left)/I_V(right))*I_V(right) == I_V(left)) { value = k; - } else { + } + else { value = left / right; } } @@ -915,48 +964,99 @@ PP(pp_modulo) { djSP; dATARGET; tryAMAGICbin(modulo,opASSIGN); { - UV left; - UV right; - bool left_neg; - bool right_neg; - UV ans; - - if (SvIOK(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)) { - IV i = SvIVX(POPs); - right = (right_neg = (i < 0)) ? -i : i; - } - else { - double n = POPn; - right = U_V((right_neg = (n < 0)) ? -n : n); - } + UV left; + UV right; + bool left_neg; + bool right_neg; + bool use_double = 0; + NV dright; + NV dleft; + + if (SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)) { + IV i = SvIVX(POPs); + right = (right_neg = (i < 0)) ? -i : i; + } + else { + dright = POPn; + use_double = 1; + right_neg = dright < 0; + if (right_neg) + dright = -dright; + } - if (SvIOK(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)) { - IV i = SvIVX(POPs); - left = (left_neg = (i < 0)) ? -i : i; - } - else { - double n = POPn; - left = U_V((left_neg = (n < 0)) ? -n : n); - } + if (!use_double && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)) { + IV i = SvIVX(POPs); + left = (left_neg = (i < 0)) ? -i : i; + } + else { + dleft = POPn; + if (!use_double) { + use_double = 1; + dright = right; + } + left_neg = dleft < 0; + if (left_neg) + dleft = -dleft; + } - if (!right) - DIE("Illegal modulus zero"); - - ans = left % right; - if ((left_neg != right_neg) && ans) - ans = right - ans; - if (right_neg) { - /* XXX may warn: unary minus operator applied to unsigned type */ - /* could change -foo to be (~foo)+1 instead */ - if (ans <= ~((UV)IV_MAX)+1) - sv_setiv(TARG, ~ans+1); - else - sv_setnv(TARG, -(double)ans); - } - else - sv_setuv(TARG, ans); - PUSHTARG; - RETURN; + if (use_double) { + NV dans; + +#if 1 +/* Somehow U_V is pessimized even if CASTFLAGS is 0 */ +# if CASTFLAGS & 2 +# define CAST_D2UV(d) U_V(d) +# else +# define CAST_D2UV(d) ((UV)(d)) +# endif + /* Tried to do this only in the case DOUBLESIZE <= UV_SIZE, + * or, in other words, precision of UV more than of NV. + * But in fact the approach below turned out to be an + * optimization - floor() may be slow */ + if (dright <= UV_MAX && dleft <= UV_MAX) { + right = CAST_D2UV(dright); + left = CAST_D2UV(dleft); + goto do_uv; + } +#endif + + /* Backward-compatibility clause: */ + dright = Perl_floor(dright + 0.5); + dleft = Perl_floor(dleft + 0.5); + + if (!dright) + DIE(aTHX_ "Illegal modulus zero"); + + dans = Perl_fmod(dleft, dright); + if ((left_neg != right_neg) && dans) + dans = dright - dans; + if (right_neg) + dans = -dans; + sv_setnv(TARG, dans); + } + else { + UV ans; + + do_uv: + if (!right) + DIE(aTHX_ "Illegal modulus zero"); + + ans = left % right; + if ((left_neg != right_neg) && ans) + ans = right - ans; + if (right_neg) { + /* XXX may warn: unary minus operator applied to unsigned type */ + /* could change -foo to be (~foo)+1 instead */ + if (ans <= ~((UV)IV_MAX)+1) + sv_setiv(TARG, ~ans+1); + else + sv_setnv(TARG, -(NV)ans); + } + else + sv_setuv(TARG, ans); + } + PUSHTARG; + RETURN; } } @@ -987,16 +1087,10 @@ PP(pp_repeat) SP -= items; } else { /* Note: mark already snarfed by pp_list */ - SV *tmpstr; + SV *tmpstr = POPs; STRLEN len; + bool isutf = DO_UTF8(tmpstr); - tmpstr = POPs; - if (TARG == tmpstr && SvTHINKFIRST(tmpstr)) { - if (SvREADONLY(tmpstr) && PL_curcop != &PL_compiling) - DIE("Can't x= to readonly value"); - if (SvROK(tmpstr)) - sv_unref(tmpstr); - } SvSetSV(TARG, tmpstr); SvPV_force(TARG, len); if (count != 1) { @@ -1009,7 +1103,10 @@ PP(pp_repeat) } *SvEND(TARG) = '\0'; } - (void)SvPOK_only(TARG); + if (isutf) + (void)SvPOK_only_UTF8(TARG); + else + (void)SvPOK_only(TARG); PUSHTARG; } RETURN; @@ -1030,16 +1127,14 @@ PP(pp_left_shift) { djSP; dATARGET; tryAMAGICbin(lshift,opASSIGN); { - IBW shift = POPi; + IV shift = POPi; if (PL_op->op_private & HINT_INTEGER) { - IBW i = TOPi; - i = BWi(i) << shift; - SETi(BWi(i)); + IV i = TOPi; + SETi(i << shift); } else { - UBW u = TOPu; - u <<= shift; - SETu(BWu(u)); + UV u = TOPu; + SETu(u << shift); } RETURN; } @@ -1049,16 +1144,14 @@ PP(pp_right_shift) { djSP; dATARGET; tryAMAGICbin(rshift,opASSIGN); { - IBW shift = POPi; + IV shift = POPi; if (PL_op->op_private & HINT_INTEGER) { - IBW i = TOPi; - i = BWi(i) >> shift; - SETi(BWi(i)); + IV i = TOPi; + SETi(i >> shift); } else { - UBW u = TOPu; - u >>= shift; - SETu(BWu(u)); + UV u = TOPu; + SETu(u >> shift); } RETURN; } @@ -1121,6 +1214,13 @@ PP(pp_ncmp) dPOPTOPnnrl; I32 value; +#ifdef Perl_isnan + if (Perl_isnan(left) || Perl_isnan(right)) { + SETs(&PL_sv_undef); + RETURN; + } + value = (left > right) - (left < right); +#else if (left == right) value = 0; else if (left < right) @@ -1131,6 +1231,7 @@ PP(pp_ncmp) SETs(&PL_sv_undef); RETURN; } +#endif SETi(value); RETURN; } @@ -1228,12 +1329,12 @@ PP(pp_bit_and) dPOPTOPssrl; if (SvNIOKp(left) || SvNIOKp(right)) { if (PL_op->op_private & HINT_INTEGER) { - IBW value = SvIV(left) & SvIV(right); - SETi(BWi(value)); + IV i = SvIV(left) & SvIV(right); + SETi(i); } else { - UBW value = SvUV(left) & SvUV(right); - SETu(BWu(value)); + UV u = SvUV(left) & SvUV(right); + SETu(u); } } else { @@ -1251,12 +1352,12 @@ PP(pp_bit_xor) dPOPTOPssrl; if (SvNIOKp(left) || SvNIOKp(right)) { if (PL_op->op_private & HINT_INTEGER) { - IBW value = (USE_LEFT(left) ? SvIV(left) : 0) ^ SvIV(right); - SETi(BWi(value)); + IV i = (USE_LEFT(left) ? SvIV(left) : 0) ^ SvIV(right); + SETi(i); } else { - UBW value = (USE_LEFT(left) ? SvUV(left) : 0) ^ SvUV(right); - SETu(BWu(value)); + UV u = (USE_LEFT(left) ? SvUV(left) : 0) ^ SvUV(right); + SETu(u); } } else { @@ -1274,12 +1375,12 @@ PP(pp_bit_or) dPOPTOPssrl; if (SvNIOKp(left) || SvNIOKp(right)) { if (PL_op->op_private & HINT_INTEGER) { - IBW value = (USE_LEFT(left) ? SvIV(left) : 0) | SvIV(right); - SETi(BWi(value)); + IV i = (USE_LEFT(left) ? SvIV(left) : 0) | SvIV(right); + SETi(i); } else { - UBW value = (USE_LEFT(left) ? SvUV(left) : 0) | SvUV(right); - SETu(BWu(value)); + UV u = (USE_LEFT(left) ? SvUV(left) : 0) | SvUV(right); + SETu(u); } } else { @@ -1297,9 +1398,23 @@ PP(pp_negate) dTOPss; if (SvGMAGICAL(sv)) mg_get(sv); - if (SvIOKp(sv) && !SvNOKp(sv) && !SvPOKp(sv) && SvIVX(sv) != IV_MIN) - SETi(-SvIVX(sv)); - else if (SvNIOKp(sv)) + if (SvIOKp(sv) && !SvNOKp(sv) && !SvPOKp(sv)) { + if (SvIsUV(sv)) { + if (SvIVX(sv) == IV_MIN) { + SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */ + RETURN; + } + else if (SvUVX(sv) <= IV_MAX) { + SETi(-SvIVX(sv)); + RETURN; + } + } + else if (SvIVX(sv) != IV_MIN) { + SETi(-SvIVX(sv)); + RETURN; + } + } + if (SvNIOKp(sv)) SETn(-SvNV(sv)); else if (SvPOKp(sv)) { STRLEN len; @@ -1312,6 +1427,10 @@ PP(pp_negate) sv_setsv(TARG, sv); *SvPV_force(TARG, len) = *s == '-' ? '+' : '-'; } + else if (DO_UTF8(sv) && *(U8*)s >= 0xc0 && isIDFIRST_utf8((U8*)s)) { + sv_setpvn(TARG, "-", 1); + sv_catsv(TARG, sv); + } else sv_setnv(TARG, -SvNV(sv)); SETTARG; @@ -1324,9 +1443,7 @@ PP(pp_negate) PP(pp_not) { -#ifdef OVERLOAD djSP; tryAMAGICunSET(not); -#endif /* OVERLOAD */ *PL_stack_sp = boolSV(!SvTRUE(*PL_stack_sp)); return NORMAL; } @@ -1338,12 +1455,12 @@ PP(pp_complement) dTOPss; if (SvNIOKp(sv)) { if (PL_op->op_private & HINT_INTEGER) { - IBW value = ~SvIV(sv); - SETi(BWi(value)); + IV i = ~SvIV(sv); + SETi(i); } else { - UBW value = ~SvUV(sv); - SETu(BWu(value)); + UV u = ~SvUV(sv); + SETu(u); } } else { @@ -1390,7 +1507,7 @@ PP(pp_i_divide) { dPOPiv; if (value == 0) - DIE("Illegal division by zero"); + DIE(aTHX_ "Illegal division by zero"); value = POPi / value; PUSHi( value ); RETURN; @@ -1403,7 +1520,7 @@ PP(pp_i_modulo) { dPOPTOPiirl; if (!right) - DIE("Illegal modulus zero"); + DIE(aTHX_ "Illegal modulus zero"); SETi( left % right ); RETURN; } @@ -1521,7 +1638,7 @@ PP(pp_atan2) djSP; dTARGET; tryAMAGICbin(atan2,0); { dPOPTOPnnrl; - SETn(atan2(left, right)); + SETn(Perl_atan2(left, right)); RETURN; } } @@ -1530,9 +1647,9 @@ PP(pp_sin) { djSP; dTARGET; tryAMAGICun(sin); { - double value; + NV value; value = POPn; - value = sin(value); + value = Perl_sin(value); XPUSHn(value); RETURN; } @@ -1542,9 +1659,9 @@ PP(pp_cos) { djSP; dTARGET; tryAMAGICun(cos); { - double value; + NV value; value = POPn; - value = cos(value); + value = Perl_cos(value); XPUSHn(value); RETURN; } @@ -1556,40 +1673,30 @@ PP(pp_cos) compatibility by calling rand() but allow the user to override it. See INSTALL for details. --Andy Dougherty 15 July 1998 */ -#ifndef my_rand -# define my_rand rand -#endif -#ifndef my_srand -# define my_srand srand +/* Now it's after 5.005, and Configure supports drand48() and random(), + in addition to rand(). So the overrides should not be needed any more. + --Jarkko Hietaniemi 27 September 1998 + */ + +#ifndef HAS_DRAND48_PROTO +extern double drand48 (void); #endif PP(pp_rand) { djSP; dTARGET; - double value; + NV value; if (MAXARG < 1) value = 1.0; else value = POPn; if (value == 0.0) value = 1.0; - if (!srand_called) { - (void)my_srand((unsigned)seed()); - srand_called = TRUE; + if (!PL_srand_called) { + (void)seedDrand01((Rand_seed_t)seed()); + PL_srand_called = TRUE; } -#if RANDBITS == 31 - value = my_rand() * value / 2147483648.0; -#else -#if RANDBITS == 16 - value = my_rand() * value / 65536.0; -#else -#if RANDBITS == 15 - value = my_rand() * value / 32768.0; -#else - value = my_rand() * value / (double)(((unsigned long)1) << RANDBITS); -#endif -#endif -#endif + value *= Drand01(); XPUSHn(value); RETURN; } @@ -1602,22 +1709,22 @@ PP(pp_srand) anum = seed(); else anum = POPu; - (void)my_srand((unsigned)anum); - srand_called = TRUE; + (void)seedDrand01((Rand_seed_t)anum); + PL_srand_called = TRUE; EXTEND(SP, 1); RETPUSHYES; } STATIC U32 -seed(void) +S_seed(pTHX) { /* * This is really just a quick hack which grabs various garbage * values. It really should be a real hash algorithm which * spreads the effect of every input bit onto every output bit, - * if someone who knows about such tings would bother to write it. + * if someone who knows about such things would bother to write it. * Might be a good idea to add that function to CORE as well. - * No numbers below come from careful analysis or anyting here, + * No numbers below come from careful analysis or anything here, * except they are primes and SEED_C1 > 1E6 to get a full-width * value from (tv_sec * SEED_C1 + tv_usec). The multipliers should * probably be bigger too. @@ -1634,29 +1741,58 @@ seed(void) #define SEED_C5 26107 dTHR; +#ifndef PERL_NO_DEV_RANDOM + int fd; +#endif U32 u; #ifdef VMS # include /* when[] = (low 32 bits, high 32 bits) of time since epoch * in 100-ns units, typically incremented ever 10 ms. */ unsigned int when[2]; +#else +# ifdef HAS_GETTIMEOFDAY + struct timeval when; +# else + Time_t when; +# endif +#endif + +/* This test is an escape hatch, this symbol isn't set by Configure. */ +#ifndef PERL_NO_DEV_RANDOM +#ifndef PERL_RANDOM_DEVICE + /* /dev/random isn't used by default because reads from it will block + * if there isn't enough entropy available. You can compile with + * PERL_RANDOM_DEVICE to it if you'd prefer Perl to block until there + * is enough real entropy to fill the seed. */ +# define PERL_RANDOM_DEVICE "/dev/urandom" +#endif + fd = PerlLIO_open(PERL_RANDOM_DEVICE, 0); + if (fd != -1) { + if (PerlLIO_read(fd, &u, sizeof u) != sizeof u) + u = 0; + PerlLIO_close(fd); + if (u) + return u; + } +#endif + +#ifdef VMS _ckvmssts(sys$gettim(when)); u = (U32)SEED_C1 * when[0] + (U32)SEED_C2 * when[1]; #else # ifdef HAS_GETTIMEOFDAY - struct timeval when; gettimeofday(&when,(struct timezone *) 0); u = (U32)SEED_C1 * when.tv_sec + (U32)SEED_C2 * when.tv_usec; # else - Time_t when; (void)time(&when); u = (U32)SEED_C1 * when; # endif #endif - u += SEED_C3 * (U32)getpid(); - u += SEED_C4 * (U32)(UV)PL_stack_sp; + u += SEED_C3 * (U32)PerlProc_getpid(); + u += SEED_C4 * (U32)PTR2UV(PL_stack_sp); #ifndef PLAN9 /* XXX Plan9 assembler chokes on this; fix needed */ - u += SEED_C5 * (U32)(UV)&when; + u += SEED_C5 * (U32)PTR2UV(&when); #endif return u; } @@ -1665,9 +1801,9 @@ PP(pp_exp) { djSP; dTARGET; tryAMAGICun(exp); { - double value; + NV value; value = POPn; - value = exp(value); + value = Perl_exp(value); XPUSHn(value); RETURN; } @@ -1677,13 +1813,13 @@ PP(pp_log) { djSP; dTARGET; tryAMAGICun(log); { - double value; + NV value; value = POPn; if (value <= 0.0) { - SET_NUMERIC_STANDARD(); - DIE("Can't take log of %g", value); + RESTORE_NUMERIC_STANDARD(); + DIE(aTHX_ "Can't take log of %g", value); } - value = log(value); + value = Perl_log(value); XPUSHn(value); RETURN; } @@ -1693,13 +1829,13 @@ PP(pp_sqrt) { djSP; dTARGET; tryAMAGICun(sqrt); { - double value; + NV value; value = POPn; if (value < 0.0) { - SET_NUMERIC_STANDARD(); - DIE("Can't take sqrt of %g", value); + RESTORE_NUMERIC_STANDARD(); + DIE(aTHX_ "Can't take sqrt of %g", value); } - value = sqrt(value); + value = Perl_sqrt(value); XPUSHn(value); RETURN; } @@ -1709,7 +1845,7 @@ PP(pp_int) { djSP; dTARGET; { - double value = TOPn; + NV value = TOPn; IV iv; if (SvIOKp(TOPs) && !SvNOKp(TOPs) && !SvPOKp(TOPs)) { @@ -1718,9 +1854,9 @@ PP(pp_int) } else { if (value >= 0.0) - (void)modf(value, &value); + (void)Perl_modf(value, &value); else { - (void)modf(-value, &value); + (void)Perl_modf(-value, &value); value = -value; } iv = I_V(value); @@ -1737,7 +1873,7 @@ PP(pp_abs) { djSP; dTARGET; tryAMAGICun(abs); { - double value = TOPn; + NV value = TOPn; IV iv; if (SvIOKp(TOPs) && !SvNOKp(TOPs) && !SvPOKp(TOPs) && @@ -1760,29 +1896,35 @@ PP(pp_hex) djSP; dTARGET; char *tmps; I32 argtype; + STRLEN n_a; - tmps = POPp; - XPUSHu(scan_hex(tmps, 99, &argtype)); + tmps = POPpx; + argtype = 1; /* allow underscores */ + XPUSHn(scan_hex(tmps, 99, &argtype)); RETURN; } PP(pp_oct) { djSP; dTARGET; - UV value; + NV value; I32 argtype; char *tmps; + STRLEN n_a; - tmps = POPp; + tmps = POPpx; while (*tmps && isSPACE(*tmps)) tmps++; if (*tmps == '0') tmps++; + argtype = 1; /* allow underscores */ if (*tmps == 'x') value = scan_hex(++tmps, 99, &argtype); + else if (*tmps == 'b') + value = scan_bin(++tmps, 99, &argtype); else value = scan_oct(tmps, 99, &argtype); - XPUSHu(value); + XPUSHn(value); RETURN; } @@ -1791,7 +1933,12 @@ PP(pp_oct) PP(pp_length) { djSP; dTARGET; - SETi( sv_len(TOPs) ); + SV *sv = TOPs; + + if (DO_UTF8(sv)) + SETi(sv_len_utf8(sv)); + else + SETi(sv_len(sv)); RETURN; } @@ -1801,6 +1948,7 @@ PP(pp_substr) SV *sv; I32 len; STRLEN curlen; + STRLEN utfcurlen; I32 pos; I32 rem; I32 fail; @@ -1811,6 +1959,7 @@ PP(pp_substr) STRLEN repl_len; SvTAINTED_off(TARG); /* decontaminate */ + SvUTF8_off(TARG); /* decontaminate */ if (MAXARG > 2) { if (MAXARG > 3) { sv = POPs; @@ -1822,6 +1971,16 @@ PP(pp_substr) sv = POPs; PUTBACK; tmps = SvPV(sv, curlen); + if (DO_UTF8(sv)) { + utfcurlen = sv_len_utf8(sv); + if (utfcurlen == curlen) + utfcurlen = 0; + else + curlen = utfcurlen; + } + else + utfcurlen = 0; + if (pos >= arybase) { pos -= arybase; rem = curlen-pos; @@ -1856,22 +2015,32 @@ PP(pp_substr) rem -= pos; } if (fail < 0) { - if (PL_dowarn || lvalue || repl) - warn("substr outside of string"); + if (lvalue || repl) + Perl_croak(aTHX_ "substr outside of string"); + if (ckWARN(WARN_SUBSTR)) + Perl_warner(aTHX_ WARN_SUBSTR, "substr outside of string"); RETPUSHUNDEF; } else { + if (utfcurlen) + sv_pos_u2b(sv, &pos, &rem); tmps += pos; sv_setpvn(TARG, tmps, rem); - if (lvalue) { /* it's an lvalue! */ + if (utfcurlen) + SvUTF8_on(TARG); + if (repl) + sv_insert(sv, pos, rem, repl, repl_len); + else if (lvalue) { /* it's an lvalue! */ if (!SvGMAGICAL(sv)) { if (SvROK(sv)) { - SvPV_force(sv,PL_na); - if (PL_dowarn) - warn("Attempt to use reference as lvalue in substr"); + STRLEN n_a; + SvPV_force(sv,n_a); + if (ckWARN(WARN_SUBSTR)) + Perl_warner(aTHX_ WARN_SUBSTR, + "Attempt to use reference as lvalue in substr"); } if (SvOK(sv)) /* is it defined ? */ - (void)SvPOK_only(sv); + (void)SvPOK_only_UTF8(sv); else sv_setpvn(sv,"",0); /* avoid lexical reincarnation */ } @@ -1890,8 +2059,6 @@ PP(pp_substr) LvTARGOFF(TARG) = pos; LvTARGLEN(TARG) = rem; } - else if (repl) - sv_insert(sv, pos, rem, repl, repl_len); } SPAGAIN; PUSHs(TARG); /* avoid SvSETMAGIC here */ @@ -1905,74 +2072,24 @@ PP(pp_vec) register I32 offset = POPi; register SV *src = POPs; I32 lvalue = PL_op->op_flags & OPf_MOD; - STRLEN srclen; - unsigned char *s = (unsigned char*)SvPV(src, srclen); - unsigned long retnum; - I32 len; - SvTAINTED_off(TARG); /* decontaminate */ - offset *= size; /* turn into bit offset */ - len = (offset + size + 7) / 8; - if (offset < 0 || size < 1) - retnum = 0; - else { - if (lvalue) { /* it's an lvalue! */ - if (SvTYPE(TARG) < SVt_PVLV) { - sv_upgrade(TARG, SVt_PVLV); - sv_magic(TARG, Nullsv, 'v', Nullch, 0); - } - - LvTYPE(TARG) = 'v'; - if (LvTARG(TARG) != src) { - if (LvTARG(TARG)) - SvREFCNT_dec(LvTARG(TARG)); - LvTARG(TARG) = SvREFCNT_inc(src); - } - LvTARGOFF(TARG) = offset; - LvTARGLEN(TARG) = size; - } - if (len > srclen) { - if (size <= 8) - retnum = 0; - else { - offset >>= 3; - if (size == 16) { - if (offset >= srclen) - retnum = 0; - else - retnum = (unsigned long) s[offset] << 8; - } - else if (size == 32) { - if (offset >= srclen) - retnum = 0; - else if (offset + 1 >= srclen) - retnum = (unsigned long) s[offset] << 24; - else if (offset + 2 >= srclen) - retnum = ((unsigned long) s[offset] << 24) + - ((unsigned long) s[offset + 1] << 16); - else - retnum = ((unsigned long) s[offset] << 24) + - ((unsigned long) s[offset + 1] << 16) + - (s[offset + 2] << 8); - } - } + SvTAINTED_off(TARG); /* decontaminate */ + if (lvalue) { /* it's an lvalue! */ + if (SvTYPE(TARG) < SVt_PVLV) { + sv_upgrade(TARG, SVt_PVLV); + sv_magic(TARG, Nullsv, 'v', Nullch, 0); } - else if (size < 8) - retnum = (s[offset >> 3] >> (offset & 7)) & ((1 << size) - 1); - else { - offset >>= 3; - if (size == 8) - retnum = s[offset]; - else if (size == 16) - retnum = ((unsigned long) s[offset] << 8) + s[offset+1]; - else if (size == 32) - retnum = ((unsigned long) s[offset] << 24) + - ((unsigned long) s[offset + 1] << 16) + - (s[offset + 2] << 8) + s[offset+3]; + LvTYPE(TARG) = 'v'; + if (LvTARG(TARG) != src) { + if (LvTARG(TARG)) + SvREFCNT_dec(LvTARG(TARG)); + LvTARG(TARG) = SvREFCNT_inc(src); } + LvTARGOFF(TARG) = offset; + LvTARGLEN(TARG) = size; } - sv_setuv(TARG, (UV)retnum); + sv_setuv(TARG, do_vecget(src, offset, size)); PUSHs(TARG); RETURN; } @@ -1996,16 +2113,20 @@ PP(pp_index) little = POPs; big = POPs; tmps = SvPV(big, biglen); + if (offset > 0 && DO_UTF8(big)) + sv_pos_u2b(big, &offset, 0); if (offset < 0) offset = 0; else if (offset > biglen) offset = biglen; if (!(tmps2 = fbm_instr((unsigned char*)tmps + offset, (unsigned char*)tmps + biglen, little, 0))) - retval = -1 + arybase; + retval = -1; else - retval = tmps2 - tmps + arybase; - PUSHi(retval); + retval = tmps2 - tmps; + if (retval > 0 && DO_UTF8(big)) + sv_pos_b2u(big, &retval); + PUSHi(retval + arybase); RETURN; } @@ -2016,7 +2137,6 @@ PP(pp_rindex) SV *little; STRLEN blen; STRLEN llen; - SV *offstr; I32 offset; I32 retval; char *tmps; @@ -2024,37 +2144,36 @@ PP(pp_rindex) I32 arybase = PL_curcop->cop_arybase; if (MAXARG >= 3) - offstr = POPs; + offset = POPi; little = POPs; big = POPs; tmps2 = SvPV(little, llen); tmps = SvPV(big, blen); if (MAXARG < 3) offset = blen; - else - offset = SvIV(offstr) - arybase + llen; + else { + if (offset > 0 && DO_UTF8(big)) + sv_pos_u2b(big, &offset, 0); + offset = offset - arybase + llen; + } if (offset < 0) offset = 0; else if (offset > blen) offset = blen; if (!(tmps2 = rninstr(tmps, tmps + offset, tmps2, tmps2 + llen))) - retval = -1 + arybase; + retval = -1; else - retval = tmps2 - tmps + arybase; - PUSHi(retval); + retval = tmps2 - tmps; + if (retval > 0 && DO_UTF8(big)) + sv_pos_b2u(big, &retval); + PUSHi(retval + arybase); RETURN; } PP(pp_sprintf) { djSP; dMARK; dORIGMARK; dTARGET; -#ifdef USE_LOCALE_NUMERIC - if (PL_op->op_private & OPpLOCALE) - SET_NUMERIC_LOCAL(); - else - SET_NUMERIC_STANDARD(); -#endif do_sprintf(TARG, SP-MARK, MARK+1); TAINT_IF(SvTAINTED(TARG)); SP = ORIGMARK; @@ -2065,19 +2184,17 @@ PP(pp_sprintf) PP(pp_ord) { djSP; dTARGET; - I32 value; - char *tmps; + UV value; + STRLEN n_a; + SV *tmpsv = POPs; + U8 *tmps = (U8*)SvPVx(tmpsv,n_a); + I32 retlen; -#ifndef I286 - tmps = POPp; - value = (I32) (*tmps & 255); -#else - I32 anum; - tmps = POPp; - anum = (I32) *tmps; - value = (I32) (anum & 255); -#endif - XPUSHi(value); + if ((*tmps & 0x80) && DO_UTF8(tmpsv)) + value = utf8_to_uv(tmps, &retlen); + else + value = (UV)(*tmps & 255); + XPUSHu(value); RETURN; } @@ -2085,12 +2202,26 @@ PP(pp_chr) { djSP; dTARGET; char *tmps; + U32 value = POPu; (void)SvUPGRADE(TARG,SVt_PV); + + if (value > 255 && !IN_BYTE) { + SvGROW(TARG, UTF8_MAXLEN+1); + tmps = SvPVX(TARG); + tmps = (char*)uv_to_utf8((U8*)tmps, (UV)value); + SvCUR_set(TARG, tmps - SvPVX(TARG)); + *tmps = '\0'; + (void)SvPOK_only(TARG); + SvUTF8_on(TARG); + XPUSHs(TARG); + RETURN; + } + SvGROW(TARG,2); SvCUR_set(TARG, 1); tmps = SvPVX(TARG); - *tmps++ = POPi; + *tmps++ = value; *tmps = '\0'; (void)SvPOK_only(TARG); XPUSHs(TARG); @@ -2100,15 +2231,16 @@ PP(pp_chr) PP(pp_crypt) { djSP; dTARGET; dPOPTOPssrl; + STRLEN n_a; #ifdef HAS_CRYPT - char *tmps = SvPV(left, PL_na); + char *tmps = SvPV(left, n_a); #ifdef FCRYPT - sv_setpv(TARG, fcrypt(tmps, SvPV(right, na))); + sv_setpv(TARG, fcrypt(tmps, SvPV(right, n_a))); #else - sv_setpv(TARG, crypt(tmps, SvPV(right, PL_na))); + sv_setpv(TARG, PerlProc_crypt(tmps, SvPV(right, n_a))); #endif #else - DIE( + DIE(aTHX_ "The crypt() function is unimplemented due to excessive paranoia."); #endif SETs(TARG); @@ -2119,25 +2251,58 @@ PP(pp_ucfirst) { djSP; SV *sv = TOPs; - register char *s; + register U8 *s; + STRLEN slen; + + if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) { + I32 ulen; + U8 tmpbuf[UTF8_MAXLEN]; + U8 *tend; + UV uv = utf8_to_uv(s, &ulen); - if (!SvPADTMP(sv)) { - dTARGET; - sv_setsv(TARG, sv); - sv = TARG; - SETs(sv); - } - s = SvPV_force(sv, PL_na); - if (*s) { if (PL_op->op_private & OPpLOCALE) { TAINT; SvTAINTED_on(sv); - *s = toUPPER_LC(*s); + uv = toTITLE_LC_uni(uv); } else - *s = toUPPER(*s); + uv = toTITLE_utf8(s); + + tend = uv_to_utf8(tmpbuf, uv); + + if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) { + dTARGET; + sv_setpvn(TARG, (char*)tmpbuf, tend - tmpbuf); + sv_catpvn(TARG, (char*)(s + ulen), slen - ulen); + SvUTF8_on(TARG); + SETs(TARG); + } + else { + s = (U8*)SvPV_force(sv, slen); + Copy(tmpbuf, s, ulen, U8); + } } - + else { + if (!SvPADTMP(sv) || SvREADONLY(sv)) { + dTARGET; + SvUTF8_off(TARG); /* decontaminate */ + sv_setsv(TARG, sv); + sv = TARG; + SETs(sv); + } + s = (U8*)SvPV_force(sv, slen); + if (*s) { + if (PL_op->op_private & OPpLOCALE) { + TAINT; + SvTAINTED_on(sv); + *s = toUPPER_LC(*s); + } + else + *s = toUPPER(*s); + } + } + if (SvSMAGICAL(sv)) + mg_set(sv); RETURN; } @@ -2145,26 +2310,58 @@ PP(pp_lcfirst) { djSP; SV *sv = TOPs; - register char *s; + register U8 *s; + STRLEN slen; + + if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) { + I32 ulen; + U8 tmpbuf[UTF8_MAXLEN]; + U8 *tend; + UV uv = utf8_to_uv(s, &ulen); - if (!SvPADTMP(sv)) { - dTARGET; - sv_setsv(TARG, sv); - sv = TARG; - SETs(sv); - } - s = SvPV_force(sv, PL_na); - if (*s) { if (PL_op->op_private & OPpLOCALE) { TAINT; SvTAINTED_on(sv); - *s = toLOWER_LC(*s); + uv = toLOWER_LC_uni(uv); } else - *s = toLOWER(*s); + uv = toLOWER_utf8(s); + + tend = uv_to_utf8(tmpbuf, uv); + + if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) { + dTARGET; + sv_setpvn(TARG, (char*)tmpbuf, tend - tmpbuf); + sv_catpvn(TARG, (char*)(s + ulen), slen - ulen); + SvUTF8_on(TARG); + SETs(TARG); + } + else { + s = (U8*)SvPV_force(sv, slen); + Copy(tmpbuf, s, ulen, U8); + } } - - SETs(sv); + else { + if (!SvPADTMP(sv) || SvREADONLY(sv)) { + dTARGET; + SvUTF8_off(TARG); /* decontaminate */ + sv_setsv(TARG, sv); + sv = TARG; + SETs(sv); + } + s = (U8*)SvPV_force(sv, slen); + if (*s) { + if (PL_op->op_private & OPpLOCALE) { + TAINT; + SvTAINTED_on(sv); + *s = toLOWER_LC(*s); + } + else + *s = toLOWER(*s); + } + } + if (SvSMAGICAL(sv)) + mg_set(sv); RETURN; } @@ -2172,31 +2369,73 @@ PP(pp_uc) { djSP; SV *sv = TOPs; - register char *s; + register U8 *s; STRLEN len; - if (!SvPADTMP(sv)) { + if (DO_UTF8(sv)) { dTARGET; - sv_setsv(TARG, sv); - sv = TARG; - SETs(sv); - } - - s = SvPV_force(sv, len); - if (len) { - register char *send = s + len; - - if (PL_op->op_private & OPpLOCALE) { - TAINT; - SvTAINTED_on(sv); - for (; s < send; s++) - *s = toUPPER_LC(*s); + I32 ulen; + register U8 *d; + U8 *send; + + s = (U8*)SvPV(sv,len); + if (!len) { + SvUTF8_off(TARG); /* decontaminate */ + sv_setpvn(TARG, "", 0); + SETs(TARG); } else { - for (; s < send; s++) - *s = toUPPER(*s); + (void)SvUPGRADE(TARG, SVt_PV); + SvGROW(TARG, (len * 2) + 1); + (void)SvPOK_only(TARG); + d = (U8*)SvPVX(TARG); + send = s + len; + if (PL_op->op_private & OPpLOCALE) { + TAINT; + SvTAINTED_on(TARG); + while (s < send) { + d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv(s, &ulen))); + s += ulen; + } + } + else { + while (s < send) { + d = uv_to_utf8(d, toUPPER_utf8( s )); + s += UTF8SKIP(s); + } + } + *d = '\0'; + SvUTF8_on(TARG); + SvCUR_set(TARG, d - (U8*)SvPVX(TARG)); + SETs(TARG); + } + } + else { + if (!SvPADTMP(sv) || SvREADONLY(sv)) { + dTARGET; + SvUTF8_off(TARG); /* decontaminate */ + sv_setsv(TARG, sv); + sv = TARG; + SETs(sv); + } + s = (U8*)SvPV_force(sv, len); + if (len) { + register U8 *send = s + len; + + if (PL_op->op_private & OPpLOCALE) { + TAINT; + SvTAINTED_on(sv); + for (; s < send; s++) + *s = toUPPER_LC(*s); + } + else { + for (; s < send; s++) + *s = toUPPER(*s); + } } } + if (SvSMAGICAL(sv)) + mg_set(sv); RETURN; } @@ -2204,31 +2443,74 @@ PP(pp_lc) { djSP; SV *sv = TOPs; - register char *s; + register U8 *s; STRLEN len; - if (!SvPADTMP(sv)) { + if (DO_UTF8(sv)) { dTARGET; - sv_setsv(TARG, sv); - sv = TARG; - SETs(sv); + I32 ulen; + register U8 *d; + U8 *send; + + s = (U8*)SvPV(sv,len); + if (!len) { + SvUTF8_off(TARG); /* decontaminate */ + sv_setpvn(TARG, "", 0); + SETs(TARG); + } + else { + (void)SvUPGRADE(TARG, SVt_PV); + SvGROW(TARG, (len * 2) + 1); + (void)SvPOK_only(TARG); + d = (U8*)SvPVX(TARG); + send = s + len; + if (PL_op->op_private & OPpLOCALE) { + TAINT; + SvTAINTED_on(TARG); + while (s < send) { + d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv(s, &ulen))); + s += ulen; + } + } + else { + while (s < send) { + d = uv_to_utf8(d, toLOWER_utf8(s)); + s += UTF8SKIP(s); + } + } + *d = '\0'; + SvUTF8_on(TARG); + SvCUR_set(TARG, d - (U8*)SvPVX(TARG)); + SETs(TARG); + } } + else { + if (!SvPADTMP(sv) || SvREADONLY(sv)) { + dTARGET; + SvUTF8_off(TARG); /* decontaminate */ + sv_setsv(TARG, sv); + sv = TARG; + SETs(sv); + } - s = SvPV_force(sv, len); - if (len) { - register char *send = s + len; + s = (U8*)SvPV_force(sv, len); + if (len) { + register U8 *send = s + len; - if (PL_op->op_private & OPpLOCALE) { - TAINT; - SvTAINTED_on(sv); - for (; s < send; s++) - *s = toLOWER_LC(*s); - } - else { - for (; s < send; s++) - *s = toLOWER(*s); + if (PL_op->op_private & OPpLOCALE) { + TAINT; + SvTAINTED_on(sv); + for (; s < send; s++) + *s = toLOWER_LC(*s); + } + else { + for (; s < send; s++) + *s = toLOWER(*s); + } } } + if (SvSMAGICAL(sv)) + mg_set(sv); RETURN; } @@ -2240,22 +2522,46 @@ PP(pp_quotemeta) register char *s = SvPV(sv,len); register char *d; + SvUTF8_off(TARG); /* decontaminate */ if (len) { (void)SvUPGRADE(TARG, SVt_PV); SvGROW(TARG, (len * 2) + 1); d = SvPVX(TARG); - while (len--) { - if (!isALNUM(*s)) - *d++ = '\\'; - *d++ = *s++; + if (DO_UTF8(sv)) { + while (len) { + if (*s & 0x80) { + STRLEN ulen = UTF8SKIP(s); + if (ulen > len) + ulen = len; + len -= ulen; + while (ulen--) + *d++ = *s++; + } + else { + if (!isALNUM(*s)) + *d++ = '\\'; + *d++ = *s++; + len--; + } + } + SvUTF8_on(TARG); + } + else { + while (len--) { + if (!isALNUM(*s)) + *d++ = '\\'; + *d++ = *s++; + } } *d = '\0'; SvCUR_set(TARG, d - SvPVX(TARG)); - (void)SvPOK_only(TARG); + (void)SvPOK_only_UTF8(TARG); } else sv_setpvn(TARG, s, len); SETs(TARG); + if (SvSMAGICAL(TARG)) + mg_set(TARG); RETURN; } @@ -2289,7 +2595,7 @@ PP(pp_aslice) svp = av_fetch(av, elem, lval); if (lval) { if (!svp || *svp == &PL_sv_undef) - DIE(no_aelem, elem); + DIE(aTHX_ PL_no_aelem, elem); if (PL_op->op_private & OPpLVAL_INTRO) save_aelem(av, elem, svp); } @@ -2308,7 +2614,7 @@ PP(pp_aslice) PP(pp_each) { - djSP; dTARGET; + djSP; HV *hash = (HV*)POPs; HE *entry; I32 gimme = GIMME_V; @@ -2323,12 +2629,13 @@ PP(pp_each) if (entry) { PUSHs(hv_iterkeysv(entry)); /* won't clobber stack_sp */ if (gimme == G_ARRAY) { + SV *val; PUTBACK; /* might clobber stack_sp */ - sv_setsv(TARG, realhv ? - hv_iterval(hash, entry) : avhv_iterval((AV*)hash, entry)); + val = realhv ? + hv_iterval(hash, entry) : avhv_iterval((AV*)hash, entry); SPAGAIN; - PUSHs(TARG); + PUSHs(val); } } else if (gimme == G_SCALAR) @@ -2339,12 +2646,12 @@ PP(pp_each) PP(pp_values) { - return do_kv(ARGS); + return do_kv(); } PP(pp_keys) { - return do_kv(ARGS); + return do_kv(); } PP(pp_delete) @@ -2360,13 +2667,28 @@ PP(pp_delete) U32 hvtype; hv = (HV*)POPs; hvtype = SvTYPE(hv); - while (++MARK <= SP) { - if (hvtype == SVt_PVHV) + if (hvtype == SVt_PVHV) { /* hash element */ + while (++MARK <= SP) { sv = hv_delete_ent(hv, *MARK, discard, 0); - else - DIE("Not a HASH reference"); - *MARK = sv ? sv : &PL_sv_undef; + *MARK = sv ? sv : &PL_sv_undef; + } + } + else if (hvtype == SVt_PVAV) { + if (PL_op->op_flags & OPf_SPECIAL) { /* array element */ + while (++MARK <= SP) { + sv = av_delete((AV*)hv, SvIV(*MARK), discard); + *MARK = sv ? sv : &PL_sv_undef; + } + } + else { /* pseudo-hash element */ + while (++MARK <= SP) { + sv = avhv_delete_ent((AV*)hv, *MARK, discard, 0); + *MARK = sv ? sv : &PL_sv_undef; + } + } } + else + DIE(aTHX_ "Not a HASH reference"); if (discard) SP = ORIGMARK; else if (gimme == G_SCALAR) { @@ -2380,8 +2702,14 @@ PP(pp_delete) hv = (HV*)POPs; if (SvTYPE(hv) == SVt_PVHV) sv = hv_delete_ent(hv, keysv, discard, 0); + else if (SvTYPE(hv) == SVt_PVAV) { + if (PL_op->op_flags & OPf_SPECIAL) + sv = av_delete((AV*)hv, SvIV(keysv), discard); + else + sv = avhv_delete_ent((AV*)hv, keysv, discard, 0); + } else - DIE("Not a HASH reference"); + DIE(aTHX_ "Not a HASH reference"); if (!sv) sv = &PL_sv_undef; if (!discard) @@ -2393,16 +2721,36 @@ PP(pp_delete) PP(pp_exists) { djSP; - SV *tmpsv = POPs; - HV *hv = (HV*)POPs; + SV *tmpsv; + HV *hv; + + if (PL_op->op_private & OPpEXISTS_SUB) { + GV *gv; + CV *cv; + SV *sv = POPs; + cv = sv_2cv(sv, &hv, &gv, FALSE); + if (cv) + RETPUSHYES; + if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv)) + RETPUSHYES; + RETPUSHNO; + } + tmpsv = POPs; + hv = (HV*)POPs; if (SvTYPE(hv) == SVt_PVHV) { if (hv_exists_ent(hv, tmpsv, 0)) RETPUSHYES; - } else if (SvTYPE(hv) == SVt_PVAV) { - if (avhv_exists_ent((AV*)hv, tmpsv, 0)) + } + else if (SvTYPE(hv) == SVt_PVAV) { + if (PL_op->op_flags & OPf_SPECIAL) { /* array element */ + if (av_exists((AV*)hv, SvIV(tmpsv))) + RETPUSHYES; + } + else if (avhv_exists_ent((AV*)hv, tmpsv, 0)) /* pseudo-hash element */ RETPUSHYES; - } else { - DIE("Not a HASH reference"); + } + else { + DIE(aTHX_ "Not a HASH reference"); } RETPUSHNO; } @@ -2414,6 +2762,9 @@ PP(pp_hslice) register I32 lval = PL_op->op_flags & OPf_MOD; I32 realhv = (SvTYPE(hv) == SVt_PVHV); + if (!realhv && PL_op->op_private & OPpLVAL_INTRO) + DIE(aTHX_ "Can't localize pseudo-hash element"); + if (realhv || SvTYPE(hv) == SVt_PVAV) { while (++MARK <= SP) { SV *keysv = *MARK; @@ -2421,12 +2772,15 @@ PP(pp_hslice) if (realhv) { HE *he = hv_fetch_ent(hv, keysv, lval, 0); svp = he ? &HeVAL(he) : 0; - } else { + } + else { svp = avhv_fetch_ent((AV*)hv, keysv, lval, 0); } if (lval) { - if (!svp || *svp == &PL_sv_undef) - DIE(no_helem, SvPV(keysv, PL_na)); + if (!svp || *svp == &PL_sv_undef) { + STRLEN n_a; + DIE(aTHX_ PL_no_helem, SvPV(keysv, n_a)); + } if (PL_op->op_private & OPpLVAL_INTRO) save_helem(hv, keysv, svp); } @@ -2492,20 +2846,17 @@ PP(pp_lslice) for (lelem = firstlelem; lelem <= lastlelem; lelem++) { ix = SvIVx(*lelem); - if (ix < 0) { + if (ix < 0) ix += max; - if (ix < 0) - *lelem = &PL_sv_undef; - else if (!(*lelem = firstrelem[ix])) - *lelem = &PL_sv_undef; - } - else { + else ix -= arybase; - if (ix >= max || !(*lelem = firstrelem[ix])) + if (ix < 0 || ix >= max) + *lelem = &PL_sv_undef; + else { + is_something_there = TRUE; + if (!(*lelem = firstrelem[ix])) *lelem = &PL_sv_undef; } - if (!is_something_there && (SvOK(*lelem) || SvGMAGICAL(*lelem))) - is_something_there = TRUE; } if (is_something_there) SP = lastlelem; @@ -2534,8 +2885,8 @@ PP(pp_anonhash) SV *val = NEWSV(46, 0); if (MARK < SP) sv_setsv(val, *++MARK); - else if (PL_dowarn) - warn("Odd number of elements in hash assignment"); + else if (ckWARN(WARN_MISC)) + Perl_warner(aTHX_ WARN_MISC, "Odd number of elements in hash assignment"); (void)hv_store_ent(hv,key,val,0); } SP = ORIGMARK; @@ -2558,12 +2909,12 @@ PP(pp_splice) SV **tmparyval = 0; MAGIC *mg; - if (SvRMAGICAL(ary) && (mg = mg_find((SV*)ary,'P'))) { - *MARK-- = mg->mg_obj; + if ((mg = SvTIED_mg((SV*)ary, 'P'))) { + *MARK-- = SvTIED_obj((SV*)ary, mg); PUSHMARK(MARK); PUTBACK; ENTER; - perl_call_method("SPLICE",GIMME_V); + call_method("SPLICE",GIMME_V); LEAVE; SPAGAIN; RETURN; @@ -2578,7 +2929,7 @@ PP(pp_splice) else offset -= PL_curcop->cop_arybase; if (offset < 0) - DIE(no_aelem, i); + DIE(aTHX_ PL_no_aelem, i); if (++MARK < SP) { length = SvIVx(*MARK++); if (length < 0) { @@ -2608,12 +2959,8 @@ PP(pp_splice) newlen = SP - MARK; diff = newlen - length; - if (newlen && !AvREAL(ary)) { - if (AvREIFY(ary)) - av_reify(ary); - else - assert(AvREAL(ary)); /* would leak, so croak */ - } + if (newlen && !AvREAL(ary) && AvREIFY(ary)) + av_reify(ary); if (diff < 0) { /* shrinking the area */ if (newlen) { @@ -2756,12 +3103,12 @@ PP(pp_push) register SV *sv = &PL_sv_undef; MAGIC *mg; - if (SvRMAGICAL(ary) && (mg = mg_find((SV*)ary,'P'))) { - *MARK-- = mg->mg_obj; + if ((mg = SvTIED_mg((SV*)ary, 'P'))) { + *MARK-- = SvTIED_obj((SV*)ary, mg); PUSHMARK(MARK); PUTBACK; ENTER; - perl_call_method("PUSH",G_SCALAR|G_DISCARD); + call_method("PUSH",G_SCALAR|G_DISCARD); LEAVE; SPAGAIN; } @@ -2812,12 +3159,12 @@ PP(pp_unshift) register I32 i = 0; MAGIC *mg; - if (SvRMAGICAL(ary) && (mg = mg_find((SV*)ary,'P'))) { - *MARK-- = mg->mg_obj; + if ((mg = SvTIED_mg((SV*)ary, 'P'))) { + *MARK-- = SvTIED_obj((SV*)ary, mg); PUSHMARK(MARK); PUTBACK; ENTER; - perl_call_method("UNSHIFT",G_SCALAR|G_DISCARD); + call_method("UNSHIFT",G_SCALAR|G_DISCARD); LEAVE; SPAGAIN; } @@ -2847,6 +3194,7 @@ PP(pp_reverse) *MARK++ = *SP; *SP-- = tmp; } + /* safe as long as stack cannot get extended in the above */ SP = oldsp; } else { @@ -2856,19 +3204,47 @@ PP(pp_reverse) dTARGET; STRLEN len; + SvUTF8_off(TARG); /* decontaminate */ if (SP - MARK > 1) do_join(TARG, &PL_sv_no, MARK, SP); else sv_setsv(TARG, (SP > MARK) ? *SP : DEFSV); up = SvPV_force(TARG, len); if (len > 1) { + if (DO_UTF8(TARG)) { /* first reverse each character */ + U8* s = (U8*)SvPVX(TARG); + U8* send = (U8*)(s + len); + while (s < send) { + if (*s < 0x80) { + s++; + continue; + } + else { + up = (char*)s; + s += UTF8SKIP(s); + down = (char*)(s - 1); + if (s > send || !((*down & 0xc0) == 0x80)) { + if (ckWARN_d(WARN_UTF8)) + Perl_warner(aTHX_ WARN_UTF8, + "Malformed UTF-8 character"); + break; + } + while (down > up) { + tmp = *up; + *up++ = *down; + *down-- = tmp; + } + } + } + up = SvPVX(TARG); + } down = SvPVX(TARG) + len - 1; while (down > up) { tmp = *up; *up++ = *down; *down-- = tmp; } - (void)SvPOK_only(TARG); + (void)SvPOK_only_UTF8(TARG); } SP = MARK + 1; SETTARG; @@ -2876,8 +3252,8 @@ PP(pp_reverse) RETURN; } -STATIC SV * -mul128(SV *sv, U8 m) +STATIC SV * +S_mul128(pTHX_ SV *sv, U8 m) { STRLEN len; char *s = SvPV(sv, len); @@ -2885,7 +3261,7 @@ mul128(SV *sv, U8 m) U32 i = 0; if (!strnEQ(s, "0000", 4)) { /* need to grow sv */ - SV *tmpNew = newSVpv("0000000000", 10); + SV *tmpNew = newSVpvn("0000000000", 10); sv_catsv(tmpNew, sv); SvREFCNT_dec(sv); /* free old sv */ @@ -2905,11 +3281,22 @@ mul128(SV *sv, U8 m) /* Explosives and implosives. */ +#if 'I' == 73 && 'J' == 74 +/* On an ASCII/ISO kind of system */ +#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a') +#else +/* + Some other sort of character set - use memchr() so we don't match + the null byte. + */ +#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ') +#endif + PP(pp_unpack) { djSP; dPOPPOPssrl; - SV **oldsp = SP; + I32 start_sp_offset = SP - PL_stack_base; I32 gimme = GIMME_V; SV *sv; STRLEN llen; @@ -2922,6 +3309,7 @@ PP(pp_unpack) I32 datumtype; register I32 len; register I32 bits; + register char *str; /* These must not be in registers: */ I16 ashort; @@ -2934,21 +3322,25 @@ PP(pp_unpack) unsigned int auint; U32 aulong; #ifdef HAS_QUAD - unsigned Quad_t auquad; + Uquad_t auquad; #endif char *aptr; float afloat; double adouble; I32 checksum = 0; register U32 culong; - double cdouble; - static char* bitcount = 0; + NV cdouble; int commas = 0; + int star; +#ifdef PERL_NATINT_PACK + int natint; /* native integer */ + int unatint; /* unsigned native integer */ +#endif if (gimme != G_ARRAY) { /* arrange to do first one only */ /*SUPPRESS 530*/ for (patend = pat; !isALPHA(*patend) || *patend == 'x'; patend++) ; - if (strchr("aAbBhHP", *patend) || *pat == '%') { + if (strchr("aAZbBhHP", *patend) || *pat == '%') { patend++; while (isDIGIT(*patend) || *patend == '*') patend++; @@ -2959,27 +3351,54 @@ PP(pp_unpack) while (pat < patend) { reparse: datumtype = *pat++ & 0xFF; +#ifdef PERL_NATINT_PACK + natint = 0; +#endif if (isSPACE(datumtype)) continue; + if (datumtype == '#') { + while (pat < patend && *pat != '\n') + pat++; + continue; + } + if (*pat == '!') { + char *natstr = "sSiIlL"; + + if (strchr(natstr, datumtype)) { +#ifdef PERL_NATINT_PACK + natint = 1; +#endif + pat++; + } + else + DIE(aTHX_ "'!' allowed only after types %s", natstr); + } + star = 0; if (pat >= patend) len = 1; else if (*pat == '*') { len = strend - strbeg; /* long enough */ pat++; + star = 1; } else if (isDIGIT(*pat)) { len = *pat++ - '0'; - while (isDIGIT(*pat)) + while (isDIGIT(*pat)) { len = (len * 10) + (*pat++ - '0'); + if (len < 0) + DIE(aTHX_ "Repeat count in unpack overflows"); + } } else len = (datumtype != '@'); + redo_switch: switch(datumtype) { default: - croak("Invalid type in unpack: '%c'", (int)datumtype); + DIE(aTHX_ "Invalid type in unpack: '%c'", (int)datumtype); case ',': /* grandfather in commas but with a warning */ - if (commas++ == 0 && PL_dowarn) - warn("Invalid type in unpack: '%c'", (int)datumtype); + if (commas++ == 0 && ckWARN(WARN_UNPACK)) + Perl_warner(aTHX_ WARN_UNPACK, + "Invalid type in unpack: '%c'", (int)datumtype); break; case '%': if (len == 1 && pat[-1] != '1') @@ -2992,20 +3411,32 @@ PP(pp_unpack) break; case '@': if (len > strend - strbeg) - DIE("@ outside of string"); + DIE(aTHX_ "@ outside of string"); s = strbeg + len; break; case 'X': if (len > s - strbeg) - DIE("X outside of string"); + DIE(aTHX_ "X outside of string"); s -= len; break; case 'x': if (len > strend - s) - DIE("x outside of string"); + DIE(aTHX_ "x outside of string"); s += len; break; + case '/': + if (start_sp_offset >= SP - PL_stack_base) + DIE(aTHX_ "/ must follow a numeric type"); + datumtype = *pat++; + if (*pat == '*') + pat++; /* ignore '*' for compatibility with pack */ + if (isDIGIT(*pat)) + DIE(aTHX_ "/ cannot take a count" ); + len = POPi; + star = 0; + goto redo_switch; case 'A': + case 'Z': case 'a': if (len > strend - s) len = strend - s; @@ -3014,12 +3445,19 @@ PP(pp_unpack) sv = NEWSV(35, len); sv_setpvn(sv, s, len); s += len; - if (datumtype == 'A') { + if (datumtype == 'A' || datumtype == 'Z') { aptr = s; /* borrow register */ - s = SvPVX(sv) + len - 1; - while (s >= SvPVX(sv) && (!*s || isSPACE(*s))) - s--; - *++s = '\0'; + if (datumtype == 'Z') { /* 'Z' strips stuff after first null */ + s = SvPVX(sv); + while (*s) + s++; + } + else { /* 'A' strips both nulls and spaces */ + s = SvPVX(sv) + len - 1; + while (s >= SvPVX(sv) && (!*s || isSPACE(*s))) + s--; + *++s = '\0'; + } SvCUR_set(sv, s - SvPVX(sv)); s = aptr; /* unborrow register */ } @@ -3027,24 +3465,24 @@ PP(pp_unpack) break; case 'B': case 'b': - if (pat[-1] == '*' || len > (strend - s) * 8) + if (star || len > (strend - s) * 8) len = (strend - s) * 8; if (checksum) { - if (!bitcount) { - Newz(601, bitcount, 256, char); + if (!PL_bitcount) { + Newz(601, PL_bitcount, 256, char); for (bits = 1; bits < 256; bits++) { - if (bits & 1) bitcount[bits]++; - if (bits & 2) bitcount[bits]++; - if (bits & 4) bitcount[bits]++; - if (bits & 8) bitcount[bits]++; - if (bits & 16) bitcount[bits]++; - if (bits & 32) bitcount[bits]++; - if (bits & 64) bitcount[bits]++; - if (bits & 128) bitcount[bits]++; + if (bits & 1) PL_bitcount[bits]++; + if (bits & 2) PL_bitcount[bits]++; + if (bits & 4) PL_bitcount[bits]++; + if (bits & 8) PL_bitcount[bits]++; + if (bits & 16) PL_bitcount[bits]++; + if (bits & 32) PL_bitcount[bits]++; + if (bits & 64) PL_bitcount[bits]++; + if (bits & 128) PL_bitcount[bits]++; } } while (len >= 8) { - culong += bitcount[*(unsigned char*)s++]; + culong += PL_bitcount[*(unsigned char*)s++]; len -= 8; } if (len) { @@ -3067,8 +3505,7 @@ PP(pp_unpack) sv = NEWSV(35, len + 1); SvCUR_set(sv, len); SvPOK_on(sv); - aptr = pat; /* borrow register */ - pat = SvPVX(sv); + str = SvPVX(sv); if (datumtype == 'b') { aint = len; for (len = 0; len < aint; len++) { @@ -3076,7 +3513,7 @@ PP(pp_unpack) bits >>= 1; else bits = *s++; - *pat++ = '0' + (bits & 1); + *str++ = '0' + (bits & 1); } } else { @@ -3086,22 +3523,20 @@ PP(pp_unpack) bits <<= 1; else bits = *s++; - *pat++ = '0' + ((bits & 128) != 0); + *str++ = '0' + ((bits & 128) != 0); } } - *pat = '\0'; - pat = aptr; /* unborrow register */ + *str = '\0'; XPUSHs(sv_2mortal(sv)); break; case 'H': case 'h': - if (pat[-1] == '*' || len > (strend - s) * 2) + if (star || len > (strend - s) * 2) len = (strend - s) * 2; sv = NEWSV(35, len + 1); SvCUR_set(sv, len); SvPOK_on(sv); - aptr = pat; /* borrow register */ - pat = SvPVX(sv); + str = SvPVX(sv); if (datumtype == 'h') { aint = len; for (len = 0; len < aint; len++) { @@ -3109,7 +3544,7 @@ PP(pp_unpack) bits >>= 4; else bits = *s++; - *pat++ = PL_hexdigit[bits & 15]; + *str++ = PL_hexdigit[bits & 15]; } } else { @@ -3119,11 +3554,10 @@ PP(pp_unpack) bits <<= 4; else bits = *s++; - *pat++ = PL_hexdigit[(bits >> 4) & 15]; + *str++ = PL_hexdigit[(bits >> 4) & 15]; } } - *pat = '\0'; - pat = aptr; /* unborrow register */ + *str = '\0'; XPUSHs(sv_2mortal(sv)); break; case 'c': @@ -3171,67 +3605,166 @@ PP(pp_unpack) } } break; + case 'U': + if (len > strend - s) + len = strend - s; + if (checksum) { + while (len-- > 0 && s < strend) { + auint = utf8_to_uv((U8*)s, &along); + s += along; + if (checksum > 32) + cdouble += (NV)auint; + else + culong += auint; + } + } + else { + EXTEND(SP, len); + EXTEND_MORTAL(len); + while (len-- > 0 && s < strend) { + auint = utf8_to_uv((U8*)s, &along); + s += along; + sv = NEWSV(37, 0); + sv_setuv(sv, (UV)auint); + PUSHs(sv_2mortal(sv)); + } + } + break; case 's': +#if SHORTSIZE == SIZE16 along = (strend - s) / SIZE16; +#else + along = (strend - s) / (natint ? sizeof(short) : SIZE16); +#endif if (len > along) len = along; if (checksum) { - while (len-- > 0) { - COPY16(s, &ashort); - s += SIZE16; - culong += ashort; +#if SHORTSIZE != SIZE16 + if (natint) { + short ashort; + while (len-- > 0) { + COPYNN(s, &ashort, sizeof(short)); + s += sizeof(short); + culong += ashort; + + } + } + else +#endif + { + while (len-- > 0) { + COPY16(s, &ashort); +#if SHORTSIZE > SIZE16 + if (ashort > 32767) + ashort -= 65536; +#endif + s += SIZE16; + culong += ashort; + } } } else { EXTEND(SP, len); EXTEND_MORTAL(len); - while (len-- > 0) { - COPY16(s, &ashort); - s += SIZE16; - sv = NEWSV(38, 0); - sv_setiv(sv, (IV)ashort); - PUSHs(sv_2mortal(sv)); +#if SHORTSIZE != SIZE16 + if (natint) { + short ashort; + while (len-- > 0) { + COPYNN(s, &ashort, sizeof(short)); + s += sizeof(short); + sv = NEWSV(38, 0); + sv_setiv(sv, (IV)ashort); + PUSHs(sv_2mortal(sv)); + } + } + else +#endif + { + while (len-- > 0) { + COPY16(s, &ashort); +#if SHORTSIZE > SIZE16 + if (ashort > 32767) + ashort -= 65536; +#endif + s += SIZE16; + sv = NEWSV(38, 0); + sv_setiv(sv, (IV)ashort); + PUSHs(sv_2mortal(sv)); + } } } break; case 'v': case 'n': case 'S': +#if SHORTSIZE == SIZE16 along = (strend - s) / SIZE16; +#else + unatint = natint && datumtype == 'S'; + along = (strend - s) / (unatint ? sizeof(unsigned short) : SIZE16); +#endif if (len > along) len = along; if (checksum) { - while (len-- > 0) { - COPY16(s, &aushort); - s += SIZE16; +#if SHORTSIZE != SIZE16 + if (unatint) { + unsigned short aushort; + while (len-- > 0) { + COPYNN(s, &aushort, sizeof(unsigned short)); + s += sizeof(unsigned short); + culong += aushort; + } + } + else +#endif + { + while (len-- > 0) { + COPY16(s, &aushort); + s += SIZE16; #ifdef HAS_NTOHS - if (datumtype == 'n') - aushort = PerlSock_ntohs(aushort); + if (datumtype == 'n') + aushort = PerlSock_ntohs(aushort); #endif #ifdef HAS_VTOHS - if (datumtype == 'v') - aushort = vtohs(aushort); + if (datumtype == 'v') + aushort = vtohs(aushort); #endif - culong += aushort; + culong += aushort; + } } } else { EXTEND(SP, len); EXTEND_MORTAL(len); - while (len-- > 0) { - COPY16(s, &aushort); - s += SIZE16; - sv = NEWSV(39, 0); +#if SHORTSIZE != SIZE16 + if (unatint) { + unsigned short aushort; + while (len-- > 0) { + COPYNN(s, &aushort, sizeof(unsigned short)); + s += sizeof(unsigned short); + sv = NEWSV(39, 0); + sv_setiv(sv, (UV)aushort); + PUSHs(sv_2mortal(sv)); + } + } + else +#endif + { + while (len-- > 0) { + COPY16(s, &aushort); + s += SIZE16; + sv = NEWSV(39, 0); #ifdef HAS_NTOHS - if (datumtype == 'n') - aushort = PerlSock_ntohs(aushort); + if (datumtype == 'n') + aushort = PerlSock_ntohs(aushort); #endif #ifdef HAS_VTOHS - if (datumtype == 'v') - aushort = vtohs(aushort); + if (datumtype == 'v') + aushort = vtohs(aushort); #endif - sv_setiv(sv, (IV)aushort); - PUSHs(sv_2mortal(sv)); + sv_setiv(sv, (UV)aushort); + PUSHs(sv_2mortal(sv)); + } } } break; @@ -3244,7 +3777,7 @@ PP(pp_unpack) Copy(s, &aint, 1, int); s += sizeof(int); if (checksum > 32) - cdouble += (double)aint; + cdouble += (NV)aint; else culong += aint; } @@ -3259,7 +3792,25 @@ PP(pp_unpack) #ifdef __osf__ /* Without the dummy below unpack("i", pack("i",-1)) * return 0xFFffFFff instead of -1 for Digital Unix V4.0 - * cc with optimization turned on */ + * cc with optimization turned on. + * + * The bug was detected in + * DEC C V5.8-009 on Digital UNIX V4.0 (Rev. 1091) (V4.0E) + * with optimization (-O4) turned on. + * DEC C V5.2-040 on Digital UNIX V4.0 (Rev. 564) (V4.0B) + * does not have this problem even with -O4. + * + * This bug was reported as DECC_BUGS 1431 + * and tracked internally as GEM_BUGS 7775. + * + * The bug is fixed in + * Tru64 UNIX V5.0: Compaq C V6.1-006 or later + * UNIX V4.0F support: DEC C V5.9-006 or later + * UNIX V4.0E support: DEC C V5.8-011 or later + * and also in DTK. + * + * See also few lines later for the same bug. + */ (aint) ? sv_setiv(sv, (IV)aint) : #endif @@ -3277,7 +3828,7 @@ PP(pp_unpack) Copy(s, &auint, 1, unsigned int); s += sizeof(unsigned int); if (checksum > 32) - cdouble += (double)auint; + cdouble += (NV)auint; else culong += auint; } @@ -3289,78 +3840,164 @@ PP(pp_unpack) Copy(s, &auint, 1, unsigned int); s += sizeof(unsigned int); sv = NEWSV(41, 0); +#ifdef __osf__ + /* Without the dummy below unpack("I", pack("I",0xFFFFFFFF)) + * returns 1.84467440737096e+19 instead of 0xFFFFFFFF. + * See details few lines earlier. */ + (auint) ? + sv_setuv(sv, (UV)auint) : +#endif sv_setuv(sv, (UV)auint); PUSHs(sv_2mortal(sv)); } } break; case 'l': +#if LONGSIZE == SIZE32 along = (strend - s) / SIZE32; +#else + along = (strend - s) / (natint ? sizeof(long) : SIZE32); +#endif if (len > along) len = along; if (checksum) { - while (len-- > 0) { - COPY32(s, &along); - s += SIZE32; - if (checksum > 32) - cdouble += (double)along; - else - culong += along; +#if LONGSIZE != SIZE32 + if (natint) { + long along; + while (len-- > 0) { + COPYNN(s, &along, sizeof(long)); + s += sizeof(long); + if (checksum > 32) + cdouble += (NV)along; + else + culong += along; + } + } + else +#endif + { + while (len-- > 0) { + COPY32(s, &along); +#if LONGSIZE > SIZE32 + if (along > 2147483647) + along -= 4294967296; +#endif + s += SIZE32; + if (checksum > 32) + cdouble += (NV)along; + else + culong += along; + } } } else { EXTEND(SP, len); EXTEND_MORTAL(len); - while (len-- > 0) { - COPY32(s, &along); - s += SIZE32; - sv = NEWSV(42, 0); - sv_setiv(sv, (IV)along); - PUSHs(sv_2mortal(sv)); +#if LONGSIZE != SIZE32 + if (natint) { + long along; + while (len-- > 0) { + COPYNN(s, &along, sizeof(long)); + s += sizeof(long); + sv = NEWSV(42, 0); + sv_setiv(sv, (IV)along); + PUSHs(sv_2mortal(sv)); + } + } + else +#endif + { + while (len-- > 0) { + COPY32(s, &along); +#if LONGSIZE > SIZE32 + if (along > 2147483647) + along -= 4294967296; +#endif + s += SIZE32; + sv = NEWSV(42, 0); + sv_setiv(sv, (IV)along); + PUSHs(sv_2mortal(sv)); + } } } break; case 'V': case 'N': case 'L': +#if LONGSIZE == SIZE32 along = (strend - s) / SIZE32; +#else + unatint = natint && datumtype == 'L'; + along = (strend - s) / (unatint ? sizeof(unsigned long) : SIZE32); +#endif if (len > along) len = along; if (checksum) { - while (len-- > 0) { - COPY32(s, &aulong); - s += SIZE32; +#if LONGSIZE != SIZE32 + if (unatint) { + unsigned long aulong; + while (len-- > 0) { + COPYNN(s, &aulong, sizeof(unsigned long)); + s += sizeof(unsigned long); + if (checksum > 32) + cdouble += (NV)aulong; + else + culong += aulong; + } + } + else +#endif + { + while (len-- > 0) { + COPY32(s, &aulong); + s += SIZE32; #ifdef HAS_NTOHL - if (datumtype == 'N') - aulong = PerlSock_ntohl(aulong); + if (datumtype == 'N') + aulong = PerlSock_ntohl(aulong); #endif #ifdef HAS_VTOHL - if (datumtype == 'V') - aulong = vtohl(aulong); + if (datumtype == 'V') + aulong = vtohl(aulong); #endif - if (checksum > 32) - cdouble += (double)aulong; - else - culong += aulong; + if (checksum > 32) + cdouble += (NV)aulong; + else + culong += aulong; + } } } else { EXTEND(SP, len); EXTEND_MORTAL(len); - while (len-- > 0) { - COPY32(s, &aulong); - s += SIZE32; +#if LONGSIZE != SIZE32 + if (unatint) { + unsigned long aulong; + while (len-- > 0) { + COPYNN(s, &aulong, sizeof(unsigned long)); + s += sizeof(unsigned long); + sv = NEWSV(43, 0); + sv_setuv(sv, (UV)aulong); + PUSHs(sv_2mortal(sv)); + } + } + else +#endif + { + while (len-- > 0) { + COPY32(s, &aulong); + s += SIZE32; #ifdef HAS_NTOHL - if (datumtype == 'N') - aulong = PerlSock_ntohl(aulong); + if (datumtype == 'N') + aulong = PerlSock_ntohl(aulong); #endif #ifdef HAS_VTOHL - if (datumtype == 'V') - aulong = vtohl(aulong); + if (datumtype == 'V') + aulong = vtohl(aulong); #endif - sv = NEWSV(43, 0); - sv_setuv(sv, (UV)aulong); - PUSHs(sv_2mortal(sv)); + sv = NEWSV(43, 0); + sv_setuv(sv, (UV)aulong); + PUSHs(sv_2mortal(sv)); + } } } break; @@ -3402,8 +4039,9 @@ PP(pp_unpack) } else if (++bytes >= sizeof(UV)) { /* promote to string */ char *t; + STRLEN n_a; - sv = newSVpvf("%.*Vu", (int)TYPE_DIGITS(UV), auv); + sv = Perl_newSVpvf(aTHX_ "%.*Vu", (int)TYPE_DIGITS(UV), auv); while (s < strend) { sv = mul128(sv, *s & 0x7f); if (!(*s++ & 0x80)) { @@ -3411,7 +4049,7 @@ PP(pp_unpack) break; } } - t = SvPV(sv, PL_na); + t = SvPV(sv, n_a); while (*t == '0') t++; sv_chop(sv, t); @@ -3421,7 +4059,7 @@ PP(pp_unpack) } } if ((s >= strend) && bytes) - croak("Unterminated compressed integer"); + DIE(aTHX_ "Unterminated compressed integer"); } break; case 'P': @@ -3455,7 +4093,7 @@ PP(pp_unpack) if (aquad >= IV_MIN && aquad <= IV_MAX) sv_setiv(sv, (IV)aquad); else - sv_setnv(sv, (double)aquad); + sv_setnv(sv, (NV)aquad); PUSHs(sv_2mortal(sv)); } break; @@ -3466,17 +4104,17 @@ PP(pp_unpack) EXTEND(SP, len); EXTEND_MORTAL(len); while (len-- > 0) { - if (s + sizeof(unsigned Quad_t) > strend) + if (s + sizeof(Uquad_t) > strend) auquad = 0; else { - Copy(s, &auquad, 1, unsigned Quad_t); - s += sizeof(unsigned Quad_t); + Copy(s, &auquad, 1, Uquad_t); + s += sizeof(Uquad_t); } sv = NEWSV(43, 0); if (auquad <= UV_MAX) sv_setuv(sv, (UV)auquad); else - sv_setnv(sv, (double)auquad); + sv_setnv(sv, (NV)auquad); PUSHs(sv_2mortal(sv)); } break; @@ -3501,7 +4139,7 @@ PP(pp_unpack) Copy(s, &afloat, 1, float); s += sizeof(float); sv = NEWSV(47, 0); - sv_setnv(sv, (double)afloat); + sv_setnv(sv, (NV)afloat); PUSHs(sv_2mortal(sv)); } } @@ -3525,37 +4163,54 @@ PP(pp_unpack) Copy(s, &adouble, 1, double); s += sizeof(double); sv = NEWSV(48, 0); - sv_setnv(sv, (double)adouble); + sv_setnv(sv, (NV)adouble); PUSHs(sv_2mortal(sv)); } } break; case 'u': + /* MKS: + * Initialise the decode mapping. By using a table driven + * algorithm, the code will be character-set independent + * (and just as fast as doing character arithmetic) + */ + if (PL_uudmap['M'] == 0) { + int i; + + for (i = 0; i < sizeof(PL_uuemap); i += 1) + PL_uudmap[(U8)PL_uuemap[i]] = i; + /* + * Because ' ' and '`' map to the same value, + * we need to decode them both the same. + */ + PL_uudmap[' '] = 0; + } + along = (strend - s) * 3 / 4; sv = NEWSV(42, along); if (along) SvPOK_on(sv); - while (s < strend && *s > ' ' && *s < 'a') { + while (s < strend && *s > ' ' && ISUUCHAR(*s)) { I32 a, b, c, d; char hunk[4]; hunk[3] = '\0'; - len = (*s++ - ' ') & 077; + len = PL_uudmap[*(U8*)s++] & 077; while (len > 0) { - if (s < strend && *s >= ' ') - a = (*s++ - ' ') & 077; - else - a = 0; - if (s < strend && *s >= ' ') - b = (*s++ - ' ') & 077; - else - b = 0; - if (s < strend && *s >= ' ') - c = (*s++ - ' ') & 077; - else - c = 0; - if (s < strend && *s >= ' ') - d = (*s++ - ' ') & 077; + if (s < strend && ISUUCHAR(*s)) + a = PL_uudmap[*(U8*)s++] & 077; + else + a = 0; + if (s < strend && ISUUCHAR(*s)) + b = PL_uudmap[*(U8*)s++] & 077; + else + b = 0; + if (s < strend && ISUUCHAR(*s)) + c = PL_uudmap[*(U8*)s++] & 077; + else + c = 0; + if (s < strend && ISUUCHAR(*s)) + d = PL_uudmap[*(U8*)s++] & 077; else d = 0; hunk[0] = (a << 2) | (b >> 4); @@ -3575,8 +4230,8 @@ PP(pp_unpack) if (checksum) { sv = NEWSV(42, 0); if (strchr("fFdD", datumtype) || - (checksum > 32 && strchr("iIlLN", datumtype)) ) { - double trouble; + (checksum > 32 && strchr("iIlLNU", datumtype)) ) { + NV trouble; adouble = 1.0; while (checksum >= 16) { @@ -3592,7 +4247,7 @@ PP(pp_unpack) along = (1 << checksum) - 1; while (cdouble < 0.0) cdouble += adouble; - cdouble = modf(cdouble / adouble, &trouble) * adouble; + cdouble = Perl_modf(cdouble / adouble, &trouble) * adouble; sv_setnv(sv, cdouble); } else { @@ -3606,40 +4261,45 @@ PP(pp_unpack) checksum = 0; } } - if (SP == oldsp && gimme == G_SCALAR) + if (SP - PL_stack_base == start_sp_offset && gimme == G_SCALAR) PUSHs(&PL_sv_undef); RETURN; } STATIC void -doencodes(register SV *sv, register char *s, register I32 len) +S_doencodes(pTHX_ register SV *sv, register char *s, register I32 len) { char hunk[5]; - *hunk = len + ' '; + *hunk = PL_uuemap[len]; sv_catpvn(sv, hunk, 1); hunk[4] = '\0'; - while (len > 0) { - hunk[0] = ' ' + (077 & (*s >> 2)); - hunk[1] = ' ' + (077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))); - hunk[2] = ' ' + (077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))); - hunk[3] = ' ' + (077 & (s[2] & 077)); + while (len > 2) { + hunk[0] = PL_uuemap[(077 & (*s >> 2))]; + hunk[1] = PL_uuemap[(077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017)))]; + hunk[2] = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))]; + hunk[3] = PL_uuemap[(077 & (s[2] & 077))]; sv_catpvn(sv, hunk, 4); s += 3; len -= 3; } - for (s = SvPVX(sv); *s; s++) { - if (*s == ' ') - *s = '`'; + if (len > 0) { + char r = (len > 1 ? s[1] : '\0'); + hunk[0] = PL_uuemap[(077 & (*s >> 2))]; + hunk[1] = PL_uuemap[(077 & (((*s << 4) & 060) | ((r >> 4) & 017)))]; + hunk[2] = PL_uuemap[(077 & ((r << 2) & 074))]; + hunk[3] = PL_uuemap[0]; + sv_catpvn(sv, hunk, 4); } sv_catpvn(sv, "\n", 1); } -STATIC SV * -is_an_int(char *s, STRLEN l) +STATIC SV * +S_is_an_int(pTHX_ char *s, STRLEN l) { - SV *result = newSVpv("", l); - char *result_c = SvPV(result, PL_na); /* convenience */ + STRLEN n_a; + SV *result = newSVpvn(s, l); + char *result_c = SvPV(result, n_a); /* convenience */ char *out = result_c; bool skip = 1; bool ignore = 0; @@ -3683,10 +4343,9 @@ is_an_int(char *s, STRLEN l) return (result); } +/* pnum must be '\0' terminated */ STATIC int -div128(SV *pnum, bool *done) - /* must be '\0' terminated */ - +S_div128(pTHX_ SV *pnum, bool *done) { STRLEN len; char *s = SvPV(pnum, len); @@ -3719,6 +4378,7 @@ PP(pp_pack) register I32 items; STRLEN fromlen; register char *pat = SvPVx(*++MARK, fromlen); + char *patcopy; register char *patend = pat + fromlen; register I32 len; I32 datumtype; @@ -3736,41 +4396,82 @@ PP(pp_pack) U32 aulong; #ifdef HAS_QUAD Quad_t aquad; - unsigned Quad_t auquad; + Uquad_t auquad; #endif char *aptr; float afloat; double adouble; int commas = 0; +#ifdef PERL_NATINT_PACK + int natint; /* native integer */ +#endif items = SP - MARK; MARK++; sv_setpvn(cat, "", 0); + patcopy = pat; while (pat < patend) { -#define NEXTFROM (items-- > 0 ? *MARK++ : &PL_sv_no) + SV *lengthcode = Nullsv; +#define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *MARK++ : &PL_sv_no) datumtype = *pat++ & 0xFF; - if (isSPACE(datumtype)) +#ifdef PERL_NATINT_PACK + natint = 0; +#endif + if (isSPACE(datumtype)) { + patcopy++; + continue; + } + if (datumtype == 'U' && pat == patcopy+1) + SvUTF8_on(cat); + if (datumtype == '#') { + while (pat < patend && *pat != '\n') + pat++; continue; + } + if (*pat == '!') { + char *natstr = "sSiIlL"; + + if (strchr(natstr, datumtype)) { +#ifdef PERL_NATINT_PACK + natint = 1; +#endif + pat++; + } + else + DIE(aTHX_ "'!' allowed only after types %s", natstr); + } if (*pat == '*') { len = strchr("@Xxu", datumtype) ? 0 : items; pat++; } else if (isDIGIT(*pat)) { len = *pat++ - '0'; - while (isDIGIT(*pat)) + while (isDIGIT(*pat)) { len = (len * 10) + (*pat++ - '0'); + if (len < 0) + DIE(aTHX_ "Repeat count in pack overflows"); + } } else len = 1; + if (*pat == '/') { + ++pat; + if ((*pat != 'a' && *pat != 'A' && *pat != 'Z') || pat[1] != '*') + DIE(aTHX_ "/ must be followed by a*, A* or Z*"); + lengthcode = sv_2mortal(newSViv(sv_len(items > 0 + ? *MARK : &PL_sv_no) + + (*pat == 'Z' ? 1 : 0))); + } switch(datumtype) { default: - croak("Invalid type in pack: '%c'", (int)datumtype); + DIE(aTHX_ "Invalid type in pack: '%c'", (int)datumtype); case ',': /* grandfather in commas but with a warning */ - if (commas++ == 0 && PL_dowarn) - warn("Invalid type in pack: '%c'", (int)datumtype); + if (commas++ == 0 && ckWARN(WARN_PACK)) + Perl_warner(aTHX_ WARN_PACK, + "Invalid type in pack: '%c'", (int)datumtype); break; case '%': - DIE("%% may only be used in unpack"); + DIE(aTHX_ "%% may only be used in unpack"); case '@': len -= SvCUR(cat); if (len > 0) @@ -3782,7 +4483,7 @@ PP(pp_pack) case 'X': shrink: if (SvCUR(cat) < len) - DIE("X outside of string"); + DIE(aTHX_ "X outside of string"); SvCUR(cat) -= len; *SvEND(cat) = '\0'; break; @@ -3795,13 +4496,20 @@ PP(pp_pack) sv_catpvn(cat, null10, len); break; case 'A': + case 'Z': case 'a': fromstr = NEXTFROM; aptr = SvPV(fromstr, fromlen); - if (pat[-1] == '*') + if (pat[-1] == '*') { len = fromlen; - if (fromlen > len) + if (datumtype == 'Z') + ++len; + } + if (fromlen >= len) { sv_catpvn(cat, aptr, len); + if (datumtype == 'Z') + *(SvEND(cat)-1) = '\0'; + } else { sv_catpvn(cat, aptr, fromlen); len -= fromlen; @@ -3824,15 +4532,14 @@ PP(pp_pack) case 'B': case 'b': { - char *savepat = pat; + register char *str; I32 saveitems; fromstr = NEXTFROM; saveitems = items; - aptr = SvPV(fromstr, fromlen); + str = SvPV(fromstr, fromlen); if (pat[-1] == '*') len = fromlen; - pat = aptr; aint = SvCUR(cat); SvCUR(cat) += (len+7)/8; SvGROW(cat, SvCUR(cat) + 1); @@ -3843,7 +4550,7 @@ PP(pp_pack) items = 0; if (datumtype == 'B') { for (len = 0; len++ < aint;) { - items |= *pat++ & 1; + items |= *str++ & 1; if (len & 7) items <<= 1; else { @@ -3854,7 +4561,7 @@ PP(pp_pack) } else { for (len = 0; len++ < aint;) { - if (*pat++ & 1) + if (*str++ & 1) items |= 128; if (len & 7) items >>= 1; @@ -3871,26 +4578,24 @@ PP(pp_pack) items >>= 7 - (aint & 7); *aptr++ = items & 0xff; } - pat = SvPVX(cat) + SvCUR(cat); - while (aptr <= pat) + str = SvPVX(cat) + SvCUR(cat); + while (aptr <= str) *aptr++ = '\0'; - pat = savepat; items = saveitems; } break; case 'H': case 'h': { - char *savepat = pat; + register char *str; I32 saveitems; fromstr = NEXTFROM; saveitems = items; - aptr = SvPV(fromstr, fromlen); + str = SvPV(fromstr, fromlen); if (pat[-1] == '*') len = fromlen; - pat = aptr; aint = SvCUR(cat); SvCUR(cat) += (len+1)/2; SvGROW(cat, SvCUR(cat) + 1); @@ -3901,10 +4606,10 @@ PP(pp_pack) items = 0; if (datumtype == 'H') { for (len = 0; len++ < aint;) { - if (isALPHA(*pat)) - items |= ((*pat++ & 15) + 9) & 15; + if (isALPHA(*str)) + items |= ((*str++ & 15) + 9) & 15; else - items |= *pat++ & 15; + items |= *str++ & 15; if (len & 1) items <<= 4; else { @@ -3915,10 +4620,10 @@ PP(pp_pack) } else { for (len = 0; len++ < aint;) { - if (isALPHA(*pat)) - items |= (((*pat++ & 15) + 9) & 15) << 4; + if (isALPHA(*str)) + items |= (((*str++ & 15) + 9) & 15) << 4; else - items |= (*pat++ & 15) << 4; + items |= (*str++ & 15) << 4; if (len & 1) items >>= 4; else { @@ -3929,11 +4634,10 @@ PP(pp_pack) } if (aint & 1) *aptr++ = items & 0xff; - pat = SvPVX(cat) + SvCUR(cat); - while (aptr <= pat) + str = SvPVX(cat) + SvCUR(cat); + while (aptr <= str) *aptr++ = '\0'; - pat = savepat; items = saveitems; } break; @@ -3946,6 +4650,16 @@ PP(pp_pack) sv_catpvn(cat, &achar, sizeof(char)); } break; + case 'U': + while (len-- > 0) { + fromstr = NEXTFROM; + auint = SvUV(fromstr); + SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN); + SvCUR_set(cat, (char*)uv_to_utf8((U8*)SvEND(cat),auint) + - SvPVX(cat)); + } + *SvEND(cat) = '\0'; + break; /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */ case 'f': case 'F': @@ -3984,11 +4698,48 @@ PP(pp_pack) } break; case 'S': +#if SHORTSIZE != SIZE16 + if (natint) { + unsigned short aushort; + + while (len-- > 0) { + fromstr = NEXTFROM; + aushort = SvUV(fromstr); + sv_catpvn(cat, (char *)&aushort, sizeof(unsigned short)); + } + } + else +#endif + { + U16 aushort; + + while (len-- > 0) { + fromstr = NEXTFROM; + aushort = (U16)SvUV(fromstr); + CAT16(cat, &aushort); + } + + } + break; case 's': - while (len-- > 0) { - fromstr = NEXTFROM; - ashort = (I16)SvIV(fromstr); - CAT16(cat, &ashort); +#if SHORTSIZE != SIZE16 + if (natint) { + short ashort; + + while (len-- > 0) { + fromstr = NEXTFROM; + ashort = SvIV(fromstr); + sv_catpvn(cat, (char *)&ashort, sizeof(short)); + } + } + else +#endif + { + while (len-- > 0) { + fromstr = NEXTFROM; + ashort = (I16)SvIV(fromstr); + CAT16(cat, &ashort); + } } break; case 'I': @@ -4001,26 +4752,26 @@ PP(pp_pack) case 'w': while (len-- > 0) { fromstr = NEXTFROM; - adouble = floor(SvNV(fromstr)); + adouble = Perl_floor(SvNV(fromstr)); if (adouble < 0) - croak("Cannot compress negative numbers"); + DIE(aTHX_ "Cannot compress negative numbers"); if ( -#ifdef BW_BITS - adouble <= BW_MASK +#if UVSIZE > 4 && UVSIZE >= NVSIZE + adouble <= 0xffffffff #else -#ifdef CXUX_BROKEN_CONSTANT_CONVERT +# ifdef CXUX_BROKEN_CONSTANT_CONVERT adouble <= UV_MAX_cxux -#else +# else adouble <= UV_MAX -#endif +# endif #endif ) { char buf[1 + sizeof(UV)]; char *in = buf + sizeof(buf); - UV auv = U_V(adouble);; + UV auv = U_V(adouble); do { *--in = (auv & 0x7f) | 0x80; @@ -4038,7 +4789,7 @@ PP(pp_pack) /* Copy string and check for compliance */ from = SvPV(fromstr, len); if ((norm = is_an_int(from, len)) == NULL) - croak("can compress only unsigned integer"); + DIE(aTHX_ "can compress only unsigned integer"); New('w', result, len, char); in = result + len; @@ -4058,14 +4809,14 @@ PP(pp_pack) double next = floor(adouble / 128); *--in = (unsigned char)(adouble - (next * 128)) | 0x80; if (--in < buf) /* this cannot happen ;-) */ - croak ("Cannot compress integer"); + DIE(aTHX_ "Cannot compress integer"); adouble = next; } while (adouble > 0); buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */ sv_catpvn(cat, in, (buf + sizeof(buf)) - in); } else - croak("Cannot compress non integer"); + DIE(aTHX_ "Cannot compress non integer"); } break; case 'i': @@ -4096,25 +4847,53 @@ PP(pp_pack) } break; case 'L': - while (len-- > 0) { - fromstr = NEXTFROM; - aulong = SvUV(fromstr); - CAT32(cat, &aulong); +#if LONGSIZE != SIZE32 + if (natint) { + unsigned long aulong; + + while (len-- > 0) { + fromstr = NEXTFROM; + aulong = SvUV(fromstr); + sv_catpvn(cat, (char *)&aulong, sizeof(unsigned long)); + } + } + else +#endif + { + while (len-- > 0) { + fromstr = NEXTFROM; + aulong = SvUV(fromstr); + CAT32(cat, &aulong); + } } break; case 'l': - while (len-- > 0) { - fromstr = NEXTFROM; - along = SvIV(fromstr); - CAT32(cat, &along); +#if LONGSIZE != SIZE32 + if (natint) { + long along; + + while (len-- > 0) { + fromstr = NEXTFROM; + along = SvIV(fromstr); + sv_catpvn(cat, (char *)&along, sizeof(long)); + } + } + else +#endif + { + while (len-- > 0) { + fromstr = NEXTFROM; + along = SvIV(fromstr); + CAT32(cat, &along); + } } break; #ifdef HAS_QUAD case 'Q': while (len-- > 0) { fromstr = NEXTFROM; - auquad = (unsigned Quad_t)SvIV(fromstr); - sv_catpvn(cat, (char*)&auquad, sizeof(unsigned Quad_t)); + auquad = (Uquad_t)SvUV(fromstr); + sv_catpvn(cat, (char*)&auquad, sizeof(Uquad_t)); } break; case 'q': @@ -4124,7 +4903,7 @@ PP(pp_pack) sv_catpvn(cat, (char*)&aquad, sizeof(Quad_t)); } break; -#endif /* HAS_QUAD */ +#endif case 'P': len = 1; /* assume SV is correct length */ /* FALL THROUGH */ @@ -4134,17 +4913,23 @@ PP(pp_pack) if (fromstr == &PL_sv_undef) aptr = NULL; else { + STRLEN n_a; /* XXX better yet, could spirit away the string to * a safe spot and hang on to it until the result * of pack() (and all copies of the result) are * gone. */ - if (PL_dowarn && (SvTEMP(fromstr) || SvPADTMP(fromstr))) - warn("Attempt to pack pointer to temporary value"); + if (ckWARN(WARN_PACK) && (SvTEMP(fromstr) + || (SvPADTMP(fromstr) + && !SvREADONLY(fromstr)))) + { + Perl_warner(aTHX_ WARN_PACK, + "Attempt to pack pointer to temporary value"); + } if (SvPOK(fromstr) || SvNIOK(fromstr)) - aptr = SvPV(fromstr,PL_na); + aptr = SvPV(fromstr,n_a); else - aptr = SvPV_force(fromstr,PL_na); + aptr = SvPV_force(fromstr,n_a); } sv_catpvn(cat, (char*)&aptr, sizeof(char*)); } @@ -4211,14 +4996,19 @@ PP(pp_split) pm = (PMOP*)POPs; #endif if (!pm || !s) - DIE("panic: do_split"); + DIE(aTHX_ "panic: do_split"); rx = pm->op_pmregexp; TAINT_IF((pm->op_pmflags & PMf_LOCALE) && (pm->op_pmflags & (PMf_WHITE | PMf_SKIPWHITE))); - if (pm->op_pmreplroot) + if (pm->op_pmreplroot) { +#ifdef USE_ITHREADS + ary = GvAVn((GV*)PL_curpad[(PADOFFSET)pm->op_pmreplroot]); +#else ary = GvAVn((GV*)pm->op_pmreplroot); +#endif + } else if (gimme != G_ARRAY) #ifdef USE_THREADS ary = (AV*)PL_curpad[0]; @@ -4233,13 +5023,14 @@ PP(pp_split) av_extend(ary,0); av_clear(ary); SPAGAIN; - if (SvRMAGICAL(ary) && (mg = mg_find((SV *) ary, 'P'))) { + if ((mg = SvTIED_mg((SV*)ary, 'P'))) { PUSHMARK(SP); - XPUSHs(mg->mg_obj); + XPUSHs(SvTIED_obj((SV*)ary, mg)); } else { if (!AvREAL(ary)) { AvREAL_on(ary); + AvREIFY_off(ary); for (i = AvFILLp(ary); i >= 0; i--) AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */ } @@ -4305,15 +5096,19 @@ PP(pp_split) s = m; } } - else if (rx->check_substr && !rx->nparens + else if ((rx->reganch & RE_USE_INTUIT) && !rx->nparens && (rx->reganch & ROPT_CHECK_ALL) && !(rx->reganch & ROPT_ANCH)) { - i = SvCUR(rx->check_substr); - if (i == 1 && !SvTAIL(rx->check_substr)) { - i = *SvPVX(rx->check_substr); + int tail = (rx->reganch & RE_INTUIT_TAIL); + SV *csv = CALLREG_INTUIT_STRING(aTHX_ rx); + char c; + + len = rx->minlen; + if (len == 1 && !tail) { + c = *SvPV(csv,len); while (--limit) { /*SUPPRESS 530*/ - for (m = s; m < strend && *m != i; m++) ; + for (m = s; m < strend && *m != c; m++) ; if (m >= strend) break; dstr = NEWSV(30, m-s); @@ -4327,8 +5122,8 @@ PP(pp_split) else { #ifndef lint while (s < strend && --limit && - (m=fbm_instr((unsigned char*)s, (unsigned char*)strend, - rx->check_substr, 0)) ) + (m = fbm_instr((unsigned char*)s, (unsigned char*)strend, + csv, PL_multiline ? FBMrf_MULTILINE : 0)) ) #endif { dstr = NEWSV(31, m-s); @@ -4336,25 +5131,28 @@ PP(pp_split) if (make_mortal) sv_2mortal(dstr); XPUSHs(dstr); - s = m + i; + s = m + len; /* Fake \n at the end */ } } } else { maxiters += (strend - s) * rx->nparens; - while (s < strend && --limit && - CALLREGEXEC(rx, s, strend, orig, 1, Nullsv, NULL, 0)) + while (s < strend && --limit +/* && (!rx->check_substr + || ((s = CALLREG_INTUIT_START(aTHX_ rx, sv, s, strend, + 0, NULL)))) +*/ && CALLREGEXEC(aTHX_ rx, s, strend, orig, + 1 /* minend */, sv, NULL, 0)) { TAINT_IF(RX_MATCH_TAINTED(rx)); - if (rx->subbase - && rx->subbase != orig) { + if (RX_MATCH_COPIED(rx) && rx->subbeg != orig) { m = s; s = orig; - orig = rx->subbase; + orig = rx->subbeg; s = orig + (m - s); strend = s + (strend - m); } - m = rx->startp[0]; + m = rx->startp[0] + orig; dstr = NEWSV(32, m-s); sv_setpvn(dstr, s, m-s); if (make_mortal) @@ -4362,8 +5160,8 @@ PP(pp_split) XPUSHs(dstr); if (rx->nparens) { for (i = 1; i <= rx->nparens; i++) { - s = rx->startp[i]; - m = rx->endp[i]; + s = rx->startp[i] + orig; + m = rx->endp[i] + orig; if (m && s) { dstr = NEWSV(33, m-s); sv_setpvn(dstr, s, m-s); @@ -4375,14 +5173,14 @@ PP(pp_split) XPUSHs(dstr); } } - s = rx->endp[0]; + s = rx->endp[0] + orig; } } LEAVE_SCOPE(oldsave); iters = (SP - PL_stack_base) - base; if (iters > maxiters) - DIE("Split loop"); + DIE(aTHX_ "Split loop"); /* keep field after final delim? */ if (s < strend || (iters && origlimit)) { @@ -4416,7 +5214,7 @@ PP(pp_split) else { PUTBACK; ENTER; - perl_call_method("PUSH",G_SCALAR|G_DISCARD); + call_method("PUSH",G_SCALAR|G_DISCARD); LEAVE; SPAGAIN; if (gimme == G_ARRAY) { @@ -4444,20 +5242,20 @@ PP(pp_split) #ifdef USE_THREADS void -unlock_condpair(void *svv) +Perl_unlock_condpair(pTHX_ void *svv) { dTHR; MAGIC *mg = mg_find((SV*)svv, 'm'); if (!mg) - croak("panic: unlock_condpair unlocking non-mutex"); + Perl_croak(aTHX_ "panic: unlock_condpair unlocking non-mutex"); MUTEX_LOCK(MgMUTEXP(mg)); if (MgOWNER(mg) != thr) - croak("panic: unlock_condpair unlocking mutex that we don't own"); + Perl_croak(aTHX_ "panic: unlock_condpair unlocking mutex that we don't own"); MgOWNER(mg) = 0; COND_SIGNAL(MgOWNERCONDP(mg)); - DEBUG_L(PerlIO_printf(PerlIO_stderr(), "0x%lx: unlock 0x%lx\n", - (unsigned long)thr, (unsigned long)svv);) + DEBUG_S(PerlIO_printf(Perl_debug_log, "0x%"UVxf": unlock 0x%"UVxf"\n", + PTR2UV(thr), PTR2UV(svv));) MUTEX_UNLOCK(MgMUTEXP(mg)); } #endif /* USE_THREADS */ @@ -4468,25 +5266,7 @@ PP(pp_lock) dTOPss; SV *retsv = sv; #ifdef USE_THREADS - MAGIC *mg; - - if (SvROK(sv)) - sv = SvRV(sv); - - mg = condpair_magic(sv); - MUTEX_LOCK(MgMUTEXP(mg)); - if (MgOWNER(mg) == thr) - MUTEX_UNLOCK(MgMUTEXP(mg)); - else { - while (MgOWNER(mg)) - COND_WAIT(MgOWNERCONDP(mg), MgMUTEXP(mg)); - MgOWNER(mg) = thr; - DEBUG_L(PerlIO_printf(PerlIO_stderr(), "0x%lx: pp_lock lock 0x%lx\n", - (unsigned long)thr, (unsigned long)sv);) - MUTEX_UNLOCK(MgMUTEXP(mg)); - SvREFCNT_inc(sv); /* keep alive until magic_mutexfree */ - save_destructor(unlock_condpair, sv); - } + sv_lock(sv); #endif /* USE_THREADS */ if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV || SvTYPE(retsv) == SVt_PVCV) { @@ -4498,8 +5278,8 @@ PP(pp_lock) PP(pp_threadsv) { - djSP; #ifdef USE_THREADS + djSP; EXTEND(SP, 1); if (PL_op->op_private & OPpLVAL_INTRO) PUSHs(*save_threadsv(PL_op->op_targ)); @@ -4507,6 +5287,6 @@ PP(pp_threadsv) PUSHs(THREADSV(PL_op->op_targ)); RETURN; #else - DIE("tried to access per-thread data in non-threaded perl"); + DIE(aTHX_ "tried to access per-thread data in non-threaded perl"); #endif /* USE_THREADS */ }