/* pp.c
*
- * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
+ * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+ * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
*/
/*
- * "It's a big house this, and very peculiar. Always a bit more to discover,
- * and no knowing what you'll find around a corner. And Elves, sir!" --Samwise
+ * 'It's a big house this, and very peculiar. Always a bit more
+ * to discover, and no knowing what you'll find round a corner.
+ * And Elves, sir!' --Samwise Gamgee
+ *
+ * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
*/
/* This file contains general pp ("push/pop") functions that execute the
{
dVAR; dSP; dTARGET;
I32 gimme;
+ assert(SvTYPE(TARG) == SVt_PVAV);
if (PL_op->op_private & OPpLVAL_INTRO)
if (!(PL_op->op_private & OPpPAD_STATE))
SAVECLEARSV(PAD_SVl(PL_op->op_targ));
}
gimme = GIMME_V;
if (gimme == G_ARRAY) {
- const I32 maxarg = AvFILL((AV*)TARG) + 1;
+ const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
EXTEND(SP, maxarg);
if (SvMAGICAL(TARG)) {
U32 i;
for (i=0; i < (U32)maxarg; i++) {
- SV * const * const svp = av_fetch((AV*)TARG, i, FALSE);
+ SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
SP[i+1] = (svp) ? *svp : &PL_sv_undef;
}
}
else {
- Copy(AvARRAY((AV*)TARG), SP+1, maxarg, SV*);
+ Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
}
SP += maxarg;
}
else if (gimme == G_SCALAR) {
SV* const sv = sv_newmortal();
- const I32 maxarg = AvFILL((AV*)TARG) + 1;
+ const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
sv_setiv(sv, maxarg);
PUSHs(sv);
}
dVAR; dSP; dTARGET;
I32 gimme;
+ assert(SvTYPE(TARG) == SVt_PVHV);
XPUSHs(TARG);
if (PL_op->op_private & OPpLVAL_INTRO)
if (!(PL_op->op_private & OPpPAD_STATE))
RETURNOP(do_kv());
}
else if (gimme == G_SCALAR) {
- SV* const sv = Perl_hv_scalar(aTHX_ (HV*)TARG);
+ SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
SETs(sv);
}
RETURN;
/* Translations. */
+const char S_no_symref_sv[] =
+ "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
+
PP(pp_rv2gv)
{
dVAR; dSP; dTOPss;
sv = SvRV(sv);
if (SvTYPE(sv) == SVt_PVIO) {
- GV * const gv = (GV*) sv_newmortal();
+ GV * const gv = MUTABLE_GV(sv_newmortal());
gv_init(gv, 0, "", 0, 0);
- GvIOp(gv) = (IO *)sv;
+ GvIOp(gv) = MUTABLE_IO(sv);
SvREFCNT_inc_void_NN(sv);
- sv = (SV*) gv;
+ sv = MUTABLE_SV(gv);
}
- else if (SvTYPE(sv) != SVt_PVGV)
+ else if (!isGV_with_GP(sv))
DIE(aTHX_ "Not a GLOB reference");
}
else {
- if (SvTYPE(sv) != SVt_PVGV) {
+ if (!isGV_with_GP(sv)) {
if (SvGMAGICAL(sv)) {
mg_get(sv);
if (SvROK(sv))
* NI-S 1999/05/07
*/
if (SvREADONLY(sv))
- Perl_croak(aTHX_ PL_no_modify);
+ Perl_croak(aTHX_ "%s", PL_no_modify);
if (PL_op->op_private & OPpDEREF) {
GV *gv;
if (cUNOP->op_targ) {
STRLEN len;
SV * const namesv = PAD_SV(cUNOP->op_targ);
const char * const name = SvPV(namesv, len);
- gv = (GV*)newSV(0);
+ gv = MUTABLE_GV(newSV(0));
gv_init(gv, CopSTASH(PL_curcop), name, len, 0);
}
else {
gv = newGVgen(name);
}
prepare_SV_for_RV(sv);
- SvRV_set(sv, (SV*)gv);
+ SvRV_set(sv, MUTABLE_SV(gv));
SvROK_on(sv);
SvSETMAGIC(sv);
goto wasref;
if ((PL_op->op_flags & OPf_SPECIAL) &&
!(PL_op->op_flags & OPf_MOD))
{
- SV * const temp = (SV*)gv_fetchsv(sv, 0, SVt_PVGV);
+ SV * const temp = MUTABLE_SV(gv_fetchsv(sv, 0, SVt_PVGV));
if (!temp
&& (!is_gv_magical_sv(sv,0)
- || !(sv = (SV*)gv_fetchsv(sv, GV_ADD, SVt_PVGV)))) {
+ || !(sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD,
+ SVt_PVGV))))) {
RETSETUNDEF;
}
sv = temp;
}
else {
if (PL_op->op_private & HINT_STRICT_REFS)
- DIE(aTHX_ PL_no_symref_sv, sv, "a symbol");
+ DIE(aTHX_ S_no_symref_sv, sv, (SvCUR(sv)>32 ? "..." : ""), "a symbol");
if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
== OPpDONT_INIT_GV) {
/* We are the target of a coderef assignment. Return
things. */
RETURN;
}
- sv = (SV*)gv_fetchsv(sv, GV_ADD, SVt_PVGV);
+ sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD, SVt_PVGV));
}
}
}
if (PL_op->op_private & OPpLVAL_INTRO)
- save_gp((GV*)sv, !(PL_op->op_flags & OPf_SPECIAL));
+ save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
SETs(sv);
RETURN;
}
/* Helper function for pp_rv2sv and pp_rv2av */
GV *
-Perl_softref2xv(pTHX_ SV *const sv, const char *const what, const U32 type,
- SV ***spp)
+Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
+ const svtype type, SV ***spp)
{
dVAR;
GV *gv;
+ PERL_ARGS_ASSERT_SOFTREF2XV;
+
if (PL_op->op_private & HINT_STRICT_REFS) {
if (SvOK(sv))
- Perl_die(aTHX_ PL_no_symref_sv, sv, what);
+ Perl_die(aTHX_ S_no_symref_sv, sv, (SvCUR(sv)>32 ? "..." : ""), what);
else
Perl_die(aTHX_ PL_no_usym, what);
}
}
}
else {
- gv = (GV*)sv;
+ gv = MUTABLE_GV(sv);
- if (SvTYPE(gv) != SVt_PVGV) {
+ if (!isGV_with_GP(gv)) {
if (SvGMAGICAL(sv)) {
mg_get(sv);
if (SvROK(sv))
if (PL_op->op_flags & OPf_MOD) {
if (PL_op->op_private & OPpLVAL_INTRO) {
if (cUNOP->op_first->op_type == OP_NULL)
- sv = save_scalar((GV*)TOPs);
+ sv = save_scalar(MUTABLE_GV(TOPs));
else if (gv)
sv = save_scalar(gv);
else
- Perl_croak(aTHX_ PL_no_localize_ref);
+ Perl_croak(aTHX_ "%s", PL_no_localize_ref);
}
else if (PL_op->op_private & OPpDEREF)
vivify_ref(sv, PL_op->op_private & OPpDEREF);
PP(pp_av2arylen)
{
dVAR; dSP;
- AV * const av = (AV*)TOPs;
- SV ** const sv = Perl_av_arylen_p(aTHX_ (AV*)av);
- if (!*sv) {
- *sv = newSV_type(SVt_PVMG);
- sv_magic(*sv, (SV*)av, PERL_MAGIC_arylen, NULL, 0);
+ AV * const av = MUTABLE_AV(TOPs);
+ const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
+ if (lvalue) {
+ SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
+ if (!*sv) {
+ *sv = newSV_type(SVt_PVMG);
+ sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
+ }
+ SETs(*sv);
+ } else {
+ SETs(sv_2mortal(newSViv(
+ AvFILL(MUTABLE_AV(av)) + CopARYBASE_get(PL_curcop)
+ )));
}
- SETs(*sv);
RETURN;
}
LvTYPE(TARG) = '.';
if (LvTARG(TARG) != sv) {
- if (LvTARG(TARG))
- SvREFCNT_dec(LvTARG(TARG));
+ SvREFCNT_dec(LvTARG(TARG));
LvTARG(TARG) = SvREFCNT_inc_simple(sv);
}
PUSHs(TARG); /* no SvSETMAGIC */
CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
if (cv) {
if (CvCLONE(cv))
- cv = (CV*)sv_2mortal((SV*)cv_clone(cv));
+ cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
if ((PL_op->op_private & OPpLVAL_INTRO)) {
if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
cv = GvCV(gv);
}
}
else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
- cv = (CV*)gv;
+ cv = MUTABLE_CV(gv);
}
else
- cv = (CV*)&PL_sv_undef;
- SETs((SV*)cv);
+ cv = MUTABLE_CV(&PL_sv_undef);
+ SETs(MUTABLE_SV(cv));
RETURN;
}
ret = newSVpvs_flags("_;$", SVs_TEMP);
goto set;
}
+ if (code == -KEY_keys || code == -KEY_values || code == -KEY_each) {
+ ret = newSVpvs_flags("\\[@%]", SVs_TEMP);
+ goto set;
+ }
if (code == -KEY_readpipe) {
s = "CORE::backtick";
}
PP(pp_anoncode)
{
dVAR; dSP;
- CV* cv = (CV*)PAD_SV(PL_op->op_targ);
+ CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
if (CvCLONE(cv))
- cv = (CV*)sv_2mortal((SV*)cv_clone(cv));
+ cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
EXTEND(SP,1);
- PUSHs((SV*)cv);
+ PUSHs(MUTABLE_SV(cv));
RETURN;
}
dVAR;
SV* rv;
+ PERL_ARGS_ASSERT_REFTO;
+
if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
if (LvTARGLEN(sv))
vivify_defelem(sv);
SvREFCNT_inc_void_NN(sv);
}
else if (SvTYPE(sv) == SVt_PVAV) {
- if (!AvREAL((AV*)sv) && AvREIFY((AV*)sv))
- av_reify((AV*)sv);
+ if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
+ av_reify(MUTABLE_AV(sv));
SvTEMP_off(sv);
SvREFCNT_inc_void_NN(sv);
}
if (ssv && !SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
Perl_croak(aTHX_ "Attempt to bless into a reference");
ptr = SvPV_const(ssv,len);
- if (len == 0 && ckWARN(WARN_MISC))
- Perl_warner(aTHX_ packWARN(WARN_MISC),
- "Explicit blessing to '' (assuming package main)");
+ if (len == 0)
+ Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
+ "Explicit blessing to '' (assuming package main)");
stash = gv_stashpvn(ptr, len, GV_ADD);
}
SV *sv = POPs;
const char * const elem = SvPV_nolen_const(sv);
- GV * const gv = (GV*)POPs;
+ GV * const gv = MUTABLE_GV(POPs);
SV * tmpRef = NULL;
sv = NULL;
switch (*elem) {
case 'A':
if (strEQ(second_letter, "RRAY"))
- tmpRef = (SV*)GvAV(gv);
+ tmpRef = MUTABLE_SV(GvAV(gv));
break;
case 'C':
if (strEQ(second_letter, "ODE"))
- tmpRef = (SV*)GvCVu(gv);
+ tmpRef = MUTABLE_SV(GvCVu(gv));
break;
case 'F':
if (strEQ(second_letter, "ILEHANDLE")) {
/* finally deprecated in 5.8.0 */
deprecate("*glob{FILEHANDLE}");
- tmpRef = (SV*)GvIOp(gv);
+ tmpRef = MUTABLE_SV(GvIOp(gv));
}
else
if (strEQ(second_letter, "ORMAT"))
- tmpRef = (SV*)GvFORM(gv);
+ tmpRef = MUTABLE_SV(GvFORM(gv));
break;
case 'G':
if (strEQ(second_letter, "LOB"))
- tmpRef = (SV*)gv;
+ tmpRef = MUTABLE_SV(gv);
break;
case 'H':
if (strEQ(second_letter, "ASH"))
- tmpRef = (SV*)GvHV(gv);
+ tmpRef = MUTABLE_SV(GvHV(gv));
break;
case 'I':
if (*second_letter == 'O' && !elem[2])
- tmpRef = (SV*)GvIOp(gv);
+ tmpRef = MUTABLE_SV(GvIOp(gv));
break;
case 'N':
if (strEQ(second_letter, "AME"))
case SVt_NULL:
break;
case SVt_PVAV:
- av_undef((AV*)sv);
+ av_undef(MUTABLE_AV(sv));
break;
case SVt_PVHV:
- hv_undef((HV*)sv);
+ hv_undef(MUTABLE_HV(sv));
break;
case SVt_PVCV:
- if (cv_const_sv((CV*)sv) && ckWARN(WARN_MISC))
- Perl_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
- CvANON((CV*)sv) ? "(anonymous)" : GvENAME(CvGV((CV*)sv)));
+ if (cv_const_sv((const CV *)sv))
+ Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
+ CvANON((const CV *)sv) ? "(anonymous)"
+ : GvENAME(CvGV((const CV *)sv)));
/* FALLTHROUGH */
case SVt_PVFM:
{
/* let user-undef'd sub keep its identity */
- GV* const gv = CvGV((CV*)sv);
- cv_undef((CV*)sv);
- CvGV((CV*)sv) = gv;
+ GV* const gv = CvGV((const CV *)sv);
+ cv_undef(MUTABLE_CV(sv));
+ CvGV((const CV *)sv) = gv;
}
break;
case SVt_PVGV:
- if (SvFAKE(sv))
+ if (SvFAKE(sv)) {
SvSetMagicSV(sv, &PL_sv_undef);
- else {
+ break;
+ }
+ else if (isGV_with_GP(sv)) {
GP *gp;
HV *stash;
/* undef *Foo:: */
- if((stash = GvHV((GV*)sv)) && HvNAME_get(stash))
+ if((stash = GvHV((const GV *)sv)) && HvNAME_get(stash))
mro_isa_changed_in(stash);
/* undef *Pkg::meth_name ... */
- else if(GvCVu((GV*)sv) && (stash = GvSTASH((GV*)sv)) && HvNAME_get(stash))
+ else if(GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
+ && HvNAME_get(stash))
mro_method_changed_in(stash);
- gp_free((GV*)sv);
+ gp_free(MUTABLE_GV(sv));
Newxz(gp, 1, GP);
GvGP(sv) = gp_ref(gp);
GvSV(sv) = newSV(0);
GvLINE(sv) = CopLINE(PL_curcop);
- GvEGV(sv) = (GV*)sv;
+ GvEGV(sv) = MUTABLE_GV(sv);
GvMULTI_on(sv);
+ break;
}
- break;
+ /* FALL THROUGH */
default:
if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
SvPV_free(sv);
PP(pp_predec)
{
dVAR; dSP;
- if (SvTYPE(TOPs) >= SVt_PVGV && SvTYPE(TOPs) != SVt_PVLV)
- DIE(aTHX_ PL_no_modify);
+ if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
+ DIE(aTHX_ "%s", PL_no_modify);
if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
&& SvIVX(TOPs) != IV_MIN)
{
PP(pp_postinc)
{
dVAR; dSP; dTARGET;
- if (SvTYPE(TOPs) >= SVt_PVGV && SvTYPE(TOPs) != SVt_PVLV)
- DIE(aTHX_ PL_no_modify);
+ if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
+ DIE(aTHX_ "%s", PL_no_modify);
sv_setsv(TARG, TOPs);
if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
&& SvIVX(TOPs) != IV_MAX)
PP(pp_postdec)
{
dVAR; dSP; dTARGET;
- if (SvTYPE(TOPs) >= SVt_PVGV && SvTYPE(TOPs) != SVt_PVLV)
- DIE(aTHX_ PL_no_modify);
+ if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
+ DIE(aTHX_ "%s", PL_no_modify);
sv_setsv(TARG, TOPs);
if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
&& SvIVX(TOPs) != IV_MIN)
}
MARK++;
repeatcpy((char*)(MARK + items), (char*)MARK,
- items * sizeof(SV*), count - 1);
+ items * sizeof(const SV *), count - 1);
SP += max;
}
else if (count <= 0)
STRLEN len;
const char * const s = SvPV_const(sv, len);
if (isIDFIRST(*s)) {
- sv_setpvn(TARG, "-", 1);
+ sv_setpvs(TARG, "-");
sv_catsv(TARG, sv);
}
else if (*s == '+' || *s == '-') {
if (SvNOK(sv))
sv_setnv(TARG, -SvNV(sv));
else {
- sv_setpvn(TARG, "-", 1);
+ sv_setpvs(TARG, "-");
sv_catsv(TARG, sv);
}
}
sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
SvUTF8_off(TARG);
}
- SETs(TARG);
+ SETTARG;
RETURN;
}
#ifdef LIBERAL
#endif
for ( ; anum > 0; anum--, tmps++)
*tmps = ~*tmps;
-
- SETs(TARG);
+ SETTARG;
}
RETURN;
}
/* This is the i_modulo with the workaround for the _moddi3 bug
* in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
* See below for pp_i_modulo. */
- dVAR; dVAR; dSP; dATARGET; tryAMAGICbin(modulo,opASSIGN);
+ dVAR; dSP; dATARGET; tryAMAGICbin(modulo,opASSIGN);
{
dPOPTOPiirl;
if (!right)
dVAR; dSP; dTARGET;
SV * const sv = TOPs;
- if (SvAMAGIC(sv)) {
- /* For an overloaded scalar, we can't know in advance if it's going to
- be UTF-8 or not. Also, we can't call sv_len_utf8 as it likes to
- cache the length. Maybe that should be a documented feature of it.
+ if (SvGAMAGIC(sv)) {
+ /* For an overloaded or magic scalar, we can't know in advance if
+ it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
+ it likes to cache the length. Maybe that should be a documented
+ feature of it.
*/
STRLEN len;
- const char *const p = SvPV_const(sv, len);
+ const char *const p
+ = sv_2pv_flags(sv, &len,
+ SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
- if (DO_UTF8(sv)) {
+ if (!p)
+ SETs(&PL_sv_undef);
+ else if (DO_UTF8(sv)) {
SETi(utf8_length((U8*)p, (U8*)p + len));
}
else
SETi(len);
-
+ } else if (SvOK(sv)) {
+ /* Neither magic nor overloaded. */
+ if (DO_UTF8(sv))
+ SETi(sv_len_utf8(sv));
+ else
+ SETi(sv_len(sv));
+ } else {
+ SETs(&PL_sv_undef);
}
- else if (DO_UTF8(sv))
- SETi(sv_len_utf8(sv));
- else
- SETi(sv_len(sv));
RETURN;
}
if (fail < 0) {
if (lvalue || repl)
Perl_croak(aTHX_ "substr outside of string");
- if (ckWARN(WARN_SUBSTR))
- Perl_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
+ Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
RETPUSHUNDEF;
}
else {
repl = SvPV_const(repl_sv_copy, repl_len);
repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
}
- sv_insert(sv, pos, rem, repl, repl_len);
+ if (!SvOK(sv))
+ sv_setpvs(sv, "");
+ sv_insert_flags(sv, pos, rem, repl, repl_len, 0);
if (repl_is_utf8)
SvUTF8_on(sv);
- if (repl_sv_copy)
- SvREFCNT_dec(repl_sv_copy);
+ SvREFCNT_dec(repl_sv_copy);
}
else if (lvalue) { /* it's an lvalue! */
if (!SvGMAGICAL(sv)) {
if (SvROK(sv)) {
SvPV_force_nolen(sv);
- if (ckWARN(WARN_SUBSTR))
- Perl_warner(aTHX_ packWARN(WARN_SUBSTR),
- "Attempt to use reference as lvalue in substr");
+ Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
+ "Attempt to use reference as lvalue in substr");
}
if (isGV_with_GP(sv))
SvPV_force_nolen(sv);
else if (SvOK(sv)) /* is it defined ? */
(void)SvPOK_only_UTF8(sv);
else
- sv_setpvn(sv,"",0); /* avoid lexical reincarnation */
+ sv_setpvs(sv, ""); /* avoid lexical reincarnation */
}
if (SvTYPE(TARG) < SVt_PVLV) {
LvTYPE(TARG) = 'x';
if (LvTARG(TARG) != sv) {
- if (LvTARG(TARG))
- SvREFCNT_dec(LvTARG(TARG));
+ SvREFCNT_dec(LvTARG(TARG));
LvTARG(TARG) = SvREFCNT_inc_simple(sv);
}
LvTARGOFF(TARG) = upos;
}
LvTYPE(TARG) = 'v';
if (LvTARG(TARG) != src) {
- if (LvTARG(TARG))
- SvREFCNT_dec(LvTARG(TARG));
+ SvREFCNT_dec(LvTARG(TARG));
LvTARG(TARG) = SvREFCNT_inc_simple(src);
}
LvTARGOFF(TARG) = offset;
if (retval > 0 && big_utf8)
sv_pos_b2u(big, &retval);
}
- if (temp)
- SvREFCNT_dec(temp);
+ SvREFCNT_dec(temp);
fail:
PUSHi(retval + arybase);
RETURN;
# else
sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
# endif
- SETs(TARG);
+ SETTARG;
RETURN;
#else
DIE(aTHX_
"The crypt() function is unimplemented due to excessive paranoia.");
+ return NORMAL;
#endif
}
+/* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
+ * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
+
+/* Both the characters below can be stored in two UTF-8 bytes. In UTF-8 the max
+ * character that 2 bytes can hold is U+07FF, and in UTF-EBCDIC it is U+03FF.
+ * See http://www.unicode.org/unicode/reports/tr16 */
+#define LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS 0x0178 /* Also is title case */
+#define GREEK_CAPITAL_LETTER_MU 0x039C /* Upper and title case of MICRON */
+
+/* Below are several macros that generate code */
+/* Generates code to store a unicode codepoint c that is known to occupy
+ * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
+#define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
+ STMT_START { \
+ *(p) = UTF8_TWO_BYTE_HI(c); \
+ *((p)+1) = UTF8_TWO_BYTE_LO(c); \
+ } STMT_END
+
+/* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
+ * available byte after the two bytes */
+#define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
+ STMT_START { \
+ *(p)++ = UTF8_TWO_BYTE_HI(c); \
+ *((p)++) = UTF8_TWO_BYTE_LO(c); \
+ } STMT_END
+
+/* Generates code to store the upper case of latin1 character l which is known
+ * to have its upper case be non-latin1 into the two bytes p and p+1. There
+ * are only two characters that fit this description, and this macro knows
+ * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
+ * bytes */
+#define STORE_NON_LATIN1_UC(p, l) \
+STMT_START { \
+ if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
+ STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
+ } else { /* Must be the following letter */ \
+ STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
+ } \
+} STMT_END
+
+/* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
+ * after the character stored */
+#define CAT_NON_LATIN1_UC(p, l) \
+STMT_START { \
+ if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
+ CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
+ } else { \
+ CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
+ } \
+} STMT_END
+
+/* Generates code to add the two UTF-8 bytes (probably u) that are the upper
+ * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
+ * and must require two bytes to store it. Advances p to point to the next
+ * available position */
+#define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
+STMT_START { \
+ if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
+ CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
+ } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
+ *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
+ } else {/* else is one of the other two special cases */ \
+ CAT_NON_LATIN1_UC((p), (l)); \
+ } \
+} STMT_END
+
PP(pp_ucfirst)
{
+ /* Actually is both lcfirst() and ucfirst(). Only the first character
+ * changes. This means that possibly we can change in-place, ie., just
+ * take the source and change that one character and store it back, but not
+ * if read-only etc, or if the length changes */
+
dVAR;
dSP;
SV *source = TOPs;
- STRLEN slen;
+ STRLEN slen; /* slen is the byte length of the whole SV. */
STRLEN need;
SV *dest;
- bool inplace = TRUE;
- bool doing_utf8;
+ bool inplace; /* ? Convert first char only, in-place */
+ bool doing_utf8 = FALSE; /* ? using utf8 */
+ bool convert_source_to_utf8 = FALSE; /* ? need to convert */
const int op_type = PL_op->op_type;
const U8 *s;
U8 *d;
U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
- STRLEN ulen;
- STRLEN tculen;
+ STRLEN ulen; /* ulen is the byte length of the original Unicode character
+ * stored as UTF-8 at s. */
+ STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
+ * lowercased) character stored in tmpbuf. May be either
+ * UTF-8 or not, but in either case is the number of bytes */
SvGETMAGIC(source);
if (SvOK(source)) {
s = (const U8*)SvPV_nomg_const(source, slen);
} else {
+ if (ckWARN(WARN_UNINITIALIZED))
+ report_uninit(source);
s = (const U8*)"";
slen = 0;
}
- if (slen && DO_UTF8(source) && UTF8_IS_START(*s)) {
+ /* We may be able to get away with changing only the first character, in
+ * place, but not if read-only, etc. Later we may discover more reasons to
+ * not convert in-place. */
+ inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
+
+ /* First calculate what the changed first character should be. This affects
+ * whether we can just swap it out, leaving the rest of the string unchanged,
+ * or even if have to convert the dest to UTF-8 when the source isn't */
+
+ if (! slen) { /* If empty */
+ need = 1; /* still need a trailing NUL */
+ }
+ else if (DO_UTF8(source)) { /* Is the source utf8? */
doing_utf8 = TRUE;
- utf8_to_uvchr(s, &ulen);
- if (op_type == OP_UCFIRST) {
- toTITLE_utf8(s, tmpbuf, &tculen);
- } else {
- toLOWER_utf8(s, tmpbuf, &tculen);
+
+/* TODO: This is #ifdefd out because it has hard-coded the standard mappings,
+ * and doesn't allow for the user to specify their own. When code is added to
+ * detect if there is a user-defined mapping in force here, and if so to use
+ * that, then the code below can be compiled. The detection would be a good
+ * thing anyway, as currently the user-defined mappings only work on utf8
+ * strings, and thus depend on the chosen internal storage method, which is a
+ * bad thing */
+#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
+ if (UTF8_IS_INVARIANT(*s)) {
+
+ /* An invariant source character is either ASCII or, in EBCDIC, an
+ * ASCII equivalent or a caseless C1 control. In both these cases,
+ * the lower and upper cases of any character are also invariants
+ * (and title case is the same as upper case). So it is safe to
+ * use the simple case change macros which avoid the overhead of
+ * the general functions. Note that if perl were to be extended to
+ * do locale handling in UTF-8 strings, this wouldn't be true in,
+ * for example, Lithuanian or Turkic. */
+ *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
+ tculen = ulen = 1;
+ need = slen + 1;
}
- /* If the two differ, we definately cannot do inplace. */
- inplace = (ulen == tculen);
- need = slen + 1 - ulen + tculen;
- } else {
- doing_utf8 = FALSE;
- need = slen + 1;
+ else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
+ U8 chr;
+
+ /* Similarly, if the source character isn't invariant but is in the
+ * latin1 range (or EBCDIC equivalent thereof), we have the case
+ * changes compiled into perl, and can avoid the overhead of the
+ * general functions. In this range, the characters are stored as
+ * two UTF-8 bytes, and it so happens that any changed-case version
+ * is also two bytes (in both ASCIIish and EBCDIC machines). */
+ tculen = ulen = 2;
+ need = slen + 1;
+
+ /* Convert the two source bytes to a single Unicode code point
+ * value, change case and save for below */
+ chr = UTF8_ACCUMULATE(*s, *(s+1));
+ if (op_type == OP_LCFIRST) { /* lower casing is easy */
+ U8 lower = toLOWER_LATIN1(chr);
+ STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
+ }
+ else { /* ucfirst */
+ U8 upper = toUPPER_LATIN1_MOD(chr);
+
+ /* Most of the latin1 range characters are well-behaved. Their
+ * title and upper cases are the same, and are also in the
+ * latin1 range. The macro above returns their upper (hence
+ * title) case, and all that need be done is to save the result
+ * for below. However, several characters are problematic, and
+ * have to be handled specially. The MOD in the macro name
+ * above means that these tricky characters all get mapped to
+ * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
+ * This mapping saves some tests for the majority of the
+ * characters */
+
+ if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
+
+ /* Not tricky. Just save it. */
+ STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
+ }
+ else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
+
+ /* This one is tricky because it is two characters long,
+ * though the UTF-8 is still two bytes, so the stored
+ * length doesn't change */
+ *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
+ *(tmpbuf + 1) = 's';
+ }
+ else {
+
+ /* The other two have their title and upper cases the same,
+ * but are tricky because the changed-case characters
+ * aren't in the latin1 range. They, however, do fit into
+ * two UTF-8 bytes */
+ STORE_NON_LATIN1_UC(tmpbuf, chr);
+ }
+ }
+ }
+ else {
+#endif /* end of dont want to break user-defined casing */
+
+ /* Here, can't short-cut the general case */
+
+ utf8_to_uvchr(s, &ulen);
+ if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
+ else toLOWER_utf8(s, tmpbuf, &tculen);
+
+ /* we can't do in-place if the length changes. */
+ if (ulen != tculen) inplace = FALSE;
+ need = slen + 1 - ulen + tculen;
+#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
+ }
+#endif
}
+ else { /* Non-zero length, non-UTF-8, Need to consider locale and if
+ * latin1 is treated as caseless. Note that a locale takes
+ * precedence */
+ tculen = 1; /* Most characters will require one byte, but this will
+ * need to be overridden for the tricky ones */
+ need = slen + 1;
+
+ if (op_type == OP_LCFIRST) {
+
+ /* lower case the first letter: no trickiness for any character */
+ *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
+ ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
+ }
+ /* is ucfirst() */
+ else if (IN_LOCALE_RUNTIME) {
+ *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
+ * have upper and title case different
+ */
+ }
+ else if (! IN_UNI_8_BIT) {
+ *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
+ * on EBCDIC machines whatever the
+ * native function does */
+ }
+ else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
+ *tmpbuf = toUPPER_LATIN1_MOD(*s);
+
+ /* tmpbuf now has the correct title case for all latin1 characters
+ * except for the several ones that have tricky handling. All
+ * of these are mapped by the MOD to the letter below. */
+ if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
+
+ /* The length is going to change, with all three of these, so
+ * can't replace just the first character */
+ inplace = FALSE;
+
+ /* We use the original to distinguish between these tricky
+ * cases */
+ if (*s == LATIN_SMALL_LETTER_SHARP_S) {
+ /* Two character title case 'Ss', but can remain non-UTF-8 */
+ need = slen + 2;
+ *tmpbuf = 'S';
+ *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
+ tculen = 2;
+ }
+ else {
+
+ /* The other two tricky ones have their title case outside
+ * latin1. It is the same as their upper case. */
+ doing_utf8 = TRUE;
+ STORE_NON_LATIN1_UC(tmpbuf, *s);
+
+ /* The UTF-8 and UTF-EBCDIC lengths of both these characters
+ * and their upper cases is 2. */
+ tculen = ulen = 2;
+
+ /* The entire result will have to be in UTF-8. Assume worst
+ * case sizing in conversion. (all latin1 characters occupy
+ * at most two bytes in utf8) */
+ convert_source_to_utf8 = TRUE;
+ need = slen * 2 + 1;
+ }
+ } /* End of is one of the three special chars */
+ } /* End of use Unicode (Latin1) semantics */
+ } /* End of changing the case of the first character */
- if (SvPADTMP(source) && !SvREADONLY(source) && inplace && SvTEMP(source)) {
- /* We can convert in place. */
+ /* Here, have the first character's changed case stored in tmpbuf. Ready to
+ * generate the result */
+ if (inplace) {
+ /* We can convert in place. This means we change just the first
+ * character without disturbing the rest; no need to grow */
dest = source;
s = d = (U8*)SvPV_force_nomg(source, slen);
} else {
dest = TARG;
+ /* Here, we can't convert in place; we earlier calculated how much
+ * space we will need, so grow to accommodate that */
SvUPGRADE(dest, SVt_PV);
d = (U8*)SvGROW(dest, need);
(void)SvPOK_only(dest);
SETs(dest);
-
- inplace = FALSE;
}
if (doing_utf8) {
- if(!inplace) {
- /* slen is the byte length of the whole SV.
- * ulen is the byte length of the original Unicode character
- * stored as UTF-8 at s.
- * tculen is the byte length of the freshly titlecased (or
- * lowercased) Unicode character stored as UTF-8 at tmpbuf.
- * We first set the result to be the titlecased (/lowercased)
- * character, and then append the rest of the SV data. */
- sv_setpvn(dest, (char*)tmpbuf, tculen);
- if (slen > ulen)
- sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
+ if (! inplace) {
+ if (! convert_source_to_utf8) {
+
+ /* Here both source and dest are in UTF-8, but have to create
+ * the entire output. We initialize the result to be the
+ * title/lower cased first character, and then append the rest
+ * of the string. */
+ sv_setpvn(dest, (char*)tmpbuf, tculen);
+ if (slen > ulen) {
+ sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
+ }
+ }
+ else {
+ const U8 *const send = s + slen;
+
+ /* Here the dest needs to be in UTF-8, but the source isn't,
+ * except we earlier UTF-8'd the first character of the source
+ * into tmpbuf. First put that into dest, and then append the
+ * rest of the source, converting it to UTF-8 as we go. */
+
+ /* Assert tculen is 2 here because the only two characters that
+ * get to this part of the code have 2-byte UTF-8 equivalents */
+ *d++ = *tmpbuf;
+ *d++ = *(tmpbuf + 1);
+ s++; /* We have just processed the 1st char */
+
+ for (; s < send; s++) {
+ d = uvchr_to_utf8(d, *s);
+ }
+ *d = '\0';
+ SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
+ }
SvUTF8_on(dest);
}
- else {
+ else { /* in-place UTF-8. Just overwrite the first character */
Copy(tmpbuf, d, tculen, U8);
SvCUR_set(dest, need - 1);
}
}
- else {
- if (*s) {
+ else { /* Neither source nor dest are in or need to be UTF-8 */
+ if (slen) {
if (IN_LOCALE_RUNTIME) {
TAINT;
SvTAINTED_on(dest);
- *d = (op_type == OP_UCFIRST)
- ? toUPPER_LC(*s) : toLOWER_LC(*s);
}
- else
- *d = (op_type == OP_UCFIRST) ? toUPPER(*s) : toLOWER(*s);
- } else {
- /* See bug #39028 */
+ if (inplace) { /* in-place, only need to change the 1st char */
+ *d = *tmpbuf;
+ }
+ else { /* Not in-place */
+
+ /* Copy the case-changed character(s) from tmpbuf */
+ Copy(tmpbuf, d, tculen, U8);
+ d += tculen - 1; /* Code below expects d to point to final
+ * character stored */
+ }
+ }
+ else { /* empty source */
+ /* See bug #39028: Don't taint if empty */
*d = *s;
}
+ /* In a "use bytes" we don't treat the source as UTF-8, but, still want
+ * the destination to retain that flag */
if (SvUTF8(source))
SvUTF8_on(dest);
- if (!inplace) {
+ if (!inplace) { /* Finish the rest of the string, unchanged */
/* This will copy the trailing NUL */
Copy(s + 1, d + 1, slen, U8);
SvCUR_set(dest, need - 1);
/* There's so much setup/teardown code common between uc and lc, I wonder if
it would be worth merging the two, and just having a switch outside each
- of the three tight loops. */
+ of the three tight loops. There is less and less commonality though */
PP(pp_uc)
{
dVAR;
SvGETMAGIC(source);
if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
- && SvTEMP(source) && !DO_UTF8(source)) {
- /* We can convert in place. */
-
+ && SvTEMP(source) && !DO_UTF8(source)
+ && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
+
+ /* We can convert in place. The reason we can't if in UNI_8_BIT is to
+ * make the loop tight, so we overwrite the source with the dest before
+ * looking at it, and we need to look at the original source
+ * afterwards. There would also need to be code added to handle
+ * switching to not in-place in midstream if we run into characters
+ * that change the length.
+ */
dest = source;
s = d = (U8*)SvPV_force_nomg(source, len);
min = len + 1;
if (SvOK(source)) {
s = (const U8*)SvPV_nomg_const(source, len);
} else {
+ if (ckWARN(WARN_UNINITIALIZED))
+ report_uninit(source);
s = (const U8*)"";
len = 0;
}
const U8 *const send = s + len;
U8 tmpbuf[UTF8_MAXBYTES+1];
+/* This is ifdefd out because it needs more work and thought. It isn't clear
+ * that we should do it. These are hard-coded rules from the Unicode standard,
+ * and may change. 5.2 gives new guidance on the iota subscript, for example,
+ * which has not been checked against this; and secondly it may be that we are
+ * passed a subset of the context, via a \U...\E, for example, and its not
+ * clear what the best approach is to that */
+#ifdef CONTEXT_DEPENDENT_CASING
+ bool in_iota_subscript = FALSE;
+#endif
+
while (s < send) {
- const STRLEN u = UTF8SKIP(s);
- STRLEN ulen;
-
- toUPPER_utf8(s, tmpbuf, &ulen);
- if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
- /* If the eventually required minimum size outgrows
- * the available space, we need to grow. */
- const UV o = d - (U8*)SvPVX_const(dest);
-
- /* If someone uppercases one million U+03B0s we SvGROW() one
- * million times. Or we could try guessing how much to
- allocate without allocating too much. Such is life. */
- SvGROW(dest, min);
- d = (U8*)SvPVX(dest) + o;
+#ifdef CONTEXT_DEPENDENT_CASING
+ if (in_iota_subscript && ! is_utf8_mark(s)) {
+ /* A non-mark. Time to output the iota subscript */
+#define GREEK_CAPITAL_LETTER_IOTA 0x0399
+#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
+
+ CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
+ in_iota_subscript = FALSE;
+ }
+#endif
+
+
+/* See comments at the first instance in this file of this ifdef */
+#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
+
+ /* If the UTF-8 character is invariant, then it is in the range
+ * known by the standard macro; result is only one byte long */
+ if (UTF8_IS_INVARIANT(*s)) {
+ *d++ = toUPPER(*s);
+ s++;
+ }
+ else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
+
+ /* Likewise, if it fits in a byte, its case change is in our
+ * table */
+ U8 orig = UTF8_ACCUMULATE(*s, *(s+1));
+ U8 upper = toUPPER_LATIN1_MOD(orig);
+ CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
+ s += 2;
+ }
+ else {
+#else
+ {
+#endif
+
+ /* Otherwise, need the general UTF-8 case. Get the changed
+ * case value and copy it to the output buffer */
+
+ const STRLEN u = UTF8SKIP(s);
+ STRLEN ulen;
+
+#ifndef CONTEXT_DEPENDENT_CASING
+ toUPPER_utf8(s, tmpbuf, &ulen);
+#else
+ const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
+ if (uv == GREEK_CAPITAL_LETTER_IOTA && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI) {
+ in_iota_subscript = TRUE;
+ }
+ else {
+#endif
+ if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
+ /* If the eventually required minimum size outgrows
+ * the available space, we need to grow. */
+ const UV o = d - (U8*)SvPVX_const(dest);
+
+ /* If someone uppercases one million U+03B0s we
+ * SvGROW() one million times. Or we could try
+ * guessing how much to allocate without allocating too
+ * much. Such is life. See corresponding comment in lc code
+ * for another option */
+ SvGROW(dest, min);
+ d = (U8*)SvPVX(dest) + o;
+ }
+ Copy(tmpbuf, d, ulen, U8);
+ d += ulen;
+#ifdef CONTEXT_DEPENDENT_CASING
+ }
+#endif
+ s += u;
}
- Copy(tmpbuf, d, ulen, U8);
- d += ulen;
- s += u;
}
+#ifdef CONTEXT_DEPENDENT_CASING
+ if (in_iota_subscript) CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
+#endif
SvUTF8_on(dest);
*d = '\0';
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
- } else {
+ } else { /* Not UTF-8 */
if (len) {
const U8 *const send = s + len;
+
+ /* Use locale casing if in locale; regular style if not treating
+ * latin1 as having case; otherwise the latin1 casing. Do the
+ * whole thing in a tight loop, for speed, */
if (IN_LOCALE_RUNTIME) {
TAINT;
SvTAINTED_on(dest);
for (; s < send; d++, s++)
*d = toUPPER_LC(*s);
}
- else {
- for (; s < send; d++, s++)
+ else if (! IN_UNI_8_BIT) {
+ for (; s < send; d++, s++) {
*d = toUPPER(*s);
+ }
}
- }
+ else {
+ for (; s < send; d++, s++) {
+ *d = toUPPER_LATIN1_MOD(*s);
+ if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
+
+ /* The mainstream case is the tight loop above. To avoid
+ * extra tests in that, all three characters that require
+ * special handling are mapped by the MOD to the one tested
+ * just above.
+ * Use the source to distinguish between the three cases */
+
+ if (*s == LATIN_SMALL_LETTER_SHARP_S) {
+
+ /* uc() of this requires 2 characters, but they are
+ * ASCII. If not enough room, grow the string */
+ if (SvLEN(dest) < ++min) {
+ const UV o = d - (U8*)SvPVX_const(dest);
+ SvGROW(dest, min);
+ d = (U8*)SvPVX(dest) + o;
+ }
+ *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
+ continue; /* Back to the tight loop; still in ASCII */
+ }
+
+ /* The other two special handling characters have their
+ * upper cases outside the latin1 range, hence need to be
+ * in UTF-8, so the whole result needs to be in UTF-8. So,
+ * here we are somewhere in the middle of processing a
+ * non-UTF-8 string, and realize that we will have to convert
+ * the whole thing to UTF-8. What to do? There are
+ * several possibilities. The simplest to code is to
+ * convert what we have so far, set a flag, and continue on
+ * in the loop. The flag would be tested each time through
+ * the loop, and if set, the next character would be
+ * converted to UTF-8 and stored. But, I (khw) didn't want
+ * to slow down the mainstream case at all for this fairly
+ * rare case, so I didn't want to add a test that didn't
+ * absolutely have to be there in the loop, besides the
+ * possibility that it would get too complicated for
+ * optimizers to deal with. Another possibility is to just
+ * give up, convert the source to UTF-8, and restart the
+ * function that way. Another possibility is to convert
+ * both what has already been processed and what is yet to
+ * come separately to UTF-8, then jump into the loop that
+ * handles UTF-8. But the most efficient time-wise of the
+ * ones I could think of is what follows, and turned out to
+ * not require much extra code. */
+
+ /* Convert what we have so far into UTF-8, telling the
+ * function that we know it should be converted, and to
+ * allow extra space for what we haven't processed yet.
+ * Assume the worst case space requirements for converting
+ * what we haven't processed so far: that it will require
+ * two bytes for each remaining source character, plus the
+ * NUL at the end. This may cause the string pointer to
+ * move, so re-find it. */
+
+ len = d - (U8*)SvPVX_const(dest);
+ SvCUR_set(dest, len);
+ len = sv_utf8_upgrade_flags_grow(dest,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+ (send -s) * 2 + 1);
+ d = (U8*)SvPVX(dest) + len;
+
+ /* And append the current character's upper case in UTF-8 */
+ CAT_NON_LATIN1_UC(d, *s);
+
+ /* Now process the remainder of the source, converting to
+ * upper and UTF-8. If a resulting byte is invariant in
+ * UTF-8, output it as-is, otherwise convert to UTF-8 and
+ * append it to the output. */
+
+ s++;
+ for (; s < send; s++) {
+ U8 upper = toUPPER_LATIN1_MOD(*s);
+ if UTF8_IS_INVARIANT(upper) {
+ *d++ = upper;
+ }
+ else {
+ CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
+ }
+ }
+
+ /* Here have processed the whole source; no need to continue
+ * with the outer loop. Each character has been converted
+ * to upper case and converted to UTF-8 */
+
+ break;
+ } /* End of processing all latin1-style chars */
+ } /* End of processing all chars */
+ } /* End of source is not empty */
+
if (source != dest) {
- *d = '\0';
+ *d = '\0'; /* Here d points to 1 after last char, add NUL */
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
}
- }
+ } /* End of isn't utf8 */
SvSETMAGIC(dest);
RETURN;
}
if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
&& SvTEMP(source) && !DO_UTF8(source)) {
- /* We can convert in place. */
+ /* We can convert in place, as lowercasing anything in the latin1 range
+ * (or else DO_UTF8 would have been on) doesn't lengthen it */
dest = source;
s = d = (U8*)SvPV_force_nomg(source, len);
min = len + 1;
if (SvOK(source)) {
s = (const U8*)SvPV_nomg_const(source, len);
} else {
+ if (ckWARN(WARN_UNINITIALIZED))
+ report_uninit(source);
s = (const U8*)"";
len = 0;
}
U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
while (s < send) {
- const STRLEN u = UTF8SKIP(s);
- STRLEN ulen;
- const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
+/* See comments at the first instance in this file of this ifdef */
+#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
+ if (UTF8_IS_INVARIANT(*s)) {
-#define GREEK_CAPITAL_LETTER_SIGMA 0x03A3 /* Unicode U+03A3 */
- if (uv == GREEK_CAPITAL_LETTER_SIGMA) {
- NOOP;
- /*
- * Now if the sigma is NOT followed by
- * /$ignorable_sequence$cased_letter/;
- * and it IS preceded by /$cased_letter$ignorable_sequence/;
- * where $ignorable_sequence is [\x{2010}\x{AD}\p{Mn}]*
- * and $cased_letter is [\p{Ll}\p{Lo}\p{Lt}]
- * then it should be mapped to 0x03C2,
- * (GREEK SMALL LETTER FINAL SIGMA),
- * instead of staying 0x03A3.
- * "should be": in other words, this is not implemented yet.
- * See lib/unicore/SpecialCasing.txt.
+ /* Invariant characters use the standard mappings compiled in.
*/
+ *d++ = toLOWER(*s);
+ s++;
}
- if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
- /* If the eventually required minimum size outgrows
- * the available space, we need to grow. */
- const UV o = d - (U8*)SvPVX_const(dest);
-
- /* If someone lowercases one million U+0130s we SvGROW() one
- * million times. Or we could try guessing how much to
- allocate without allocating too much. Such is life. */
- SvGROW(dest, min);
- d = (U8*)SvPVX(dest) + o;
+ else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
+
+ /* As do the ones in the Latin1 range */
+ U8 lower = toLOWER_LATIN1(UTF8_ACCUMULATE(*s, *(s+1)));
+ CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
+ s += 2;
}
- Copy(tmpbuf, d, ulen, U8);
- d += ulen;
- s += u;
- }
+ else {
+#endif
+ /* Here, is utf8 not in Latin-1 range, have to go out and get
+ * the mappings from the tables. */
+
+ const STRLEN u = UTF8SKIP(s);
+ STRLEN ulen;
+
+/* See comments at the first instance in this file of this ifdef */
+#ifndef CONTEXT_DEPENDENT_CASING
+ toLOWER_utf8(s, tmpbuf, &ulen);
+#else
+ /* Here is context dependent casing, not compiled in currently;
+ * needs more thought and work */
+
+ const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
+
+ /* If the lower case is a small sigma, it may be that we need
+ * to change it to a final sigma. This happens at the end of
+ * a word that contains more than just this character, and only
+ * when we started with a capital sigma. */
+ if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
+ s > send - len && /* Makes sure not the first letter */
+ utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
+ ) {
+
+ /* We use the algorithm in:
+ * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
+ * is a CAPITAL SIGMA): If C is preceded by a sequence
+ * consisting of a cased letter and a case-ignorable
+ * sequence, and C is not followed by a sequence consisting
+ * of a case ignorable sequence and then a cased letter,
+ * then when lowercasing C, C becomes a final sigma */
+
+ /* To determine if this is the end of a word, need to peek
+ * ahead. Look at the next character */
+ const U8 *peek = s + u;
+
+ /* Skip any case ignorable characters */
+ while (peek < send && is_utf8_case_ignorable(peek)) {
+ peek += UTF8SKIP(peek);
+ }
+
+ /* If we reached the end of the string without finding any
+ * non-case ignorable characters, or if the next such one
+ * is not-cased, then we have met the conditions for it
+ * being a final sigma with regards to peek ahead, and so
+ * must do peek behind for the remaining conditions. (We
+ * know there is stuff behind to look at since we tested
+ * above that this isn't the first letter) */
+ if (peek >= send || ! is_utf8_cased(peek)) {
+ peek = utf8_hop(s, -1);
+
+ /* Here are at the beginning of the first character
+ * before the original upper case sigma. Keep backing
+ * up, skipping any case ignorable characters */
+ while (is_utf8_case_ignorable(peek)) {
+ peek = utf8_hop(peek, -1);
+ }
+
+ /* Here peek points to the first byte of the closest
+ * non-case-ignorable character before the capital
+ * sigma. If it is cased, then by the Unicode
+ * algorithm, we should use a small final sigma instead
+ * of what we have */
+ if (is_utf8_cased(peek)) {
+ STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
+ UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
+ }
+ }
+ }
+ else { /* Not a context sensitive mapping */
+#endif /* End of commented out context sensitive */
+ if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
+
+ /* If the eventually required minimum size outgrows
+ * the available space, we need to grow. */
+ const UV o = d - (U8*)SvPVX_const(dest);
+
+ /* If someone lowercases one million U+0130s we
+ * SvGROW() one million times. Or we could try
+ * guessing how much to allocate without allocating too
+ * much. Such is life. Another option would be to
+ * grow an extra byte or two more each time we need to
+ * grow, which would cut down the million to 500K, with
+ * little waste */
+ SvGROW(dest, min);
+ d = (U8*)SvPVX(dest) + o;
+ }
+#ifdef CONTEXT_DEPENDENT_CASING
+ }
+#endif
+ /* Copy the newly lowercased letter to the output buffer we're
+ * building */
+ Copy(tmpbuf, d, ulen, U8);
+ d += ulen;
+ s += u;
+#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
+ }
+#endif
+ } /* End of looping through the source string */
SvUTF8_on(dest);
*d = '\0';
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
- } else {
+ } else { /* Not utf8 */
if (len) {
const U8 *const send = s + len;
+
+ /* Use locale casing if in locale; regular style if not treating
+ * latin1 as having case; otherwise the latin1 casing. Do the
+ * whole thing in a tight loop, for speed, */
if (IN_LOCALE_RUNTIME) {
TAINT;
SvTAINTED_on(dest);
for (; s < send; d++, s++)
*d = toLOWER_LC(*s);
}
- else {
- for (; s < send; d++, s++)
+ else if (! IN_UNI_8_BIT) {
+ for (; s < send; d++, s++) {
*d = toLOWER(*s);
+ }
+ }
+ else {
+ for (; s < send; d++, s++) {
+ *d = toLOWER_LATIN1(*s);
+ }
}
}
if (source != dest) {
}
else
sv_setpvn(TARG, s, len);
- SETs(TARG);
- if (SvSMAGICAL(TARG))
- mg_set(TARG);
+ SETTARG;
RETURN;
}
PP(pp_aslice)
{
dVAR; dSP; dMARK; dORIGMARK;
- register AV* const av = (AV*)POPs;
+ register AV *const av = MUTABLE_AV(POPs);
register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
if (SvTYPE(av) == SVt_PVAV) {
const I32 arybase = CopARYBASE_get(PL_curcop);
- if (lval && PL_op->op_private & OPpLVAL_INTRO) {
+ const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
+ bool can_preserve = FALSE;
+
+ if (localizing) {
+ MAGIC *mg;
+ HV *stash;
+
+ can_preserve = SvCANEXISTDELETE(av);
+ }
+
+ if (lval && localizing) {
register SV **svp;
I32 max = -1;
for (svp = MARK + 1; svp <= SP; svp++) {
if (max > AvMAX(av))
av_extend(av, max);
}
+
while (++MARK <= SP) {
register SV **svp;
I32 elem = SvIV(*MARK);
+ bool preeminent = TRUE;
if (elem > 0)
elem -= arybase;
+ if (localizing && can_preserve) {
+ /* If we can determine whether the element exist,
+ * Try to preserve the existenceness of a tied array
+ * element by using EXISTS and DELETE if possible.
+ * Fallback to FETCH and STORE otherwise. */
+ preeminent = av_exists(av, elem);
+ }
+
svp = av_fetch(av, elem, lval);
if (lval) {
if (!svp || *svp == &PL_sv_undef)
DIE(aTHX_ PL_no_aelem, elem);
- if (PL_op->op_private & OPpLVAL_INTRO)
- save_aelem(av, elem, svp);
+ if (localizing) {
+ if (preeminent)
+ save_aelem(av, elem, svp);
+ else
+ SAVEADELETE(av, elem);
+ }
}
*MARK = svp ? *svp : &PL_sv_undef;
}
{
dVAR;
dSP;
- AV *array = (AV*)POPs;
+ AV *array = MUTABLE_AV(POPs);
const I32 gimme = GIMME_V;
IV *iterp = Perl_av_iter_p(aTHX_ array);
const IV current = (*iterp)++;
{
dVAR;
dSP;
- AV *array = (AV*)POPs;
+ AV *array = MUTABLE_AV(POPs);
const I32 gimme = GIMME_V;
*Perl_av_iter_p(aTHX_ array) = 0;
{
dVAR;
dSP;
- HV * hash = (HV*)POPs;
+ HV * hash = MUTABLE_HV(POPs);
HE *entry;
const I32 gimme = GIMME_V;
RETURN;
}
-PP(pp_delete)
+STATIC OP *
+S_do_delete_local(pTHX)
{
dVAR;
dSP;
const I32 gimme = GIMME_V;
- const I32 discard = (gimme == G_VOID) ? G_DISCARD : 0;
+ const MAGIC *mg;
+ HV *stash;
+
+ if (PL_op->op_private & OPpSLICE) {
+ dMARK; dORIGMARK;
+ SV * const osv = POPs;
+ const bool tied = SvRMAGICAL(osv)
+ && mg_find((const SV *)osv, PERL_MAGIC_tied);
+ const bool can_preserve = SvCANEXISTDELETE(osv)
+ || mg_find((const SV *)osv, PERL_MAGIC_env);
+ const U32 type = SvTYPE(osv);
+ if (type == SVt_PVHV) { /* hash element */
+ HV * const hv = MUTABLE_HV(osv);
+ while (++MARK <= SP) {
+ SV * const keysv = *MARK;
+ SV *sv = NULL;
+ bool preeminent = TRUE;
+ if (can_preserve)
+ preeminent = hv_exists_ent(hv, keysv, 0);
+ if (tied) {
+ HE *he = hv_fetch_ent(hv, keysv, 1, 0);
+ if (he)
+ sv = HeVAL(he);
+ else
+ preeminent = FALSE;
+ }
+ else {
+ sv = hv_delete_ent(hv, keysv, 0, 0);
+ SvREFCNT_inc_simple_void(sv); /* De-mortalize */
+ }
+ if (preeminent) {
+ save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
+ if (tied) {
+ *MARK = sv_mortalcopy(sv);
+ mg_clear(sv);
+ } else
+ *MARK = sv;
+ }
+ else {
+ SAVEHDELETE(hv, keysv);
+ *MARK = &PL_sv_undef;
+ }
+ }
+ }
+ else if (type == SVt_PVAV) { /* array element */
+ if (PL_op->op_flags & OPf_SPECIAL) {
+ AV * const av = MUTABLE_AV(osv);
+ while (++MARK <= SP) {
+ I32 idx = SvIV(*MARK);
+ SV *sv = NULL;
+ bool preeminent = TRUE;
+ if (can_preserve)
+ preeminent = av_exists(av, idx);
+ if (tied) {
+ SV **svp = av_fetch(av, idx, 1);
+ if (svp)
+ sv = *svp;
+ else
+ preeminent = FALSE;
+ }
+ else {
+ sv = av_delete(av, idx, 0);
+ SvREFCNT_inc_simple_void(sv); /* De-mortalize */
+ }
+ if (preeminent) {
+ save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
+ if (tied) {
+ *MARK = sv_mortalcopy(sv);
+ mg_clear(sv);
+ } else
+ *MARK = sv;
+ }
+ else {
+ SAVEADELETE(av, idx);
+ *MARK = &PL_sv_undef;
+ }
+ }
+ }
+ }
+ else
+ DIE(aTHX_ "Not a HASH reference");
+ if (gimme == G_VOID)
+ SP = ORIGMARK;
+ else if (gimme == G_SCALAR) {
+ MARK = ORIGMARK;
+ if (SP > MARK)
+ *++MARK = *SP;
+ else
+ *++MARK = &PL_sv_undef;
+ SP = MARK;
+ }
+ }
+ else {
+ SV * const keysv = POPs;
+ SV * const osv = POPs;
+ const bool tied = SvRMAGICAL(osv)
+ && mg_find((const SV *)osv, PERL_MAGIC_tied);
+ const bool can_preserve = SvCANEXISTDELETE(osv)
+ || mg_find((const SV *)osv, PERL_MAGIC_env);
+ const U32 type = SvTYPE(osv);
+ SV *sv = NULL;
+ if (type == SVt_PVHV) {
+ HV * const hv = MUTABLE_HV(osv);
+ bool preeminent = TRUE;
+ if (can_preserve)
+ preeminent = hv_exists_ent(hv, keysv, 0);
+ if (tied) {
+ HE *he = hv_fetch_ent(hv, keysv, 1, 0);
+ if (he)
+ sv = HeVAL(he);
+ else
+ preeminent = FALSE;
+ }
+ else {
+ sv = hv_delete_ent(hv, keysv, 0, 0);
+ SvREFCNT_inc_simple_void(sv); /* De-mortalize */
+ }
+ if (preeminent) {
+ save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
+ if (tied) {
+ SV *nsv = sv_mortalcopy(sv);
+ mg_clear(sv);
+ sv = nsv;
+ }
+ }
+ else
+ SAVEHDELETE(hv, keysv);
+ }
+ else if (type == SVt_PVAV) {
+ if (PL_op->op_flags & OPf_SPECIAL) {
+ AV * const av = MUTABLE_AV(osv);
+ I32 idx = SvIV(keysv);
+ bool preeminent = TRUE;
+ if (can_preserve)
+ preeminent = av_exists(av, idx);
+ if (tied) {
+ SV **svp = av_fetch(av, idx, 1);
+ if (svp)
+ sv = *svp;
+ else
+ preeminent = FALSE;
+ }
+ else {
+ sv = av_delete(av, idx, 0);
+ SvREFCNT_inc_simple_void(sv); /* De-mortalize */
+ }
+ if (preeminent) {
+ save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
+ if (tied) {
+ SV *nsv = sv_mortalcopy(sv);
+ mg_clear(sv);
+ sv = nsv;
+ }
+ }
+ else
+ SAVEADELETE(av, idx);
+ }
+ else
+ DIE(aTHX_ "panic: avhv_delete no longer supported");
+ }
+ else
+ DIE(aTHX_ "Not a HASH reference");
+ if (!sv)
+ sv = &PL_sv_undef;
+ if (gimme != G_VOID)
+ PUSHs(sv);
+ }
+
+ RETURN;
+}
+
+PP(pp_delete)
+{
+ dVAR;
+ dSP;
+ I32 gimme;
+ I32 discard;
+
+ if (PL_op->op_private & OPpLVAL_INTRO)
+ return do_delete_local();
+
+ gimme = GIMME_V;
+ discard = (gimme == G_VOID) ? G_DISCARD : 0;
if (PL_op->op_private & OPpSLICE) {
dMARK; dORIGMARK;
- HV * const hv = (HV*)POPs;
+ HV * const hv = MUTABLE_HV(POPs);
const U32 hvtype = SvTYPE(hv);
if (hvtype == SVt_PVHV) { /* hash element */
while (++MARK <= SP) {
else if (hvtype == SVt_PVAV) { /* array element */
if (PL_op->op_flags & OPf_SPECIAL) {
while (++MARK <= SP) {
- SV * const sv = av_delete((AV*)hv, SvIV(*MARK), discard);
+ SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*MARK), discard);
*MARK = sv ? sv : &PL_sv_undef;
}
}
}
else {
SV *keysv = POPs;
- HV * const hv = (HV*)POPs;
- SV *sv;
+ HV * const hv = MUTABLE_HV(POPs);
+ SV *sv = NULL;
if (SvTYPE(hv) == SVt_PVHV)
sv = hv_delete_ent(hv, keysv, discard, 0);
else if (SvTYPE(hv) == SVt_PVAV) {
if (PL_op->op_flags & OPf_SPECIAL)
- sv = av_delete((AV*)hv, SvIV(keysv), discard);
+ sv = av_delete(MUTABLE_AV(hv), SvIV(keysv), discard);
else
DIE(aTHX_ "panic: avhv_delete no longer supported");
}
RETPUSHNO;
}
tmpsv = POPs;
- hv = (HV*)POPs;
+ hv = MUTABLE_HV(POPs);
if (SvTYPE(hv) == SVt_PVHV) {
if (hv_exists_ent(hv, tmpsv, 0))
RETPUSHYES;
}
else if (SvTYPE(hv) == SVt_PVAV) {
if (PL_op->op_flags & OPf_SPECIAL) { /* array element */
- if (av_exists((AV*)hv, SvIV(tmpsv)))
+ if (av_exists(MUTABLE_AV(hv), SvIV(tmpsv)))
RETPUSHYES;
}
}
PP(pp_hslice)
{
dVAR; dSP; dMARK; dORIGMARK;
- register HV * const hv = (HV*)POPs;
+ register HV * const hv = MUTABLE_HV(POPs);
register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
- bool other_magic = FALSE;
+ bool can_preserve = FALSE;
if (localizing) {
MAGIC *mg;
HV *stash;
- other_magic = mg_find((SV*)hv, PERL_MAGIC_env) ||
- ((mg = mg_find((SV*)hv, PERL_MAGIC_tied))
- /* Try to preserve the existenceness of a tied hash
- * element by using EXISTS and DELETE if possible.
- * Fallback to FETCH and STORE otherwise */
- && (stash = SvSTASH(SvRV(SvTIED_obj((SV*)hv, mg))))
- && gv_fetchmethod_autoload(stash, "EXISTS", TRUE)
- && gv_fetchmethod_autoload(stash, "DELETE", TRUE));
+ if (SvCANEXISTDELETE(hv) || mg_find((const SV *)hv, PERL_MAGIC_env))
+ can_preserve = TRUE;
}
while (++MARK <= SP) {
SV * const keysv = *MARK;
SV **svp;
HE *he;
- bool preeminent = FALSE;
-
- if (localizing) {
- preeminent = SvRMAGICAL(hv) && !other_magic ? 1 :
- hv_exists_ent(hv, keysv, 0);
+ bool preeminent = TRUE;
+
+ if (localizing && can_preserve) {
+ /* If we can determine whether the element exist,
+ * try to preserve the existenceness of a tied hash
+ * element by using EXISTS and DELETE if possible.
+ * Fallback to FETCH and STORE otherwise. */
+ preeminent = hv_exists_ent(hv, keysv, 0);
}
he = hv_fetch_ent(hv, keysv, lval, 0);
}
if (localizing) {
if (HvNAME_get(hv) && isGV(*svp))
- save_gp((GV*)*svp, !(PL_op->op_flags & OPf_SPECIAL));
- else {
- if (preeminent)
- save_helem(hv, keysv, svp);
- else {
- STRLEN keylen;
- const char * const key = SvPV_const(keysv, keylen);
- SAVEDELETE(hv, savepvn(key,keylen),
- SvUTF8(keysv) ? -(I32)keylen : (I32)keylen);
- }
- }
+ save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL));
+ else if (preeminent)
+ save_helem_flags(hv, keysv, svp,
+ (PL_op->op_flags & OPf_SPECIAL) ? 0 : SAVEf_SETMAGIC);
+ else
+ SAVEHDELETE(hv, keysv);
}
}
*MARK = svp ? *svp : &PL_sv_undef;
{
dVAR; dSP; dMARK; dORIGMARK;
const I32 items = SP - MARK;
- SV * const av = (SV *) av_make(items, MARK+1);
+ SV * const av = MUTABLE_SV(av_make(items, MARK+1));
SP = ORIGMARK; /* av_make() might realloc stack_sp */
mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
? newRV_noinc(av) : av);
SV * const val = newSV(0);
if (MARK < SP)
sv_setsv(val, *++MARK);
- else if (ckWARN(WARN_MISC))
- Perl_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash");
+ else
+ Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash");
(void)hv_store_ent(hv,key,val,0);
}
SP = ORIGMARK;
mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
- ? newRV_noinc((SV*) hv) : (SV*) hv);
+ ? newRV_noinc(MUTABLE_SV(hv)) : MUTABLE_SV(hv));
RETURN;
}
PP(pp_splice)
{
dVAR; dSP; dMARK; dORIGMARK;
- register AV *ary = (AV*)*++MARK;
+ register AV *ary = MUTABLE_AV(*++MARK);
register SV **src;
register SV **dst;
register I32 i;
I32 newlen;
I32 after;
I32 diff;
- const MAGIC * const mg = SvTIED_mg((SV*)ary, PERL_MAGIC_tied);
+ const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
if (mg) {
- *MARK-- = SvTIED_obj((SV*)ary, mg);
+ *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
PUSHMARK(MARK);
PUTBACK;
- ENTER;
+ ENTER_with_name("call_SPLICE");
call_method("SPLICE",GIMME_V);
- LEAVE;
+ LEAVE_with_name("call_SPLICE");
SPAGAIN;
RETURN;
}
length = AvMAX(ary) + 1;
}
if (offset > AvFILLp(ary) + 1) {
- if (ckWARN(WARN_MISC))
- Perl_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" );
+ Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" );
offset = AvFILLp(ary) + 1;
}
after = AvFILLp(ary) + 1 - (offset + length);
PP(pp_push)
{
dVAR; dSP; dMARK; dORIGMARK; dTARGET;
- register AV * const ary = (AV*)*++MARK;
- const MAGIC * const mg = SvTIED_mg((SV*)ary, PERL_MAGIC_tied);
+ register AV * const ary = MUTABLE_AV(*++MARK);
+ const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
if (mg) {
- *MARK-- = SvTIED_obj((SV*)ary, mg);
+ *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
PUSHMARK(MARK);
PUTBACK;
- ENTER;
+ ENTER_with_name("call_PUSH");
call_method("PUSH",G_SCALAR|G_DISCARD);
- LEAVE;
+ LEAVE_with_name("call_PUSH");
SPAGAIN;
- SP = ORIGMARK;
- PUSHi( AvFILL(ary) + 1 );
}
else {
PL_delaymagic = DM_DELAY;
av_store(ary, AvFILLp(ary)+1, sv);
}
if (PL_delaymagic & DM_ARRAY)
- mg_set((SV*)ary);
+ mg_set(MUTABLE_SV(ary));
PL_delaymagic = 0;
- SP = ORIGMARK;
- PUSHi( AvFILLp(ary) + 1 );
+ }
+ SP = ORIGMARK;
+ if (OP_GIMME(PL_op, 0) != G_VOID) {
+ PUSHi( AvFILL(ary) + 1 );
}
RETURN;
}
{
dVAR;
dSP;
- AV * const av = (AV*)POPs;
+ AV * const av = MUTABLE_AV(POPs);
SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av);
EXTEND(SP, 1);
assert (sv);
PP(pp_unshift)
{
dVAR; dSP; dMARK; dORIGMARK; dTARGET;
- register AV *ary = (AV*)*++MARK;
- const MAGIC * const mg = SvTIED_mg((SV*)ary, PERL_MAGIC_tied);
+ register AV *ary = MUTABLE_AV(*++MARK);
+ const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
if (mg) {
- *MARK-- = SvTIED_obj((SV*)ary, mg);
+ *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
PUSHMARK(MARK);
PUTBACK;
- ENTER;
+ ENTER_with_name("call_UNSHIFT");
call_method("UNSHIFT",G_SCALAR|G_DISCARD);
- LEAVE;
+ LEAVE_with_name("call_UNSHIFT");
SPAGAIN;
}
else {
}
}
SP = ORIGMARK;
- PUSHi( AvFILL(ary) + 1 );
+ if (OP_GIMME(PL_op, 0) != G_VOID) {
+ PUSHi( AvFILL(ary) + 1 );
+ }
RETURN;
}
PP(pp_reverse)
{
dVAR; dSP; dMARK;
- SV ** const oldsp = SP;
if (GIMME == G_ARRAY) {
- MARK++;
- while (MARK < SP) {
- register SV * const tmp = *MARK;
- *MARK++ = *SP;
- *SP-- = tmp;
+ if (PL_op->op_private & OPpREVERSE_INPLACE) {
+ AV *av;
+
+ /* See pp_sort() */
+ assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
+ (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
+ av = MUTABLE_AV((*SP));
+ /* In-place reversing only happens in void context for the array
+ * assignment. We don't need to push anything on the stack. */
+ SP = MARK;
+
+ if (SvMAGICAL(av)) {
+ I32 i, j;
+ register SV *tmp = sv_newmortal();
+ /* For SvCANEXISTDELETE */
+ HV *stash;
+ const MAGIC *mg;
+ bool can_preserve = SvCANEXISTDELETE(av);
+
+ for (i = 0, j = av_len(av); i < j; ++i, --j) {
+ register SV *begin, *end;
+
+ if (can_preserve) {
+ if (!av_exists(av, i)) {
+ if (av_exists(av, j)) {
+ register SV *sv = av_delete(av, j, 0);
+ begin = *av_fetch(av, i, TRUE);
+ sv_setsv_mg(begin, sv);
+ }
+ continue;
+ }
+ else if (!av_exists(av, j)) {
+ register SV *sv = av_delete(av, i, 0);
+ end = *av_fetch(av, j, TRUE);
+ sv_setsv_mg(end, sv);
+ continue;
+ }
+ }
+
+ begin = *av_fetch(av, i, TRUE);
+ end = *av_fetch(av, j, TRUE);
+ sv_setsv(tmp, begin);
+ sv_setsv_mg(begin, end);
+ sv_setsv_mg(end, tmp);
+ }
+ }
+ else {
+ SV **begin = AvARRAY(av);
+ SV **end = begin + AvFILLp(av);
+
+ while (begin < end) {
+ register SV * const tmp = *begin;
+ *begin++ = *end;
+ *end-- = tmp;
+ }
+ }
+ }
+ else {
+ SV **oldsp = SP;
+ MARK++;
+ while (MARK < SP) {
+ register SV * const tmp = *MARK;
+ *MARK++ = *SP;
+ *SP-- = tmp;
+ }
+ /* safe as long as stack cannot get extended in the above */
+ SP = oldsp;
}
- /* safe as long as stack cannot get extended in the above */
- SP = oldsp;
}
else {
register char *up;
SvUTF8_off(TARG); /* decontaminate */
if (SP - MARK > 1)
do_join(TARG, &PL_sv_no, MARK, SP);
- else
+ else {
sv_setsv(TARG, (SP > MARK)
? *SP
: (padoff_du = find_rundefsvoffset(),
(padoff_du == NOT_IN_PAD
|| PAD_COMPNAME_FLAGS_isOUR(padoff_du))
? DEFSV : PAD_SVl(padoff_du)));
+
+ if (! SvOK(TARG) && ckWARN(WARN_UNINITIALIZED))
+ report_uninit(TARG);
+ }
+
up = SvPV_force(TARG, len);
if (len > 1) {
if (DO_UTF8(TARG)) { /* first reverse each character */
I32 iters = 0;
const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (STRLEN)(strend - s);
I32 maxiters = slen + 10;
+ I32 trailing_empty = 0;
const char *orig;
const I32 origlimit = limit;
I32 realarray = 0;
I32 base;
const I32 gimme = GIMME_V;
+ bool gimme_scalar;
const I32 oldsave = PL_savestack_ix;
- I32 make_mortal = 1;
+ U32 make_mortal = SVs_TEMP;
bool multiline = 0;
MAGIC *mg = NULL;
#ifdef USE_ITHREADS
if (pm->op_pmreplrootu.op_pmtargetoff) {
- ary = GvAVn((GV*)PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff));
+ ary = GvAVn(MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)));
}
#else
if (pm->op_pmreplrootu.op_pmtargetgv) {
ary = GvAVn(pm->op_pmreplrootu.op_pmtargetgv);
}
#endif
- else if (gimme != G_ARRAY)
- ary = GvAVn(PL_defgv);
else
ary = NULL;
if (ary && (gimme != G_ARRAY || (pm->op_pmflags & PMf_ONCE))) {
av_extend(ary,0);
av_clear(ary);
SPAGAIN;
- if ((mg = SvTIED_mg((SV*)ary, PERL_MAGIC_tied))) {
+ if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
PUSHMARK(SP);
- XPUSHs(SvTIED_obj((SV*)ary, mg));
+ XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
}
else {
if (!AvREAL(ary)) {
multiline = 1;
}
+ gimme_scalar = gimme == G_SCALAR && !ary;
+
if (!limit)
limit = maxiters + 2;
if (RX_EXTFLAGS(rx) & RXf_WHITE) {
if (m >= strend)
break;
- dstr = newSVpvn_utf8(s, m-s, do_utf8);
- if (make_mortal)
- sv_2mortal(dstr);
- XPUSHs(dstr);
+ if (gimme_scalar) {
+ iters++;
+ if (m-s == 0)
+ trailing_empty++;
+ else
+ trailing_empty = 0;
+ } else {
+ dstr = newSVpvn_flags(s, m-s,
+ (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
+ XPUSHs(dstr);
+ }
/* skip the whitespace found last */
if (do_utf8)
m++;
if (m >= strend)
break;
- dstr = newSVpvn_utf8(s, m-s, do_utf8);
- if (make_mortal)
- sv_2mortal(dstr);
- XPUSHs(dstr);
+
+ if (gimme_scalar) {
+ iters++;
+ if (m-s == 0)
+ trailing_empty++;
+ else
+ trailing_empty = 0;
+ } else {
+ dstr = newSVpvn_flags(s, m-s,
+ (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
+ XPUSHs(dstr);
+ }
s = m;
}
}
or
split //, $str, $i;
*/
- const U32 items = limit - 1;
- if (items < slen)
- EXTEND(SP, items);
- else
- EXTEND(SP, slen);
+ if (!gimme_scalar) {
+ const U32 items = limit - 1;
+ if (items < slen)
+ EXTEND(SP, items);
+ else
+ EXTEND(SP, slen);
+ }
if (do_utf8) {
while (--limit) {
/* keep track of how many bytes we skip over */
m = s;
s += UTF8SKIP(s);
- dstr = newSVpvn_utf8(m, s-m, TRUE);
-
- if (make_mortal)
- sv_2mortal(dstr);
+ if (gimme_scalar) {
+ iters++;
+ if (s-m == 0)
+ trailing_empty++;
+ else
+ trailing_empty = 0;
+ } else {
+ dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
- PUSHs(dstr);
+ PUSHs(dstr);
+ }
if (s >= strend)
break;
}
} else {
while (--limit) {
- dstr = newSVpvn(s, 1);
+ if (gimme_scalar) {
+ iters++;
+ } else {
+ dstr = newSVpvn(s, 1);
- s++;
- if (make_mortal)
- sv_2mortal(dstr);
+ if (make_mortal)
+ sv_2mortal(dstr);
- PUSHs(dstr);
+ PUSHs(dstr);
+ }
+
+ s++;
if (s >= strend)
break;
}
}
}
- else if (do_utf8 == ((RX_EXTFLAGS(rx) & RXf_UTF8) != 0) &&
+ else if (do_utf8 == (RX_UTF8(rx) != 0) &&
(RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx)
&& (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
&& !(RX_EXTFLAGS(rx) & RXf_ANCH)) {
SV * const csv = CALLREG_INTUIT_STRING(rx);
len = RX_MINLENRET(rx);
- if (len == 1 && !(RX_EXTFLAGS(rx) & RXf_UTF8) && !tail) {
+ if (len == 1 && !RX_UTF8(rx) && !tail) {
const char c = *SvPV_nolen_const(csv);
while (--limit) {
for (m = s; m < strend && *m != c; m++)
;
if (m >= strend)
break;
- dstr = newSVpvn_utf8(s, m-s, do_utf8);
- if (make_mortal)
- sv_2mortal(dstr);
- XPUSHs(dstr);
+ if (gimme_scalar) {
+ iters++;
+ if (m-s == 0)
+ trailing_empty++;
+ else
+ trailing_empty = 0;
+ } else {
+ dstr = newSVpvn_flags(s, m-s,
+ (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
+ XPUSHs(dstr);
+ }
/* The rx->minlen is in characters but we want to step
* s ahead by bytes. */
if (do_utf8)
(m = fbm_instr((unsigned char*)s, (unsigned char*)strend,
csv, multiline ? FBMrf_MULTILINE : 0)) )
{
- dstr = newSVpvn_utf8(s, m-s, do_utf8);
- if (make_mortal)
- sv_2mortal(dstr);
- XPUSHs(dstr);
+ if (gimme_scalar) {
+ iters++;
+ if (m-s == 0)
+ trailing_empty++;
+ else
+ trailing_empty = 0;
+ } else {
+ dstr = newSVpvn_flags(s, m-s,
+ (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
+ XPUSHs(dstr);
+ }
/* The rx->minlen is in characters but we want to step
* s ahead by bytes. */
if (do_utf8)
strend = s + (strend - m);
}
m = RX_OFFS(rx)[0].start + orig;
- dstr = newSVpvn_utf8(s, m-s, do_utf8);
- if (make_mortal)
- sv_2mortal(dstr);
- XPUSHs(dstr);
+
+ if (gimme_scalar) {
+ iters++;
+ if (m-s == 0)
+ trailing_empty++;
+ else
+ trailing_empty = 0;
+ } else {
+ dstr = newSVpvn_flags(s, m-s,
+ (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
+ XPUSHs(dstr);
+ }
if (RX_NPARENS(rx)) {
I32 i;
for (i = 1; i <= (I32)RX_NPARENS(rx); i++) {
/* japhy (07/27/01) -- the (m && s) test doesn't catch
parens that didn't match -- they should be set to
undef, not the empty string */
- if (m >= orig && s >= orig) {
- dstr = newSVpvn_utf8(s, m-s, do_utf8);
+ if (gimme_scalar) {
+ iters++;
+ if (m-s == 0)
+ trailing_empty++;
+ else
+ trailing_empty = 0;
+ } else {
+ if (m >= orig && s >= orig) {
+ dstr = newSVpvn_flags(s, m-s,
+ (do_utf8 ? SVf_UTF8 : 0)
+ | make_mortal);
+ }
+ else
+ dstr = &PL_sv_undef; /* undef, not "" */
+ XPUSHs(dstr);
}
- else
- dstr = &PL_sv_undef; /* undef, not "" */
- if (make_mortal)
- sv_2mortal(dstr);
- XPUSHs(dstr);
+
}
}
s = RX_OFFS(rx)[0].end + orig;
}
}
- iters = (SP - PL_stack_base) - base;
+ if (!gimme_scalar) {
+ iters = (SP - PL_stack_base) - base;
+ }
if (iters > maxiters)
DIE(aTHX_ "Split loop");
/* keep field after final delim? */
if (s < strend || (iters && origlimit)) {
- const STRLEN l = strend - s;
- dstr = newSVpvn_utf8(s, l, do_utf8);
- if (make_mortal)
- sv_2mortal(dstr);
- XPUSHs(dstr);
+ if (!gimme_scalar) {
+ const STRLEN l = strend - s;
+ dstr = newSVpvn_flags(s, l, (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
+ XPUSHs(dstr);
+ }
iters++;
}
else if (!origlimit) {
- while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) {
- if (TOPs && !make_mortal)
- sv_2mortal(TOPs);
- iters--;
- *SP-- = &PL_sv_undef;
+ if (gimme_scalar) {
+ iters -= trailing_empty;
+ } else {
+ while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) {
+ if (TOPs && !make_mortal)
+ sv_2mortal(TOPs);
+ *SP-- = &PL_sv_undef;
+ iters--;
+ }
}
}
if (!mg) {
if (SvSMAGICAL(ary)) {
PUTBACK;
- mg_set((SV*)ary);
+ mg_set(MUTABLE_SV(ary));
SPAGAIN;
}
if (gimme == G_ARRAY) {
}
else {
PUTBACK;
- ENTER;
+ ENTER_with_name("call_PUSH");
call_method("PUSH",G_SCALAR|G_DISCARD);
- LEAVE;
+ LEAVE_with_name("call_PUSH");
SPAGAIN;
if (gimme == G_ARRAY) {
I32 i;
dSP;
dTOPss;
SV *retsv = sv;
+ assert(SvTYPE(retsv) != SVt_PVCV);
SvLOCK(sv);
- if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV
- || SvTYPE(retsv) == SVt_PVCV) {
+ if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV) {
retsv = refto(retsv);
}
SETs(retsv);
dVAR;
DIE(aTHX_ "panic: unimplemented op %s (#%d) called", OP_NAME(PL_op),
PL_op->op_type);
+ return NORMAL;
+}
+
+PP(pp_boolkeys)
+{
+ dVAR;
+ dSP;
+ HV * const hv = (HV*)POPs;
+
+ if (SvRMAGICAL(hv)) {
+ MAGIC * const mg = mg_find((SV*)hv, PERL_MAGIC_tied);
+ if (mg) {
+ XPUSHs(magic_scalarpack(hv, mg));
+ RETURN;
+ }
+ }
+
+ XPUSHs(boolSV(HvKEYS(hv) != 0));
+ RETURN;
}
/*