From: Zefram Date: Sun, 8 Nov 2009 14:03:45 +0000 (+0100) Subject: Bareword sub lookups X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=f7461760003db2ce68155c97ea6c1658e96fcd27;p=p5sagit%2Fp5-mst-13.2.git Bareword sub lookups Attached is a patch that changes how the tokeniser looks up subroutines, when they're referenced by a bareword, for prototype and const-sub purposes. Formerly, it has looked up bareword subs directly in the package, which is contrary to the way the generated op tree looks up the sub, via an rv2cv op. The patch makes the tokeniser generate the rv2cv op earlier, and dig around in that. The motivation for this is to allow modules to hook the rv2cv op creation, to affect the name->subroutine lookup process. Currently, such hooking affects op execution as intended, but everything goes wrong with a bareword ref where the tokeniser looks at some unrelated CV, or a blank space, in the package. With the patch in place, an rv2cv hook correctly affects the tokeniser and therefore the prototype-based aspects of parsing. The patch also changes ck_subr (which applies the argument context and checking parts of prototype behaviour) to handle subs referenced by an RV const op inside the rv2cv, where formerly it would only handle a gv op inside the rv2cv. This is to support the most likely kind of modified rv2cv op. The attached patch is the resulting revised version of the bareword sub patch. It incorporates the original patch (allowing rv2cv op hookers to control prototype processing), the GV-downgrading addition, and a mention in perldelta. --- diff --git a/embed.fnc b/embed.fnc index cbea291..755c42d 100644 --- a/embed.fnc +++ b/embed.fnc @@ -365,6 +365,7 @@ Ap |void |gv_fullname4 |NN SV* sv|NN const GV* gv|NULLOK const char* prefix|bool pMox |GP * |newGP |NN GV *const gv Ap |void |gv_init |NN GV* gv|NULLOK HV* stash|NN const char* name|STRLEN len|int multi Ap |void |gv_name_set |NN GV* gv|NN const char *name|U32 len|U32 flags +Apd |void |gv_try_downgrade|NN GV* gv Apd |HV* |gv_stashpv |NN const char* name|I32 flags Apd |HV* |gv_stashpvn |NN const char* name|U32 namelen|I32 flags Apd |HV* |gv_stashsv |NN SV* sv|I32 flags diff --git a/embed.h b/embed.h index 636a87b..e80384a 100644 --- a/embed.h +++ b/embed.h @@ -291,6 +291,7 @@ #define gv_fullname4 Perl_gv_fullname4 #define gv_init Perl_gv_init #define gv_name_set Perl_gv_name_set +#define gv_try_downgrade Perl_gv_try_downgrade #define gv_stashpv Perl_gv_stashpv #define gv_stashpvn Perl_gv_stashpvn #define gv_stashsv Perl_gv_stashsv @@ -2654,6 +2655,7 @@ #endif #define gv_init(a,b,c,d,e) Perl_gv_init(aTHX_ a,b,c,d,e) #define gv_name_set(a,b,c,d) Perl_gv_name_set(aTHX_ a,b,c,d) +#define gv_try_downgrade(a) Perl_gv_try_downgrade(aTHX_ a) #define gv_stashpv(a,b) Perl_gv_stashpv(aTHX_ a,b) #define gv_stashpvn(a,b,c) Perl_gv_stashpvn(aTHX_ a,b,c) #define gv_stashsv(a,b) Perl_gv_stashsv(aTHX_ a,b) diff --git a/global.sym b/global.sym index 4928017..6000af7 100644 --- a/global.sym +++ b/global.sym @@ -148,6 +148,7 @@ Perl_gv_fullname3 Perl_gv_fullname4 Perl_gv_init Perl_gv_name_set +Perl_gv_try_downgrade Perl_gv_stashpv Perl_gv_stashpvn Perl_gv_stashsv diff --git a/gv.c b/gv.c index 3e225bc..932b2b8 100644 --- a/gv.c +++ b/gv.c @@ -2372,6 +2372,53 @@ Perl_gv_name_set(pTHX_ GV *gv, const char *name, U32 len, U32 flags) } /* +=for apidoc gv_try_downgrade + +If C is a typeglob containing only a constant sub, and is only +referenced from its package, and both the typeglob and the sub are +sufficiently ordinary, replace the typeglob (in the package) with a +placeholder that more compactly represents the same thing. This is meant +to be used when a placeholder has been upgraded, most likely because +something wanted to look at a proper code object, and it has turned out +to be a constant sub to which a proper reference is no longer required. + +=cut +*/ + +void +Perl_gv_try_downgrade(pTHX_ GV *gv) +{ + HV *stash; + CV *cv; + HEK *namehek; + SV **gvp; + PERL_ARGS_ASSERT_GV_TRY_DOWNGRADE; + if (SvREFCNT(gv) == 1 && SvTYPE(gv) == SVt_PVGV && !SvFAKE(gv) && + !SvOBJECT(gv) && !SvMAGICAL(gv) && !SvREADONLY(gv) && + isGV_with_GP(gv) && GvGP(gv) && + GvMULTI(gv) && !GvINTRO(gv) && GvREFCNT(gv) == 1 && + !GvSV(gv) && !GvAV(gv) && !GvHV(gv) && !GvIOp(gv) && !GvFORM(gv) && + GvEGV(gv) == gv && (stash = GvSTASH(gv)) && (cv = GvCV(gv)) && + !SvOBJECT(cv) && !SvMAGICAL(cv) && !SvREADONLY(cv) && + CvSTASH(cv) == stash && CvGV(cv) == gv && + CvCONST(cv) && !CvMETHOD(cv) && !CvLVALUE(cv) && !CvUNIQUE(cv) && + !CvNODEBUG(cv) && !CvCLONE(cv) && !CvCLONED(cv) && !CvANON(cv) && + (namehek = GvNAME_HEK(gv)) && + (gvp = hv_fetch(stash, HEK_KEY(namehek), + HEK_LEN(namehek)*(HEK_UTF8(namehek) ? -1 : 1), 0)) && + *gvp == (SV*)gv) { + SV *value = SvREFCNT_inc(CvXSUBANY(cv).any_ptr); + SvREFCNT(gv) = 0; + sv_clear((SV*)gv); + SvREFCNT(gv) = 1; + SvFLAGS(gv) = SVt_IV|SVf_ROK; + SvANY(gv) = (XPVGV*)((char*)&(gv->sv_u.svu_iv) - + STRUCT_OFFSET(XPVIV, xiv_iv)); + SvRV_set(gv, value); + } +} + +/* * Local variables: * c-indentation-style: bsd * c-basic-offset: 4 diff --git a/op.c b/op.c index 1e869c1..43cbc07 100644 --- a/op.c +++ b/op.c @@ -570,6 +570,13 @@ Perl_op_clear(pTHX_ OP *o) case OP_AELEMFAST: if (! (o->op_type == OP_AELEMFAST && o->op_flags & OPf_SPECIAL)) { /* not an OP_PADAV replacement */ + GV *gv = (o->op_type == OP_GV || o->op_type == OP_GVSV) +#ifdef USE_ITHREADS + && PL_curpad +#endif + ? cGVOPo_gv : NULL; + if (gv) + SvREFCNT_inc(gv); #ifdef USE_ITHREADS if (cPADOPo->op_padix > 0) { /* No GvIN_PAD_off(cGVOPo_gv) here, because other references @@ -581,6 +588,12 @@ Perl_op_clear(pTHX_ OP *o) SvREFCNT_dec(cSVOPo->op_sv); cSVOPo->op_sv = NULL; #endif + if (gv) { + int try_downgrade = SvREFCNT(gv) == 2; + SvREFCNT_dec(gv); + if (try_downgrade) + gv_try_downgrade(gv); + } } break; case OP_METHOD_NAMED: @@ -7945,22 +7958,29 @@ Perl_ck_subr(pTHX_ OP *o) o->op_private |= OPpENTERSUB_HASTARG; for (cvop = o2; cvop->op_sibling; cvop = cvop->op_sibling) ; if (cvop->op_type == OP_RV2CV) { - SVOP* tmpop; o->op_private |= (cvop->op_private & OPpENTERSUB_AMPER); op_null(cvop); /* disable rv2cv */ - tmpop = (SVOP*)((UNOP*)cvop)->op_first; - if (tmpop->op_type == OP_GV && !(o->op_private & OPpENTERSUB_AMPER)) { - GV *gv = cGVOPx_gv(tmpop); - cv = GvCVu(gv); - if (!cv) - tmpop->op_private |= OPpEARLY_CV; - else { - if (SvPOK(cv)) { - STRLEN len; - namegv = CvANON(cv) ? gv : CvGV(cv); - proto = SvPV(MUTABLE_SV(cv), len); - proto_end = proto + len; - } + if (!(o->op_private & OPpENTERSUB_AMPER)) { + SVOP *tmpop = (SVOP*)((UNOP*)cvop)->op_first; + GV *gv = NULL; + switch (tmpop->op_type) { + case OP_GV: { + gv = cGVOPx_gv(tmpop); + cv = GvCVu(gv); + if (!cv) + tmpop->op_private |= OPpEARLY_CV; + } break; + case OP_CONST: { + SV *sv = cSVOPx_sv(tmpop); + if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV) + cv = (CV*)SvRV(sv); + } break; + } + if (cv && SvPOK(cv)) { + STRLEN len; + namegv = gv && CvANON(cv) ? gv : CvGV(cv); + proto = SvPV(MUTABLE_SV(cv), len); + proto_end = proto + len; } } } diff --git a/pod/perl5112delta.pod b/pod/perl5112delta.pod index ca8c809..d6dad5d 100644 --- a/pod/perl5112delta.pod +++ b/pod/perl5112delta.pod @@ -74,6 +74,16 @@ implements reverse Polish notation arithmetic via pluggable keywords. This module is mainly used for test purposes, and is not normally installed, but also serves as an example of how to use the new mechanism. +=head2 Overridable function lookup + +Where an extension module hooks the creation of rv2cv ops, to modify +the subroutine lookup process, this now works correctly for bareword +subroutine calls. This means that prototypes on subroutines referenced +this way will be processed correctly. (Previously bareword subroutine +names were initially looked up, for parsing purposes, by an unhookable +mechanism, so extensions could only properly influence subroutine names +that appeared with an C<&> sigil.) + =head1 New Platforms XXX List any platforms that this version of perl compiles on, that previous diff --git a/proto.h b/proto.h index c3322b8..2a3b118 100644 --- a/proto.h +++ b/proto.h @@ -943,6 +943,11 @@ PERL_CALLCONV void Perl_gv_name_set(pTHX_ GV* gv, const char *name, U32 len, U32 #define PERL_ARGS_ASSERT_GV_NAME_SET \ assert(gv); assert(name) +PERL_CALLCONV void Perl_gv_try_downgrade(pTHX_ GV* gv) + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT_GV_TRY_DOWNGRADE \ + assert(gv) + PERL_CALLCONV HV* Perl_gv_stashpv(pTHX_ const char* name, I32 flags) __attribute__nonnull__(pTHX_1); #define PERL_ARGS_ASSERT_GV_STASHPV \ diff --git a/toke.c b/toke.c index 0bfa970..680d8a2 100644 --- a/toke.c +++ b/toke.c @@ -5361,6 +5361,7 @@ Perl_yylex(pTHX) SV *sv; int pkgname = 0; const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); + OP *rv2cv_op; CV *cv; #ifdef PERL_MAD SV *nextPL_nextwhite = 0; @@ -5454,19 +5455,29 @@ Perl_yylex(pTHX) if (len) goto safe_bareword; - /* Do the explicit type check so that we don't need to force - the initialisation of the symbol table to have a real GV. - Beware - gv may not really be a PVGV, cv may not really be - a PVCV, (because of the space optimisations that gv_init - understands) But they're true if for this symbol there is - respectively a typeglob and a subroutine. - */ - cv = gv ? ((SvTYPE(gv) == SVt_PVGV) - /* Real typeglob, so get the real subroutine: */ - ? GvCVu(gv) - /* A proxy for a subroutine in this package? */ - : SvOK(gv) ? MUTABLE_CV(gv) : NULL) - : NULL; + cv = NULL; + { + OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv)); + const_op->op_private = OPpCONST_BARE; + rv2cv_op = newCVREF(0, const_op); + } + if (rv2cv_op->op_type == OP_RV2CV && + (rv2cv_op->op_flags & OPf_KIDS)) { + OP *rv_op = cUNOPx(rv2cv_op)->op_first; + switch (rv_op->op_type) { + case OP_CONST: { + SV *sv = cSVOPx_sv(rv_op); + if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV) + cv = (CV*)SvRV(sv); + } break; + case OP_GV: { + GV *gv = cGVOPx_gv(rv_op); + CV *maybe_cv = GvCVu(gv); + if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV) + cv = maybe_cv; + } break; + } + } /* See if it's the indirect object for a list operator. */ @@ -5489,8 +5500,10 @@ Perl_yylex(pTHX) /* Two barewords in a row may indicate method call. */ if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && - (tmp = intuit_method(s, gv, cv))) + (tmp = intuit_method(s, gv, cv))) { + op_free(rv2cv_op); return REPORT(tmp); + } /* If not a declared subroutine, it's an indirect object. */ /* (But it's an indir obj regardless for sort.) */ @@ -5498,7 +5511,7 @@ Perl_yylex(pTHX) if ( ( !immediate_paren && (PL_last_lop_op == OP_SORT || - ((!gv || !cv) && + (!cv && (PL_last_lop_op != OP_MAPSTART && PL_last_lop_op != OP_GREPSTART)))) || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0' @@ -5521,6 +5534,7 @@ Perl_yylex(pTHX) /* Is this a word before a => operator? */ if (*s == '=' && s[1] == '>' && !pkgname) { + op_free(rv2cv_op); CLINE; sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf); if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) @@ -5535,7 +5549,7 @@ Perl_yylex(pTHX) d = s + 1; while (SPACE_OR_TAB(*d)) d++; - if (*d == ')' && (sv = gv_const_sv(gv))) { + if (*d == ')' && (sv = cv_const_sv(cv))) { s = d + 1; goto its_constant; } @@ -5556,6 +5570,7 @@ Perl_yylex(pTHX) PL_thistoken = newSVpvs(""); } #endif + op_free(rv2cv_op); force_next(WORD); pl_yylval.ival = 0; TOKEN('&'); @@ -5563,7 +5578,8 @@ Perl_yylex(pTHX) /* If followed by var or block, call it a method (unless sub) */ - if ((*s == '$' || *s == '{') && (!gv || !cv)) { + if ((*s == '$' || *s == '{') && !cv) { + op_free(rv2cv_op); PL_last_lop = PL_oldbufptr; PL_last_lop_op = OP_METHOD; PREBLOCK(METHOD); @@ -5573,8 +5589,10 @@ Perl_yylex(pTHX) if (!orig_keyword && (isIDFIRST_lazy_if(s,UTF) || *s == '$') - && (tmp = intuit_method(s, gv, cv))) + && (tmp = intuit_method(s, gv, cv))) { + op_free(rv2cv_op); return REPORT(tmp); + } /* Not a method, so call it a subroutine (if defined) */ @@ -5584,25 +5602,17 @@ Perl_yylex(pTHX) "Ambiguous use of -%s resolved as -&%s()", PL_tokenbuf, PL_tokenbuf); /* Check for a constant sub */ - if ((sv = gv_const_sv(gv))) { + if ((sv = cv_const_sv(cv))) { its_constant: + op_free(rv2cv_op); SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); pl_yylval.opval->op_private = 0; TOKEN(WORD); } - /* Resolve to GV now. */ - if (SvTYPE(gv) != SVt_PVGV) { - gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV); - assert (SvTYPE(gv) == SVt_PVGV); - /* cv must have been some sort of placeholder, so - now needs replacing with a real code reference. */ - cv = GvCV(gv); - } - op_free(pl_yylval.opval); - pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv)); + pl_yylval.opval = rv2cv_op; pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; PL_last_lop = PL_oldbufptr; PL_last_lop_op = OP_ENTERSUB; @@ -5670,7 +5680,7 @@ Perl_yylex(pTHX) if (probable_sub) { gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV); op_free(pl_yylval.opval); - pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv)); + pl_yylval.opval = rv2cv_op; pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; PL_last_lop = PL_oldbufptr; PL_last_lop_op = OP_ENTERSUB; @@ -5722,6 +5732,7 @@ Perl_yylex(pTHX) } } } + op_free(rv2cv_op); safe_bareword: if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {