From: Yves Orton Date: Wed, 21 Mar 2007 10:39:24 +0000 (+0100) Subject: Resolve PL_curpm issues with (??{}) and fix corruption of match results when pattern... X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=28d8d7f41ab202dd5f7611033d27ecad44cadd60;p=p5sagit%2Fp5-mst-13.2.git Resolve PL_curpm issues with (??{}) and fix corruption of match results when pattern is a qr. Message-ID: <9b18b3110703210239x540f5ad9mdb41c2ea6229ac31@mail.gmail.com> plus two follow-up patches (minor tweaks) p4raw-id: //depot/perl@30678 --- diff --git a/dump.c b/dump.c index 3bfdeeb..e9fa1ae 100644 --- a/dump.c +++ b/dump.c @@ -1263,8 +1263,20 @@ Perl_do_magic_dump(pTHX_ I32 level, PerlIO *file, const MAGIC *mg, I32 nest, I32 Perl_dump_indent(aTHX_ level, file, " MINMATCH\n"); } if (mg->mg_obj) { - Perl_dump_indent(aTHX_ level, file, " MG_OBJ = 0x%"UVxf"\n", PTR2UV(mg->mg_obj)); - if (mg->mg_flags & MGf_REFCOUNTED) + Perl_dump_indent(aTHX_ level, file, " MG_OBJ = 0x%"UVxf"\n", + PTR2UV(mg->mg_obj)); + if (mg->mg_type == PERL_MAGIC_qr) { + regexp *re=(regexp *)mg->mg_obj; + SV *dsv= sv_newmortal(); + const char * const s = pv_pretty(dsv, re->wrapped, re->wraplen, + 60, NULL, NULL, + ( PERL_PV_PRETTY_QUOTE | PERL_PV_ESCAPE_RE | PERL_PV_PRETTY_ELIPSES | + ((re->extflags & RXf_UTF8) ? PERL_PV_ESCAPE_UNI : 0)) + ); + Perl_dump_indent(aTHX_ level+1, file, " PAT = %s\n", s); + Perl_dump_indent(aTHX_ level+1, file, " REFCNT = %"IVdf"\n", (IV*)re->refcnt); + } + if (mg->mg_flags & MGf_REFCOUNTED) do_sv_dump(level+2, file, mg->mg_obj, nest+1, maxnest, dumpops, pvlim); /* MG is already +1 */ } if (mg->mg_len) diff --git a/embed.fnc b/embed.fnc index 1686b3c..9d48950 100644 --- a/embed.fnc +++ b/embed.fnc @@ -677,6 +677,7 @@ Ap |I32 |pregexec |NN regexp* prog|NN char* stringarg \ |NN char* strend|NN char* strbeg|I32 minend \ |NN SV* screamer|U32 nosave Ap |void |pregfree |NULLOK struct regexp* r +EXp |struct regexp* |reg_temp_copy |NN struct regexp* r Ap |void |regfree_internal|NULLOK struct regexp* r Ap |char * |reg_stringify |NN MAGIC *mg|NULLOK STRLEN *lp|NULLOK U32 *flags|NULLOK I32 *haseval #if defined(USE_ITHREADS) diff --git a/embed.h b/embed.h index c930c91..e7d8887 100644 --- a/embed.h +++ b/embed.h @@ -685,6 +685,9 @@ #define regclass_swash Perl_regclass_swash #define pregexec Perl_pregexec #define pregfree Perl_pregfree +#if defined(PERL_CORE) || defined(PERL_EXT) +#define reg_temp_copy Perl_reg_temp_copy +#endif #define regfree_internal Perl_regfree_internal #define reg_stringify Perl_reg_stringify #if defined(USE_ITHREADS) @@ -2906,6 +2909,9 @@ #define regclass_swash(a,b,c,d,e) Perl_regclass_swash(aTHX_ a,b,c,d,e) #define pregexec(a,b,c,d,e,f,g) Perl_pregexec(aTHX_ a,b,c,d,e,f,g) #define pregfree(a) Perl_pregfree(aTHX_ a) +#if defined(PERL_CORE) || defined(PERL_EXT) +#define reg_temp_copy(a) Perl_reg_temp_copy(aTHX_ a) +#endif #define regfree_internal(a) Perl_regfree_internal(aTHX_ a) #define reg_stringify(a,b,c,d) Perl_reg_stringify(aTHX_ a,b,c,d) #if defined(USE_ITHREADS) diff --git a/ext/Devel/Peek/t/Peek.t b/ext/Devel/Peek/t/Peek.t index d3a957a..43dcb1c 100644 --- a/ext/Devel/Peek/t/Peek.t +++ b/ext/Devel/Peek/t/Peek.t @@ -282,6 +282,8 @@ do_test(15, MG_VIRTUAL = $ADDR MG_TYPE = PERL_MAGIC_qr\(r\) MG_OBJ = $ADDR + PAT = "\(\?-xism:tic\)" + REFCNT = 2 STASH = $ADDR\\t"Regexp"'); do_test(16, diff --git a/ext/Encode/t/Aliases.t b/ext/Encode/t/Aliases.t index ff86ed1..9c70944 100644 --- a/ext/Encode/t/Aliases.t +++ b/ext/Encode/t/Aliases.t @@ -122,6 +122,7 @@ use Test::More tests => (scalar keys %a2c) * 4; print "# alias test; \$ON_EBCDIC == $ON_EBCDIC\n"; foreach my $a (keys %a2c){ + print "# $a => $a2c{$a}\n"; my $e = Encode::find_encoding($a); is((defined($e) and $e->name), $a2c{$a},$a) or warn "alias was $a";; diff --git a/ext/re/re.pm b/ext/re/re.pm index c33ca3c..e06602d 100644 --- a/ext/re/re.pm +++ b/ext/re/re.pm @@ -473,45 +473,39 @@ floating string. This will be what the optimiser of the Perl that you are using thinks is the longest. If you believe that the result is wrong please report it via the L utility. -=item regname($name,$qr,$all) +=item regname($name,$all) -Returns the contents of a named buffer. If $qr is missing, or is not the -result of a qr// then returns the result of the last successful match. If -$all is true then returns an array ref containing one entry per buffer, +Returns the contents of a named buffer of the last successful match. If +$all is true, then returns an array ref containing one entry per buffer, otherwise returns the first defined buffer. -=item regnames($qr,$all) +=item regnames($all) -Returns a list of all of the named buffers defined in a pattern. If -$all is true then it returns all names defined, if not returns only -names which were involved in the last successful match. If $qr is omitted -or is not the result of a qr// then returns the details for the last -successful match. +Returns a list of all of the named buffers defined in the last successful +match. If $all is true, then it returns all names defined, if not it returns +only names which were involved in the match. -=item regnames_iterinit($qr) +=item regnames_iterinit() -Initializes the internal hash iterator associated to a regexps named capture -buffers. If $qr is omitted resets the iterator associated with the regexp used -in the last successful match. +Initializes the internal hash iterator associated to the last successful +matches named capture buffers. -=item regnames_iternext($qr,$all) +=item regnames_iternext($all) -Gets the next key from the hash associated with a regexp. If $qr -is omitted resets the iterator associated with the regexp used in the -last successful match. If $all is true returns the keys of all of the +Gets the next key from the named capture buffer hash associated with the +last successful match. If $all is true returns the keys of all of the distinct named buffers in the pattern, if not returns only those names used in the last successful match. -=item regnames_count($qr) +=item regnames_count() -Returns the number of distinct names defined in the regexp $qr. If -$qr is omitted or not a regexp returns the count of names in the -last successful match. +Returns the number of distinct names defined in the pattern used +for the last successful match. -B that this result is always the actual number of distinct -named buffers defined, it may not actually match that which is -returned by C and related routines when those routines -have not been called with the $all parameter set.. +B this result is always the actual number of distinct +named buffers defined, it may not actually match that which is +returned by C and related routines when those routines +have not been called with the $all parameter set. =back diff --git a/ext/re/t/re_funcs.t b/ext/re/t/re_funcs.t index 6bdafcb..0d9092a 100644 --- a/ext/re/t/re_funcs.t +++ b/ext/re/t/re_funcs.t @@ -42,19 +42,14 @@ use re qw(is_regexp regexp_pattern regmust if ('1234'=~/(?:(?\d)|(?!))(?\d)(?\d)(?\d)/){ - my $qr = qr/(?foo)(?bar)/; - my @names = sort +regnames($qr); - is("@names","","regnames"); - @names = sort +regnames($qr,1); - is("@names","bar foo","regnames - all"); - @names = sort +regnames(); + my @names = sort +regnames(); is("@names","A B","regnames"); - @names = sort +regnames(undef,1); + @names = sort +regnames(1); is("@names","A B C","regnames"); - is(join("", @{regname("A",undef,1)}),"13"); - is(join("", @{regname("B",undef,1)}),"24"); + is(join("", @{regname("A",1)}),"13"); + is(join("", @{regname("B",1)}),"24"); { - if ('foobar'=~/$qr/) { + if ('foobar'=~/(?foo)(?bar)/) { regnames_iterinit(); my @res; while (defined(my $key=regnames_iternext)) { @@ -68,20 +63,7 @@ if ('1234'=~/(?:(?\d)|(?!))(?\d)(?\d)(?\d)/){ } } is(regnames_count(),3); - is(regnames_count($qr),2); -} -{ - use warnings; - require Tie::Hash::NamedCapture; - my $qr = qr/(?foo)/; - if ( 'foo' =~ /$qr/ ) { - tie my %hash,"Tie::Hash::NamedCapture",re => $qr; - if ('bar'=~/bar/) { - # last successful match is now different - is($hash{foo},'foo'); # prints foo - } - } } # New tests above this line, don't forget to update the test count below! -use Test::More tests => 23; +use Test::More tests => 19; # No tests here! diff --git a/global.sym b/global.sym index 023e030..097e3b8 100644 --- a/global.sym +++ b/global.sym @@ -390,6 +390,7 @@ Perl_regdump Perl_regclass_swash Perl_pregexec Perl_pregfree +Perl_reg_temp_copy Perl_regfree_internal Perl_reg_stringify Perl_regdupe_internal diff --git a/lib/Tie/Hash/NamedCapture.pm b/lib/Tie/Hash/NamedCapture.pm index 3383f16..73bc20b 100644 --- a/lib/Tie/Hash/NamedCapture.pm +++ b/lib/Tie/Hash/NamedCapture.pm @@ -3,27 +3,18 @@ package Tie::Hash::NamedCapture; use strict; use warnings; -our $VERSION = "0.04"; +our $VERSION = "0.05"; sub TIEHASH { my $classname = shift; my %opts = @_; - if ($opts{re} && !re::is_regexp($opts{re})) { - require Carp; - Carp::croak("'re' parameter to " . __PACKAGE__ - . "->TIEHASH must be a qr//."); - } - - my $self = bless { - all => $opts{all}, - re => $opts{re}, - }, $classname; + my $self = bless { all => $opts{all} }, $classname; return $self; } sub FETCH { - return re::regname($_[1],$_[0]->{re},$_[0]->{all}); + return re::regname($_[1],$_[0]->{all}); } sub STORE { @@ -32,16 +23,16 @@ sub STORE { } sub FIRSTKEY { - re::regnames_iterinit($_[0]->{re}); + re::regnames_iterinit(); return $_[0]->NEXTKEY; } sub NEXTKEY { - return re::regnames_iternext($_[0]->{re},$_[0]->{all}); + return re::regnames_iternext($_[0]->{all}); } sub EXISTS { - return defined re::regname( $_[1], $_[0]->{re},$_[0]->{all}); + return defined re::regname( $_[1], $_[0]->{all}); } sub DELETE { @@ -55,7 +46,7 @@ sub CLEAR { } sub SCALAR { - return scalar re::regnames($_[0]->{re},$_[0]->{all}); + return scalar re::regnames($_[0]->{all}); } tie %+, __PACKAGE__; @@ -74,19 +65,13 @@ Tie::Hash::NamedCapture - Named regexp capture buffers tie my %hash, "Tie::Hash::NamedCapture"; # %hash now behaves like %+ - tie my %hash, "Tie::Hash::NamedCapture", re => $qr, all => 1; + tie my %hash, "Tie::Hash::NamedCapture", all => 1; # %hash now access buffers from regexp in $qr like %- =head1 DESCRIPTION This module is used to implement the special hashes C<%+> and C<%->, but it -can be used independently. - -When the C parameter is set to a C expression, then the tied -hash is bound to that particular regexp and will return the results of its -last successful match. If the parameter is omitted, then the hash behaves -just as C<$1> does by referencing the last successful match in the -currently active dynamic scope. +can be used to tie other variables as you choose. When the C parameter is provided, then the tied hash elements will be array refs listing the contents of each capture buffer whose name is the @@ -104,20 +89,6 @@ The keys of C<%->-like hashes correspond to all buffer names found in the regular expression; the keys of C<%+>-like hashes list only the names of buffers that have captured (and that are thus associated to defined values). -For instance: - - my $qr = qr/(?bar)/; - if ( 'bar' =~ $qr ) { - tie my %hash, "Tie::Hash::NamedCapture", re => $qr; - print $+{foo}; # prints "bar" - print $hash{foo}; # prints "bar" too - if ( 'bar' =~ /bar/ ) { - # last successful match is now different - print $+{foo}; # prints nothing (undef) - print $hash{foo}; # still prints "bar" - } - } - =head1 SEE ALSO L, L, L, L. diff --git a/pp_ctl.c b/pp_ctl.c index 25cfe5f..124a40f 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -118,9 +118,9 @@ PP(pp_regcomp) mg = mg_find(sv, PERL_MAGIC_qr); } if (mg) { - regexp * const re = (regexp *)mg->mg_obj; + regexp * const re = reg_temp_copy((regexp *)mg->mg_obj); ReREFCNT_dec(PM_GETRE(pm)); - PM_SETRE(pm, ReREFCNT_inc(re)); + PM_SETRE(pm, re); } else { STRLEN len; diff --git a/proto.h b/proto.h index 54c98f4..0fc070e 100644 --- a/proto.h +++ b/proto.h @@ -1853,6 +1853,9 @@ PERL_CALLCONV I32 Perl_pregexec(pTHX_ regexp* prog, char* stringarg, char* stren __attribute__nonnull__(pTHX_6); PERL_CALLCONV void Perl_pregfree(pTHX_ struct regexp* r); +PERL_CALLCONV struct regexp* Perl_reg_temp_copy(pTHX_ struct regexp* r) + __attribute__nonnull__(pTHX_1); + PERL_CALLCONV void Perl_regfree_internal(pTHX_ struct regexp* r); PERL_CALLCONV char * Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval) __attribute__nonnull__(pTHX_1); diff --git a/regcomp.c b/regcomp.c index 7c08840..663d288 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4183,7 +4183,7 @@ redo_first_pass: + (sizeof(STD_PAT_MODS) - 1) + (sizeof("(?:)") - 1); - Newx(r->wrapped, r->wraplen, char ); + Newx(r->wrapped, r->wraplen + 1, char ); p = r->wrapped; *p++='('; *p++='?'; if (has_k) @@ -4206,13 +4206,14 @@ redo_first_pass: } } - *p++=':'; + *p++ = ':'; Copy(RExC_precomp, p, r->prelen, char); r->precomp = p; p += r->prelen; if (has_runon) - *p++='\n'; - *p=')'; + *p++ = '\n'; + *p++ = ')'; + *p = 0; } r->intflags = 0; @@ -8665,31 +8666,93 @@ Perl_pregfree(pTHX_ struct regexp *r) if (!r || (--r->refcnt > 0)) return; - - CALLREGFREE_PVT(r); /* free the private data */ + if (r->mother_re) { + ReREFCNT_dec(r->mother_re); + } else { + CALLREGFREE_PVT(r); /* free the private data */ + if (r->paren_names) + SvREFCNT_dec(r->paren_names); + Safefree(r->wrapped); + } + if (r->substrs) { + if (r->anchored_substr) + SvREFCNT_dec(r->anchored_substr); + if (r->anchored_utf8) + SvREFCNT_dec(r->anchored_utf8); + if (r->float_substr) + SvREFCNT_dec(r->float_substr); + if (r->float_utf8) + SvREFCNT_dec(r->float_utf8); + Safefree(r->substrs); + } RX_MATCH_COPY_FREE(r); #ifdef PERL_OLD_COPY_ON_WRITE if (r->saved_copy) - SvREFCNT_dec(r->saved_copy); + SvREFCNT_dec(r->saved_copy); #endif - if (r->substrs) { - if (r->anchored_substr) - SvREFCNT_dec(r->anchored_substr); - if (r->anchored_utf8) - SvREFCNT_dec(r->anchored_utf8); - if (r->float_substr) - SvREFCNT_dec(r->float_substr); - if (r->float_utf8) - SvREFCNT_dec(r->float_utf8); - Safefree(r->substrs); + if (r->swap) { + Safefree(r->swap->startp); + Safefree(r->swap->endp); + Safefree(r->swap); } - if (r->paren_names) - SvREFCNT_dec(r->paren_names); - Safefree(r->wrapped); Safefree(r->startp); Safefree(r->endp); Safefree(r); } + +/* reg_temp_copy() + + This is a hacky workaround to the structural issue of match results + being stored in the regexp structure which is in turn stored in + PL_curpm/PL_reg_curpm. The problem is that due to qr// the pattern + could be PL_curpm in multiple contexts, and could require multiple + result sets being associated with the pattern simultaneously, such + as when doing a recursive match with (??{$qr}) + + The solution is to make a lightweight copy of the regexp structure + when a qr// is returned from the code executed by (??{$qr}) this + lightweight copy doesnt actually own any of its data except for + the starp/end and the actual regexp structure itself. + +*/ + + +regexp * +Perl_reg_temp_copy (pTHX_ struct regexp *r) { + regexp *ret; + register const I32 npar = r->nparens+1; + (void)ReREFCNT_inc(r); + Newx(ret, 1, regexp); + StructCopy(r, ret, regexp); + Newx(ret->startp, npar, I32); + Copy(r->startp, ret->startp, npar, I32); + Newx(ret->endp, npar, I32); + Copy(r->endp, ret->endp, npar, I32); + ret->refcnt = 1; + if (r->substrs) { + struct reg_substr_datum *s; + I32 i; + Newx(ret->substrs, 1, struct reg_substr_data); + for (s = ret->substrs->data, i = 0; i < 3; i++, s++) { + s->min_offset = r->substrs->data[i].min_offset; + s->max_offset = r->substrs->data[i].max_offset; + s->end_shift = r->substrs->data[i].end_shift; + s->substr = SvREFCNT_inc(r->substrs->data[i].substr); + s->utf8_substr = SvREFCNT_inc(r->substrs->data[i].utf8_substr); + } + } + RX_MATCH_COPIED_off(ret); +#ifdef PERL_OLD_COPY_ON_WRITE + /* this is broken. */ + assert(0); + if (ret->saved_copy) + ret->saved_copy=NULL; +#endif + ret->mother_re = r; + ret->swap = NULL; + + return ret; +} #endif /* regfree_internal() @@ -8814,11 +8877,7 @@ Perl_regfree_internal(pTHX_ struct regexp *r) Safefree(ri->data->what); Safefree(ri->data); } - if (ri->swap) { - Safefree(ri->swap->startp); - Safefree(ri->swap->endp); - Safefree(ri->swap); - } + Safefree(ri); } @@ -8848,7 +8907,7 @@ Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param) { dVAR; regexp *ret; - int i, npar; + I32 i, npar; struct reg_substr_datum *s; if (!r) @@ -8864,6 +8923,14 @@ Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param) Copy(r->startp, ret->startp, npar, I32); Newx(ret->endp, npar, I32); Copy(r->endp, ret->endp, npar, I32); + if(r->swap) { + Newx(ret->swap, 1, regexp_paren_ofs); + /* no need to copy these */ + Newx(ret->swap->startp, npar, I32); + Newx(ret->swap->endp, npar, I32); + } else { + ret->swap = NULL; + } if (r->substrs) { Newx(ret->substrs, 1, struct reg_substr_data); @@ -8877,11 +8944,12 @@ Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param) } else ret->substrs = NULL; - ret->wrapped = SAVEPVN(r->wrapped, r->wraplen); + ret->wrapped = SAVEPVN(r->wrapped, r->wraplen+1); ret->precomp = ret->wrapped + (r->precomp - r->wrapped); ret->prelen = r->prelen; ret->wraplen = r->wraplen; + ret->mother_re = NULL; ret->refcnt = r->refcnt; ret->minlen = r->minlen; ret->minlenret = r->minlenret; @@ -8942,14 +9010,6 @@ Perl_regdupe_internal(pTHX_ const regexp *r, CLONE_PARAMS *param) Newxc(reti, sizeof(regexp_internal) + (len+1)*sizeof(regnode), char, regexp_internal); Copy(ri->program, reti->program, len+1, regnode); - if(ri->swap) { - Newx(reti->swap, 1, regexp_paren_ofs); - /* no need to copy these */ - Newx(reti->swap->startp, npar, I32); - Newx(reti->swap->endp, npar, I32); - } else { - reti->swap = NULL; - } reti->regstclass = NULL; diff --git a/regcomp.h b/regcomp.h index 72f415a..4799b1f 100644 --- a/regcomp.h +++ b/regcomp.h @@ -101,11 +101,7 @@ typedef OP OP_4tree; /* Will be redefined later. */ /* This is the stuff that used to live in regexp.h that was truly private to the engine itself. It now lives here. */ -/* swap buffer for paren structs */ -typedef struct regexp_paren_ofs { - I32 *startp; - I32 *endp; -} regexp_paren_ofs; + typedef struct regexp_internal { int name_list_idx; /* Optional data index of an array of paren names */ @@ -118,7 +114,6 @@ typedef struct regexp_paren_ofs { U32 proglen; } u; - regexp_paren_ofs *swap; /* Swap copy of *startp / *endp */ regnode *regstclass; /* Optional startclass as identified or constructed by the optimiser */ struct reg_data *data; /* Additional miscellaneous data used by the program. diff --git a/regexec.c b/regexec.c index c9efaae..3d64f20 100644 --- a/regexec.c +++ b/regexec.c @@ -1652,9 +1652,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, static void S_swap_match_buff (pTHX_ regexp *prog) { I32 *t; - RXi_GET_DECL(prog,progi); - if (!progi->swap) { + if (!prog->swap) { /* We have to be careful. If the previous successful match was from this regex we don't want a subsequent paritally successful match to clobber the old results. @@ -1662,16 +1661,16 @@ S_swap_match_buff (pTHX_ regexp *prog) { to the re, and switch the buffer each match. If we fail we switch it back, otherwise we leave it swapped. */ - Newxz(progi->swap, 1, regexp_paren_ofs); + Newxz(prog->swap, 1, regexp_paren_ofs); /* no need to copy these */ - Newxz(progi->swap->startp, prog->nparens + 1, I32); - Newxz(progi->swap->endp, prog->nparens + 1, I32); + Newxz(prog->swap->startp, prog->nparens + 1, I32); + Newxz(prog->swap->endp, prog->nparens + 1, I32); } - t = progi->swap->startp; - progi->swap->startp = prog->startp; + t = prog->swap->startp; + prog->swap->startp = prog->startp; prog->startp = t; - t = progi->swap->endp; - progi->swap->endp = prog->endp; + t = prog->swap->endp; + prog->swap->endp = prog->endp; prog->endp = t; } @@ -2611,6 +2610,10 @@ S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) { return 0; } +#define SETREX(Re1,Re2) \ + if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \ + Re1 = (Re2) + STATIC I32 /* 0 failure, 1 success */ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) { @@ -3654,8 +3657,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) } if (mg) { - re = (regexp *)mg->mg_obj; - (void)ReREFCNT_inc(re); + re = reg_temp_copy((regexp *)mg->mg_obj); /*XXX:dmq*/ } else { STRLEN len; @@ -3674,6 +3676,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PL_regsize = osize; } } + RX_MATCH_COPIED_off(re); + re->subbeg = rex->subbeg; + re->sublen = rex->sublen; rei = RXi_GET(re); DEBUG_EXECUTE_r( debug_start_match(re, do_utf8, locinput, PL_regeol, @@ -3715,7 +3720,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) ST.prev_rex = rex; ST.prev_curlyx = cur_curlyx; - rex = re; + SETREX(rex,re); rexi = rei; cur_curlyx = NULL; ST.B = next; @@ -3735,7 +3740,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) /* note: this is called twice; first after popping B, then A */ PL_reg_flags ^= ST.toggle_reg_flags; ReREFCNT_dec(rex); - rex = ST.prev_rex; + SETREX(rex,ST.prev_rex); rexi = RXi_GET(rex); regcpblow(ST.cp); cur_eval = ST.prev_eval; @@ -3751,7 +3756,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) /* note: this is called twice; first after popping B, then A */ PL_reg_flags ^= ST.toggle_reg_flags; ReREFCNT_dec(rex); - rex = ST.prev_rex; + SETREX(rex,ST.prev_rex); rexi = RXi_GET(rex); PL_reginput = locinput; REGCP_UNWIND(ST.lastcp); @@ -4760,7 +4765,7 @@ NULL PL_reg_flags ^= st->u.eval.toggle_reg_flags; st->u.eval.prev_rex = rex; /* inner */ - rex = cur_eval->u.eval.prev_rex; /* outer */ + SETREX(rex,cur_eval->u.eval.prev_rex); rexi = RXi_GET(rex); cur_curlyx = cur_eval->u.eval.prev_curlyx; ReREFCNT_inc(rex); diff --git a/regexp.h b/regexp.h index d12df92..9cf324f 100644 --- a/regexp.h +++ b/regexp.h @@ -31,6 +31,7 @@ struct reg_substr_data; struct reg_data; struct regexp_engine; +struct regexp; struct reg_substr_datum { I32 min_offset; @@ -48,11 +49,19 @@ struct reg_substr_data { #else #define SV_SAVED_COPY #endif + +/* swap buffer for paren structs */ +typedef struct regexp_paren_ofs { + I32 *startp; + I32 *endp; +} regexp_paren_ofs; + /* this is ordered such that the most commonly used fields are at the start of the struct */ typedef struct regexp { /* what engine created this regexp? */ const struct regexp_engine* engine; + struct regexp* mother_re; /* what re is this a lightweight copy of? */ /* Information about the match that the perl core uses to manage things */ U32 extflags; /* Flags used both externally and internally */ @@ -71,8 +80,10 @@ typedef struct regexp { /* Data about the last/current match. These are modified during matching*/ U32 lastparen; /* last open paren matched */ U32 lastcloseparen; /* last close paren matched */ + regexp_paren_ofs *swap; /* Swap copy of *startp / *endp */ I32 *startp; /* Array of offsets from start of string (@-) */ I32 *endp; /* Array of offsets from start of string (@+) */ + char *subbeg; /* saved or original string so \digit works forever. */ I32 sublen; /* Length of string pointed by subbeg */ @@ -216,7 +227,6 @@ typedef struct regexp_engine { #define RXf_TAINTED_SEEN 0x20000000 /* two bits here */ - #define RX_HAS_CUTGROUP(prog) ((prog)->intflags & PREGf_CUTGROUP_SEEN) #define RX_MATCH_TAINTED(prog) ((prog)->extflags & RXf_TAINTED_SEEN) #define RX_MATCH_TAINTED_on(prog) ((prog)->extflags |= RXf_TAINTED_SEEN) diff --git a/t/op/pat.t b/t/op/pat.t index 423822a..71ddbe9 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -4267,11 +4267,11 @@ sub kt $re = qr/^ ( (??{ $grabit }) ) $ /x; my @res = '0902862349' =~ $re; iseq(join("-",@res),"0902862349", - 'PL_curpm is set properly on nested eval # TODO'); + 'PL_curpm is set properly on nested eval'); our $qr = qr/ (o) (??{ $1 }) /x; ok( 'boob'=~/( b (??{ $qr }) b )/x && 1, - "PL_curpm, nested eval # TODO"); + "PL_curpm, nested eval"); } { @@ -4325,7 +4325,17 @@ sub kt ok($c=~/${c}|\x{100}/); ok(@w==0); } - +{ + local $Message = "corruption of match results of qr// across scopes"; + my $qr=qr/(fo+)(ba+r)/; + 'foobar'=~/$qr/; + iseq("$1$2","foobar"); + { + 'foooooobaaaaar'=~/$qr/; + iseq("$1$2",'foooooobaaaaar'); + } + iseq("$1$2","foobar"); +} # Test counter is at bottom of file. Put new tests above here. #------------------------------------------------------------------- # Keep the following tests last -- they may crash perl @@ -4395,7 +4405,7 @@ ok($@=~/\QSequence \k... not terminated in regex;\E/); iseq(0+$::test,$::TestCount,"Got the right number of tests!"); # Don't forget to update this! BEGIN { - $::TestCount = 1652; + $::TestCount = 1655; print "1..$::TestCount\n"; } diff --git a/universal.c b/universal.c index 0d2ec1c..d4de858 100644 --- a/universal.c +++ b/universal.c @@ -333,11 +333,11 @@ Perl_boot_core_UNIVERSAL(pTHX) newXSproto("Internals::inc_sub_generation",XS_Internals_inc_sub_generation, file, ""); newXSproto("re::is_regexp", XS_re_is_regexp, file, "$"); - newXSproto("re::regname", XS_re_regname, file, ";$$$"); - newXSproto("re::regnames", XS_re_regnames, file, ";$$"); - newXSproto("re::regnames_iterinit", XS_re_regnames_iterinit, file, ";$"); - newXSproto("re::regnames_iternext", XS_re_regnames_iternext, file, ";$$"); - newXSproto("re::regnames_count", XS_re_regnames_count, file, ";$"); + newXSproto("re::regname", XS_re_regname, file, ";$$"); + newXSproto("re::regnames", XS_re_regnames, file, ";$"); + newXSproto("re::regnames_iterinit", XS_re_regnames_iterinit, file, ""); + newXSproto("re::regnames_iternext", XS_re_regnames_iternext, file, ";$"); + newXSproto("re::regnames_count", XS_re_regnames_count, file, ""); } @@ -1143,31 +1143,23 @@ XS(XS_re_regname) dVAR; dXSARGS; - if (items < 1 || items > 3) - Perl_croak(aTHX_ "Usage: %s(%s)", "re::regname", "sv, qr = NULL, all = NULL"); + if (items < 1 || items > 2) + Perl_croak(aTHX_ "Usage: %s(%s)", "re::regname", "name[, all ]"); PERL_UNUSED_VAR(cv); /* -W */ PERL_UNUSED_VAR(ax); /* -Wall */ SP -= items; { SV * sv = ST(0); - SV * qr; SV * all; - regexp *re = NULL; + regexp *re = PL_curpm ? PM_GETRE(PL_curpm) : NULL; SV *bufs = NULL; if (items < 2) - qr = NULL; - else { - qr = ST(1); - } - - if (items < 3) all = NULL; else { - all = ST(2); + all = ST(1); } { - re = Perl_get_re_arg( aTHX_ qr, 1, NULL); if (SvPOK(sv) && re && re->paren_names) { bufs = CALLREG_NAMEDBUF(re,sv,all && SvTRUE(all)); if (bufs) { @@ -1189,30 +1181,22 @@ XS(XS_re_regnames) { dVAR; dXSARGS; - if (items < 0 || items > 2) - Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames", "sv = NULL, all = NULL"); + if (items < 0 || items > 1) + Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames", "[all]"); PERL_UNUSED_VAR(cv); /* -W */ PERL_UNUSED_VAR(ax); /* -Wall */ SP -= items; { - SV * sv; SV * all; - regexp *re = NULL; + regexp *re = PL_curpm ? PM_GETRE(PL_curpm) : NULL; IV count = 0; if (items < 1) - sv = NULL; - else { - sv = ST(0); - } - - if (items < 2) all = NULL; else { - all = ST(1); + all = ST(0); } { - re = Perl_get_re_arg( aTHX_ sv, 1, NULL ); if (re && re->paren_names) { HV *hv= re->paren_names; (void)hv_iterinit(hv); @@ -1259,29 +1243,19 @@ XS(XS_re_regnames_iterinit) { dVAR; dXSARGS; - if (items < 0 || items > 1) - Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames_iterinit", "sv = NULL"); + if (items != 0 ) + Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames_iterinit"); PERL_UNUSED_VAR(cv); /* -W */ PERL_UNUSED_VAR(ax); /* -Wall */ SP -= items; { - SV * sv; - regexp *re = NULL; - - if (items < 1) - sv = NULL; - else { - sv = ST(0); - } - { - re = Perl_get_re_arg( aTHX_ sv, 1, NULL ); - if (re && re->paren_names) { - (void)hv_iterinit(re->paren_names); - XPUSHs(newSViv(HvTOTALKEYS(re->paren_names))); - } else { - XSRETURN_UNDEF; - } - } + regexp *re = PL_curpm ? PM_GETRE(PL_curpm) : NULL; + if (re && re->paren_names) { + (void)hv_iterinit(re->paren_names); + XPUSHs(newSViv(HvTOTALKEYS(re->paren_names))); + } else { + XSRETURN_UNDEF; + } PUTBACK; return; } @@ -1292,60 +1266,50 @@ XS(XS_re_regnames_iternext) { dVAR; dXSARGS; - if (items < 0 || items > 2) - Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames_iternext", "sv = NULL, all = NULL"); + if (items < 0 || items > 1) + Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames_iternext", "[all]"); PERL_UNUSED_VAR(cv); /* -W */ PERL_UNUSED_VAR(ax); /* -Wall */ SP -= items; { - SV * sv; SV * all; - regexp *re; + regexp *re = PL_curpm ? PM_GETRE(PL_curpm) : NULL; if (items < 1) - sv = NULL; - else { - sv = ST(0); - } - - if (items < 2) all = NULL; else { - all = ST(1); + all = ST(0); } - { - re = Perl_get_re_arg( aTHX_ sv, 1, NULL ); - if (re && re->paren_names) { - HV *hv= re->paren_names; - while (1) { - HE *temphe = hv_iternext_flags(hv,0); - if (temphe) { - IV i; - IV parno = 0; - SV* sv_dat = HeVAL(temphe); - I32 *nums = (I32*)SvPVX(sv_dat); - for ( i = 0; i < SvIVX(sv_dat); i++ ) { - if ((I32)(re->lastcloseparen) >= nums[i] && - re->startp[nums[i]] != -1 && - re->endp[nums[i]] != -1) - { - parno = nums[i]; - break; - } - } - if (parno || (all && SvTRUE(all))) { - STRLEN len; - char *pv = HePV(temphe, len); - XPUSHs(newSVpvn(pv,len)); - XSRETURN(1); + if (re && re->paren_names) { + HV *hv= re->paren_names; + while (1) { + HE *temphe = hv_iternext_flags(hv,0); + if (temphe) { + IV i; + IV parno = 0; + SV* sv_dat = HeVAL(temphe); + I32 *nums = (I32*)SvPVX(sv_dat); + for ( i = 0; i < SvIVX(sv_dat); i++ ) { + if ((I32)(re->lastcloseparen) >= nums[i] && + re->startp[nums[i]] != -1 && + re->endp[nums[i]] != -1) + { + parno = nums[i]; + break; } - } else { - break; } + if (parno || (all && SvTRUE(all))) { + STRLEN len; + char *pv = HePV(temphe, len); + XPUSHs(newSVpvn(pv,len)); + XSRETURN(1); + } + } else { + break; } } - XSRETURN_UNDEF; - } + } + XSRETURN_UNDEF; PUTBACK; return; } @@ -1354,22 +1318,16 @@ XS(XS_re_regnames_iternext) XS(XS_re_regnames_count) { - SV * sv; - regexp *re = NULL; + regexp *re = PL_curpm ? PM_GETRE(PL_curpm) : NULL; dVAR; dXSARGS; - if (items < 0 || items > 1) - Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames_count", "sv = NULL"); + if (items != 0) + Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames_count", ""); PERL_UNUSED_VAR(cv); /* -W */ PERL_UNUSED_VAR(ax); /* -Wall */ SP -= items; - if (items < 1) - sv = NULL; - else { - sv = ST(0); - } - re = Perl_get_re_arg( aTHX_ sv, 1, NULL ); + if (re && re->paren_names) { XPUSHs(newSViv(HvTOTALKEYS(re->paren_names))); } else {