From: Ilya Zakharevich <ilya@math.berkeley.edu>
Date: Wed, 10 Jun 1998 03:51:47 +0000 (-0400)
Subject: Bugs with (?{}), $^R and many-to-many subst
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=ce862d02de7e5d8ac2078735cf4bd004193e837d;p=p5sagit%2Fp5-mst-13.2.git

Bugs with (?{}), $^R and many-to-many subst

	Message-Id: <199806100751.DAA05441@monk.mps.ohio-state.edu>

p4raw-id: //depot/perl@1117
---

diff --git a/interp.sym b/interp.sym
index 1a13e67..7bbb11e 100644
--- a/interp.sym
+++ b/interp.sym
@@ -145,6 +145,7 @@ regsize
 regstartp
 regtill
 regxend
+replgv
 restartop
 rightgv
 rs
diff --git a/intrpvar.h b/intrpvar.h
index 6ee52ca..74c914b 100644
--- a/intrpvar.h
+++ b/intrpvar.h
@@ -68,6 +68,7 @@ PERLVAR(Iscreamfirst,	I32 *)
 PERLVAR(Iscreamnext,	I32 *)		
 PERLVARI(Imaxscream,	I32,	-1)	
 PERLVAR(Ilastscream,	SV *)		
+PERLVAR(Ireplgv,	GV *)		
 
 /* shortcuts to misc objects */
 PERLVAR(Ierrgv,		GV *)		
diff --git a/op.c b/op.c
index 61001a4..3440b1c 100644
--- a/op.c
+++ b/op.c
@@ -2099,6 +2099,7 @@ pmruntime(OP *o, OP *expr, OP *repl)
 {
     PMOP *pm;
     LOGOP *rcop;
+    I32 repl_has_vars = 0;
 
     if (o->op_type == OP_TRANS)
 	return pmtrans(o, expr, repl);
@@ -2165,13 +2166,15 @@ pmruntime(OP *o, OP *expr, OP *repl)
 	    for (curop = LINKLIST(repl); curop!=repl; curop = LINKLIST(curop)) {
 		if (opargs[curop->op_type] & OA_DANGEROUS) {
 #ifdef USE_THREADS
-		    if (curop->op_type == OP_THREADSV
-			&& strchr("&`'123456789+", curop->op_private)) {
-			break;
+		    if (curop->op_type == OP_THREADSV) {
+			repl_has_vars = 1;
+			if (strchr("&`'123456789+", curop->op_private)) {
+			    break;
 		    }
 #else
 		    if (curop->op_type == OP_GV) {
 			GV *gv = ((GVOP*)curop)->op_gv;
+			repl_has_vars = 1;
 			if (strchr("&`'123456789+", *GvENAME(gv)))
 			    break;
 		    }
@@ -2189,7 +2192,7 @@ pmruntime(OP *o, OP *expr, OP *repl)
 			     curop->op_type == OP_PADAV ||
 			     curop->op_type == OP_PADHV ||
 			     curop->op_type == OP_PADANY) {
-			     /* is okay */
+			repl_has_vars = 1;
 		    }
 		    else
 			break;
@@ -2197,12 +2200,19 @@ pmruntime(OP *o, OP *expr, OP *repl)
 		lastop = curop;
 	    }
 	}
-	if (curop == repl) {
+	if (curop == repl
+	    && !(repl_has_vars 
+		 && (!pm->op_pmregexp 
+		     || pm->op_pmregexp->reganch & ROPT_EVAL_SEEN))) {
 	    pm->op_pmflags |= PMf_CONST;	/* const for long enough */
 	    pm->op_pmpermflags |= PMf_CONST;	/* const for long enough */
 	    prepend_elem(o->op_type, scalar(repl), o);
 	}
 	else {
+	    if (curop == repl && !pm->op_pmregexp) { /* Has variables. */
+		pm->op_pmflags |= PMf_MAYBE_CONST;
+		pm->op_pmpermflags |= PMf_MAYBE_CONST;
+	    }
 	    Newz(1101, rcop, 1, LOGOP);
 	    rcop->op_type = OP_SUBSTCONT;
 	    rcop->op_ppaddr = ppaddr[OP_SUBSTCONT];
diff --git a/op.h b/op.h
index 0cc6be7..7c60aec 100644
--- a/op.h
+++ b/op.h
@@ -189,7 +189,7 @@ struct pmop {
 #define PMf_TAINTMEM	0x0001		/* taint $1 etc. if target tainted */
 #define PMf_ONCE	0x0002		/* use pattern only once per reset */
 #define PMf_REVERSED	0x0004		/* Should be matched right->left */
-/*#define PMf_ALL		0x0008*/		/* initial constant is whole pat */
+#define PMf_MAYBE_CONST	0x0008		/* replacement contains variables */
 #define PMf_SKIPWHITE	0x0010		/* skip leading whitespace for split */
 #define PMf_FOLD	0x0020		/* case insensitivity */
 #define PMf_CONST	0x0040		/* subst replacement is constant */
diff --git a/perl.c b/perl.c
index 9d70377..f436f44 100644
--- a/perl.c
+++ b/perl.c
@@ -476,6 +476,7 @@ perl_destruct(register PerlInterpreter *sv_interp)
     argvoutgv = Nullgv;
     stdingv = Nullgv;
     last_in_gv = Nullgv;
+    replgv = Nullgv;
 
     /* reset so print() ends up where we expect */
     setdefout(Nullgv);
@@ -1818,6 +1819,8 @@ init_main_stash(void)
     defgv = gv_fetchpv("_",TRUE, SVt_PVAV);
     errgv = gv_HVadd(gv_fetchpv("@", TRUE, SVt_PV));
     GvMULTI_on(errgv);
+    replgv = gv_HVadd(gv_fetchpv("\022", TRUE, SVt_PV)); /* ^R */
+    GvMULTI_on(replgv);
     (void)form("%240s","");	/* Preallocate temp - for immediate signals. */
     sv_grow(ERRSV, 240);	/* Preallocate - for immediate signals. */
     sv_setpvn(ERRSV, "", 0);
diff --git a/regcomp.c b/regcomp.c
index 05c3a80..ceb5a29 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -954,6 +954,8 @@ pregcomp(char *exp, char *xend, PMOP *pm)
 	r->reganch |= ROPT_GPOS_SEEN;
     if (regseen & REG_SEEN_LOOKBEHIND)
 	r->reganch |= ROPT_LOOKBEHIND_SEEN;
+    if (regseen & REG_SEEN_EVAL)
+	r->reganch |= ROPT_EVAL_SEEN;
     Newz(1002, r->startp, regnpar, char*);
     Newz(1002, r->endp, regnpar, char*);
     DEBUG_r(regdump(r));
@@ -1028,6 +1030,7 @@ reg(I32 paren, I32 *flagp)
 		OP_4tree *sop, *rop;
 
 		seen_zerolen++;
+		regseen |= REG_SEEN_EVAL;
 		while (count && (c = *regcomp_parse)) {
 		    if (c == '\\' && regcomp_parse[1])
 			regcomp_parse++;
@@ -2354,6 +2357,8 @@ regdump(regexp *r)
     if (r->reganch & ROPT_IMPLICIT)
 	PerlIO_printf(Perl_debug_log, "implicit ");
     PerlIO_printf(Perl_debug_log, "minlen %ld ", (long) r->minlen);
+    if (r->reganch & ROPT_EVAL_SEEN)
+	PerlIO_printf(Perl_debug_log, "with eval ");
     PerlIO_printf(Perl_debug_log, "\n");
 #endif	/* DEBUGGING */
 }
diff --git a/regcomp.h b/regcomp.h
index bc7977d..7b0a12e 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -452,4 +452,5 @@ const static char reg_off_by_arg[] = {
 #define REG_SEEN_ZERO_LEN	1
 #define REG_SEEN_LOOKBEHIND	2
 #define REG_SEEN_GPOS		4
+#define REG_SEEN_EVAL		8
 
diff --git a/regexec.c b/regexec.c
index ac9f37b..5ef0313 100644
--- a/regexec.c
+++ b/regexec.c
@@ -57,7 +57,10 @@
 
 #define RF_tainted	1		/* tainted information used? */
 #define RF_warned	2		/* warned about big count? */
-#define RF_evaled	4		/* Did an EVAL? */
+#define RF_evaled	4		/* Did an EVAL with setting? */
+
+#define RS_init		1		/* eval environment created */
+#define RS_set		2		/* replsv value is set */
 
 #ifndef STATIC
 #define	STATIC	static
@@ -194,6 +197,7 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend, cha
     I32 end_shift = 0;			/* Same for the end. */
     I32 scream_pos = -1;		/* Internal iterator of scream. */
     char *scream_olds;
+    SV* oreplsv = GvSV(replgv);
 
     cc.cur = 0;
     cc.oldcc = 0;
@@ -632,6 +636,12 @@ got_it:
 	    }
 	}
     }
+    /* Preserve the current value of $^R */
+    if (oreplsv != GvSV(replgv)) {
+	sv_setsv(oreplsv, GvSV(replgv));/* So that when GvSV(replgv) is
+					   restored, the value remains
+					   the same. */
+    }
     return 1;
 
 phooey:
@@ -650,6 +660,19 @@ regtry(regexp *prog, char *startpos)
     register char **ep;
     CHECKPOINT lastcp;
 
+    if ((prog->reganch & ROPT_EVAL_SEEN) && !reg_eval_set) {
+	reg_eval_set = RS_init;
+	DEBUG_r(DEBUG_s(
+	    PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %i\n", stack_sp - stack_base);
+	    ));
+	SAVEINT(cxstack[cxstack_ix].blk_oldsp);
+	cxstack[cxstack_ix].blk_oldsp = stack_sp - stack_base;
+	/* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
+	SAVETMPS;
+	/* Apparently this is not needed, judging by wantarray. */
+	/* SAVEINT(cxstack[cxstack_ix].blk_gimme);
+	   cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
+    }
     reginput = startpos;
     regstartp = prog->startp;
     regendp = prog->endp;
@@ -980,22 +1003,6 @@ regmatch(regnode *prog)
 	    op = (OP_4tree*)regdata->data[n];
 	    DEBUG_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%x\n", op) );
 	    curpad = AvARRAY((AV*)regdata->data[n + 1]);
-	    if (!reg_eval_set) {
-		/* Preserve whatever is on stack now, otherwise
-		   OP_NEXTSTATE will overwrite it. */
-		SAVEINT(reg_eval_set);	/* Protect against unwinding. */
-		reg_eval_set = 1;
-		DEBUG_r(DEBUG_s(
-		    PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %i\n", stack_sp - stack_base);
-		    ));
-		SAVEINT(cxstack[cxstack_ix].blk_oldsp);
-		cxstack[cxstack_ix].blk_oldsp = stack_sp - stack_base;
-		/* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
-		SAVETMPS;
-		/* Apparently this is not needed, judging by wantarray. */
-		/* SAVEINT(cxstack[cxstack_ix].blk_gimme);
-		   cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
-	    }
 
 	    CALLRUNOPS();			/* Scalar context. */
 	    SPAGAIN;
@@ -1005,7 +1012,8 @@ regmatch(regnode *prog)
 	    if (logical) {
 		logical = 0;
 		sw = SvTRUE(ret);
-	    }
+	    } else
+		sv_setsv(save_scalar(replgv), ret);
 	    op = oop;
 	    curpad = ocurpad;
 	    curcop = ocurcop;
diff --git a/regexp.h b/regexp.h
index cb6b0c6..f1301d9 100644
--- a/regexp.h
+++ b/regexp.h
@@ -85,6 +85,7 @@ typedef struct regexp {
 #define ROPT_GPOS_SEEN		0x40
 #define ROPT_CHECK_ALL		0x80
 #define ROPT_LOOKBEHIND_SEEN	0x100
+#define ROPT_EVAL_SEEN		0x200
 
 #define ROPT_TAINTED_SEEN	0x8000
 
diff --git a/t/op/pat.t b/t/op/pat.t
index e6b9015..5516ce5 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -2,7 +2,7 @@
 
 # $RCSfile: pat.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:12 $
 
-print "1..104\n";
+print "1..107\n";
 
 $x = "abc\ndef\n";
 
@@ -355,6 +355,24 @@ print "not " unless f(pos($x)) == 4;
 print "ok $test\n";
 $test++;
 
+$x = $^R = 67;
+'foot' =~ /foo(?{$x = 12; 75})[t]/;
+print "not " unless $^R eq '75';
+print "ok $test\n";
+$test++;
+
+$x = $^R = 67;
+'foot' =~ /foo(?{$x = 12; 75})[xy]/;
+print "not " unless $^R eq '67' and $x eq '12';
+print "ok $test\n";
+$test++;
+
+$x = $^R = 67;
+'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/;
+print "not " unless $^R eq '79' and $x eq '12';
+print "ok $test\n";
+$test++;
+
 sub must_warn_pat {
     my $warn_pat = shift;
     return sub { print "not " unless $_[0] =~ /$warn_pat/ }
diff --git a/t/op/subst.t b/t/op/subst.t
index 248aa71..92a848f 100755
--- a/t/op/subst.t
+++ b/t/op/subst.t
@@ -2,7 +2,7 @@
 
 # $RCSfile: s.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:22 $
 
-print "1..69\n";
+print "1..70\n";
 
 $x = 'foo';
 $_ = "x";
@@ -270,3 +270,31 @@ print $_ eq "bbcbb" ? "ok 68\n" : "not ok 68 # `$_' ne `bbcbb'\n";
 # XXX TODO: Most tests above don't test return values of the ops. They should.
 $_ = "ab";
 print (s/a/b/ == 1 ? "ok 69\n" : "not ok 69\n");
+
+$_ = <<'EOL';
+     $url = new URI::URL "http://www/";   die if $url eq "xXx";
+EOL
+$^R = 'junk';
+
+$foo = ' $@%#lowercase $@%# lowercase UPPERCASE$@%#UPPERCASE' .
+  ' $@%#lowercase$@%#lowercase$@%# lowercase lowercase $@%#lowercase' .
+  ' lowercase $@%#MiXeD$@%# ';
+
+s{  \d+          \b [,.;]? (?{ 'digits' })
+   |
+    [a-z]+       \b [,.;]? (?{ 'lowercase' })
+   |
+    [A-Z]+       \b [,.;]? (?{ 'UPPERCASE' })
+   |
+    [A-Z] [a-z]+ \b [,.;]? (?{ 'Capitalized' })
+   |
+    [A-Za-z]+    \b [,.;]? (?{ 'MiXeD' })
+   |
+    [A-Za-z0-9]+ \b [,.;]? (?{ 'alphanumeric' })
+   |
+    \s+                    (?{ ' ' })
+   |
+    [^A-Za-z0-9\s]+          (?{ '$@%#' })
+}{$^R}xg;
+print ($_ eq $foo ? "ok 70\n" : "not ok 70\n#'$_'\n#'$foo'\n");
+