From: Yitzchak Scott-Thoennes <sthoenna@efn.org>
Date: Wed, 11 Sep 2002 22:22:45 +0000 (-0700)
Subject: Re: sv_2pv_flags and ROK and UTF8 flags
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=cb50f42d44feb5486b1014e87f10579f0b7cddbf;p=p5sagit%2Fp5-mst-13.2.git

Re: sv_2pv_flags and ROK and UTF8 flags
Message-ID: <lSCg9gzkgymX092yn@efn.org>

p4raw-id: //depot/perl@17947
---

diff --git a/dump.c b/dump.c
index e287a79..520b210 100644
--- a/dump.c
+++ b/dump.c
@@ -768,7 +768,7 @@ static struct { char type; char *name; } magic_names[] = {
 	{ PERL_MAGIC_taint,          "taint(t)" },
 	{ PERL_MAGIC_uvar_elem,      "uvar_elem(v)" },
 	{ PERL_MAGIC_vec,            "vec(v)" },
-	{ PERL_MAGIC_vstring,        "v-string(V)" },
+	{ PERL_MAGIC_vstring,        "vstring(V)" },
 	{ PERL_MAGIC_substr,         "substr(x)" },
 	{ PERL_MAGIC_defelem,        "defelem(y)" },
 	{ PERL_MAGIC_ext,            "ext(~)" },
@@ -842,13 +842,15 @@ Perl_do_magic_dump(pTHX_ I32 level, PerlIO *file, MAGIC *mg, I32 nest, I32 maxne
 
         if (mg->mg_flags) {
             Perl_dump_indent(aTHX_ level, file, "    MG_FLAGS = 0x%02X\n", mg->mg_flags);
-	    if (mg->mg_flags & MGf_TAINTEDDIR)
+	    if (mg->mg_type == PERL_MAGIC_envelem &&
+		mg->mg_flags & MGf_TAINTEDDIR)
 	        Perl_dump_indent(aTHX_ level, file, "      TAINTEDDIR\n");
 	    if (mg->mg_flags & MGf_REFCOUNTED)
 	        Perl_dump_indent(aTHX_ level, file, "      REFCOUNTED\n");
             if (mg->mg_flags & MGf_GSKIP)
 	        Perl_dump_indent(aTHX_ level, file, "      GSKIP\n");
-	    if (mg->mg_flags & MGf_MINMATCH)
+	    if (mg->mg_type == PERL_MAGIC_regex_global &&
+		mg->mg_flags & MGf_MINMATCH)
 	        Perl_dump_indent(aTHX_ level, file, "      MINMATCH\n");
         }
 	if (mg->mg_obj) {
diff --git a/mg.h b/mg.h
index e99b52c..bbd675b 100644
--- a/mg.h
+++ b/mg.h
@@ -33,14 +33,13 @@ struct magic {
     I32		mg_len;
 };
 
-#define MGf_TAINTEDDIR 1
+#define MGf_TAINTEDDIR 1        /* PERL_MAGIC_envelem only */
+#define MGf_MINMATCH   1        /* PERL_MAGIC_regex_global only */
 #define MGf_REFCOUNTED 2
 #define MGf_GSKIP      4
 #define MGf_COPY       8
 #define MGf_DUP        16
 
-#define MGf_MINMATCH   1
-
 #define MgTAINTEDDIR(mg)	(mg->mg_flags & MGf_TAINTEDDIR)
 #define MgTAINTEDDIR_on(mg)	(mg->mg_flags |= MGf_TAINTEDDIR)
 #define MgTAINTEDDIR_off(mg)	(mg->mg_flags &= ~MGf_TAINTEDDIR)
diff --git a/regexec.c b/regexec.c
index b69fd2b..c93df5d 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2821,6 +2821,7 @@ S_regmatch(pTHX_ regnode *prog)
 		    MAGIC *mg = Null(MAGIC*);
 		    re_cc_state state;
 		    CHECKPOINT cp, lastcp;
+                    int toggleutf;
 
 		    if(SvROK(ret) || SvRMAGICAL(ret)) {
 			SV *sv = SvROK(ret) ? SvRV(ret) : ret;
@@ -2841,6 +2842,7 @@ S_regmatch(pTHX_ regnode *prog)
 			I32 onpar = PL_regnpar;
 
 			Zero(&pm, 1, PMOP);
+                        if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
 			re = CALLREGCOMP(aTHX_ t, t + len, &pm);
 			if (!(SvFLAGS(ret)
 			      & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
@@ -2873,6 +2875,9 @@ S_regmatch(pTHX_ regnode *prog)
 		    *PL_reglastcloseparen = 0;
 		    PL_reg_call_cc = &state;
 		    PL_reginput = locinput;
+		    toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
+				((re->reganch & ROPT_UTF8) != 0);
+		    if (toggleutf) PL_reg_flags ^= RF_utf8;
 
 		    /* XXXX This is too dramatic a measure... */
 		    PL_reg_maxiter = 0;
@@ -2887,6 +2892,7 @@ S_regmatch(pTHX_ regnode *prog)
 			PL_regcc = state.cc;
 			PL_reg_re = state.re;
 			cache_re(PL_reg_re);
+			if (toggleutf) PL_reg_flags ^= RF_utf8;
 
 			/* XXXX This is too dramatic a measure... */
 			PL_reg_maxiter = 0;
@@ -2903,6 +2909,7 @@ S_regmatch(pTHX_ regnode *prog)
 		    PL_regcc = state.cc;
 		    PL_reg_re = state.re;
 		    cache_re(PL_reg_re);
+		    if (toggleutf) PL_reg_flags ^= RF_utf8;
 
 		    /* XXXX This is too dramatic a measure... */
 		    PL_reg_maxiter = 0;
diff --git a/sv.c b/sv.c
index b4b7dba..78048c0 100644
--- a/sv.c
+++ b/sv.c
@@ -2890,7 +2890,7 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags)
 {
     register char *s;
     int olderrno;
-    SV *tsv;
+    SV *tsv, *origsv;
     char tbuf[64];	/* Must fit sprintf/Gconvert of longest IV/NV */
     char *tmpbuf = tbuf;
 
@@ -2939,6 +2939,7 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags)
                     SvUTF8_off(sv);
                 return pv;
             }
+	    origsv = sv;
 	    sv = (SV*)SvRV(sv);
 	    if (!sv)
 		s = "NULLREF";
@@ -3020,6 +3021,11 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags)
 			    mg->mg_ptr[mg->mg_len] = 0;
 			}
 			PL_reginterp_cnt += re->program[0].next_off;
+
+			if (re->reganch & ROPT_UTF8)
+			    SvUTF8_on(origsv);
+			else
+			    SvUTF8_off(origsv);
 			*lp = mg->mg_len;
 			return mg->mg_ptr;
 		    }
@@ -3188,16 +3194,14 @@ would lose the UTF-8'ness of the PV.
 void
 Perl_sv_copypv(pTHX_ SV *dsv, register SV *ssv)
 {
-    SV *tmpsv = sv_newmortal();
     STRLEN len;
     char *s;
     s = SvPV(ssv,len);
-    sv_setpvn(tmpsv,s,len);
+    sv_setpvn(dsv,s,len);
     if (SvUTF8(ssv))
-	SvUTF8_on(tmpsv);
+	SvUTF8_on(dsv);
     else
-	SvUTF8_off(tmpsv);
-    SvSetSV(dsv,tmpsv);
+	SvUTF8_off(dsv);
 }
 
 /*
diff --git a/t/op/pat.t b/t/op/pat.t
index ed61015..4ef860c 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..932\n";
+print "1..940\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -2913,22 +2913,62 @@ print(($a eq '(?-xism:foo)' ? '' : 'not '),
 ++$test;
 
 $x = "\x{3fe}";
+$z=$y = "\317\276"; # $y is byte representation of $x
+
 $a = qr/$x/;
 print(($x =~ $a ? '' : 'not '), "ok $test - utf8 interpolation in qr//\n");
 ++$test;
 
 print(("a$a" =~ $x ? '' : 'not '),
-      "ok $test - stringifed qr// preserves utf8 # TODO\n");
+      "ok $test - stringifed qr// preserves utf8\n");
+++$test;
+
+print(("a$x" =~ /^a$a\z/ ? '' : 'not '),
+      "ok $test - interpolated qr// preserves utf8\n");
+++$test;
+
+print(("a$x" =~ /^a(??{$a})\z/ ? '' : 'not '),
+      "ok $test - postponed interpolation of qr// preserves utf8\n");
+++$test;
+
+{ use re 'eval';
+
+print(("$x$x" =~ /^$x(??{$x})\z/ ? '' : 'not '),
+      "ok $test - postponed utf8 string in utf8 re matches utf8\n");
+++$test;
+
+print(("$y$x" =~ /^$y(??{$x})\z/ ? '' : 'not '),
+      "ok $test - postponed utf8 string in non-utf8 re matches utf8\n");
 ++$test;
 
-print(("a$x" =~ qr/a$a/ ? '' : 'not '),
-      "ok $test - interpolated qr// preserves utf8 # TODO\n");
+print(("$y$x" !~ /^$y(??{$y})\z/ ? '' : 'not '),
+      "ok $test - postponed non-utf8 string in non-utf8 re doesn't match utf8\n");
 ++$test;
 
-print(("a$x" =~ qr/a(??{$a})/ ? '' : 'not '),
-      "ok $test - postponed interpolation of qr// preserves utf8 # TODO\n");
+print(("$x$x" !~ /^$x(??{$y})\z/ ? '' : 'not '),
+      "ok $test - postponed non-utf8 string in utf8 re doesn't match utf8\n");
 ++$test;
 
+print(("$y$y" =~ /^$y(??{$y})\z/ ? '' : 'not '),
+      "ok $test - postponed non-utf8 string in non-utf8 re matches non-utf8\n");
+++$test;
+
+print(("$x$y" =~ /^$x(??{$y})\z/ ? '' : 'not '),
+      "ok $test - postponed non-utf8 string in utf8 re matches non-utf8\n");
+++$test;
+$y = $z; # reset $y after upgrade
+
+print(("$x$y" !~ /^$x(??{$x})\z/ ? '' : 'not '),
+      "ok $test - postponed utf8 string in utf8 re doesn't match non-utf8\n");
+++$test;
+$y = $z; # reset $y after upgrade
+
+print(("$y$y" !~ /^$y(??{$x})\z/ ? '' : 'not '),
+      "ok $test - postponed utf8 string in non-utf8 re doesn't match non-utf8\n");
+++$test;
+
+} # no re 'eval'
+
 print "# more user-defined character properties\n";
 
 sub IsSyriac1 {
@@ -2951,4 +2991,4 @@ END
 print "\x{0712}" =~ /\p{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
 print "\x{072F}" =~ /\P{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
 
-# last test 932
+# last test 940