From: Yitzchak Scott-Thoennes Date: Wed, 11 Sep 2002 22:22:45 +0000 (-0700) Subject: Re: sv_2pv_flags and ROK and UTF8 flags X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=cb50f42d44feb5486b1014e87f10579f0b7cddbf;p=p5sagit%2Fp5-mst-13.2.git Re: sv_2pv_flags and ROK and UTF8 flags Message-ID: p4raw-id: //depot/perl@17947 --- diff --git a/dump.c b/dump.c index e287a79..520b210 100644 --- a/dump.c +++ b/dump.c @@ -768,7 +768,7 @@ static struct { char type; char *name; } magic_names[] = { { PERL_MAGIC_taint, "taint(t)" }, { PERL_MAGIC_uvar_elem, "uvar_elem(v)" }, { PERL_MAGIC_vec, "vec(v)" }, - { PERL_MAGIC_vstring, "v-string(V)" }, + { PERL_MAGIC_vstring, "vstring(V)" }, { PERL_MAGIC_substr, "substr(x)" }, { PERL_MAGIC_defelem, "defelem(y)" }, { PERL_MAGIC_ext, "ext(~)" }, @@ -842,13 +842,15 @@ Perl_do_magic_dump(pTHX_ I32 level, PerlIO *file, MAGIC *mg, I32 nest, I32 maxne if (mg->mg_flags) { Perl_dump_indent(aTHX_ level, file, " MG_FLAGS = 0x%02X\n", mg->mg_flags); - if (mg->mg_flags & MGf_TAINTEDDIR) + if (mg->mg_type == PERL_MAGIC_envelem && + mg->mg_flags & MGf_TAINTEDDIR) Perl_dump_indent(aTHX_ level, file, " TAINTEDDIR\n"); if (mg->mg_flags & MGf_REFCOUNTED) Perl_dump_indent(aTHX_ level, file, " REFCOUNTED\n"); if (mg->mg_flags & MGf_GSKIP) Perl_dump_indent(aTHX_ level, file, " GSKIP\n"); - if (mg->mg_flags & MGf_MINMATCH) + if (mg->mg_type == PERL_MAGIC_regex_global && + mg->mg_flags & MGf_MINMATCH) Perl_dump_indent(aTHX_ level, file, " MINMATCH\n"); } if (mg->mg_obj) { diff --git a/mg.h b/mg.h index e99b52c..bbd675b 100644 --- a/mg.h +++ b/mg.h @@ -33,14 +33,13 @@ struct magic { I32 mg_len; }; -#define MGf_TAINTEDDIR 1 +#define MGf_TAINTEDDIR 1 /* PERL_MAGIC_envelem only */ +#define MGf_MINMATCH 1 /* PERL_MAGIC_regex_global only */ #define MGf_REFCOUNTED 2 #define MGf_GSKIP 4 #define MGf_COPY 8 #define MGf_DUP 16 -#define MGf_MINMATCH 1 - #define MgTAINTEDDIR(mg) (mg->mg_flags & MGf_TAINTEDDIR) #define MgTAINTEDDIR_on(mg) (mg->mg_flags |= MGf_TAINTEDDIR) #define MgTAINTEDDIR_off(mg) (mg->mg_flags &= ~MGf_TAINTEDDIR) diff --git a/regexec.c b/regexec.c index b69fd2b..c93df5d 100644 --- a/regexec.c +++ b/regexec.c @@ -2821,6 +2821,7 @@ S_regmatch(pTHX_ regnode *prog) MAGIC *mg = Null(MAGIC*); re_cc_state state; CHECKPOINT cp, lastcp; + int toggleutf; if(SvROK(ret) || SvRMAGICAL(ret)) { SV *sv = SvROK(ret) ? SvRV(ret) : ret; @@ -2841,6 +2842,7 @@ S_regmatch(pTHX_ regnode *prog) I32 onpar = PL_regnpar; Zero(&pm, 1, PMOP); + if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8; re = CALLREGCOMP(aTHX_ t, t + len, &pm); if (!(SvFLAGS(ret) & (SVs_TEMP | SVs_PADTMP | SVf_READONLY))) @@ -2873,6 +2875,9 @@ S_regmatch(pTHX_ regnode *prog) *PL_reglastcloseparen = 0; PL_reg_call_cc = &state; PL_reginput = locinput; + toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^ + ((re->reganch & ROPT_UTF8) != 0); + if (toggleutf) PL_reg_flags ^= RF_utf8; /* XXXX This is too dramatic a measure... */ PL_reg_maxiter = 0; @@ -2887,6 +2892,7 @@ S_regmatch(pTHX_ regnode *prog) PL_regcc = state.cc; PL_reg_re = state.re; cache_re(PL_reg_re); + if (toggleutf) PL_reg_flags ^= RF_utf8; /* XXXX This is too dramatic a measure... */ PL_reg_maxiter = 0; @@ -2903,6 +2909,7 @@ S_regmatch(pTHX_ regnode *prog) PL_regcc = state.cc; PL_reg_re = state.re; cache_re(PL_reg_re); + if (toggleutf) PL_reg_flags ^= RF_utf8; /* XXXX This is too dramatic a measure... */ PL_reg_maxiter = 0; diff --git a/sv.c b/sv.c index b4b7dba..78048c0 100644 --- a/sv.c +++ b/sv.c @@ -2890,7 +2890,7 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags) { register char *s; int olderrno; - SV *tsv; + SV *tsv, *origsv; char tbuf[64]; /* Must fit sprintf/Gconvert of longest IV/NV */ char *tmpbuf = tbuf; @@ -2939,6 +2939,7 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags) SvUTF8_off(sv); return pv; } + origsv = sv; sv = (SV*)SvRV(sv); if (!sv) s = "NULLREF"; @@ -3020,6 +3021,11 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags) mg->mg_ptr[mg->mg_len] = 0; } PL_reginterp_cnt += re->program[0].next_off; + + if (re->reganch & ROPT_UTF8) + SvUTF8_on(origsv); + else + SvUTF8_off(origsv); *lp = mg->mg_len; return mg->mg_ptr; } @@ -3188,16 +3194,14 @@ would lose the UTF-8'ness of the PV. void Perl_sv_copypv(pTHX_ SV *dsv, register SV *ssv) { - SV *tmpsv = sv_newmortal(); STRLEN len; char *s; s = SvPV(ssv,len); - sv_setpvn(tmpsv,s,len); + sv_setpvn(dsv,s,len); if (SvUTF8(ssv)) - SvUTF8_on(tmpsv); + SvUTF8_on(dsv); else - SvUTF8_off(tmpsv); - SvSetSV(dsv,tmpsv); + SvUTF8_off(dsv); } /* diff --git a/t/op/pat.t b/t/op/pat.t index ed61015..4ef860c 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..932\n"; +print "1..940\n"; BEGIN { chdir 't' if -d 't'; @@ -2913,22 +2913,62 @@ print(($a eq '(?-xism:foo)' ? '' : 'not '), ++$test; $x = "\x{3fe}"; +$z=$y = "\317\276"; # $y is byte representation of $x + $a = qr/$x/; print(($x =~ $a ? '' : 'not '), "ok $test - utf8 interpolation in qr//\n"); ++$test; print(("a$a" =~ $x ? '' : 'not '), - "ok $test - stringifed qr// preserves utf8 # TODO\n"); + "ok $test - stringifed qr// preserves utf8\n"); +++$test; + +print(("a$x" =~ /^a$a\z/ ? '' : 'not '), + "ok $test - interpolated qr// preserves utf8\n"); +++$test; + +print(("a$x" =~ /^a(??{$a})\z/ ? '' : 'not '), + "ok $test - postponed interpolation of qr// preserves utf8\n"); +++$test; + +{ use re 'eval'; + +print(("$x$x" =~ /^$x(??{$x})\z/ ? '' : 'not '), + "ok $test - postponed utf8 string in utf8 re matches utf8\n"); +++$test; + +print(("$y$x" =~ /^$y(??{$x})\z/ ? '' : 'not '), + "ok $test - postponed utf8 string in non-utf8 re matches utf8\n"); ++$test; -print(("a$x" =~ qr/a$a/ ? '' : 'not '), - "ok $test - interpolated qr// preserves utf8 # TODO\n"); +print(("$y$x" !~ /^$y(??{$y})\z/ ? '' : 'not '), + "ok $test - postponed non-utf8 string in non-utf8 re doesn't match utf8\n"); ++$test; -print(("a$x" =~ qr/a(??{$a})/ ? '' : 'not '), - "ok $test - postponed interpolation of qr// preserves utf8 # TODO\n"); +print(("$x$x" !~ /^$x(??{$y})\z/ ? '' : 'not '), + "ok $test - postponed non-utf8 string in utf8 re doesn't match utf8\n"); ++$test; +print(("$y$y" =~ /^$y(??{$y})\z/ ? '' : 'not '), + "ok $test - postponed non-utf8 string in non-utf8 re matches non-utf8\n"); +++$test; + +print(("$x$y" =~ /^$x(??{$y})\z/ ? '' : 'not '), + "ok $test - postponed non-utf8 string in utf8 re matches non-utf8\n"); +++$test; +$y = $z; # reset $y after upgrade + +print(("$x$y" !~ /^$x(??{$x})\z/ ? '' : 'not '), + "ok $test - postponed utf8 string in utf8 re doesn't match non-utf8\n"); +++$test; +$y = $z; # reset $y after upgrade + +print(("$y$y" !~ /^$y(??{$x})\z/ ? '' : 'not '), + "ok $test - postponed utf8 string in non-utf8 re doesn't match non-utf8\n"); +++$test; + +} # no re 'eval' + print "# more user-defined character properties\n"; sub IsSyriac1 { @@ -2951,4 +2991,4 @@ END print "\x{0712}" =~ /\p{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; print "\x{072F}" =~ /\P{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; -# last test 932 +# last test 940