From: Jarkko Hietaniemi Date: Mon, 24 Feb 2003 18:44:41 +0000 (+0000) Subject: Patching magic from Inaba-san's keyboard: fix for [perl #8769]: X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=db79b45b3c913399aef4d2f3647453e63c4772a8;p=p5sagit%2Fp5-mst-13.2.git Patching magic from Inaba-san's keyboard: fix for [perl #8769]: "scalar upgraded to UTF-8 as a side effect of quote-interpolation when 'use encoding' is engaged"-- wasn't actually encoding's fault. p4raw-id: //depot/perl@18764 --- diff --git a/pp_ctl.c b/pp_ctl.c index 9a807a5..cdcbb30 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -156,6 +156,7 @@ PP(pp_substcont) register char *m = cx->sb_m; char *orig = cx->sb_orig; register REGEXP *rx = cx->sb_rx; + SV *nsv = Nullsv; rxres_restore(&cx->sb_rxres, rx); RX_MATCH_UTF8_set(rx, SvUTF8(cx->sb_targ)); @@ -178,7 +179,10 @@ PP(pp_substcont) { SV *targ = cx->sb_targ; - sv_catpvn(dstr, s, cx->sb_strend - s); + if (DO_UTF8(dstr) && !SvUTF8(targ)) + sv_catpvn_utf8_upgrade(dstr, s, cx->sb_strend - s, nsv); + else + sv_catpvn(dstr, s, cx->sb_strend - s); cx->sb_rxtainted |= RX_MATCH_TAINTED(rx); #ifdef PERL_COPY_ON_WRITE @@ -221,8 +225,12 @@ PP(pp_substcont) cx->sb_strend = s + (cx->sb_strend - m); } cx->sb_m = m = rx->startp[0] + orig; - if (m > s) - sv_catpvn(dstr, s, m-s); + if (m > s) { + if (DO_UTF8(dstr) && !SvUTF8(cx->sb_targ)) + sv_catpvn_utf8_upgrade(dstr, s, m - s, nsv); + else + sv_catpvn(dstr, s, m-s); + } cx->sb_s = rx->endp[0] + orig; { /* Update the pos() information. */ SV *sv = cx->sb_targ; diff --git a/pp_hot.c b/pp_hot.c index 62b5c5c..2991900 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -136,11 +136,12 @@ PP(pp_concat) bool lbyte; STRLEN rlen; char* rpv = SvPV(right, rlen); /* mg_get(right) happens here */ - bool rbyte = !SvUTF8(right); + bool rbyte = !SvUTF8(right), rcopied = FALSE; if (TARG == right && right != left) { right = sv_2mortal(newSVpvn(rpv, rlen)); rpv = SvPV(right, rlen); /* no point setting UTF8 here */ + rcopied = TRUE; } if (TARG != left) { @@ -176,6 +177,8 @@ PP(pp_concat) if (lbyte) sv_utf8_upgrade_nomg(TARG); else { + if (!rcopied) + right = sv_2mortal(newSVpvn(rpv, rlen)); sv_utf8_upgrade_nomg(right); rpv = SvPV(right, rlen); } @@ -1901,6 +1904,7 @@ PP(pp_subst) #ifdef PERL_COPY_ON_WRITE bool is_cow; #endif + SV *nsv = Nullsv; /* known replacement string? */ dstr = (pm->op_pmflags & PMf_CONST) ? POPs : Nullsv; @@ -1986,7 +1990,7 @@ PP(pp_subst) if (dstr) { /* replacement needing upgrading? */ if (DO_UTF8(TARG) && !doutf8) { - SV *nsv = sv_newmortal(); + nsv = sv_newmortal(); SvSetSV(nsv, dstr); if (PL_encoding) sv_recode_to_utf8(nsv, PL_encoding); @@ -2011,7 +2015,8 @@ PP(pp_subst) && !is_cow #endif && (I32)clen <= rx->minlen && (once || !(r_flags & REXEC_COPY_STR)) - && !(rx->reganch & ROPT_LOOKBEHIND_SEEN)) { + && !(rx->reganch & ROPT_LOOKBEHIND_SEEN) + && (!doutf8 || SvUTF8(TARG))) { if (!CALLREGEXEC(aTHX_ rx, s, strend, orig, 0, TARG, NULL, r_flags | REXEC_CHECKED)) { @@ -2155,7 +2160,10 @@ PP(pp_subst) strend = s + (strend - m); } m = rx->startp[0] + orig; - sv_catpvn(dstr, s, m-s); + if (doutf8 && !SvUTF8(dstr)) + sv_catpvn_utf8_upgrade(dstr, s, m - s, nsv); + else + sv_catpvn(dstr, s, m-s); s = rx->endp[0] + orig; if (clen) sv_catpvn(dstr, c, clen); @@ -2163,12 +2171,8 @@ PP(pp_subst) break; } while (CALLREGEXEC(aTHX_ rx, s, strend, orig, s == m, TARG, NULL, r_flags)); - if (doutf8 && !DO_UTF8(dstr)) { - SV* nsv = sv_2mortal(newSVpvn(s, strend - s)); - - sv_utf8_upgrade(nsv); - sv_catpvn(dstr, SvPVX(nsv), SvCUR(nsv)); - } + if (doutf8 && !DO_UTF8(TARG)) + sv_catpvn_utf8_upgrade(dstr, s, strend - s, nsv); else sv_catpvn(dstr, s, strend - s); diff --git a/sv.c b/sv.c index 73fef98..b132a1e 100644 --- a/sv.c +++ b/sv.c @@ -8288,7 +8288,11 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV I32 svix = 0; static char nullstr[] = "(null)"; SV *argsv = Nullsv; - bool has_utf8 = FALSE; /* has the result utf8? */ + bool has_utf8; /* has the result utf8? */ + bool pat_utf8; /* the pattern is in utf8? */ + SV *nsv = Nullsv; + + has_utf8 = pat_utf8 = DO_UTF8(sv); /* no matter what, this is a string now */ (void)SvPV_force(sv, origlen); @@ -8389,7 +8393,10 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV /* echo everything up to the next format specification */ for (q = p; q < patend && *q != '%'; ++q) ; if (q > p) { - sv_catpvn(sv, p, q - p); + if (has_utf8 && !pat_utf8) + sv_catpvn_utf8_upgrade(sv, p, q - p, nsv); + else + sv_catpvn(sv, p, q - p); p = q; } if (q++ >= patend) diff --git a/sv.h b/sv.h index 956340a..f94f57c 100644 --- a/sv.h +++ b/sv.h @@ -1102,6 +1102,17 @@ otherwise. #define sv_pvn_force(sv, lp) sv_pvn_force_flags(sv, lp, SV_GMAGIC) #define sv_utf8_upgrade(sv) sv_utf8_upgrade_flags(sv, SV_GMAGIC) +/* Should be named SvCatPVN_utf8_upgrade? */ +#define sv_catpvn_utf8_upgrade(dsv, sstr, slen, nsv) \ + STMT_START { \ + if (!(nsv)) \ + nsv = sv_2mortal(newSVpvn(sstr, slen)); \ + else \ + sv_setpvn(nsv, sstr, slen); \ + SvUTF8_off(nsv); \ + sv_utf8_upgrade(nsv); \ + sv_catsv(dsv, nsv); \ + } STMT_END /* =for apidoc Am|SV*|newRV_inc|SV* sv @@ -1199,6 +1210,7 @@ Returns a pointer to the character buffer. #define SvSetMagicSV_nosteal(dst,src) \ SvSetSV_nosteal_and(dst,src,SvSETMAGIC(dst)) + #if !defined(SKIP_DEBUGGING) #define SvPEEK(sv) sv_peek(sv) #else diff --git a/t/op/concat.t b/t/op/concat.t index 4813690..c1a6e23 100644 --- a/t/op/concat.t +++ b/t/op/concat.t @@ -18,7 +18,7 @@ sub ok { return $ok; } -print "1..12\n"; +print "1..18\n"; ($a, $b, $c) = qw(foo bar); @@ -87,3 +87,20 @@ ok("$c$a$c" eq "foo", "concatenate undef, fore and aft"); eval{"\x{1234}$pi"}; ok(!$@, "bug id 20001020.006, constant right"); } + +sub beq { use bytes; $_[0] eq $_[1]; } + +{ + # concat should not upgrade its arguments. + my($l, $r, $c); + + ($l, $r, $c) = ("\x{101}", "\x{fe}", "\x{101}\x{fe}"); + ok(beq($l.$r, $c), "concat utf8 and byte"); + ok(beq($l, "\x{101}"), "right not changed after concat u+b"); + ok(beq($r, "\x{fe}"), "left not changed after concat u+b"); + + ($l, $r, $c) = ("\x{fe}", "\x{101}", "\x{fe}\x{101}"); + ok(beq($l.$r, $c), "concat byte and utf8"); + ok(beq($l, "\x{fe}"), "right not changed after concat b+u"); + ok(beq($r, "\x{101}"), "left not changed after concat b+u"); +}