register char *m = cx->sb_m;
char *orig = cx->sb_orig;
register REGEXP *rx = cx->sb_rx;
+ SV *nsv = Nullsv;
rxres_restore(&cx->sb_rxres, rx);
RX_MATCH_UTF8_set(rx, SvUTF8(cx->sb_targ));
{
SV *targ = cx->sb_targ;
- sv_catpvn(dstr, s, cx->sb_strend - s);
+ if (DO_UTF8(dstr) && !SvUTF8(targ))
+ sv_catpvn_utf8_upgrade(dstr, s, cx->sb_strend - s, nsv);
+ else
+ sv_catpvn(dstr, s, cx->sb_strend - s);
cx->sb_rxtainted |= RX_MATCH_TAINTED(rx);
#ifdef PERL_COPY_ON_WRITE
cx->sb_strend = s + (cx->sb_strend - m);
}
cx->sb_m = m = rx->startp[0] + orig;
- if (m > s)
- sv_catpvn(dstr, s, m-s);
+ if (m > s) {
+ if (DO_UTF8(dstr) && !SvUTF8(cx->sb_targ))
+ sv_catpvn_utf8_upgrade(dstr, s, m - s, nsv);
+ else
+ sv_catpvn(dstr, s, m-s);
+ }
cx->sb_s = rx->endp[0] + orig;
{ /* Update the pos() information. */
SV *sv = cx->sb_targ;
bool lbyte;
STRLEN rlen;
char* rpv = SvPV(right, rlen); /* mg_get(right) happens here */
- bool rbyte = !SvUTF8(right);
+ bool rbyte = !SvUTF8(right), rcopied = FALSE;
if (TARG == right && right != left) {
right = sv_2mortal(newSVpvn(rpv, rlen));
rpv = SvPV(right, rlen); /* no point setting UTF8 here */
+ rcopied = TRUE;
}
if (TARG != left) {
if (lbyte)
sv_utf8_upgrade_nomg(TARG);
else {
+ if (!rcopied)
+ right = sv_2mortal(newSVpvn(rpv, rlen));
sv_utf8_upgrade_nomg(right);
rpv = SvPV(right, rlen);
}
#ifdef PERL_COPY_ON_WRITE
bool is_cow;
#endif
+ SV *nsv = Nullsv;
/* known replacement string? */
dstr = (pm->op_pmflags & PMf_CONST) ? POPs : Nullsv;
if (dstr) {
/* replacement needing upgrading? */
if (DO_UTF8(TARG) && !doutf8) {
- SV *nsv = sv_newmortal();
+ nsv = sv_newmortal();
SvSetSV(nsv, dstr);
if (PL_encoding)
sv_recode_to_utf8(nsv, PL_encoding);
&& !is_cow
#endif
&& (I32)clen <= rx->minlen && (once || !(r_flags & REXEC_COPY_STR))
- && !(rx->reganch & ROPT_LOOKBEHIND_SEEN)) {
+ && !(rx->reganch & ROPT_LOOKBEHIND_SEEN)
+ && (!doutf8 || SvUTF8(TARG))) {
if (!CALLREGEXEC(aTHX_ rx, s, strend, orig, 0, TARG, NULL,
r_flags | REXEC_CHECKED))
{
strend = s + (strend - m);
}
m = rx->startp[0] + orig;
- sv_catpvn(dstr, s, m-s);
+ if (doutf8 && !SvUTF8(dstr))
+ sv_catpvn_utf8_upgrade(dstr, s, m - s, nsv);
+ else
+ sv_catpvn(dstr, s, m-s);
s = rx->endp[0] + orig;
if (clen)
sv_catpvn(dstr, c, clen);
break;
} while (CALLREGEXEC(aTHX_ rx, s, strend, orig, s == m,
TARG, NULL, r_flags));
- if (doutf8 && !DO_UTF8(dstr)) {
- SV* nsv = sv_2mortal(newSVpvn(s, strend - s));
-
- sv_utf8_upgrade(nsv);
- sv_catpvn(dstr, SvPVX(nsv), SvCUR(nsv));
- }
+ if (doutf8 && !DO_UTF8(TARG))
+ sv_catpvn_utf8_upgrade(dstr, s, strend - s, nsv);
else
sv_catpvn(dstr, s, strend - s);
I32 svix = 0;
static char nullstr[] = "(null)";
SV *argsv = Nullsv;
- bool has_utf8 = FALSE; /* has the result utf8? */
+ bool has_utf8; /* has the result utf8? */
+ bool pat_utf8; /* the pattern is in utf8? */
+ SV *nsv = Nullsv;
+
+ has_utf8 = pat_utf8 = DO_UTF8(sv);
/* no matter what, this is a string now */
(void)SvPV_force(sv, origlen);
/* echo everything up to the next format specification */
for (q = p; q < patend && *q != '%'; ++q) ;
if (q > p) {
- sv_catpvn(sv, p, q - p);
+ if (has_utf8 && !pat_utf8)
+ sv_catpvn_utf8_upgrade(sv, p, q - p, nsv);
+ else
+ sv_catpvn(sv, p, q - p);
p = q;
}
if (q++ >= patend)
#define sv_pvn_force(sv, lp) sv_pvn_force_flags(sv, lp, SV_GMAGIC)
#define sv_utf8_upgrade(sv) sv_utf8_upgrade_flags(sv, SV_GMAGIC)
+/* Should be named SvCatPVN_utf8_upgrade? */
+#define sv_catpvn_utf8_upgrade(dsv, sstr, slen, nsv) \
+ STMT_START { \
+ if (!(nsv)) \
+ nsv = sv_2mortal(newSVpvn(sstr, slen)); \
+ else \
+ sv_setpvn(nsv, sstr, slen); \
+ SvUTF8_off(nsv); \
+ sv_utf8_upgrade(nsv); \
+ sv_catsv(dsv, nsv); \
+ } STMT_END
/*
=for apidoc Am|SV*|newRV_inc|SV* sv
#define SvSetMagicSV_nosteal(dst,src) \
SvSetSV_nosteal_and(dst,src,SvSETMAGIC(dst))
+
#if !defined(SKIP_DEBUGGING)
#define SvPEEK(sv) sv_peek(sv)
#else
return $ok;
}
-print "1..12\n";
+print "1..18\n";
($a, $b, $c) = qw(foo bar);
eval{"\x{1234}$pi"};
ok(!$@, "bug id 20001020.006, constant right");
}
+
+sub beq { use bytes; $_[0] eq $_[1]; }
+
+{
+ # concat should not upgrade its arguments.
+ my($l, $r, $c);
+
+ ($l, $r, $c) = ("\x{101}", "\x{fe}", "\x{101}\x{fe}");
+ ok(beq($l.$r, $c), "concat utf8 and byte");
+ ok(beq($l, "\x{101}"), "right not changed after concat u+b");
+ ok(beq($r, "\x{fe}"), "left not changed after concat u+b");
+
+ ($l, $r, $c) = ("\x{fe}", "\x{101}", "\x{fe}\x{101}");
+ ok(beq($l.$r, $c), "concat byte and utf8");
+ ok(beq($l, "\x{fe}"), "right not changed after concat b+u");
+ ok(beq($r, "\x{101}"), "left not changed after concat b+u");
+}