From: Jarkko Hietaniemi Date: Sun, 18 Mar 2001 05:15:06 +0000 (+0000) Subject: NI-S' cunning idea of how to de-UTF8 the "\C-broken" submatches. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=60425c380d8403607be85706bb016ae8d577acf4;p=p5sagit%2Fp5-mst-13.2.git NI-S' cunning idea of how to de-UTF8 the "\C-broken" submatches. p4raw-id: //depot/perl@9193 --- diff --git a/mg.c b/mg.c index eb79dc4..931b1a1 100644 --- a/mg.c +++ b/mg.c @@ -412,7 +412,9 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg) char *s = rx->subbeg + s1; char *send = rx->subbeg + t1; - i = Perl_utf8_length(aTHX_ (U8*)s, (U8*)send); + i = t1 - s1; + if (is_utf8_string((U8*)s, i)) + i = Perl_utf8_length(aTHX_ (U8*)s, (U8*)send); } if (i < 0) Perl_croak(aTHX_ "panic: magic_len: %"IVdf, (IV)i); @@ -630,7 +632,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) PL_tainted = FALSE; } sv_setpvn(sv, s, i); - if (DO_UTF8(PL_reg_sv)) + if (DO_UTF8(PL_reg_sv) && is_utf8_string((U8*)s, i)) SvUTF8_on(sv); else SvUTF8_off(sv); diff --git a/t/op/pat.t b/t/op/pat.t index a66ea45..8575ca8 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -1133,8 +1133,6 @@ $test++; $_ = "a\x{100}b"; if (/(.)(\C)(\C)(.)/) { print "ok 232\n"; - # currently \C are still tagged as UTF-8 - use bytes; if ($1 eq "a") { print "ok 233\n"; } else { @@ -1164,7 +1162,6 @@ $_ = "\x{100}"; if (/(\C)/g) { print "ok 237\n"; # currently \C are still tagged as UTF-8 - use bytes; if ($1 eq "\xC4") { print "ok 238\n"; } else { @@ -1178,7 +1175,6 @@ if (/(\C)/g) { if (/(\C)/g) { print "ok 239\n"; # currently \C are still tagged as UTF-8 - use bytes; if ($1 eq "\x80") { print "ok 240\n"; } else { @@ -1231,7 +1227,7 @@ if (ord('i') == 0x89 && ord('J') == 0xd1) { # EBCDIC } } else { for (244..245) { - print "ok $_ # Skip: not EBCDIC\n"; + print "ok $_ # Skip: only in EBCDIC\n"; } }