From: SADAHIRO Tomoyuki Date: Sun, 22 Feb 2004 16:09:33 +0000 (+0900) Subject: Re: [perl #26905] "use bytes" doesn't apply byte semantics to concatenation X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=90f5826e78891b7633d6b153f416059ce7d36f9e;p=p5sagit%2Fp5-mst-13.2.git Re: [perl #26905] "use bytes" doesn't apply byte semantics to concatenation Date: Sun, 22 Feb 2004 16:09:33 +0900 Message-Id: <20040222160505.98E5.BQW10602@nifty.com> Subject: [PATCH] Encode::CN::HZ (was Re: [perl #26905] "use bytes" doesn't apply byte semantics to concatenation) From: SADAHIRO Tomoyuki Date: Sun, 22 Feb 2004 18:41:43 +0900 Message-Id: <20040222182357.6B39.BQW10602@nifty.com> Plus, add a "_01" to the theoretical version number of Encode::CN::HZ. p4raw-id: //depot/perl@22363 --- diff --git a/ext/Encode/lib/Encode/CN/HZ.pm b/ext/Encode/lib/Encode/CN/HZ.pm index 1ea1e45..02c764c 100644 --- a/ext/Encode/lib/Encode/CN/HZ.pm +++ b/ext/Encode/lib/Encode/CN/HZ.pm @@ -3,7 +3,8 @@ package Encode::CN::HZ; use strict; use vars qw($VERSION); -$VERSION = do { my @r = (q$Revision: 1.5 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +#$VERSION = do { my @r = (q$Revision: 1.5 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +$VERSION = 1.05_01; use Encode qw(:fallbacks); @@ -17,11 +18,8 @@ __PACKAGE__->Define('hz'); sub needs_lines { 1 } -sub perlio_ok { 1 } - sub decode ($$;$) { - use bytes; my ($obj,$str,$chk) = @_; my $GB = Encode::find_encoding('gb2312-raw'); @@ -59,13 +57,11 @@ sub decode ($$;$) } } } - $_[1] = $str if $chk; + $_[1] = '' if $chk; # needs_lines guarantees no partial character return $ret; } sub cat_decode { - use bytes; - my ($obj, undef, $src, $pos, $trm, $chk) = @_; my ($rdst, $rsrc, $rpos) = \@_[1..3]; diff --git a/pp_hot.c b/pp_hot.c index 4d87255..c9fe4f0 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -137,7 +137,7 @@ PP(pp_concat) bool lbyte; STRLEN rlen; char* rpv = SvPV(right, rlen); /* mg_get(right) happens here */ - bool rbyte = !SvUTF8(right), rcopied = FALSE; + bool rbyte = !DO_UTF8(right), rcopied = FALSE; if (TARG == right && right != left) { right = sv_2mortal(newSVpvn(rpv, rlen)); @@ -147,7 +147,7 @@ PP(pp_concat) if (TARG != left) { lpv = SvPV(left, llen); /* mg_get(left) may happen here */ - lbyte = !SvUTF8(left); + lbyte = !DO_UTF8(left); sv_setpvn(TARG, lpv, llen); if (!lbyte) SvUTF8_on(TARG); @@ -160,7 +160,9 @@ PP(pp_concat) if (!SvOK(TARG)) sv_setpv(left, ""); lpv = SvPV_nomg(left, llen); - lbyte = !SvUTF8(left); + lbyte = !DO_UTF8(left); + if (IN_BYTES) + SvUTF8_off(TARG); } #if defined(PERL_Y2KWARN) diff --git a/t/op/concat.t b/t/op/concat.t index 865a498..5ef40dd 100644 --- a/t/op/concat.t +++ b/t/op/concat.t @@ -18,7 +18,7 @@ sub ok { return $ok; } -print "1..20\n"; +print "1..28\n"; ($a, $b, $c) = qw(foo bar); @@ -117,3 +117,32 @@ sub beq { use bytes; $_[0] eq $_[1]; } $y = ($x = '' . strfoo()) . "y"; ok( "$x,$y" eq "x,xy", 'figures out correct target' ); } + +{ + # [perl #26905] "use bytes" doesn't apply byte semantics to concatenation + + my $p = "\xB6"; # PILCROW SIGN (ASCII/EBCDIC), 2bytes in UTF-X + my $u = "\x{100}"; + my $b = pack 'a*', "\x{100}"; + my $pu = "\xB6\x{100}"; + my $up = "\x{100}\xB6"; + my $x1 = $p; + my $y1 = $u; + + use bytes; + ok(beq($p.$u, $p.$b), "perl #26905, left eq bytes"); + ok(beq($u.$p, $b.$p), "perl #26905, right eq bytes"); + ok(!beq($p.$u, $pu), "perl #26905, left ne unicode"); + ok(!beq($u.$p, $up), "perl #26905, right ne unicode"); + + $x1 .= $u; + $x2 = $p . $u; + $y1 .= $p; + $y2 = $u . $p; + + no bytes; + ok(beq($x1, $x2), "perl #26905, left, .= vs = . in bytes"); + ok(beq($y1, $y2), "perl #26905, right, .= vs = . in bytes"); + ok(($x1 eq $x2), "perl #26905, left, .= vs = . in chars"); + ok(($y1 eq $y2), "perl #26905, right, .= vs = . in chars"); +}