From: Nicholas Clark Date: Fri, 28 Apr 2006 16:34:14 +0000 (+0000) Subject: Fix bug 34297 (length of overloaded UTF-8 strings) X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=923318002ce505ec75344304b64394034456b5b8;p=p5sagit%2Fp5-mst-13.2.git Fix bug 34297 (length of overloaded UTF-8 strings) p4raw-id: //depot/perl@28006 --- diff --git a/MANIFEST b/MANIFEST index d130e2e..7d34252 100644 --- a/MANIFEST +++ b/MANIFEST @@ -3532,6 +3532,7 @@ t/uni/chomp.t See if Unicode chomp works t/uni/class.t See if Unicode classes work (\p) t/uni/fold.t See if Unicode folding works t/uni/lower.t See if Unicode casing works +t/uni/overload.t See if Unicode overloading works t/uni/sprintf.t See if Unicode sprintf works t/uni/title.t See if Unicode casing works t/uni/tr_7jis.t See if Unicode tr/// works diff --git a/pp.c b/pp.c index 356bfec..718f0f0 100644 --- a/pp.c +++ b/pp.c @@ -2950,7 +2950,22 @@ PP(pp_length) dVAR; dSP; dTARGET; SV * const sv = TOPs; - if (DO_UTF8(sv)) + if (SvAMAGIC(sv)) { + /* For an overloaded scalar, we can't know in advance if it's going to + be UTF-8 or not. Also, we can't call sv_len_utf8 as it likes to + cache the length. Maybe that should be a documented feature of it. + */ + STRLEN len; + const char *const p = SvPV_const(sv, len); + + if (DO_UTF8(sv)) { + SETi(Perl_utf8_length(aTHX_ p, p + len)); + } + else + SETi(len); + + } + else if (DO_UTF8(sv)) SETi(sv_len_utf8(sv)); else SETi(sv_len(sv)); diff --git a/t/uni/overload.t b/t/uni/overload.t new file mode 100644 index 0000000..9338f75 --- /dev/null +++ b/t/uni/overload.t @@ -0,0 +1,44 @@ +#!perl -w + +BEGIN { + if ($ENV{'PERL_CORE'}){ + chdir 't'; + @INC = '../lib'; + } +} + +use Test::More tests => 8; + +package UTF8Field; +use strict; + +use overload '""' => 'stringify'; + +sub new { + my $class = shift; + return bless [shift, 0], $class; +} + +sub stringify { + my $self = shift; + $self->[1] = ! $self->[1]; + if ($self->[1]) { + utf8::downgrade($self->[0]); + } else { + utf8::upgrade($self->[0]); + } + $self->[0]; +} + +package main; + +# Bug 34297 +foreach my $t ("ASCII", "B\366se") { + my $length = length $t; + + my $u = UTF8Field->new($t); + is (length $u, $length, "length of '$t'"); + is (length $u, $length, "length of '$t'"); + is (length $u, $length, "length of '$t'"); + is (length $u, $length, "length of '$t'"); +}