From: Rafael Garcia-Suarez Date: Tue, 7 Dec 2004 18:26:48 +0000 (+0000) Subject: Upgrade to Encode 2.09 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=78589665ff174f509516817976373678c66e4f7b;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Encode 2.09 p4raw-id: //depot/perl@23624 --- diff --git a/ext/Encode/Changes b/ext/Encode/Changes index 4991796..249d298 100644 --- a/ext/Encode/Changes +++ b/ext/Encode/Changes @@ -1,14 +1,38 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 2.8 2004/10/24 13:00:29 dankogai Exp dankogai $ +# $Id: Changes,v 2.9 2004/12/03 19:16:53 dankogai Exp dankogai $ # -$Revision: 2.8 $ $Date: 2004/10/24 13:00:29 $ +$Revision: 2.9 $ $Date: 2004/12/03 19:16:53 $ +! Encode.pm Encode.xs + Addressed " :encoding(utf8) broken in perl-5.8.6". + Message-Id: +! Encode.pm + Addressed "(de|en)code($valid_encoding, undef) does not warn". + http://rt.cpan.org/NoAuth/Bug.html?id=8723 +! Encode.pm t/Encode.t + Addressed "Can't encode URI". When a reference is fed to (en|de)code, + Encode now stringifies instead of returning undef. + http://rt.cpan.org/NoAuth/Bug.html?id=8725 +! Encode.xs t/fallback.t + Addressed "FB_HTMLCREF and FB_XMLCREF for the UTF-8 decoder". + http://rt.cpan.org/NoAuth/Bug.html?id=8694 +! Encode.pm + Addressed "s/digit/number/". + http://rt.cpan.org/NoAuth/Bug.html?id=8695 +! Encode.pm + Addressed "while (defined(read )) { ... } is an infinite loop". + http://rt.cpan.org/NoAuth/Bug.html?id=8696 +! Encode.pm + Addressed "What the heck is UCM?". + Document fixed so that it no longer contains "UCM-Based Encodings". + http://rt.cpan.org/NoAuth/Bug.html?id=8697 + +2.08 2004/10/24 13:00:29 ! Encode.xs lib/Encode/Encoding.pm Unicode/Unicode.{pm,xs} - Resolved the issue that was raised by the Encode::utf8 fallbacks vs. - PerlIO::encoding issue that was introduced in 2.07. This is done by - making use of ->renew() method that used to be used only by - Encode::Unicode. ->renewed() method was also introduced to fetch - the value thereof. + Resolved the issue that was raised by 2.07 -- Encode::utf8 fallbacks + that was introduce messed up PerlIO::encoding. + * To do so, ->renew() is renewed and ->renewed() was introduced to + tell whether the caller is PerlIO or not. Message-Id: <94B2EB12-25B7-11D9-9E6A-000A95DBB50A@dan.co.jp> 2.07 2004/10/22 19:35:52 diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index 29dde91..5e67e4c 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -1,9 +1,9 @@ # -# $Id: Encode.pm,v 2.8 2004/10/24 12:32:06 dankogai Exp $ +# $Id: Encode.pm,v 2.9 2004/12/03 19:16:40 dankogai Exp $ # package Encode; use strict; -our $VERSION = do { my @r = (q$Revision: 2.8 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 2.9 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; sub DEBUG () { 0 } use XSLoader (); XSLoader::load(__PACKAGE__, $VERSION); @@ -140,7 +140,7 @@ sub encode($$;$) { my ($name, $string, $check) = @_; return undef unless defined $string; - return undef if ref $string; + $string .= '' if ref $string; # stringify; $check ||=0; my $enc = find_encoding($name); unless(defined $enc){ @@ -156,7 +156,7 @@ sub decode($$;$) { my ($name,$octets,$check) = @_; return undef unless defined $octets; - return undef if ref $octets; + $octets .= '' if ref $octets; $check ||=0; my $enc = find_encoding($name); unless(defined $enc){ @@ -401,9 +401,7 @@ for $octets is B off. When you encode anything, utf8 flag of the result is always off, even when it contains completely valid utf8 string. See L below. -encode($valid_encoding, undef) is harmless but warns you for -C. -encode($valid_encoding, '') is harmless and warnless. +If the $string is C or a reference then C is returned. =item $string = decode(ENCODING, $octets [, CHECK]) @@ -423,9 +421,7 @@ the utf8 flag for $string is on unless $octets entirely consists of ASCII data (or EBCDIC on EBCDIC machines). See L below. -decode($valid_encoding, undef) is harmless but warns you for -C. -decode($valid_encoding, '') is harmless and warnless. +If the $string is C or a reference then C is returned. =item [$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK]) @@ -578,10 +574,10 @@ Now here is the list of I values available =item I = Encode::FB_DEFAULT ( == 0) If I is 0, (en|de)code will put a I in -place of a malformed character. When you encode to UCM-based encodings, -EsubcharE will be used. When you decode from UCM-based -encodings, the code point C<0xFFFD> is used. If the data is supposed -to be UTF-8, an optional lexical warning (category utf8) is given. +place of a malformed character. When you encode, EsubcharE +will be used. When you decode the code point C<0xFFFD> is used. If +the data is supposed to be UTF-8, an optional lexical warning +(category utf8) is given. =item I = Encode::FB_CROAK ( == 1) @@ -600,12 +596,10 @@ source data may contain partial multi-byte character sequences, (i.e. you are reading with a fixed-width buffer). Here is a sample code that does exactly this: - my $data = ''; my $utf8 = ''; - while(defined(read $fh, $buffer, 256)){ - # buffer may end in a partial character so we append - $data .= $buffer; - $utf8 .= decode($encoding, $data, Encode::FB_QUIET); - # $data now contains the unprocessed partial character + my $buffer = ''; my $string = ''; + while(read $fh, $buffer, 256, length($buffer)){ + $string .= decode($encoding, $buffer, Encode::FB_QUIET); + # $buffer now contains the unprocessed partial character } =item I = Encode::FB_WARN @@ -629,8 +623,8 @@ where I is the Unicode ID of the character that cannot be found in the character repertoire of the encoding. HTML/XML character reference modes are about the same, in place of -C<\x{I}>, HTML uses C<&#I;> where I is a decimal digit and -XML uses C<&#xI;> where I is the hexadecimal digit. +C<\x{I}>, HTML uses C<&#I;> where I is a decimal number and +XML uses C<&#xI;> where I is the hexadecimal number. =item The bitmask diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index d7a25ff..4d64fb1 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -1,5 +1,5 @@ /* - $Id: Encode.xs,v 2.2 2004/10/24 13:00:29 dankogai Exp dankogai $ + $Id: Encode.xs,v 2.3 2004/12/03 19:16:53 dankogai Exp dankogai $ */ #define PERL_NO_GET_CONTEXT @@ -279,7 +279,6 @@ CODE: #if 0 fprintf(stderr, "renewed == %d\n", renewed); #endif - if (renewed){ check |= ENCODE_RETURN_ON_ERR; } } FREETMPS; LEAVE; /* end PerlIO check */ @@ -302,6 +301,8 @@ CODE: U8 skip = UTF8SKIP(s); if ((s + skip) > e) { /* Partial character - done */ + if (renewed) + break; goto decode_utf8_fallback; } else if (is_utf8_char(s)) { @@ -331,7 +332,9 @@ CODE: break; } if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ - SV* subchar = newSVpvf("\\x%02" UVXf, (UV)*s); + SV* subchar = newSVpvf(check & ENCODE_PERLQQ ? "\\x%02" UVXf : + check & ENCODE_HTMLCREF ? "&#%" UVuf ";" : + "&#x%" UVxf ";", (UV)*s); sv_catsv(dst, subchar); SvREFCNT_dec(subchar); } else { diff --git a/ext/Encode/META.yml b/ext/Encode/META.yml index cea68e5..6a52035 100644 --- a/ext/Encode/META.yml +++ b/ext/Encode/META.yml @@ -1,7 +1,7 @@ # http://module-build.sourceforge.net/META-spec.html #XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# name: Encode -version: 2.08 +version: 2.09 version_from: Encode.pm installdirs: perl requires: diff --git a/ext/Encode/Unicode/Unicode.xs b/ext/Encode/Unicode/Unicode.xs index acecd9c..b17be85 100644 --- a/ext/Encode/Unicode/Unicode.xs +++ b/ext/Encode/Unicode/Unicode.xs @@ -1,5 +1,5 @@ /* - $Id: Unicode.xs,v 2.1 2004/10/24 13:00:29 dankogai Exp dankogai $ + $Id: Unicode.xs,v 2.1 2004/10/24 13:00:29 dankogai Exp $ */ #define PERL_NO_GET_CONTEXT diff --git a/ext/Encode/t/Encode.t b/ext/Encode/t/Encode.t index 63e913a..528f75f 100644 --- a/ext/Encode/t/Encode.t +++ b/ext/Encode/t/Encode.t @@ -143,6 +143,9 @@ chop $a; ok( is_utf8($a)); # weird but true: an empty UTF-8 string # non-string arguments -ok(decode(latin1 => bless {}, "x"), undef); -ok(encode(utf8 => bless {}, "x"), undef); - +package Encode::Dummy; +use overload q("") => sub { $_[0]->[0] }; +sub new { my $class = shift; bless [ @_ ] => $class } +package main; +ok(decode(latin1 => Encode::Dummy->new("foobar")), "foobar"); +ok(encode(utf8 => Encode::Dummy->new("foobar")), "foobar"); diff --git a/ext/Encode/t/fallback.t b/ext/Encode/t/fallback.t index e319357..e030414 100644 --- a/ext/Encode/t/fallback.t +++ b/ext/Encode/t/fallback.t @@ -17,7 +17,7 @@ BEGIN { use strict; #use Test::More qw(no_plan); -use Test::More tests => 36; +use Test::More tests => 40; use Encode q(:all); my $uo = ''; @@ -137,17 +137,17 @@ $dst = $ascii->encode($src, FB_HTMLCREF); is($dst, $ah, "FB_HTMLCREF ascii"); is($src, '', "FB_HTMLCREF residue ascii"); -#$src = $ao; -#$dst = $utf8->decode($src, FB_HTMLCREF); -#is($dst, $uh, "FB_HTMLCREF utf8"); -#is($src, '', "FB_HTMLCREF residue utf8"); +$src = $ao; +$dst = $utf8->decode($src, FB_HTMLCREF); +is($dst, $uh, "FB_HTMLCREF utf8"); +is($src, '', "FB_HTMLCREF residue utf8"); $src = $uo; $dst = $ascii->encode($src, FB_XMLCREF); is($dst, $ax, "FB_XMLCREF ascii"); is($src, '', "FB_XMLCREF residue ascii"); -#$src = $ao; -#$dst = $utf8->decode($src, FB_XMLCREF); -#is($dst, $ax, "FB_XMLCREF utf8"); -#is($src, '', "FB_XMLCREF residue utf8"); +$src = $ao; +$dst = $utf8->decode($src, FB_XMLCREF); +is($dst, $ax, "FB_XMLCREF utf8"); +is($src, '', "FB_XMLCREF residue utf8");