From: Rafael Garcia-Suarez Date: Mon, 16 Jan 2006 14:09:29 +0000 (+0000) Subject: Upgrade to Encode 2.14 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=41c240f59398510e3a736bd441215c051e190e68;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Encode 2.14 p4raw-id: //depot/perl@26863 --- diff --git a/MANIFEST b/MANIFEST index 7f9225d..a7d1f20 100644 --- a/MANIFEST +++ b/MANIFEST @@ -590,6 +590,7 @@ ext/Encode/t/Encode.t test script ext/Encode/t/encoding.t test script ext/Encode/t/enc_utf8.t test script ext/Encode/t/fallback.t test script +ext/Encode/t/from_to.t test script ext/Encode/t/gb2312.enc test data ext/Encode/t/gb2312.utf test data ext/Encode/t/grow.t test script diff --git a/ext/Encode/AUTHORS b/ext/Encode/AUTHORS index edb016c..60a7da0 100644 --- a/ext/Encode/AUTHORS +++ b/ext/Encode/AUTHORS @@ -53,7 +53,7 @@ Simon Cozens Spider Boardman Steve Hay Steve Peters -Tatsuhiko Miyagawa +Tatsuhiko Miyagawa Tels Vadim Konovalov Yitzchak Scott-Thoennes diff --git a/ext/Encode/Changes b/ext/Encode/Changes index acdead6..6cd82b7 100644 --- a/ext/Encode/Changes +++ b/ext/Encode/Changes @@ -1,8 +1,42 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 2.12 2005/09/08 14:17:17 dankogai Exp dankogai $ +# $Id: Changes,v 2.14 2006/01/15 15:43:36 dankogai Exp dankogai $ # -$Revision: 2.12 $ $Date: 2005/09/08 14:17:17 $ + +$Revision: 2.14 $ $Date: 2006/01/15 15:43:36 $ +2.14 2006/01/15 15:06:36 $ +! Makefile.PL + Change 26295: Don't build manpages for Encode and Unicode::Normalize + Message-Id: <200512071540.jB7Fe4Gt017960@smtp3.ActiveState.com> +! Encode.pm + Change 26081: Pod nit in Encode.pm, found by Marc Lehmann in RT #36949. + Message-Id: <200511110357.jAB3vZcP023647@smtp3.ActiveState.com> +! Encode.xs Encode/encode.h bin/enc2xs encengine.c + Change 25821: Mark more static Encode data structures as const. + Change 25823: use more 'const' in the Encode data structures. + Message-Id: <200510221243.j9MChTSu027711@smtp3.ActiveState.com> + Message-Id: <200510221343.j9MDhTk9001245@smtp3.ActiveState.com> + +2.13 2006/01/15 15:06:36 +! AUTHORS + Miyagawa's mail address updated + Message-Id: <693254b90601150535o767e10bai4f4732c275b4ebe0@mail.gmail.com> +! lib/Encode/MIME/Header.pm + #16413: Encode::MIME::Headers patch to solve what is probably someone else's bug + http://rt.cpan.org/NoAuth/Bug.html?id=16413 +! lib/Encode/MIME/Header.pm t/mime-header.t + Applied: RT #16258: Support for RFC 2184 language tag + http://rt.cpan.org/NoAuth/Bug.html?id=16258 +! Encode.pm + Fixed RT #14559: fix for #8872 introduces new "bug" + http://rt.cpan.org/NoAuth/Bug.html?id=14559 +! Encode.pm ++ t/from_to.t + from_to() now makes use of $check more naturally. + Message-Id: <693254b90601150535o767e10bai4f4732c275b4ebe0@mail.gmail.com> + + +2.12 2005/09/08 14:17:17 ! Encode.xs Encode.pm t/fallback.t Now accepts coderef for CHECK! ! ucm/8859-7.ucm diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index 4a4411b..7785f5a 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -1,9 +1,9 @@ # -# $Id: Encode.pm,v 2.12 2005/09/08 14:17:17 dankogai Exp dankogai $ +# $Id: Encode.pm,v 2.14 2006/01/15 15:43:36 dankogai Exp dankogai $ # package Encode; use strict; -our $VERSION = sprintf "%d.%02d", q$Revision: 2.12 $ =~ /(\d+)/g; +our $VERSION = sprintf "%d.%02d", q$Revision: 2.14 $ =~ /(\d+)/g; sub DEBUG () { 0 } use XSLoader (); XSLoader::load(__PACKAGE__, $VERSION); @@ -183,11 +183,10 @@ sub from_to($$$;$) require Carp; Carp::croak("Unknown encoding '$to'"); } - my $uni = $f->decode($string,$check); - return undef if ($check && length($string)); - $string = $t->encode($uni,$check); + my $uni = $f->decode($string); + $_[0] = $string = $t->encode($uni,$check); return undef if ($check && length($uni)); - return defined($_[0] = $string) ? length($string) : undef ; + return defined($_[0]) ? length($string) : undef ; } sub encode_utf8($) @@ -200,6 +199,7 @@ sub encode_utf8($) sub decode_utf8($;$) { my ($str, $check) = @_; + return $str if is_utf8($str); if ($check){ return decode("utf8", $str, $check); }else{ diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index 274fae0..6cc4a6a 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -1,5 +1,5 @@ /* - $Id: Encode.xs,v 2.6 2005/09/08 14:17:17 dankogai Exp dankogai $ + $Id: Encode.xs,v 2.7 2006/01/15 15:43:36 dankogai Exp dankogai $ */ #define PERL_NO_GET_CONTEXT diff --git a/ext/Encode/MANIFEST b/ext/Encode/MANIFEST index 35aefdd..d54232f 100644 --- a/ext/Encode/MANIFEST +++ b/ext/Encode/MANIFEST @@ -72,6 +72,7 @@ t/enc_module.t test script t/enc_utf8.t test script t/encoding.t test script t/fallback.t test script +t/from_to.t test script t/gb2312.enc test data t/gb2312.utf test data t/grow.t test script diff --git a/ext/Encode/META.yml b/ext/Encode/META.yml index 9373f60..fa6e843 100644 --- a/ext/Encode/META.yml +++ b/ext/Encode/META.yml @@ -1,7 +1,7 @@ # http://module-build.sourceforge.net/META-spec.html #XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# name: Encode -version: 2.12 +version: 2.14 version_from: Encode.pm installdirs: perl requires: diff --git a/ext/Encode/bin/enc2xs b/ext/Encode/bin/enc2xs index 82ff01c..7930ece 100644 --- a/ext/Encode/bin/enc2xs +++ b/ext/Encode/bin/enc2xs @@ -9,7 +9,7 @@ use strict; use warnings; use Getopt::Std; my @orig_ARGV = @ARGV; -our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; # These may get re-ordered. # RAW is a do_now as inserted by &enter diff --git a/ext/Encode/encoding.pm b/ext/Encode/encoding.pm index d0b083a..4db0401 100644 --- a/ext/Encode/encoding.pm +++ b/ext/Encode/encoding.pm @@ -1,4 +1,4 @@ -# $Id: encoding.pm,v 2.2 2005/09/08 14:17:17 dankogai Exp dankogai $ +# $Id: encoding.pm,v 2.2 2005/09/08 14:17:17 dankogai Exp $ package encoding; our $VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; diff --git a/ext/Encode/lib/Encode/CN/HZ.pm b/ext/Encode/lib/Encode/CN/HZ.pm index fbc6ba6..94b372c 100644 --- a/ext/Encode/lib/Encode/CN/HZ.pm +++ b/ext/Encode/lib/Encode/CN/HZ.pm @@ -3,7 +3,7 @@ package Encode::CN::HZ; use strict; use vars qw($VERSION); -$VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +$VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Encode qw(:fallbacks); @@ -45,6 +45,7 @@ sub decode ($$;$) } } else { # GB mode; the byte ranges are as in RFC 1843. + no warnings 'uninitialized'; if ($str =~ s/^((?:[\x21-\x77][\x21-\x7E])+)//) { $ret .= $GB->decode($1, $chk); } diff --git a/ext/Encode/lib/Encode/MIME/Header.pm b/ext/Encode/lib/Encode/MIME/Header.pm index f4e2ad6..29fc858 100644 --- a/ext/Encode/lib/Encode/MIME/Header.pm +++ b/ext/Encode/lib/Encode/MIME/Header.pm @@ -1,7 +1,7 @@ package Encode::MIME::Header; use strict; # use warnings; -our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Encode qw(find_encoding encode_utf8 decode_utf8); use MIME::Base64; use Carp; @@ -47,10 +47,15 @@ sub decode($$;$){ $str =~ s/\?=\s+=\?/\?==\?/gos; # multi-line header to single line $str =~ s/(:?\r|\n|\r\n)[ \t]//gos; + + 1 while ($str =~ s/(\=\?[0-9A-Za-z\-_]+\?[Qq]\?)(.*?)\?\=\1(.*?)\?\=/$1$2$3\?\=/); # Concat consecutive QP encoded mime headers + # Fixes breaking inside multi-byte characters + $str =~ s{ =\? # begin encoded word ([0-9A-Za-z\-_]+) # charset (encoding) + (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231) \?([QqBb])\? # delimiter (.*?) # Base64-encodede contents \?= # end encoded word @@ -96,6 +101,7 @@ my $re_encoded_word = (?: =\? # begin encoded word (?:[0-9A-Za-z\-_]+) # charset (encoding) + (?:\*\w+(?:-\w+)*)? # language (RFC 2231) \?(?:[QqBb])\? # delimiter (?:.*?) # Base64-encodede contents \?= # end encoded word diff --git a/ext/Encode/t/from_to.t b/ext/Encode/t/from_to.t new file mode 100644 index 0000000..dd1d9ec --- /dev/null +++ b/ext/Encode/t/from_to.t @@ -0,0 +1,12 @@ +# $Id: from_to.t,v 1.1 2006/01/15 15:06:36 dankogai Exp $ +use strict; +use Test::More tests => 3; +use Encode qw(encode from_to); + +my $foo = encode("utf-8", "\x{5abe}"); +from_to($foo, "utf-8" => "latin1", Encode::FB_HTMLCREF); +ok !Encode::is_utf8($foo); +is $foo, '媾'; + +my $bar = encode("latin-1", "\x{5abe}", Encode::FB_HTMLCREF); +is $bar, '媾'; diff --git a/ext/Encode/t/mime-header.t b/ext/Encode/t/mime-header.t index 5f80347..4e3ac56 100644 --- a/ext/Encode/t/mime-header.t +++ b/ext/Encode/t/mime-header.t @@ -1,5 +1,5 @@ # -# $Id: mime-header.t,v 2.0 2004/05/16 20:55:19 dankogai Exp $ +# $Id: mime-header.t,v 2.1 2006/01/15 15:06:36 dankogai Exp $ # This script is written in utf8 # BEGIN { @@ -23,7 +23,7 @@ no utf8; use strict; #use Test::More qw(no_plan); -use Test::More tests => 10; +use Test::More tests => 11; use_ok("Encode::MIME::Header"); my $eheader =<<'EOS'; @@ -55,6 +55,16 @@ EOS is(Encode::decode('MIME-Header', $uheader), $dheader, "decode UTF-8 (RFC2047)"); +my $lheader =<<'EOS'; +From: =?US-ASCII*en-US?Q?Keith_Moore?= +To: =?ISO-8859-1*da-DK?Q?Keld_J=F8rn_Simonsen?= +CC: =?ISO-8859-1*fr-BE?Q?Andr=E9?= Pirard +Subject: =?ISO-8859-1*en?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= + =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= +EOS + +is(Encode::decode('MIME-Header', $lheader), $dheader, "decode language tag (RFC2231)"); + $dheader=<<'EOS'; From: 小飼 å¼¾ diff --git a/ext/Encode/ucm/8859-7.ucm b/ext/Encode/ucm/8859-7.ucm index 69eab84..4dd4c6a 100644 --- a/ext/Encode/ucm/8859-7.ucm +++ b/ext/Encode/ucm/8859-7.ucm @@ -1,5 +1,5 @@ # -# $Id: 8859-7.ucm,v 2.1 2005/09/08 14:17:17 dankogai Exp dankogai $ +# $Id: 8859-7.ucm,v 2.1 2005/09/08 14:17:17 dankogai Exp $ # # Original table can be obtained at # http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT