From: Nicholas Clark Date: Sat, 7 Apr 2007 12:45:44 +0000 (+0000) Subject: Upgrade to Encode 2.19 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=51e4e64df3bc05a7c291521c55c2654beda56c26;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Encode 2.19 p4raw-id: //depot/perl@30866 --- diff --git a/ext/Encode/AUTHORS b/ext/Encode/AUTHORS index 60a7da0..647b356 100644 --- a/ext/Encode/AUTHORS +++ b/ext/Encode/AUTHORS @@ -50,6 +50,7 @@ SADAHIRO Tomoyuki SUGAWARA Hajime SUZUKI Norio Simon Cozens +Slaven Rezic Spider Boardman Steve Hay Steve Peters diff --git a/ext/Encode/Changes b/ext/Encode/Changes index e7b2d7a..5039a73 100644 --- a/ext/Encode/Changes +++ b/ext/Encode/Changes @@ -1,8 +1,33 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 2.17 2006/06/03 20:28:48 dankogai Exp dankogai $ +# $Id: Changes,v 2.19 2007/04/06 12:53:41 dankogai Exp dankogai $ # -$Revision: 2.17 $ $Date: 2006/06/03 20:28:48 $ +$Revision: 2.19 $ $Date: 2007/04/06 12:53:41 $ +! lib/Encode/JP/JIS7.pm ++ t/jis7-fallback.t + encode('iso-2022-jp') fallback support added by MIYAGAWA++ + decode()'s fallback remains unchanged (FB_PERLQQ) since UTF-8 + contains all characters in iso-2022-jp so there's no need for fancy stuff. + Message-Id: <693254b90704060526s6d850320h71cdda50dfbf7eba@mail.gmail.com> +! Encode.pm + #25216 ([PATCH] Encode.pm: postpone the load of Encode::Encoding) + http://rt.cpan.org/NoAuth/Bug.html?id=#25216 +! lib/Encode/MIME/Header.pm t/mime-header.t + #24418 (Encode::MIME::Header: wrong encoding with latin1 characters) + http://rt.cpan.org/NoAuth/Bug.html?id=#24418 +! Encode.pm + #23876 (Add documentation for LEAVE_SRC) + http://rt.cpan.org/NoAuth/Bug.html?id=#23876 +! lib/Encode/Alias.pm t/Aliases.t + #20781: Thai encoding needs alias for tis-620 + http://rt.cpan.org/NoAuth/Bug.html?id=#20781 +! bin/piconv AUTHORS + #20344: piconv: wrong conversion of utf-16le encoded files (with PATCH) + http://rt.cpan.org/NoAuth/Bug.html?id=#20344 +! Encode.pm Encode.xs bin/enc2xs encoding.pm t/Aliases.t t/utf8strict.t + Imported from bleedperl's 2.18_01 + +2.18 2006/06/03 20:28:48 ! bin/enc2xs overhauled the -C option - added ascii-ctrl', 'null', 'utf-8-strict' to core diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index bdfa695..8b0f4a6 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -1,10 +1,10 @@ # -# $Id: Encode.pm,v 2.18 2006/06/03 20:28:48 dankogai Exp dankogai $ +# $Id: Encode.pm,v 2.19 2007/04/06 12:53:41 dankogai Exp dankogai $ # package Encode; use strict; use warnings; -our $VERSION = "2.18_01"; +our $VERSION = sprintf "%d.%02d", q$Revision: 2.19 $ =~ /(\d+)/g; sub DEBUG () { 0 } use XSLoader (); XSLoader::load( __PACKAGE__, $VERSION ); @@ -210,7 +210,7 @@ predefine_encodings(1); # sub predefine_encodings { - use Encode::Encoding; + require Encode::Encoding; no warnings 'redefine'; my $use_xs = shift; if ($ON_EBCDIC) { @@ -659,6 +659,12 @@ constants via C. =back +=item Encode::LEAVE_SRC + +If the C bit is not set, but I is, then the second +argument to C or C may be assigned to by the functions. If +you're not interested in this, then bitwise-or the bitmask with it. + =head2 coderef for CHECK As of Encode 2.12 CHECK can also be a code reference which takes the diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index ba2bf62..30ede3f 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -1,5 +1,5 @@ /* - $Id: Encode.xs,v 2.10 2006/06/03 20:28:48 dankogai Exp dankogai $ + $Id: Encode.xs,v 2.11 2007/04/06 12:53:41 dankogai Exp dankogai $ */ #define PERL_NO_GET_CONTEXT diff --git a/ext/Encode/bin/piconv b/ext/Encode/bin/piconv index 0a2f6f9..37dd153 100644 --- a/ext/Encode/bin/piconv +++ b/ext/Encode/bin/piconv @@ -1,5 +1,5 @@ #!./perl -# $Id: piconv,v 2.2 2006/05/03 18:24:10 dankogai Exp $ +# $Id: piconv,v 2.3 2007/04/06 12:53:41 dankogai Exp dankogai $ # use 5.8.0; use strict; @@ -40,7 +40,7 @@ $Opt{from} || $Opt{to} || help(); my $from = $Opt{from} || $locale or help("from_encoding unspecified"); my $to = $Opt{to} || $locale or help("to_encoding unspecified"); $Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit; -my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to'; +my $scheme = exists $Scheme{$Opt{scheme}} ? $Opt{scheme} : 'from_to'; $Opt{check} ||= $Opt{c}; $Opt{perlqq} and $Opt{check} = Encode::PERLQQ; $Opt{htmlcref} and $Opt{check} = Encode::HTMLCREF; @@ -246,6 +246,9 @@ implementation. The new perlIO layer is used. NI-S' favorite. +You should use this option if you are using UTF-16 and others which +linefeed is not $/. + =back Like the I<-D> option, this is also for Encode hackers. diff --git a/ext/Encode/encoding.pm b/ext/Encode/encoding.pm index 1f418e3..fff7adb 100644 --- a/ext/Encode/encoding.pm +++ b/ext/Encode/encoding.pm @@ -1,6 +1,6 @@ -# $Id: encoding.pm,v 2.4 2006/06/03 20:28:48 dankogai Exp dankogai $ +# $Id: encoding.pm,v 2.5 2007/04/06 12:53:41 dankogai Exp dankogai $ package encoding; -our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode; use strict; @@ -315,6 +315,14 @@ always the same as the length of C<$/> in the native encoding. This pragma affects utf8::upgrade, but not utf8::downgrade. +=head2 Side effects + +If the C pragma is in scope then the lengths returned are +calculated from the length of C<$/> in Unicode characters, which is not +always the same as the length of C<$/> in the native encoding. + +This pragma affects utf8::upgrade, but not utf8::downgrade. + =head1 FEATURES THAT REQUIRE 5.8.1 Some of the features offered by this pragma requires perl 5.8.1. Most diff --git a/ext/Encode/lib/Encode/Alias.pm b/ext/Encode/lib/Encode/Alias.pm index 858f60c..b865f0d 100644 --- a/ext/Encode/lib/Encode/Alias.pm +++ b/ext/Encode/lib/Encode/Alias.pm @@ -3,7 +3,7 @@ use strict; use warnings; no warnings 'redefine'; use Encode; -our $VERSION = do { my @r = ( q$Revision: 2.6 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; sub DEBUG () { 0 } use base qw(Exporter); @@ -189,8 +189,9 @@ sub init_aliases { 'greek' => 'iso-8859-7', 'hebrew' => 'iso-8859-8', 'thai' => 'iso-8859-11', - 'tis620' => 'iso-8859-11', ); + # RT #20781 + define_alias(qr/\btis-?620\b/i => '"iso-8859-11"'); # At least AIX has IBM-NNN (surprisingly...) instead of cpNNN. # And Microsoft has their own naming (again, surprisingly). diff --git a/ext/Encode/lib/Encode/CJKConstants.pm b/ext/Encode/lib/Encode/CJKConstants.pm index ccc5231..43d2033 100644 --- a/ext/Encode/lib/Encode/CJKConstants.pm +++ b/ext/Encode/lib/Encode/CJKConstants.pm @@ -1,12 +1,12 @@ # -# $Id: CJKConstants.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp dankogai $ +# $Id: CJKConstants.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp $ # package Encode::CJKConstants; use strict; use warnings; -our $RCSID = q$Id: CJKConstants.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp dankogai $; +our $RCSID = q$Id: CJKConstants.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp $; our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Carp; diff --git a/ext/Encode/lib/Encode/JP/H2Z.pm b/ext/Encode/lib/Encode/JP/H2Z.pm index 81ce9ac..f8e2230 100644 --- a/ext/Encode/lib/Encode/JP/H2Z.pm +++ b/ext/Encode/lib/Encode/JP/H2Z.pm @@ -1,5 +1,5 @@ # -# $Id: H2Z.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp dankogai $ +# $Id: H2Z.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp $ # package Encode::JP::H2Z; @@ -7,7 +7,7 @@ package Encode::JP::H2Z; use strict; use warnings; -our $RCSID = q$Id: H2Z.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp dankogai $; +our $RCSID = q$Id: H2Z.pm,v 2.2 2006/06/03 20:28:48 dankogai Exp $; our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode::CJKConstants qw(:all); diff --git a/ext/Encode/lib/Encode/JP/JIS7.pm b/ext/Encode/lib/Encode/JP/JIS7.pm index f2e0eca..bb048fd 100644 --- a/ext/Encode/lib/Encode/JP/JIS7.pm +++ b/ext/Encode/lib/Encode/JP/JIS7.pm @@ -1,7 +1,7 @@ package Encode::JP::JIS7; use strict; use warnings; -our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode qw(:fallbacks); @@ -49,7 +49,7 @@ sub encode($$;$) { # empty the input string in the stack so perlio is ok $_[1] = '' if $chk; my ( $h2z, $jis0212 ) = @$obj{qw(h2z jis0212)}; - my $octet = Encode::encode( 'euc-jp', $utf8, FB_PERLQQ ); + my $octet = Encode::encode( 'euc-jp', $utf8, $chk ); $h2z and &Encode::JP::H2Z::h2z( \$octet ); euc_jis( \$octet, $jis0212 ); return $octet; diff --git a/ext/Encode/lib/Encode/MIME/Header.pm b/ext/Encode/lib/Encode/MIME/Header.pm index 7e8264a..b664d88 100644 --- a/ext/Encode/lib/Encode/MIME/Header.pm +++ b/ext/Encode/lib/Encode/MIME/Header.pm @@ -3,7 +3,7 @@ use strict; use warnings; no warnings 'redefine'; -our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode qw(find_encoding encode_utf8 decode_utf8); use MIME::Base64; use Carp; @@ -174,12 +174,13 @@ sub _encode_b { sub _encode_q { my $chunk = shift; + $chunk = encode_utf8($chunk); $chunk =~ s{ ([^0-9A-Za-z]) }{ join("" => map {sprintf "=%02X", $_} unpack("C*", $1)) }egox; - return decode_utf8( HEAD . 'Q?' . $chunk . TAIL ); + return HEAD . 'Q?' . $chunk . TAIL; } 1; diff --git a/ext/Encode/lib/Encode/Unicode/UTF7.pm b/ext/Encode/lib/Encode/Unicode/UTF7.pm index f8cb169..6ee4619 100644 --- a/ext/Encode/lib/Encode/Unicode/UTF7.pm +++ b/ext/Encode/lib/Encode/Unicode/UTF7.pm @@ -1,5 +1,5 @@ # -# $Id: UTF7.pm,v 2.4 2006/06/03 20:28:48 dankogai Exp dankogai $ +# $Id: UTF7.pm,v 2.4 2006/06/03 20:28:48 dankogai Exp $ # package Encode::Unicode::UTF7; use strict; diff --git a/ext/Encode/t/Aliases.t b/ext/Encode/t/Aliases.t index 9c70944..6fd0fe2 100644 --- a/ext/Encode/t/Aliases.t +++ b/ext/Encode/t/Aliases.t @@ -42,6 +42,7 @@ sub init_a2c{ 'hebrew' => 'iso-8859-8', 'thai' => 'iso-8859-11', 'tis620' => 'iso-8859-11', + 'tis-620' => 'iso-8859-11', 'WinLatin1' => 'cp1252', 'WinLatin2' => 'cp1250', 'WinCyrillic' => 'cp1251', @@ -141,6 +142,7 @@ define_alias( print "# alias test with alias overrides\n"; foreach my $a (keys %a2c){ + print "# $a => $a2c{$a}\n"; my $e = Encode::find_encoding($a); is((defined($e) and $e->name), $a2c{$a}, "Override $a") or warn "alias was $a"; diff --git a/ext/Encode/t/mime-header.t b/ext/Encode/t/mime-header.t index 3c8a559..9c63630 100644 --- a/ext/Encode/t/mime-header.t +++ b/ext/Encode/t/mime-header.t @@ -1,5 +1,5 @@ # -# $Id: mime-header.t,v 2.2 2006/05/03 18:24:10 dankogai Exp $ +# $Id: mime-header.t,v 2.3 2007/04/06 12:53:41 dankogai Exp dankogai $ # This script is written in utf8 # BEGIN { @@ -23,7 +23,7 @@ no utf8; use strict; #use Test::More qw(no_plan); -use Test::More tests => 11; +use Test::More tests => 12; use_ok("Encode::MIME::Header"); my $eheader =<<'EOS'; @@ -116,4 +116,7 @@ is(Encode::encode('MIME-Q', $dheader), $qheader, "Double decode Q"); is(Encode::encode('MIME-Q' => $pound_1024), '=?UTF-8?Q?=C2=A31024?=', 'pound 1024'); } + +is(Encode::encode('MIME-Q', "\x{fc}"), '=?UTF-8?Q?=C3=BC?=', 'Encode latin1 characters'); + __END__;