From: Jarkko Hietaniemi Date: Sat, 4 May 2002 15:58:53 +0000 (+0000) Subject: Upgrade to Encode 1.69. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=11067275d7d22484009dab2a975f2c8c28f4daf3;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Encode 1.69. p4raw-id: //depot/perl@16392 --- diff --git a/ext/Encode/AUTHORS b/ext/Encode/AUTHORS index a61622c..0c7dda8 100644 --- a/ext/Encode/AUTHORS +++ b/ext/Encode/AUTHORS @@ -17,6 +17,7 @@ Craig A. Berry Dan Kogai Elizabeth Mattijsen Gerrit P. Haase +Graham Barr Gurusamy Sarathy H.Merijn Brand Jarkko Hietaniemi diff --git a/ext/Encode/Changes b/ext/Encode/Changes index 966a53e..c1a2772 100644 --- a/ext/Encode/Changes +++ b/ext/Encode/Changes @@ -1,9 +1,29 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 1.68 2002/05/03 12:20:13 dankogai Exp $ +# $Id: Changes,v 1.69 2002/05/04 16:41:18 dankogai Exp dankogai $ # -$Revision: 1.68 $ $Date: 2002/05/03 12:20:13 $ +$Revision: 1.69 $ $Date: 2002/05/04 16:41:18 $ +! lib/Encode/MIME/Header + Floating-point coerced for UNICOS (in integer arithmetics it folds + line one character too early). Verification by Mark is pending. + Message-Id: +! Unicode/Unicode.pm + more doc patch from Elizabeth + Message-Id: <4.2.0.58.20020503210946.02f4ed30@mickey.dijkmat.nl> +! Encode/Makefile_PL.e2x + More platform-independent patch from Benjamin + Message-Id: <3CD31BE0.69F79B06@earthlink.net> +! lib/Encode/Guess AUTHORS + split regex fix by Graham Barr. Adds him to AUTHORS. + Message-Id: <20020504085419.E95940@valueclick.com> +! Encode/Makefile_PL.e2x + enc2xs script discovery made smarter and more sensible, first cited + by Miyagawa-kun and further suggestions by Rafael and Andreas +! Encode.pm lib/Encode/Guess.pm t/fallback.t t/guess.t t/mime-header.t + "The EBCDIC remapping of the low 256 bites again" #16372 by jhi + +1.68 2002/05/03 12:20:13 ! lib/Encode/Alias.pm lib/Encode/Supported.pod t/Alias.t AUTHORS UCS-4 added to aliases of UTF-32 by Elizabeth Mattijsen. Alias.t and Supported.pod modified to reflect the change. Elizabeth added @@ -583,7 +603,7 @@ $Revision: 1.68 $ $Date: 2002/05/03 12:20:13 $ Typo fixes and improvements by jhi Message-Id: <200204010201.FAA03564@alpha.hut.fi>, et al. -1.11 $Date: 2002/05/03 12:20:13 $ +1.11 $Date: 2002/05/04 16:41:18 $ + t/encoding.t + t/jperl.t ! MANIFEST diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index b9febc9..9ad7003 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -1,9 +1,9 @@ # -# $Id: Encode.pm,v 1.68 2002/05/03 12:20:31 dankogai Exp $ +# $Id: Encode.pm,v 1.69 2002/05/04 16:41:18 dankogai Exp dankogai $ # package Encode; use strict; -our $VERSION = do { my @r = (q$Revision: 1.68 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.69 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; our $DEBUG = 0; use XSLoader (); XSLoader::load(__PACKAGE__, $VERSION); diff --git a/ext/Encode/Encode/Makefile_PL.e2x b/ext/Encode/Encode/Makefile_PL.e2x index 78cf91b..0e73823 100644 --- a/ext/Encode/Encode/Makefile_PL.e2x +++ b/ext/Encode/Encode/Makefile_PL.e2x @@ -18,8 +18,8 @@ my %tables = ( require File::Spec; my ($enc2xs, $encode_h) = (); PATHLOOP: -for my $d (@Config{qw/bin sitebin vendorbin/}, - (split /:/, $ENV{PATH})){ +for my $d (@Config{qw/bin sitebin vendorbin/}, + (split /$Config{path_sep}/o, $ENV{PATH})){ for my $f (qw/enc2xs enc2xs5.7.3/){ my $path = File::Spec->catfile($d, $f); -x $path and $enc2xs = $path and last PATHLOOP; diff --git a/ext/Encode/Unicode/Unicode.pm b/ext/Encode/Unicode/Unicode.pm index 67241af..fa508eb 100644 --- a/ext/Encode/Unicode/Unicode.pm +++ b/ext/Encode/Unicode/Unicode.pm @@ -3,7 +3,7 @@ package Encode::Unicode; use strict; use warnings; -our $VERSION = do { my @r = (q$Revision: 1.36 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.37 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use XSLoader; XSLoader::load(__PACKAGE__,$VERSION); @@ -288,7 +288,8 @@ for UTF-8, which is a native format in perl). I A character encoding form plus byte serialization. There are seven character encoding schemes in Unicode: -UTF-8, UTF-16, UTF-16BE, UTF-16LE, UTF-32, UTF-32BE and UTF-32LE. +UTF-8, UTF-16, UTF-16BE, UTF-16LE, UTF-32 (UCS-4), UTF-32BE (UCS-4BE) and +UTF-32LE (UCS-4LE). =item Quick Reference @@ -330,7 +331,7 @@ form a character. Bogus surrogates result in death. When \x{10000} or above is encountered during encode(), it Cs them and pushes the surrogate pair to the output stream. -UTF-32 is a fixed-length encoding with each character taking 32 bits. +UTF-32 (UCS-4) is a fixed-length encoding with each character taking 32 bits. Since it is 32-bit, there is no need for I. =head2 by endianness @@ -434,10 +435,11 @@ every one of \x{0000_0000} up to \x{ffff_ffff} (*) is I. =head1 SEE ALSO L, L, +L, RFC 2781 L, -L +The whole Unicode standard L Ch. 15, pp. 403 of C by Larry Wall, Tom Christiansen, Jon Orwant; diff --git a/ext/Encode/lib/Encode/Alias.pm b/ext/Encode/lib/Encode/Alias.pm index a6172ed..1cc54bf 100644 --- a/ext/Encode/lib/Encode/Alias.pm +++ b/ext/Encode/lib/Encode/Alias.pm @@ -128,13 +128,13 @@ sub init_aliases define_alias( qr/^(.*)$/ => '"\L$1"' ); # UTF/UCS stuff - define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' ); - define_alias( qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"', + define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' ); + define_alias( qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"', qr/^UCS-?4-?(BE|LE)?$/i => 'uc("UTF-32$1")', - qr/^iso-10646-1$/i => '"UCS-2BE"' ); - define_alias( qr/^UTF(16|32)-?BE$/i => '"UTF-$1BE"', - qr/^UTF(16|32)-?LE$/i => '"UTF-$1LE"', - qr/^UTF(16|32)$/i => '"UTF-$1"', + qr/^iso-10646-1$/i => '"UCS-2BE"' ); + define_alias( qr/^UTF(16|32)-?BE$/i => '"UTF-$1BE"', + qr/^UTF(16|32)-?LE$/i => '"UTF-$1LE"', + qr/^UTF(16|32)$/i => '"UTF-$1"', ); # ASCII define_alias(qr/^(?:US-?)ascii$/i => '"ascii"'); diff --git a/ext/Encode/lib/Encode/Guess.pm b/ext/Encode/lib/Encode/Guess.pm index b106a04..f4bfbfa 100644 --- a/ext/Encode/lib/Encode/Guess.pm +++ b/ext/Encode/lib/Encode/Guess.pm @@ -2,7 +2,7 @@ package Encode::Guess; use strict; use Encode qw(:fallbacks find_encoding); -our $VERSION = do { my @r = (q$Revision: 1.5 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.6 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; my $Canon = 'Guess'; our $DEBUG = 0; @@ -87,7 +87,7 @@ sub guess { $DEBUG and warn "Added: ", $e->name; } my $nline = 1; - for my $line (split /\r|\n|\r\n/, $octet){ + for my $line (split /\r\n?|\n/, $octet){ # cheat 2 -- \e in the string if ($line =~ /\e/o){ my @keys = keys %try; diff --git a/ext/Encode/lib/Encode/MIME/Header.pm b/ext/Encode/lib/Encode/MIME/Header.pm index 683348a..09dd55e 100644 --- a/ext/Encode/lib/Encode/MIME/Header.pm +++ b/ext/Encode/lib/Encode/MIME/Header.pm @@ -1,7 +1,7 @@ package Encode::MIME::Header; use strict; # use warnings; -our $VERSION = do { my @r = (q$Revision: 1.3 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Encode qw(find_encoding encode_utf8); use MIME::Base64; @@ -128,7 +128,9 @@ sub _encode{ my ($o, $str) = @_; my $enc = $o->{encode}; my $llen = ($o->{bpl} - length(HEAD) - 2 - length(TAIL)); - $llen *= $enc eq 'B' ? 3/4 : 1/3; + # to coerce a floating-point arithmetics, the following contains + # .0 in numbers -- dankogai + $llen *= $enc eq 'B' ? 3.0/4.0 : 1.0/3.0; my @result = (); my $chunk = ''; while(my $chr = substr($str, 0, 1, '')){ diff --git a/ext/Encode/t/mime-header.t b/ext/Encode/t/mime-header.t index 238be30..5da24be 100644 --- a/ext/Encode/t/mime-header.t +++ b/ext/Encode/t/mime-header.t @@ -1,5 +1,5 @@ # -# $Id: mime-header.t,v 1.3 2002/04/26 03:07:59 dankogai Exp $ +# $Id: mime-header.t,v 1.4 2002/05/04 16:41:18 dankogai Exp dankogai $ # This script is written in utf8 # BEGIN {