From: Jarkko Hietaniemi Date: Tue, 30 Apr 2002 15:46:38 +0000 (+0000) Subject: Upgrade to Encode 1.65. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e8c86ba6ca66f86dc4c8f4de0abf70f53c2484f4;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Encode 1.65. p4raw-id: //depot/perl@16282 --- diff --git a/ext/Encode/Changes b/ext/Encode/Changes index 75d9208..595595e 100644 --- a/ext/Encode/Changes +++ b/ext/Encode/Changes @@ -1,9 +1,21 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 1.64 2002/04/29 06:54:06 dankogai Exp $ +# $Id: Changes,v 1.65 2002/04/30 16:13:37 dankogai Exp dankogai $ # -$Revision: 1.64 $ $Date: 2002/04/29 06:54:06 $ +$Revision: 1.65 $ $Date: 2002/04/30 16:13:37 $ +! Encode.pm + encode(undef) no longer warns for C. Suggested by Paul. + Message-Id: +! lib/Encode/Supported.pod + Encode::MIME::Header and Encode::Guess mentioned + Updated for Encode::HanExtra 0.05 and Encode::JIS2K +! lib/Encode/Guess.pm + POD fix by Miyagawa-kun + Message-Id: <86k7qqx8p7.wl@mail.edge.co.jp> + +1.64 2002/04/29 06:54:06 ! ucm/euc-jp.ucm Now decodes euc-jisx0213 also. CAVEAT: encode("euc-jp"...) and encocde("euc-jisx0213") are still DIFFERENT. @@ -541,7 +553,7 @@ $Revision: 1.64 $ $Date: 2002/04/29 06:54:06 $ Typo fixes and improvements by jhi Message-Id: <200204010201.FAA03564@alpha.hut.fi>, et al. -1.11 $Date: 2002/04/29 06:54:06 $ +1.11 $Date: 2002/04/30 16:13:37 $ + t/encoding.t + t/jperl.t ! MANIFEST diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index 45a66f6..4b0b1fe 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -1,9 +1,9 @@ # -# $Id: Encode.pm,v 1.64 2002/04/29 06:54:06 dankogai Exp $ +# $Id: Encode.pm,v 1.65 2002/04/30 16:13:37 dankogai Exp dankogai $ # package Encode; use strict; -our $VERSION = do { my @r = (q$Revision: 1.64 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.65 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; our $DEBUG = 0; use XSLoader (); XSLoader::load(__PACKAGE__, $VERSION); @@ -130,7 +130,8 @@ sub resolve_alias { sub encode($$;$) { - my ($name,$string,$check) = @_; + my ($name, $string, $check) = @_; + defined $string or return; $check ||=0; my $enc = find_encoding($name); unless(defined $enc){ @@ -145,6 +146,7 @@ sub encode($$;$) sub decode($$;$) { my ($name,$octets,$check) = @_; + defined $octets or return; $check ||=0; my $enc = find_encoding($name); unless(defined $enc){ @@ -159,6 +161,7 @@ sub decode($$;$) sub from_to($$$;$) { my ($string,$from,$to,$check) = @_; + defined $string or return; $check ||=0; my $f = find_encoding($from); unless (defined $f){ @@ -180,6 +183,7 @@ sub from_to($$$;$) sub encode_utf8($) { my ($str) = @_; + defined $str or return; utf8::encode($str); return $str; } @@ -187,6 +191,7 @@ sub encode_utf8($) sub decode_utf8($) { my ($str) = @_; + defined $str or return; return undef unless utf8::decode($str); return $str; } diff --git a/ext/Encode/bin/ucm2table b/ext/Encode/bin/ucm2table index ab44573..094ebe0 100644 --- a/ext/Encode/bin/ucm2table +++ b/ext/Encode/bin/ucm2table @@ -1,5 +1,5 @@ #!/usr/bin/perl -# $Id: ucm2table,v 1.1 2002/04/22 23:57:10 dankogai Exp $ +# $Id: ucm2table,v 1.2 2002/04/30 16:13:37 dankogai Exp dankogai $ # use 5.006; @@ -13,7 +13,7 @@ my $Hex = '[0-9A-Fa-f]'; while(<>){ chomp; my ($uni, $enc, $fb) = - /^\s+(\S+)\s+\|(\d)$/o or next; + /^\s+(\S+)\s+\|(\d)/o or next; $fb eq '0' or next; my @byte = (); my $ord = 0; @@ -32,7 +32,7 @@ while(<>){ my $start = $Opt{a} ? 0x20 : 0xa0; -for (my $x = $start; $x <= 0xffff; $ x+= 32) { +for (my $x = $start; $x <= 0xffff; $x += 32) { my $line = ''; for my $i (0..31){ my $num = $x+$i; $num eq 0x7f and next; # skip delete diff --git a/ext/Encode/lib/Encode/Guess.pm b/ext/Encode/lib/Encode/Guess.pm index 1efa5cd..35cc1e1 100644 --- a/ext/Encode/lib/Encode/Guess.pm +++ b/ext/Encode/lib/Encode/Guess.pm @@ -2,7 +2,7 @@ package Encode::Guess; use strict; use Encode qw(:fallbacks find_encoding); -our $VERSION = do { my @r = (q$Revision: 1.3 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; my $Canon = 'Guess'; our $DEBUG = 0; @@ -133,6 +133,7 @@ Encode::Guess -- Guesses encoding from data # if you are sure $data won't contain anything bogus + use Encode; use Encode::Guess qw/euc-jp shiftjis 7bit-jis/; my $utf8 = decode("Guess", $data); my $data = encode("Guess", $utf8); # this doesn't work! diff --git a/ext/Encode/lib/Encode/Supported.pod b/ext/Encode/lib/Encode/Supported.pod index 806f85b..8cdcec1 100644 --- a/ext/Encode/lib/Encode/Supported.pod +++ b/ext/Encode/lib/Encode/Supported.pod @@ -202,7 +202,6 @@ to 'CN', continental China, while traditional Chinese is mapped to euc-jp shiftjis cp932 macJapanese 7bit-jis - euc-jp iso-2022-jp [RFC1468] iso-2022-jp-1 [RFC2237] jis0201-raw { JIS X 0201 (roman + halfwidth kana) without CES } @@ -239,9 +238,25 @@ distributed separately on CPAN, under the name Encode::HanExtra. Standard DOS/Win Macintosh Comment/Reference ---------------------------------------------------------------- - gb18030 - euc-tw - big5plus + big5ext CMEX's Big5e Extension + big5plus CMEX's Big5+ Extension + cccii Chinese Character Code for Information Interchange + euc-tw EUC (Extended Unix Character) + gb18030 GBK with Traditional Characters + ---------------------------------------------------------------- + +=item Encode::JIS2K -- JIS X 0213 encodings via CPAN + +Due to size concerns, additional Japanese encodings below are +distributed separately on CPAN, under the name Encode::JIS2K. + + Standard DOS/Win Macintosh Comment/Reference + ---------------------------------------------------------------- + euc-jisx0213 + shiftjisx0123 + iso-2022-jp-3 + jis0213-1-raw + jis0213-2-raw ---------------------------------------------------------------- =back @@ -275,6 +290,23 @@ For symbols and dingbats. AdobeSymbol ---------------------------------------------------------------- +=item Encode::MIME::Header + +Strictly speaking, MIME header encoding documented in RFC 2047 is more +of encapsulation than encoding. But included anyway. + + ---------------------------------------------------------------- + MIME-Header [RFC2047] + MIME-B [RFC2047] + MIME-Q [RFC2047] + ---------------------------------------------------------------- + +=item Encode::Guess + +This one is not a name of encoding but a utility that lets you pick up +the most appropriate encoding for a data out of given I. See +L for details. + =back =head1 Unsupported encodings @@ -530,7 +562,6 @@ pages! The rule of thumb is to use C unless you know what you're doing and unless you really benefit from using C. - ISO-IR-165 [RFC1345] VISCII GB 12345 @@ -701,6 +732,7 @@ L, L, L, L, L, L, L, L +L, L =head1 References diff --git a/ext/Encode/ucm/euc-jp.ucm b/ext/Encode/ucm/euc-jp.ucm index 9479163..cc1379b 100644 --- a/ext/Encode/ucm/euc-jp.ucm +++ b/ext/Encode/ucm/euc-jp.ucm @@ -1,5 +1,5 @@ # -# $Id: euc-jp.ucm,v 1.2 2002/04/29 07:01:58 dankogai Exp dankogai $ +# $Id: euc-jp.ucm,v 1.2 2002/04/29 07:01:58 dankogai Exp $ # "euc-jp" 1