# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 1.64 2002/04/29 06:54:06 dankogai Exp $
+# $Id: Changes,v 1.65 2002/04/30 16:13:37 dankogai Exp dankogai $
#
-$Revision: 1.64 $ $Date: 2002/04/29 06:54:06 $
+$Revision: 1.65 $ $Date: 2002/04/30 16:13:37 $
+! Encode.pm
+ encode(undef) no longer warns for C<Use of uninitialized value in
+ subroutine entry>. Suggested by Paul.
+ Message-Id: <AIEAJICLCBDNAAOLLOKLMEEEEJAA.Paul.Marquess@ntlworld.com>
+! lib/Encode/Supported.pod
+ Encode::MIME::Header and Encode::Guess mentioned
+ Updated for Encode::HanExtra 0.05 and Encode::JIS2K
+! lib/Encode/Guess.pm
+ POD fix by Miyagawa-kun
+ Message-Id: <86k7qqx8p7.wl@mail.edge.co.jp>
+
+1.64 2002/04/29 06:54:06
! ucm/euc-jp.ucm
Now decodes euc-jisx0213 also. CAVEAT: encode("euc-jp"...) and
encocde("euc-jisx0213") are still DIFFERENT.
Typo fixes and improvements by jhi
Message-Id: <200204010201.FAA03564@alpha.hut.fi>, et al.
-1.11 $Date: 2002/04/29 06:54:06 $
+1.11 $Date: 2002/04/30 16:13:37 $
+ t/encoding.t
+ t/jperl.t
! MANIFEST
#
-# $Id: Encode.pm,v 1.64 2002/04/29 06:54:06 dankogai Exp $
+# $Id: Encode.pm,v 1.65 2002/04/30 16:13:37 dankogai Exp dankogai $
#
package Encode;
use strict;
-our $VERSION = do { my @r = (q$Revision: 1.64 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.65 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
our $DEBUG = 0;
use XSLoader ();
XSLoader::load(__PACKAGE__, $VERSION);
sub encode($$;$)
{
- my ($name,$string,$check) = @_;
+ my ($name, $string, $check) = @_;
+ defined $string or return;
$check ||=0;
my $enc = find_encoding($name);
unless(defined $enc){
sub decode($$;$)
{
my ($name,$octets,$check) = @_;
+ defined $octets or return;
$check ||=0;
my $enc = find_encoding($name);
unless(defined $enc){
sub from_to($$$;$)
{
my ($string,$from,$to,$check) = @_;
+ defined $string or return;
$check ||=0;
my $f = find_encoding($from);
unless (defined $f){
sub encode_utf8($)
{
my ($str) = @_;
+ defined $str or return;
utf8::encode($str);
return $str;
}
sub decode_utf8($)
{
my ($str) = @_;
+ defined $str or return;
return undef unless utf8::decode($str);
return $str;
}
#!/usr/bin/perl
-# $Id: ucm2table,v 1.1 2002/04/22 23:57:10 dankogai Exp $
+# $Id: ucm2table,v 1.2 2002/04/30 16:13:37 dankogai Exp dankogai $
#
use 5.006;
while(<>){
chomp;
my ($uni, $enc, $fb) =
- /^<U($Hex+)>\s+(\S+)\s+\|(\d)$/o or next;
+ /^<U($Hex+)>\s+(\S+)\s+\|(\d)/o or next;
$fb eq '0' or next;
my @byte = ();
my $ord = 0;
my $start = $Opt{a} ? 0x20 : 0xa0;
-for (my $x = $start; $x <= 0xffff; $ x+= 32) {
+for (my $x = $start; $x <= 0xffff; $x += 32) {
my $line = '';
for my $i (0..31){
my $num = $x+$i; $num eq 0x7f and next; # skip delete
use strict;
use Encode qw(:fallbacks find_encoding);
-our $VERSION = do { my @r = (q$Revision: 1.3 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
my $Canon = 'Guess';
our $DEBUG = 0;
# if you are sure $data won't contain anything bogus
+ use Encode;
use Encode::Guess qw/euc-jp shiftjis 7bit-jis/;
my $utf8 = decode("Guess", $data);
my $data = encode("Guess", $utf8); # this doesn't work!
euc-jp
shiftjis cp932 macJapanese
7bit-jis
- euc-jp
iso-2022-jp [RFC1468]
iso-2022-jp-1 [RFC2237]
jis0201-raw { JIS X 0201 (roman + halfwidth kana) without CES }
Standard DOS/Win Macintosh Comment/Reference
----------------------------------------------------------------
- gb18030
- euc-tw
- big5plus
+ big5ext CMEX's Big5e Extension
+ big5plus CMEX's Big5+ Extension
+ cccii Chinese Character Code for Information Interchange
+ euc-tw EUC (Extended Unix Character)
+ gb18030 GBK with Traditional Characters
+ ----------------------------------------------------------------
+
+=item Encode::JIS2K -- JIS X 0213 encodings via CPAN
+
+Due to size concerns, additional Japanese encodings below are
+distributed separately on CPAN, under the name Encode::JIS2K.
+
+ Standard DOS/Win Macintosh Comment/Reference
+ ----------------------------------------------------------------
+ euc-jisx0213
+ shiftjisx0123
+ iso-2022-jp-3
+ jis0213-1-raw
+ jis0213-2-raw
----------------------------------------------------------------
=back
AdobeSymbol
----------------------------------------------------------------
+=item Encode::MIME::Header
+
+Strictly speaking, MIME header encoding documented in RFC 2047 is more
+of encapsulation than encoding. But included anyway.
+
+ ----------------------------------------------------------------
+ MIME-Header [RFC2047]
+ MIME-B [RFC2047]
+ MIME-Q [RFC2047]
+ ----------------------------------------------------------------
+
+=item Encode::Guess
+
+This one is not a name of encoding but a utility that lets you pick up
+the most appropriate encoding for a data out of given I<suspects>. See
+L<Encode::Guess> for details.
+
=back
=head1 Unsupported encodings
The rule of thumb is to use C<UTF-8> unless you know what
you're doing and unless you really benefit from using C<UTF-16>.
-
ISO-IR-165 [RFC1345]
VISCII
GB 12345
L<Encode::Byte>,
L<Encode::CN>, L<Encode::JP>, L<Encode::KR>, L<Encode::TW>,
L<Encode::EBCDIC>, L<Encode::Symbol>
+L<Encode::MIME::Header>, L<Encode::Guess>
=head1 References
#
-# $Id: euc-jp.ucm,v 1.2 2002/04/29 07:01:58 dankogai Exp dankogai $
+# $Id: euc-jp.ucm,v 1.2 2002/04/29 07:01:58 dankogai Exp $
#
<code_set_name> "euc-jp"
<mb_cur_min> 1