From: Jarkko Hietaniemi Date: Fri, 22 Mar 2002 21:48:28 +0000 (+0000) Subject: Upgrade to Encode 0.96, from Dan Kogai. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=5129552cc421a69f6981a03ac0ecc86b5722d1e6;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Encode 0.96, from Dan Kogai. p4raw-id: //depot/perl@15430 --- diff --git a/MANIFEST b/MANIFEST index 1403dfd..dd32c99 100644 --- a/MANIFEST +++ b/MANIFEST @@ -197,10 +197,14 @@ ext/DynaLoader/Makefile.PL Dynamic Loader makefile writer ext/DynaLoader/README Dynamic Loader notes and intro ext/DynaLoader/XSLoader_pm.PL Simple XS Loader perl module ext/Encode/AUTHORS Encode extension +ext/Encode/Byte/Byte.pm Encode extension +ext/Encode/Byte/Makefile.PL Encode extension ext/Encode/Changes Encode extension ext/Encode/CN/CN.pm Encode extension ext/Encode/CN/Makefile.PL Encode extension ext/Encode/compile Encode extension +ext/Encode/EBCDIC/EBCDIC.pm Encode extension +ext/Encode/EBCDIC/Makefile.PL Encode extension ext/Encode/encengine.c Encode extension ext/Encode/encode.h Encode extension ext/Encode/Encode.pm Encode extension @@ -370,6 +374,8 @@ ext/Encode/lib/EncodeFormat.pod Encode extension ext/Encode/Makefile.PL Encode extension makefile writer ext/Encode/MANIFEST Encode extension ext/Encode/README Encode extension +ext/Encode/Symbol/Makefile.PL Encode extension +ext/Encode/Symbol/Symbol.pm Encode extension ext/Encode/t/Aliases.t Encode extension ext/Encode/t/CN.t Encode extension test ext/Encode/t/Encode.t Encode extension test @@ -1301,8 +1307,8 @@ lib/Pod/t/basic.txt podlators test lib/Pod/t/Functions.t See if Pod::Functions works lib/Pod/t/htmlescp.pod pod2html escape test input data lib/Pod/t/htmlescp.t pod2html escape test -lib/Pod/t/htmlview.t pod2html render test lib/Pod/t/htmlview.pod pod2html render test input data +lib/Pod/t/htmlview.t pod2html render test lib/Pod/t/InputObjects.t See if Pod::InputObjects works lib/Pod/t/latex.t See if Pod::LaTeX works lib/Pod/t/man.t podlators test diff --git a/ext/Encode/AUTHORS b/ext/Encode/AUTHORS index 9109238..5b65c8a 100644 --- a/ext/Encode/AUTHORS +++ b/ext/Encode/AUTHORS @@ -13,6 +13,7 @@ Anton Tagunov Autrijus Tang Dan Kogai Jarkko Hietaniemi +Michael G Schwern Nick Ing-Simmons Paul Marquess SADAHIRO Tomoyuki diff --git a/ext/Encode/Byte/Byte.pm b/ext/Encode/Byte/Byte.pm new file mode 100644 index 0000000..5b49dd9 --- /dev/null +++ b/ext/Encode/Byte/Byte.pm @@ -0,0 +1,83 @@ +package Encode::Byte; +use Encode; +our $VERSION = do { my @r = (q$Revision: 0.96 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; + +use XSLoader; +XSLoader::load('Encode::Byte',$VERSION); + +1; +__END__ +=head1 NAME + +Encode::Byte - Single Byte Encodings + +=head1 SYNOPSIS + + use Encode qw/encode decode/; + $latin1 = encode("iso-8859-1", $utf8); # loads Encode::Byte implicitly + $utf8 = decode("iso-8859-1", $latin1); # ditto + +=head1 ABSTRACT + +This module implements various single byte encodings. For most cases it uses +\x80-\xff (upper half) to map non-ASCII characters. Encodings +supported are as follows. + + Canonical Alias Description + -------------------------------------------------------------------- + iso-8859-1 latin1 + iso-8859-2 latin2 + iso-8859-3 latin3 + iso-8859-4 latin4 + iso-8859-5 latin + iso-8859-6 latin + iso-8859-7 + iso-8859-8 + iso-8859-9 latin5 + iso-8859-10 latin6 + iso-8859-11 + (iso-8859-12 is nonexistent) + iso-8859-13 latin7 + iso-8859-14 latin8 + iso-8859-15 latin9 + iso-8859-16 latin10 + + koi8-f + koi8-r + koi8-u + + viscii # ASCII + vietnamese + + cp1250 WinLatin2 + cp1251 WinCyrillic + cp1252 WinLatin1 + cp1253 WinGreek + cp1254 WinTurkiskh + cp1255 WinHebrew + cp1256 WinArabic + cp1257 WinBaltic + cp1258 WinVietnamese + # all cp* are also available as ibm-* and ms-* + + maccentraleuropean + maccroatian + macroman + maccyrillic + macromanian + macdingbats + macsami + macgreek + macthai + macicelandic + macturkish + macukraine + +=head1 DESCRIPTION + +To find how to use this module in detail, see L. + +=head1 SEE ALSO + +L + +=cut diff --git a/ext/Encode/Byte/Makefile.PL b/ext/Encode/Byte/Makefile.PL new file mode 100644 index 0000000..a49b4f1 --- /dev/null +++ b/ext/Encode/Byte/Makefile.PL @@ -0,0 +1,167 @@ +use 5.7.2; +use strict; +use ExtUtils::MakeMaker; + +my $name = 'Byte'; +my %tables = ( + '8bit' => + [ + 'ascii.ucm', + 'koi8-r.ucm', + 'viscii.ucm', + ], + Mac => + [ + qw(macCentEuro.enc macCroatian.enc + macCyrillic.enc macDingbats.enc + macGreek.enc macIceland.enc + macRoman.enc macRumanian.enc + macSami.enc macThai.enc + macTurkish.enc macUkraine.enc), + ], + + ); + +opendir(ENC,'../Encode'); +while (defined(my $file = readdir(ENC))) +{ + if ($file =~ /(8859|ibm).*\.ucm/io) + { + push(@{$tables{$1}},$file); + } +} +closedir(ENC); + +WriteMakefile( + INC => "-I..", + NAME => 'Encode::'.$name, + VERSION_FROM => "$name.pm", + OBJECT => '$(O_FILES)', + 'dist' => { + COMPRESS => 'gzip -9f', + SUFFIX => 'gz', + DIST_DEFAULT => 'all tardist', + }, + MAN3PODS => {}, + # OS 390 winges about line numbers > 64K ??? + XSOPT => '-nolinenumbers', + ); + +package MY; + +sub post_initialize +{ + my ($self) = @_; + my %o; + my $x = $self->{'OBJ_EXT'}; + # Add the table O_FILES + foreach my $e (keys %tables) + { + $o{$e.$x} = 1; + } + $o{"$name$x"} = 1; + $self->{'O_FILES'} = [sort keys %o]; + my @files = ("$name.xs"); + $self->{'C'} = ["$name.c"]; + $self->{'H'} = [$self->catfile($self->updir,'encode.h')]; + my %xs; + foreach my $table (keys %tables) { + push (@{$self->{'C'}},"$table.c"); + # Do NOT add $table.h etc. to H_FILES unless we own up as to how they + # get built. + foreach my $ext (qw($(OBJ_EXT) .c .h _def.h .fnm)) { + push (@files,$table.$ext); + } + } + $self->{'XS'} = { "$name.xs" => "$name.c" }; + $self->{'clean'}{'FILES'} .= join(' ',@files); + open(XS,">$name.xs") || die "Cannot open $name.xs:$!"; + print XS <<'END'; +#include +#include +#include +#define U8 U8 +#include "../encode.h" +END + foreach my $table (keys %tables) { + print XS qq[#include "${table}.h"\n]; + } + print XS <<"END"; + +static void +Encode_XSEncoding(pTHX_ encode_t *enc) +{ + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash); + int i = 0; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) + { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding",G_DISCARD); + SvREFCNT_dec(sv); +} + +MODULE = Encode::$name PACKAGE = Encode::$name +PROTOTYPES: DISABLE +BOOT: +{ +END + foreach my $table (keys %tables) { + print XS qq[#include "${table}_def.h"\n]; + } + print XS "}\n"; + close(XS); + return "# Built $name.xs\n\n"; +} + +sub postamble +{ + my $self = shift; + my $dir = $self->catdir($self->updir,'Encode'); + my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by compile\n"; + $str .= "$name.c : $name.xs "; + foreach my $table (keys %tables) + { + $str .= " $table.c"; + } + $str .= "\n\n"; + $str .= "$name\$(OBJ_EXT) : $name.c\n\n"; + + my $compile = $self->catfile($self->updir,'compile'); + foreach my $table (keys %tables) + { + my $numlines = 1; + my $lengthsofar = length($str); + my $continuator = ''; + $str .= "$table.c : $compile Makefile.PL"; + foreach my $file (@{$tables{$table}}) + { + $str .= $continuator.' '.$self->catfile($dir,$file); + if ( length($str)-$lengthsofar > 128*$numlines ) + { + $continuator .= " \\\n\t"; + $numlines++; + } else { + $continuator = ''; + } + } + $str .= $^O eq 'VMS' # In VMS quote to preserve case + ? qq{\n\t\$(PERL) $compile -"Q" -o \$\@ -f $table.fnm\n\n} + : qq{\n\t\$(PERL) $compile -Q -o \$\@ -f $table.fnm\n\n}; + open (FILELIST, ">$table.fnm") + || die "Could not open $table.fnm: $!"; + foreach my $file (@{$tables{$table}}) + { + print FILELIST $self->catfile($dir,$file) . "\n"; + } + close(FILELIST); + } + return $str; +} + diff --git a/ext/Encode/CN/CN.pm b/ext/Encode/CN/CN.pm index 244f7c7..9fbe843 100644 --- a/ext/Encode/CN/CN.pm +++ b/ext/Encode/CN/CN.pm @@ -1,14 +1,11 @@ package Encode::CN; -our $VERSION = do { my @r = (q$Revision: 0.95 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 0.96 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Encode; use Encode::CN::HZ; use XSLoader; XSLoader::load('Encode::CN',$VERSION); -Encode::define_alias( qr/euc.*cn$/i => '"euc-cn"' ); -Encode::define_alias( qr/cn.*euc/i => '"euc-cn"' ); - # Relocated from Encode.pm # CP936 doesn't have vendor-addon for GBK, so they're identical. Encode::define_alias( qr/^gbk$/i => '"cp936"'); @@ -41,6 +38,7 @@ Encodings supported are as follows. cp936 Code Page 936, also known as GBK (Extended GuoBiao) hz 7-bit escaped GB2312 encoding + -------------------------------------------------------------------- To find how to use this module in detail, see L. diff --git a/ext/Encode/Changes b/ext/Encode/Changes index 1922687..2f7ac07 100644 --- a/ext/Encode/Changes +++ b/ext/Encode/Changes @@ -1,8 +1,45 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 0.95 2002/03/21 15:52:20 dankogai Exp dankogai $ +# $Id: Changes,v 0.96 2002/03/22 22:22:53 dankogai Exp dankogai $ # -0.95 Thu Mar 22 2002 + +0.96 Sat Mar 23 2002 +! TW/TW.pm +! lib/Encode/Encoding.pm +! lib/Encode/Alias.pm +! lib/Encode/Supported.pod +! KR/KR.pm + Pod Fixes by Michael G Schwern via jhi + Message-ID: <20020322073908.GB10539@blackrider> +! Makefile.PL +! Encode.pm + "...I think we should include ISO 8859-1 as well." -- NI-S + Message-Id: <20020322120230.1332.8@bactrian.elixent.com> +! JP/JP.pm +! CN/CN.pm +! KR/KR.pm +! TW/TW.pm +! lib/Encode/Alias.pm + alias definitions relocated to Encode::Alias so module autoloading + works for aliases also. +! Encode.pm + encodings() now accepts args to check ExtModules. ++ Byte/Byte.pm ++ Byte/Makefile.PL ++ EBCDIC/EBCDIC.pm ++ EBCDIC/Makefile.PL ++ Symbol/Makefile.PL ++ Symbol/Symbol.pm +! Encode.pm +! Encode.xs + Latin and single byte encodings are reorganized so they are + demand-loaded like Encode::XX. Now only ascii is compiled into + Encode itself. +! lib/Encode/Alias.pm + for my $k (keys %hash){ delete $hash{$k}; } + is depreciated; fixed. + +0.95 Fri Mar 22 2002 In this update, pod rewrites and alias fixes are the main issues + lib/Encode/Supported.pod Describes supported encodings @@ -30,6 +67,7 @@ gbk => cp936 relocated to CN.pm ! t/CJKalias.t Test::More with plans (by jhi) + 0.94 Thu Mar 21 2002 + lib/Encode/Description.pod ! lib/Encode/Encoding.pm diff --git a/ext/Encode/EBCDIC/EBCDIC.pm b/ext/Encode/EBCDIC/EBCDIC.pm new file mode 100644 index 0000000..4064b32 --- /dev/null +++ b/ext/Encode/EBCDIC/EBCDIC.pm @@ -0,0 +1,39 @@ +package Encode::EBCDIC; +use Encode; +our $VERSION = do { my @r = (q$Revision: 0.96 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; + +use XSLoader; +XSLoader::load('Encode::EBCDIC',$VERSION); + +1; +__END__ +=head1 NAME + +Encode::EBCDIC - EBCDIC Encodings + +=head1 SYNOPSIS + + use Encode qw/encode decode/; + $posix_bc = encode("posix-bc", $utf8); # loads Encode::EBCDIC implicitly + $utf8 = decode("", $posix_bc); # ditto + +=head1 ABSTRACT + +This module implements various EBCDIC-Based encodings. Encodings +supported are as follows. + + Canonical Alias Description + -------------------------------------------------------------------- + cp1047 + cp37 + posix-bc + +=head1 DESCRIPTION + +To find how to use this module in detail, see L. + +=head1 SEE ALSO + +L, L + +=cut diff --git a/ext/Encode/EBCDIC/Makefile.PL b/ext/Encode/EBCDIC/Makefile.PL new file mode 100644 index 0000000..607406c --- /dev/null +++ b/ext/Encode/EBCDIC/Makefile.PL @@ -0,0 +1,142 @@ +use 5.7.2; +use strict; +use ExtUtils::MakeMaker; + +my $name = 'EBCDIC'; +my %tables = ( + ebcdic => ['cp1047.ucm','cp37.ucm','posix-bc.ucm'], + ); + +WriteMakefile( + INC => "-I..", + NAME => 'Encode::'.$name, + VERSION_FROM => "$name.pm", + OBJECT => '$(O_FILES)', + 'dist' => { + COMPRESS => 'gzip -9f', + SUFFIX => 'gz', + DIST_DEFAULT => 'all tardist', + }, + MAN3PODS => {}, + # OS 390 winges about line numbers > 64K ??? + XSOPT => '-nolinenumbers', + ); + +package MY; + +sub post_initialize +{ + my ($self) = @_; + my %o; + my $x = $self->{'OBJ_EXT'}; + # Add the table O_FILES + foreach my $e (keys %tables) + { + $o{$e.$x} = 1; + } + $o{"$name$x"} = 1; + $self->{'O_FILES'} = [sort keys %o]; + my @files = ("$name.xs"); + $self->{'C'} = ["$name.c"]; + $self->{'H'} = [$self->catfile($self->updir,'encode.h')]; + my %xs; + foreach my $table (keys %tables) { + push (@{$self->{'C'}},"$table.c"); + # Do NOT add $table.h etc. to H_FILES unless we own up as to how they + # get built. + foreach my $ext (qw($(OBJ_EXT) .c .h _def.h .fnm)) { + push (@files,$table.$ext); + } + } + $self->{'XS'} = { "$name.xs" => "$name.c" }; + $self->{'clean'}{'FILES'} .= join(' ',@files); + open(XS,">$name.xs") || die "Cannot open $name.xs:$!"; + print XS <<'END'; +#include +#include +#include +#define U8 U8 +#include "../encode.h" +END + foreach my $table (keys %tables) { + print XS qq[#include "${table}.h"\n]; + } + print XS <<"END"; + +static void +Encode_XSEncoding(pTHX_ encode_t *enc) +{ + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash); + int i = 0; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) + { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding",G_DISCARD); + SvREFCNT_dec(sv); +} + +MODULE = Encode::$name PACKAGE = Encode::$name +PROTOTYPES: DISABLE +BOOT: +{ +END + foreach my $table (keys %tables) { + print XS qq[#include "${table}_def.h"\n]; + } + print XS "}\n"; + close(XS); + return "# Built $name.xs\n\n"; +} + +sub postamble +{ + my $self = shift; + my $dir = $self->catdir($self->updir,'Encode'); + my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by compile\n"; + $str .= "$name.c : $name.xs "; + foreach my $table (keys %tables) + { + $str .= " $table.c"; + } + $str .= "\n\n"; + $str .= "$name\$(OBJ_EXT) : $name.c\n\n"; + + my $compile = $self->catfile($self->updir,'compile'); + foreach my $table (keys %tables) + { + my $numlines = 1; + my $lengthsofar = length($str); + my $continuator = ''; + $str .= "$table.c : $compile Makefile.PL"; + foreach my $file (@{$tables{$table}}) + { + $str .= $continuator.' '.$self->catfile($dir,$file); + if ( length($str)-$lengthsofar > 128*$numlines ) + { + $continuator .= " \\\n\t"; + $numlines++; + } else { + $continuator = ''; + } + } + $str .= $^O eq 'VMS' # In VMS quote to preserve case + ? qq{\n\t\$(PERL) $compile -"Q" -o \$\@ -f $table.fnm\n\n} + : qq{\n\t\$(PERL) $compile -Q -o \$\@ -f $table.fnm\n\n}; + open (FILELIST, ">$table.fnm") + || die "Could not open $table.fnm: $!"; + foreach my $file (@{$tables{$table}}) + { + print FILELIST $self->catfile($dir,$file) . "\n"; + } + close(FILELIST); + } + return $str; +} + diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index bdfd686..be7547f 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -1,6 +1,7 @@ package Encode; use strict; -our $VERSION = do { my @r = (q$Revision: 0.95 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 0.96 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $DEBUG = 0; require DynaLoader; require Exporter; @@ -20,7 +21,6 @@ our @EXPORT = qw ( our @EXPORT_OK = qw( define_encoding - define_alias from_to is_utf8 is_8bit @@ -39,50 +39,78 @@ use Carp; use Encode::Alias; -# Make a %encoding package variable to allow a certain amount of cheating -our %encoding; +# Make a %Encoding package variable to allow a certain amount of cheating +our %Encoding; -our %external_tables = +our %ExtModule = ( - 'euc-cn' => 'Encode/CN.pm', - gb2312 => 'Encode/CN.pm', - gb12345 => 'Encode/CN.pm', - gbk => 'Encode/CN.pm', - cp936 => 'Encode/CN.pm', - 'iso-ir-165' => 'Encode/CN.pm', - 'euc-jp' => 'Encode/JP.pm', - 'iso-2022-jp' => 'Encode/JP.pm', - '7bit-jis' => 'Encode/JP.pm', - shiftjis => 'Encode/JP.pm', - macjapan => 'Encode/JP.pm', - cp932 => 'Encode/JP.pm', - 'euc-kr' => 'Encode/KR.pm', - ksc5601 => 'Encode/KR.pm', - cp949 => 'Encode/KR.pm', - big5 => 'Encode/TW.pm', - 'big5-hkscs' => 'Encode/TW.pm', - cp950 => 'Encode/TW.pm', - gb18030 => 'Encode/HanExtra.pm', - big5plus => 'Encode/HanExtra.pm', - 'euc-tw' => 'Encode/HanExtra.pm', + viscii => 'Encode/Byte.pm', + 'koi8-r' => 'Encode/Byte.pm', + cp1047 => 'Encode/EBCDIC.pm', + cp37 => 'Encode/EBCDIC.pm', + 'posix-bc' => 'Encode/EBCDIC.pm', + symbol => 'Encode/Symbol.pm', + dingbats => 'Encode/Symbol.pm', + 'euc-cn' => 'Encode/CN.pm', + gb2312 => 'Encode/CN.pm', + gb12345 => 'Encode/CN.pm', + gbk => 'Encode/CN.pm', + cp936 => 'Encode/CN.pm', + 'iso-ir-165' => 'Encode/CN.pm', + 'euc-jp' => 'Encode/JP.pm', + 'iso-2022-jp' => 'Encode/JP.pm', + '7bit-jis' => 'Encode/JP.pm', + shiftjis => 'Encode/JP.pm', + macjapan => 'Encode/JP.pm', + cp932 => 'Encode/JP.pm', + 'euc-kr' => 'Encode/KR.pm', + ksc5601 => 'Encode/KR.pm', + cp949 => 'Encode/KR.pm', + big5 => 'Encode/TW.pm', + 'big5-hkscs' => 'Encode/TW.pm', + cp950 => 'Encode/TW.pm', + gb18030 => 'Encode/HanExtra.pm', + big5plus => 'Encode/HanExtra.pm', + 'euc-tw' => 'Encode/HanExtra.pm', ); +for my $k (2..11,13..16){ + $ExtModule{"iso-8859-$k"} = 'Encode/Byte.pm'; +} + +for my $k (1250..1258){ + $ExtModule{"cp$k"} = 'Encode/Byte.pm'; +} + +for my $k (qw(centeuro croatian cyrillic dingbats greek + iceland roman rumanian sami + thai turkish ukraine)) +{ + $ExtModule{"mac$k"} = 'Encode/Byte.pm'; +} + + sub encodings { - my ($class) = @_; - return - map { $_->[0] } - sort { $a->[1] cmp $b->[1] } - map { [$_, lc $_] } - grep { $_ ne 'Internal' } - keys %encoding; + my $class = shift; + my @modules = ($_[0] eq ":all") ? values %ExtModule : @_; + for my $m (@modules) + { + $DEBUG and warn "about to require $m;"; + eval { require $m; }; + } + return + map({$_->[0]} + sort({$a->[1] cmp $b->[1]} + map({[$_, lc $_]} + grep({ $_ ne 'Internal' } keys %Encoding)))); } sub define_encoding { my $obj = shift; my $name = shift; - $encoding{$name} = $obj; + $Encoding{$name} = $obj; my $lc = lc($name); define_alias($lc => $obj) unless $lc eq $name; while (@_) @@ -102,25 +130,25 @@ sub getEncoding return $name; } my $lc = lc $name; - if (exists $encoding{$name}) + if (exists $Encoding{$name}) { - return $encoding{$name}; + return $Encoding{$name}; } - if (exists $encoding{$lc}) + if (exists $Encoding{$lc}) { - return $encoding{$lc}; + return $Encoding{$lc}; } - my $oc = $class->findAlias($name); + my $oc = $class->find_alias($name); return $oc if defined $oc; - $oc = $class->findAlias($lc) if $lc ne $name; + $oc = $class->find_alias($lc) if $lc ne $name; return $oc if defined $oc; - if (!$skip_external and exists $external_tables{$lc}) + if (!$skip_external and exists $ExtModule{$lc}) { - require $external_tables{$lc}; - return $encoding{$name} if exists $encoding{$name}; + eval{ require $ExtModule{$lc}; }; + return $Encoding{$name} if exists $Encoding{$name}; } return; @@ -350,17 +378,28 @@ For CHECK see L. =head2 Listing available encodings - use Encode qw(encodings); - @list = encodings(); + use Encode; + @list = Encode->encodings(); + +Returns a list of the canonical names of the available encodings that +are loaded. To get a list of all available encodings including the +ones that are not loaded yet, say + + @all_encodings = Encode->encodings(":all"); + +Or you can give the name of specific module. + + @with_jp = Encode->encodings("Encode/JP.pm"); -Returns a list of the canonical names of the available encodings. +Note in this case you have to say "Encode/JP.pm instead of Encode::JP. To find which encodings are suppoted by this package in details, see L. =head2 Defining Aliases - use Encode qw(define_alias); + use Encode; + use Encode::Alias; define_alias( newName => ENCODING); Allows newName to be used as am alias for ENCODING. ENCODING may be diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index 44e5e22..9bd8a4c 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -5,10 +5,10 @@ #include "XSUB.h" #define U8 U8 #include "encode.h" -#include "8859.h" -#include "EBCDIC.h" -#include "Symbols.h" - +/* #include "8859.h" */ +/* #include "EBCDIC.h" */ +/* #include "Symbols.h" */ +#include "defcodes.h" #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \ Perl_croak(aTHX_ "panic_unimplemented"); \ @@ -782,7 +782,8 @@ BOOT: #if defined(USE_PERLIO) && !defined(USE_SFIO) PerlIO_define_layer(aTHX_ &PerlIO_encode); #endif -#include "8859_def.h" -#include "EBCDIC_def.h" -#include "Symbols_def.h" +/* #include "8859_def.h" */ +/* #include "EBCDIC_def.h" */ +/* #include "Symbols_def.h" */ +#include "defcodes_def.h" } diff --git a/ext/Encode/JP/JP.pm b/ext/Encode/JP/JP.pm index 3091a99..cff0d98 100644 --- a/ext/Encode/JP/JP.pm +++ b/ext/Encode/JP/JP.pm @@ -5,7 +5,7 @@ BEGIN { } } use Encode; -our $VERSION = do { my @r = (q$Revision: 0.94 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 0.96 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use XSLoader; XSLoader::load('Encode::JP',$VERSION); @@ -13,13 +13,6 @@ XSLoader::load('Encode::JP',$VERSION); use Encode::JP::JIS; use Encode::JP::ISO_2022_JP; -Encode::define_alias( qr/euc.*jp$/i => '"euc-jp"' ); -Encode::define_alias( qr/jp.*euc/i => '"euc-jp"' ); -Encode::define_alias( qr/ujis$/i => '"euc-jp"' ); -Encode::define_alias( qr/shift.*jis$/i => '"shiftjis"' ); -Encode::define_alias( qr/sjis$/i => '"shiftjis"' ); -Encode::define_alias( qr/^jis$/i => '"7bit-jis"' ); - 1; __END__ =head1 NAME @@ -49,7 +42,8 @@ supported are as follows. (7bit JIS with all Halfwidth Kana converted to Fullwidth) macjapan Mac Japan (Shift JIS + Apple vendor mappings) - cp932 Code Page 932 (Shift JIS + Microsoft vendor mappings) + cp932 Code Page 932 (Shift JIS + MS/IBM vendor mappings) + -------------------------------------------------------------------- =head1 DESCRIPTION diff --git a/ext/Encode/KR/KR.pm b/ext/Encode/KR/KR.pm index 2a6507a..e920cf7 100644 --- a/ext/Encode/KR/KR.pm +++ b/ext/Encode/KR/KR.pm @@ -1,13 +1,10 @@ package Encode::KR; -our $VERSION = do { my @r = (q$Revision: 0.94 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 0.96 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Encode; use XSLoader; XSLoader::load('Encode::KR',$VERSION); -Encode::define_alias( qr/euc.*kr$/i => '"euc-kr"' ); -Encode::define_alias( qr/kr.*euc/i => '"euc-kr"' ); - 1; __END__ =head1 NAME @@ -33,6 +30,7 @@ are as follows. ksc5601 Korean standard code set cp949 Code Page 949 (EUC-KR + Unified Hangul Code) + -------------------------------------------------------------------- To find how to use this module in detail, see L. diff --git a/ext/Encode/MANIFEST b/ext/Encode/MANIFEST index 1dec5ce..24adaca 100644 --- a/ext/Encode/MANIFEST +++ b/ext/Encode/MANIFEST @@ -1,7 +1,11 @@ AUTHORS List of authors +Byte/Byte.pm Encode extension +Byte/Makefile.PL Encode extension CN/CN.pm Encode extension CN/Makefile.PL Encode extension Changes Change Log +EBCDIC/EBCDIC.pm Encode extension +EBCDIC/Makefile.PL Encode extension Encode.pm Encode extension Encode.xs Encode extension Encode/11643-1.enc Encode table @@ -150,6 +154,8 @@ KR/Makefile.PL Encode extension MANIFEST Encode extension Makefile.PL Encode extension makefile writer README Encode extension +Symbol/Makefile.PL Encode extension +Symbol/Symbol.pm Encode extension TW/Makefile.PL Encode extension TW/TW.pm Encode extension compile Encode extension diff --git a/ext/Encode/Makefile.PL b/ext/Encode/Makefile.PL index ff80352..1afc725 100644 --- a/ext/Encode/Makefile.PL +++ b/ext/Encode/Makefile.PL @@ -4,34 +4,11 @@ use ExtUtils::MakeMaker; my %tables = ( - 8859 => ['ascii.ucm', 'koi8-r.ucm', 'viscii.ucm', - 'ibm-1250.ucm', 'ibm-1251.ucm', - 'ibm-1253.ucm', 'ibm-1254.ucm', - 'ibm-1255.ucm', 'ibm-1256.ucm', - 'ibm-1257.ucm', 'ibm-1258.ucm', - 'ibm-1252.ucm', - qw(macCentEuro.enc macCroatian.enc - macCyrillic.enc macDingbats.enc - macGreek.enc macIceland.enc - macRoman.enc macRumanian.enc - macSami.enc macThai.enc - macTurkish.enc macUkraine.enc), - ], - EBCDIC => ['cp1047.ucm','cp37.ucm','posix-bc.ucm'], - Symbols => ['symbol.ucm','dingbats.ucm'], + defcodes => ['ascii.ucm', + '8859-1.ucm', + ] ); -opendir(ENC,'Encode'); -while (defined(my $file = readdir(ENC))) -{ - if ($file =~ /8859.*\.ucm/) - { - push(@{$tables{8859}},$file); - } -} -closedir(ENC); - - WriteMakefile( NAME => "Encode", VERSION_FROM => 'Encode.pm', diff --git a/ext/Encode/Symbol/Makefile.PL b/ext/Encode/Symbol/Makefile.PL new file mode 100644 index 0000000..be99058 --- /dev/null +++ b/ext/Encode/Symbol/Makefile.PL @@ -0,0 +1,142 @@ +use 5.7.2; +use strict; +use ExtUtils::MakeMaker; + +my $name = 'Symbol'; +my %tables = ( + symbol => ['symbol.ucm','dingbats.ucm'], + ); + +WriteMakefile( + INC => "-I..", + NAME => 'Encode::'.$name, + VERSION_FROM => "$name.pm", + OBJECT => '$(O_FILES)', + 'dist' => { + COMPRESS => 'gzip -9f', + SUFFIX => 'gz', + DIST_DEFAULT => 'all tardist', + }, + MAN3PODS => {}, + # OS 390 winges about line numbers > 64K ??? + XSOPT => '-nolinenumbers', + ); + +package MY; + +sub post_initialize +{ + my ($self) = @_; + my %o; + my $x = $self->{'OBJ_EXT'}; + # Add the table O_FILES + foreach my $e (keys %tables) + { + $o{$e.$x} = 1; + } + $o{"$name$x"} = 1; + $self->{'O_FILES'} = [sort keys %o]; + my @files = ("$name.xs"); + $self->{'C'} = ["$name.c"]; + $self->{'H'} = [$self->catfile($self->updir,'encode.h')]; + my %xs; + foreach my $table (keys %tables) { + push (@{$self->{'C'}},"$table.c"); + # Do NOT add $table.h etc. to H_FILES unless we own up as to how they + # get built. + foreach my $ext (qw($(OBJ_EXT) .c .h _def.h .fnm)) { + push (@files,$table.$ext); + } + } + $self->{'XS'} = { "$name.xs" => "$name.c" }; + $self->{'clean'}{'FILES'} .= join(' ',@files); + open(XS,">$name.xs") || die "Cannot open $name.xs:$!"; + print XS <<'END'; +#include +#include +#include +#define U8 U8 +#include "../encode.h" +END + foreach my $table (keys %tables) { + print XS qq[#include "${table}.h"\n]; + } + print XS <<"END"; + +static void +Encode_XSEncoding(pTHX_ encode_t *enc) +{ + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash); + int i = 0; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) + { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding",G_DISCARD); + SvREFCNT_dec(sv); +} + +MODULE = Encode::$name PACKAGE = Encode::$name +PROTOTYPES: DISABLE +BOOT: +{ +END + foreach my $table (keys %tables) { + print XS qq[#include "${table}_def.h"\n]; + } + print XS "}\n"; + close(XS); + return "# Built $name.xs\n\n"; +} + +sub postamble +{ + my $self = shift; + my $dir = $self->catdir($self->updir,'Encode'); + my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by compile\n"; + $str .= "$name.c : $name.xs "; + foreach my $table (keys %tables) + { + $str .= " $table.c"; + } + $str .= "\n\n"; + $str .= "$name\$(OBJ_EXT) : $name.c\n\n"; + + my $compile = $self->catfile($self->updir,'compile'); + foreach my $table (keys %tables) + { + my $numlines = 1; + my $lengthsofar = length($str); + my $continuator = ''; + $str .= "$table.c : $compile Makefile.PL"; + foreach my $file (@{$tables{$table}}) + { + $str .= $continuator.' '.$self->catfile($dir,$file); + if ( length($str)-$lengthsofar > 128*$numlines ) + { + $continuator .= " \\\n\t"; + $numlines++; + } else { + $continuator = ''; + } + } + $str .= $^O eq 'VMS' # In VMS quote to preserve case + ? qq{\n\t\$(PERL) $compile -"Q" -o \$\@ -f $table.fnm\n\n} + : qq{\n\t\$(PERL) $compile -Q -o \$\@ -f $table.fnm\n\n}; + open (FILELIST, ">$table.fnm") + || die "Could not open $table.fnm: $!"; + foreach my $file (@{$tables{$table}}) + { + print FILELIST $self->catfile($dir,$file) . "\n"; + } + close(FILELIST); + } + return $str; +} + diff --git a/ext/Encode/Symbol/Symbol.pm b/ext/Encode/Symbol/Symbol.pm new file mode 100644 index 0000000..c0e0013 --- /dev/null +++ b/ext/Encode/Symbol/Symbol.pm @@ -0,0 +1,38 @@ +package Encode::Symbol; +use Encode; +our $VERSION = do { my @r = (q$Revision: 0.96 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; + +use XSLoader; +XSLoader::load('Encode::Symbol',$VERSION); + +1; +__END__ +=head1 NAME + +Encode::Symbol - EBCDIC Encodings + +=head1 SYNOPSIS + + use Encode qw/encode decode/; + $symbol = encode("symbol", $utf8); # loads Encode::Symbol implicitly + $utf8 = decode("", $symbol); # ditto + +=head1 ABSTRACT + +This module implements symbol and dingbats encodings. Encodings +supported are as follows. + + Canonical Alias Description + -------------------------------------------------------------------- + symbol + dingbats + +=head1 DESCRIPTION + +To find how to use this module in detail, see L. + +=head1 SEE ALSO + +L + +=cut diff --git a/ext/Encode/TW/TW.pm b/ext/Encode/TW/TW.pm index 3daa2a1..58764ba 100644 --- a/ext/Encode/TW/TW.pm +++ b/ext/Encode/TW/TW.pm @@ -1,13 +1,10 @@ package Encode::TW; -our $VERSION = do { my @r = (q$Revision: 0.94 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 0.96 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Encode; use XSLoader; XSLoader::load('Encode::TW',$VERSION); -Encode::define_alias( qr/big-?5$/i => '"big5"' ); -Encode::define_alias( qr/big5-hk(?:scs)?/i => '"big5-hkscs"' ); - 1; __END__ =head1 NAME @@ -32,7 +29,8 @@ Encodings supported are as follows. Hong Kong cp950 Code Page 950 (Big5 + Microsoft vendor mappings) - + -------------------------------------------------------------------- + To find how to use this module in detail, see L. =head1 NOTES diff --git a/ext/Encode/lib/Encode/Alias.pm b/ext/Encode/lib/Encode/Alias.pm index 83d09ae..5f7d345 100644 --- a/ext/Encode/lib/Encode/Alias.pm +++ b/ext/Encode/lib/Encode/Alias.pm @@ -1,22 +1,24 @@ package Encode::Alias; use strict; -use Encode qw(find_encoding); -our $VERSION = do { my @r = (q$Revision: 0.95 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +use Encode; +our $VERSION = do { my @r = (q$Revision: 0.96 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; our $DEBUG = 0; require Exporter; our @ISA = qw(Exporter); # Public, encouraged API is exported by default -our @EXPORT = qw ( - findAlias - define_alias - ); + +our @EXPORT = + qw ( + define_alias + find_alias + ); our @Alias; # ordered matching list our %Alias; # cached known aliases -sub findAlias +sub find_alias { my $class = shift; local $_ = shift; @@ -63,7 +65,8 @@ sub define_alias unshift(@Alias, $alias => $name); # newer one has precedence # clear %Alias cache to allow overrides if (ref($alias)){ - for my $k (keys %Alias){ + my @a = keys %Alias; + for my $k (@a){ if (ref($alias) eq 'Regexp' && $k =~ $alias) { $DEBUG and warn $k; @@ -80,30 +83,9 @@ sub define_alias } } - -# Allow variants of iso-8859-1 etc. -define_alias( qr/^iso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' ); - -# At least HP-UX has these. -define_alias( qr/^iso8859(\d+)$/i => '"iso-8859-$1"' ); - -# More HP stuff. -define_alias( qr/^(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i => '"${1}8"' ); - -# The Official name of ASCII. -define_alias( qr/^ANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' ); - -# This is a font issue, not an encoding issue. -# (The currency symbol of the Latin 1 upper half -# has been redefined as the euro symbol.) -define_alias( qr/^(.+)\@euro$/i => '"$1"' ); - # Allow latin-1 style names as well # 0 1 2 3 4 5 6 7 8 9 10 our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 ); -define_alias( qr/^(?:iso[-_]?)?latin[-_]?(\d+)$/i - => '"iso-8859-$Encode::Alias::Latin2iso[$1]"' ); - # Allow winlatin1 style names as well our %Winlatin2cp = ( 'latin1' => 1252, @@ -117,34 +99,91 @@ our %Winlatin2cp = ( 'vietnamese' => 1258, ); -define_alias( qr/win(latin[12]|cyrillic|baltic|greek|turkish| - hebrew|arabic|baltic|vietnamese)$/ix => - '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' ); +init_aliases(); + +sub undef_aliases{ + @Alias = (); + %Alias = (); +} + +sub init_aliases +{ + undef_aliases(); +# Allow variants of iso-8859-1 etc. + define_alias( qr/^iso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' ); + +# At least HP-UX has these. + define_alias( qr/^iso8859(\d+)$/i => '"iso-8859-$1"' ); + +# More HP stuff. + define_alias( qr/^(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i => '"${1}8"' ); + +# The Official name of ASCII. + define_alias( qr/^ANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' ); + +# This is a font issue, not an encoding issue. +# (The currency symbol of the Latin 1 upper half +# has been redefined as the euro symbol.) + define_alias( qr/^(.+)\@euro$/i => '"$1"' ); + + define_alias( qr/^(?:iso[-_]?)?latin[-_]?(\d+)$/i + => '"iso-8859-$Encode::Alias::Latin2iso[$1]"' ); + + define_alias( qr/win(latin[12]|cyrillic|baltic|greek|turkish| + hebrew|arabic|baltic|vietnamese)$/ix => + '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' ); # Common names for non-latin prefered MIME names -define_alias( 'ascii' => 'US-ascii', - 'cyrillic' => 'iso-8859-5', - 'arabic' => 'iso-8859-6', - 'greek' => 'iso-8859-7', - 'hebrew' => 'iso-8859-8', - 'thai' => 'iso-8859-11', - 'tis620' => 'iso-8859-11', - ); + define_alias( 'ascii' => 'US-ascii', + 'cyrillic' => 'iso-8859-5', + 'arabic' => 'iso-8859-6', + 'greek' => 'iso-8859-7', + 'hebrew' => 'iso-8859-8', + 'thai' => 'iso-8859-11', + 'tis620' => 'iso-8859-11', + ); # At least AIX has IBM-NNN (surprisingly...) instead of cpNNN. # And Microsoft has their own naming (again, surprisingly). -define_alias( qr/^(?:ibm|ms)[-_]?(\d\d\d\d?)$/i => '"cp$1"'); + define_alias( qr/^(?:ibm|ms)[-_]?(\d\d\d\d?)$/i => '"cp$1"'); # Sometimes seen with a leading zero. -define_alias( qr/^cp037$/i => '"cp37"'); + define_alias( qr/^cp037$/i => '"cp37"'); # Ououououou. -define_alias( qr/^macRomanian$/i => '"macRumanian"'); + define_alias( qr/^macRomanian$/i => '"macRumanian"'); # Standardize on the dashed versions. -define_alias( qr/^utf8$/i => 'utf-8' ); -define_alias( qr/^koi8r$/i => 'koi8-r' ); -define_alias( qr/^koi8u$/i => 'koi8-u' ); + define_alias( qr/^utf8$/i => 'utf-8' ); + define_alias( qr/^koi8r$/i => 'koi8-r' ); + define_alias( qr/^koi8u$/i => 'koi8-u' ); + +# for Encode::CN + define_alias( qr/euc.*cn$/i => '"euc-cn"' ); + define_alias( qr/cn.*euc/i => '"euc-cn"' ); + +# for Encode::JP + define_alias( qr/euc.*jp$/i => '"euc-jp"' ); + define_alias( qr/jp.*euc/i => '"euc-jp"' ); + define_alias( qr/ujis$/i => '"euc-jp"' ); + define_alias( qr/shift.*jis$/i => '"shiftjis"' ); + define_alias( qr/sjis$/i => '"shiftjis"' ); + define_alias( qr/^jis$/i => '"7bit-jis"' ); + +# for Encode::KR + define_alias( qr/euc.*kr$/i => '"euc-kr"' ); + define_alias( qr/kr.*euc/i => '"euc-kr"' ); + +# for Encode::TW + define_alias( qr/big-?5$/i => '"big5"' ); + define_alias( qr/big5-hk(?:scs)?/i => '"big5-hkscs"' ); + +# At last, Map white space and _ to '-' + define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' ); +} + +1; +__END__ # TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8 # TODO: HP-UX '15' encodings japanese15 korean15 roi15 @@ -160,18 +199,14 @@ define_alias( qr/^koi8u$/i => 'koi8-u' ); # Kannada Khmer Korean Laotian Malayalam Mongolian # Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese -# Map white space and _ to '-' -define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' ); - -1; -__END__ =head1 NAME Encode::Alias - alias defintions to encodings =head1 SYNOPSIS - use Encode qw(define_alias); + use Encode; + use Encode::Alias; define_alias( newName => ENCODING); =head1 DESCRIPTION @@ -207,7 +242,9 @@ I is passed to the sub as its first argument. The example is another way to names as used in X11 font names to alias the MIME names for the iso-8859-* family. -=item Alias overloading +=back + +=head2 Alias overloading You can override predefined aliases by simply applying define_alias(). New alias is always evaluated first and when neccessary define_alias() @@ -216,12 +253,23 @@ flushes internal cache to make new definition available. # redirect SHIFT_JIS to MS/IBM Code Page 932, which is a # superset of SHIFT_JIS - Encode::define_alias( qr/shift.*jis$/i => '"cp932"' ); - Encode::define_alias( qr/sjis$/i => '"cp932"' ); + define_alias( qr/shift.*jis$/i => '"cp932"' ); + define_alias( qr/sjis$/i => '"cp932"' ); + +If you want to zap all predefined aliases, you can + + Encode::Alias->undef_aliases; + +to do so. And + + Encode::Alias->init_aliases; + +gets factory setting back. + =head1 SEE ALSO L, L -=back +=cut diff --git a/ext/Encode/lib/Encode/Encoding.pm b/ext/Encode/lib/Encode/Encoding.pm index 3327fa7..d2cb803 100644 --- a/ext/Encode/lib/Encode/Encoding.pm +++ b/ext/Encode/lib/Encode/Encoding.pm @@ -1,7 +1,7 @@ package Encode::Encoding; # Base class for classes which implement encodings use strict; -our $VERSION = do { my @r = (q$Revision: 0.94 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 0.96 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; sub Define { @@ -36,7 +36,7 @@ Encode::Encoding - Encode Implementation Base Class __PACKAGE__->Define(qw(myCanonical myAlias)); -=head 1 DESCRIPTION +=head1 DESCRIPTION As mentioned in L, encodings are (in the current implementation at least) defined by objects. The mapping of encoding diff --git a/ext/Encode/lib/Encode/Internal.pm b/ext/Encode/lib/Encode/Internal.pm index b510d42..c7db507 100644 --- a/ext/Encode/lib/Encode/Internal.pm +++ b/ext/Encode/lib/Encode/Internal.pm @@ -1,6 +1,6 @@ package Encode::Internal; use strict; -our $VERSION = do { my @r = (q$Revision: 0.92 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 0.96 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use base 'Encode::Encoding'; @@ -9,7 +9,9 @@ use base 'Encode::Encoding'; __PACKAGE__->Define('Internal'); -Encode::define_alias( 'Unicode' => 'Internal' ) if ord('A') == 65; +if (ord('A') == 65){ + Encode::define_alias( 'Unicode' => 'Internal' ); +} sub decode { diff --git a/ext/Encode/lib/Encode/Supported.pod b/ext/Encode/lib/Encode/Supported.pod index 2676db2..d48d14d 100644 --- a/ext/Encode/lib/Encode/Supported.pod +++ b/ext/Encode/lib/Encode/Supported.pod @@ -4,7 +4,7 @@ Encode::Supported -- Supported encodings by Encode =head1 DESCRIPTION -=Encoding Names +=head2 Encoding Names Encoding names are case insensitive. White space in names is ignored. In addition an encoding may have aliases. @@ -16,35 +16,42 @@ he first in the following sequence: o The name in the IANA registry. o The name used by the organization that defined it. -Because of all the alias issues, and because in the gen- -eral case encodings have state, "Encode" uses the encoding -object internally once an operation is in progress. +Because of all the alias issues, and because in the general case +encodings have state, "Encode" uses the encoding object internally +once an operation is in progress. -=head2 Supported Encodings +=head1 Supported Encodings As of Perl 5.8.0, at least the following encodings are recognized. Note that unless otherwise specified, they are all case insensitive (via alias) and all occurance of spaces are replaced with '-'. In other words, "ISO 8859 1" and "iso-8859-1" are identical. -=head3 ASCII +Encodings are categorized and implemented in several different modules +but you don't have to C to make them available for +most cases. Encode.pm will automatically load those modules in need. - Canonical Aliases - ----------------------- - ascii uc-ascii +=head2 Built-in Encodings -=head3 The Unicode +The following encodings are always available. - utf8 UTF-8 - utf16 UTF-16 - ucs2 UCS-2, iso-10646-1 + Canonical Aliases + ----------------------- + iso-8859-1 latin1 + US-ascii ascii + UCS-2 ucs2, iso-10646-1 + UCS-2le + UTF-8 utf8 + ----------------------- -=head3 The ISO 8859, KOI, and other 1-byte encodings +=head2 Encode::Byte -The following encodings are based upon ASCII. For most cases it uses -\x80-\xff (upper half) to map non-ASCII characters. +The following encodings are based single-byte encoding implemented as +extended ASCII. For most cases it uses \x80-\xff (upper half) to map +non-ASCII characters. - iso-8859-1 latin1 + ----------------------- + iso-8859-1 latin iso-8859-2 latin2 iso-8859-3 latin3 iso-8859-4 latin4 @@ -90,52 +97,91 @@ The following encodings are based upon ASCII. For most cases it uses macicelandic macturkish macukraine + ----------------------- -=head3 The CJK: Chinese, Japanese, Korean (Multibyte) +=head2 The CJK: Chinese, Japanese, Korean (Multibyte) Note Vietnamese is listed above. Also read "Encoding vs Charset" below. Also note these are impelemented in distinct module by languages, due the the size concerns. See these perldocs also. - cp936 gbk # Encode::CN - euc-cn # Encode::CN - gb12345 # Encode::CN - gb2312 # Encode::CN - gb2312 # Encode::CN - hz # Encode::CN - iso-ir-165 # Encode::CN - - 7bit-jis jis # Encode::JP - cp932 # Encode::JP - euc-jp ujis # Encode::JP - iso-2022-jp # Encode::JP - macjapan # Encode::JP - shiftjis Shift_JIS, sjis # Encode::JP - - euc-kr # Encode::KR - ksc5601 # Encode::KR - cp949 # Encode::KR - - big5 # Encode::TW - big5-hkscs # Encode::TW - cp950 # Encode::TW - -Due to size concerns, additional Chinese encodings including "GB -18030", "EUC-TW" and "BIG5PLUS" are distributed separately on CPAN, -under the name Encode::HanExtra. - -=head3 EBCDIC +=over 4 + +=item Encode::CN -- Continental China + + ----------------------- + cp936 gbk + euc-cn + gb12345 + gb2312 + hz + iso-ir-165 + ----------------------- + +=item Encode::JP -- Japan + + ----------------------- + 7bit-jis jis + cp932 + euc-jp ujis + iso-2022-jp + macjapan + shiftjis Shift_JIS, sjis + ----------------------- + +=item Encode::KR -- Korea + + ----------------------- + euc-kr + ksc5601 + cp949 + ----------------------- + +=item Encode::TW -- Taiwan + + ----------------------- + big5 + big5-hkscs + cp950 + ----------------------- + +=item Encode::HanExtra -- More Chinese via CPAN + +Due to size concerns, additional Chinese encodings below are +distributed separately on CPAN, under the name Encode::HanExtra. + + ----------------------- + gb18030 + euc-tw + big5plus + ----------------------- + +=back + +=head2 Miscellaneous encodings + +=over 4 + +=item Encode::EBCDIC See perlebcdic for details. + ----------------------- cp1047 cp37 posix-bc + ----------------------- + +=item Enocode::Symbols -=head3 Symbols and dingbats +For symbols and dingbats. + ----------------------- symbol dingbats + ----------------------- + +=back =head1 Encoding vs. Charset @@ -208,6 +254,9 @@ F (eq ISO-2022) =head1 See Also -L, L, L, L, L +L, +L, +L, L, L, L +L, L =cut diff --git a/ext/Encode/t/Aliases.t b/ext/Encode/t/Aliases.t index 3afaeae..70dd09f 100644 --- a/ext/Encode/t/Aliases.t +++ b/ext/Encode/t/Aliases.t @@ -1,10 +1,8 @@ #!../perl use strict; -use Encode::CN; -use Encode::JP; -use Encode::KR; -use Encode::TW; +use Encode; +use Encode::Alias; my %a2c; @@ -55,7 +53,7 @@ BEGIN { } } -use Test::More tests => (scalar keys %a2c) * 2; +use Test::More tests => (scalar keys %a2c) * 3; print "# alias test\n"; @@ -66,8 +64,8 @@ foreach my $a (keys %a2c){ # now we override some of the aliases and see if it works fine -Encode::define_alias( qr/shift.*jis$/i => '"macjapan"' ); -Encode::define_alias( qr/sjis$/i => '"cp932"' ); +define_alias( qr/shift.*jis$/i => '"macjapan"' ); +define_alias( qr/sjis$/i => '"cp932"' ); @a2c{qw(Shift_JIS x-sjis)} = qw(macjapan cp932); @@ -78,6 +76,14 @@ foreach my $a (keys %a2c){ is((defined($e) and $e->name), $a2c{$a}); } +print "# alias undef test\n"; + +Encode::Alias->undef_aliases; +foreach my $a (keys %a2c){ + my $e = Encode::find_encoding($a); + ok(!defined($e)); +} + __END__ for (my $i = 0; $i < @Encode::Alias::Alias; $i+=2){ my ($k, $v) = @Encode::Alias::Alias[$i, $i+1];