From: Jarkko Hietaniemi Date: Thu, 21 Mar 2002 16:16:57 +0000 (+0000) Subject: Upgrade to Encode 0.95, from Dan Kogai. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=5d030b67970a2d62f2bb17edccb2e92ad0d9cad3;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Encode 0.95, from Dan Kogai. p4raw-id: //depot/perl@15391 --- diff --git a/MANIFEST b/MANIFEST index 88c5c93..5a1e01a 100644 --- a/MANIFEST +++ b/MANIFEST @@ -298,6 +298,15 @@ ext/Encode/Encode/gb12345.enc Encode table ext/Encode/Encode/gb1988.enc Encode table ext/Encode/Encode/gb2312.enc Encode table ext/Encode/Encode/gsm0338.enc Encode table +ext/Encode/Encode/ibm-1250.ucm Encode table +ext/Encode/Encode/ibm-1251.ucm Encode table +ext/Encode/Encode/ibm-1252.ucm Encode table +ext/Encode/Encode/ibm-1253.ucm Encode table +ext/Encode/Encode/ibm-1254.ucm Encode table +ext/Encode/Encode/ibm-1255.ucm Encode table +ext/Encode/Encode/ibm-1256.ucm Encode table +ext/Encode/Encode/ibm-1257.ucm Encode table +ext/Encode/Encode/ibm-1258.ucm Encode table ext/Encode/Encode/ir-197.enc Encode table ext/Encode/Encode/iso-ir-165.enc Encode table ext/Encode/Encode/jis0201.enc Encode table @@ -339,8 +348,9 @@ ext/Encode/JP/JP.pm Encode extension ext/Encode/JP/Makefile.PL Encode extension ext/Encode/KR/KR.pm Encode extension ext/Encode/KR/Makefile.PL Encode extension +ext/Encode/lib/Encode/Alias.pm Encode extension ext/Encode/lib/Encode/CN/HZ.pm Encode extension -ext/Encode/lib/Encode/Description.pod Encode extension +ext/Encode/lib/Encode/Details.pod Encode extension ext/Encode/lib/Encode/Encoding.pm Encode extension ext/Encode/lib/Encode/Internal.pm Encode extension ext/Encode/lib/Encode/iso10646_1.pm Encode extension @@ -348,6 +358,7 @@ ext/Encode/lib/Encode/JP/Constants.pm Encode extension ext/Encode/lib/Encode/JP/H2Z.pm Encode extension ext/Encode/lib/Encode/JP/ISO_2022_JP.pm Encode extension ext/Encode/lib/Encode/JP/JIS.pm Encode extension +ext/Encode/lib/Encode/Supported.pod Encode extension ext/Encode/lib/Encode/Tcl.pm Encode extension ext/Encode/lib/Encode/Tcl/Escape.pm Encode extension ext/Encode/lib/Encode/Tcl/Table.pm Encode extension @@ -359,7 +370,7 @@ ext/Encode/lib/EncodeFormat.pod Encode extension ext/Encode/Makefile.PL Encode extension makefile writer ext/Encode/MANIFEST Encode extension ext/Encode/README Encode extension -ext/Encode/t/CJKalias.t Encode extension +ext/Encode/t/Aliases.t Encode extension ext/Encode/t/CN.t Encode extension test ext/Encode/t/Encode.t Encode extension test ext/Encode/t/jisx0208.euc Encode extension test @@ -1287,14 +1298,14 @@ lib/Pod/t/basic.ovr podlators test lib/Pod/t/basic.pod podlators test lib/Pod/t/basic.t podlators test lib/Pod/t/basic.txt podlators test -lib/Pod/t/htmlescp.t pod2html escape test -lib/Pod/t/htmlescp.pod pod2html escape test input data -lib/Pod/t/pod2html-lib.pl pod2html testing library lib/Pod/t/Functions.t See if Pod::Functions works +lib/Pod/t/htmlescp.pod pod2html escape test input data +lib/Pod/t/htmlescp.t pod2html escape test lib/Pod/t/InputObjects.t See if Pod::InputObjects works lib/Pod/t/latex.t See if Pod::LaTeX works lib/Pod/t/man.t podlators test lib/Pod/t/parselink.t podlators test +lib/Pod/t/pod2html-lib.pl pod2html testing library lib/Pod/t/Select.t See if Pod::Select works lib/Pod/t/text-errors.t podlators test lib/Pod/t/Usage.t See if Pod::Usage works diff --git a/ext/Encode/AUTHORS b/ext/Encode/AUTHORS index a131c51..9109238 100644 --- a/ext/Encode/AUTHORS +++ b/ext/Encode/AUTHORS @@ -14,4 +14,5 @@ Autrijus Tang Dan Kogai Jarkko Hietaniemi Nick Ing-Simmons +Paul Marquess SADAHIRO Tomoyuki diff --git a/ext/Encode/CN/CN.pm b/ext/Encode/CN/CN.pm index 12a9fd3..244f7c7 100644 --- a/ext/Encode/CN/CN.pm +++ b/ext/Encode/CN/CN.pm @@ -1,5 +1,5 @@ package Encode::CN; -our $VERSION = do { my @r = (q$Revision: 0.94 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 0.95 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Encode; use Encode::CN::HZ; @@ -9,6 +9,10 @@ XSLoader::load('Encode::CN',$VERSION); Encode::define_alias( qr/euc.*cn$/i => '"euc-cn"' ); Encode::define_alias( qr/cn.*euc/i => '"euc-cn"' ); +# Relocated from Encode.pm +# CP936 doesn't have vendor-addon for GBK, so they're identical. +Encode::define_alias( qr/^gbk$/i => '"cp936"'); + 1; __END__ =head1 NAME diff --git a/ext/Encode/Changes b/ext/Encode/Changes index b48001e..1922687 100644 --- a/ext/Encode/Changes +++ b/ext/Encode/Changes @@ -1,8 +1,35 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 0.94 2002/03/20 19:59:38 dankogai Exp dankogai $ +# $Id: Changes,v 0.95 2002/03/21 15:52:20 dankogai Exp dankogai $ # - +0.95 Thu Mar 22 2002 + In this update, pod rewrites and alias fixes are the main issues ++ lib/Encode/Supported.pod + Describes supported encodings +! Makefile.PL + streamlined compiled-in encodings. +! lib/Encode/Description.pod -> lib/Encode/Details.pod + Renamed. ++ Encode/ibm-125?.ucm + Added from icu distibution with any occurance of + "IBM-125?" to "cp125?". Filenames remain unchanged to pay + some respect to icu staff, however. ++ lib/Encode/Alias.pm +! Encode.pm + Alias difinitions in Encode.pm relocated. +! AUTHORS +! Encode.xs + packWARN patch from Paul Marquess via jhi + Message-Id: <20020321010101.O28978@alpha.hut.fi> + Paul added to AUTHORS as a result. +! t/CJKalias.t -> t/Aliases.t + Renamed. Checks even more aliases and alias overloading +! Encode.pm +! CN/CN.pm + duplicate alias for ujis => euc-jp removed (Encode::JP has one) + gbk => cp936 relocated to CN.pm +! t/CJKalias.t + Test::More with plans (by jhi) 0.94 Thu Mar 21 2002 + lib/Encode/Description.pod ! lib/Encode/Encoding.pm diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index 8dc14c3..bdfd686 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -1,6 +1,6 @@ package Encode; use strict; -our $VERSION = do { my @r = (q$Revision: 0.94 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 0.95 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; require DynaLoader; require Exporter; @@ -37,25 +37,10 @@ bootstrap Encode (); use Carp; +use Encode::Alias; + # Make a %encoding package variable to allow a certain amount of cheating our %encoding; -my @alias; # ordered matching list -my %alias; # cached known aliases - - # 0 1 2 3 4 5 6 7 8 9 10 -our @latin2iso_num = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 ); - -our %winlatin2cp = ( - 'Latin1' => 1252, - 'Latin2' => 1250, - 'Cyrillic' => 1251, - 'Greek' => 1253, - 'Turkish' => 1254, - 'Hebrew' => 1255, - 'Arabic' => 1256, - 'Baltic' => 1257, - 'Vietnamese' => 1258, - ); our %external_tables = ( @@ -93,125 +78,6 @@ sub encodings keys %encoding; } -sub findAlias -{ - my $class = shift; - local $_ = shift; - # print "# findAlias $_\n"; - unless (exists $alias{$_}) - { - for (my $i=0; $i < @alias; $i += 2) - { - my $alias = $alias[$i]; - my $val = $alias[$i+1]; - my $new; - if (ref($alias) eq 'Regexp' && $_ =~ $alias) - { - $new = eval $val; - } - elsif (ref($alias) eq 'CODE') - { - $new = &{$alias}($val) - } - elsif (lc($_) eq lc($alias)) - { - $new = $val; - } - if (defined($new)) - { - next if $new eq $_; # avoid (direct) recursion on bugs - my $enc = (ref($new)) ? $new : find_encoding($new); - if ($enc) - { - $alias{$_} = $enc; - last; - } - } - } - } - return $alias{$_}; -} - -sub define_alias -{ - while (@_) - { - my ($alias,$name) = splice(@_,0,2); - push(@alias, $alias => $name); - } -} - -# Allow variants of iso-8859-1 etc. -define_alias( qr/^iso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' ); - -# At least HP-UX has these. -define_alias( qr/^iso8859(\d+)$/i => '"iso-8859-$1"' ); - -# More HP stuff. -define_alias( qr/^(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i => '"${1}8"' ); - -# The Official name of ASCII. -define_alias( qr/^ANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' ); - -# This is a font issue, not an encoding issue. -# (The currency symbol of the Latin 1 upper half -# has been redefined as the euro symbol.) -define_alias( qr/^(.+)\@euro$/i => '"$1"' ); - -# Allow latin-1 style names as well -define_alias( qr/^(?:iso[-_]?)?latin[-_]?(\d+)$/i => '"iso-8859-$latin2iso_num[$1]"' ); - -# Allow winlatin1 style names as well -define_alias( qr/^win(latin[12]|cyrillic|baltic|greek|turkish|hebrew|arabic|baltic|vietnamese)$/i => '"cp$winlatin2cp{\u$1}"' ); - -# Common names for non-latin prefered MIME names -define_alias( 'ascii' => 'US-ascii', - 'cyrillic' => 'iso-8859-5', - 'arabic' => 'iso-8859-6', - 'greek' => 'iso-8859-7', - 'hebrew' => 'iso-8859-8', - 'thai' => 'iso-8859-11', - 'tis620' => 'iso-8859-11', - ); - -# At least AIX has IBM-NNN (surprisingly...) instead of cpNNN. -# And Microsoft has their own naming (again, surprisingly). -define_alias( qr/^(?:ibm|ms)[-_]?(\d\d\d\d?)$/i => '"cp$1"'); - -# Sometimes seen with a leading zero. -define_alias( qr/^cp037$/i => '"cp37"'); - -# Ououououou. -define_alias( qr/^macRomanian$/i => '"macRumanian"'); - -# Standardize on the dashed versions. -define_alias( qr/^utf8$/i => 'utf-8' ); -define_alias( qr/^koi8r$/i => 'koi8-r' ); -define_alias( qr/^koi8u$/i => 'koi8-u' ); - -# Seen in some Linuxes. -define_alias( qr/^ujis$/i => 'euc-jp' ); - -# CP936 doesn't have vendor-addon for GBK, so they're identical. -define_alias( qr/^gbk$/i => '"cp936"'); - -# TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8 -# TODO: HP-UX '15' encodings japanese15 korean15 roi15 -# TODO: Cyrillic encoding ISO-IR-111 (useful?) -# TODO: Armenian encoding ARMSCII-8 -# TODO: Hebrew encoding ISO-8859-8-1 -# TODO: Thai encoding TCVN -# TODO: Korean encoding Johab -# TODO: Vietnamese encodings VPS -# TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese -# ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic -# Farsi Georgian Gujarati Gurmukhi Hebrew Japanese -# Kannada Khmer Korean Laotian Malayalam Mongolian -# Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese - -# Map white space and _ to '-' -define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' ); - sub define_encoding { my $obj = shift; @@ -340,7 +206,7 @@ The C module provides the interfaces between Perl's strings and the rest of the system. Perl strings are sequences of B. To find more about character encodings, please consult -L . This document focuses on programming references. +L . This document focuses on programming references. =head1 PERL ENCODING API @@ -487,7 +353,10 @@ For CHECK see L. use Encode qw(encodings); @list = encodings(); -Returns a list of the canonical names of the available encodings. +Returns a list of the canonical names of the available encodings. + +To find which encodings are suppoted by this package in details, +see L. =head2 Defining Aliases @@ -497,32 +366,7 @@ Returns a list of the canonical names of the available encodings. Allows newName to be used as am alias for ENCODING. ENCODING may be either the name of an encoding or and encoding object (as above). -Currently I can be specified in the following ways: - -=over 4 - -=item As a simple string. - -=item As a qr// compiled regular expression, e.g.: - - define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' ); - -In this case if I is not a reference it is C-ed to -allow C<$1> etc. to be subsituted. The example is one way to names as -used in X11 font names to alias the MIME names for the iso-8859-* -family. Note the double quote inside the single quote. If you are -using regex here, you have to do so or it won't work in this case. - -=item As a code reference, e.g.: - - define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } , ''); - -In this case C<$_> will be set to the name that is being looked up and -I is passed to the sub as its first argument. The example -is another way to names as used in X11 font names to alias the MIME -names for the iso-8859-* family. - -=back +See L on details. =head1 Defining Encodings @@ -636,9 +480,15 @@ not a string. =head1 SEE ALSO -L, L, L, L, L, -L, L, L the Perl Unicode Mailing List Eperl-unicode@perl.orgE - +L, +L, +L, +L, +L, +L, +L, +L, +L, +the Perl Unicode Mailing List Eperl-unicode@perl.orgE =cut - diff --git a/ext/Encode/Encode/ibm-1250.ucm b/ext/Encode/Encode/ibm-1250.ucm new file mode 100644 index 0000000..a43bb87 --- /dev/null +++ b/ext/Encode/Encode/ibm-1250.ucm @@ -0,0 +1,379 @@ +# ******************************************************************************* +# * +# * Copyright (C) 1995-2000, International Business Machines +# * Corporation and others. All Rights Reserved. +# * +# ******************************************************************************* +# +# File created on Fri Nov 10 13:07:49 2000 +# +# File created by dtcocm version 3.00 +# from source files 34B004E2.RPMAP100 and 04E234B0.TPMAP100 +# +# Table Version : 1.01 +# + "cp1250" + "AXXXX" + 1 + 1 + "SBCS" + \x1A +# +CHARMAP +# +# +#ISO 10646 IBM-1250 +#_________ _________ + \x00 |0 + \x01 |0 + \x02 |0 + \x03 |0 + \x04 |0 + \x05 |0 + \x06 |0 + \x07 |0 + \x08 |0 + \x09 |0 + \x0A |0 + \x0B |0 + \x0C |0 + \x0D |0 + \x0E |0 + \x0F |0 + \x10 |0 + \x11 |0 + \x12 |0 + \x13 |0 + \x14 |0 + \x15 |0 + \x16 |0 + \x17 |0 + \x18 |0 + \x19 |0 + \x1A |0 + \x1B |0 + \x1C |0 + \x1D |0 + \x1E |0 + \x1F |0 + \x20 |0 + \x21 |0 + \x22 |0 + \x23 |0 + \x24 |0 + \x25 |0 + \x26 |0 + \x27 |0 + \x28 |0 + \x29 |0 + \x2A |0 + \x2B |0 + \x2C |0 + \x2D |0 + \x2E |0 + \x2F |0 + \x30 |0 + \x31 |0 + \x32 |0 + \x33 |0 + \x34 |0 + \x35 |0 + \x36 |0 + \x37 |0 + \x38 |0 + \x39 |0 + \x3A |0 + \x3B |0 + \x3C |0 + \x3D |0 + \x3E |0 + \x3F |0 + \x40 |0 + \x41 |0 + \x42 |0 + \x43 |0 + \x44 |0 + \x45 |0 + \x46 |0 + \x47 |0 + \x48 |0 + \x49 |0 + \x4A |0 + \x4B |0 + \x4C |0 + \x4D |0 + \x4E |0 + \x4F |0 + \x50 |0 + \x51 |0 + \x52 |0 + \x53 |0 + \x54 |0 + \x55 |0 + \x56 |0 + \x57 |0 + \x58 |0 + \x59 |0 + \x5A |0 + \x5B |0 + \x5C |0 + \x5D |0 + \x5E |0 + \x5F |0 + \x60 |0 + \x61 |0 + \x62 |0 + \x63 |0 + \x64 |0 + \x65 |0 + \x66 |0 + \x67 |0 + \x68 |0 + \x69 |0 + \x6A |0 + \x6B |0 + \x6C |0 + \x6D |0 + \x6E |0 + \x6F |0 + \x70 |0 + \x71 |0 + \x72 |0 + \x73 |0 + \x74 |0 + \x75 |0 + \x76 |0 + \x77 |0 + \x78 |0 + \x79 |0 + \x7A |0 + \x7B |0 + \x7C |0 + \x7D |0 + \x7E |0 + \x7F |0 + \x80 |3 + \x81 |0 + \x83 |0 + \x88 |0 + \x90 |0 + \x98 |0 + \xA0 |0 + \xA4 |0 + \xA6 |0 + \xA7 |0 + \xA8 |0 + \xA9 |0 + \xAB |0 + \xAC |0 + \xAD |0 + \xAE |0 + \xB0 |0 + \xB1 |0 + \xB4 |0 + \xB5 |0 + \xB6 |0 + \xB7 |0 + \xB8 |0 + \xBB |0 + \xC1 |0 + \xC2 |0 + \xC4 |0 + \xC7 |0 + \xC9 |0 + \xCB |0 + \xCD |0 + \xCE |0 + \xD0 |1 + \xD3 |0 + \xD4 |0 + \xD6 |0 + \xD7 |0 + \xDA |0 + \xDC |0 + \xDD |0 + \xDF |0 + \xE1 |0 + \xE2 |0 + \xE4 |0 + \xE7 |0 + \xE9 |0 + \xEB |0 + \xED |0 + \xEE |0 + \xF3 |0 + \xF4 |0 + \xF6 |0 + \xF7 |0 + \xFA |0 + \xFC |0 + \xFD |0 + \xC3 |0 + \xE3 |0 + \xA5 |0 + \xB9 |0 + \xC6 |0 + \xE6 |0 + \xC8 |0 + \xE8 |0 + \xCF |0 + \xEF |0 + \xD0 |0 + \xF0 |0 + \xCA |0 + \xEA |0 + \xCC |0 + \xEC |0 + \xC5 |0 + \xE5 |0 + \xBC |0 + \xBE |0 + \xA3 |0 + \xB3 |0 + \xD1 |0 + \xF1 |0 + \xD2 |0 + \xF2 |0 + \xD5 |0 + \xF5 |0 + \xC0 |0 + \xE0 |0 + \xD8 |0 + \xF8 |0 + \x8C |0 + \x9C |0 + \xAA |0 + \xBA |0 + \x8A |0 + \x9A |0 + \xDE |0 + \xFE |0 + \x8D |0 + \x9D |0 + \xD9 |0 + \xF9 |0 + \xDB |0 + \xFB |0 + \x8F |0 + \x9F |0 + \xAF |0 + \xBF |0 + \x8E |0 + \x9E |0 + \xA1 |0 + \xA2 |0 + \xFF |0 + \xB2 |0 + \xBD |0 + \x96 |0 + \x97 |0 + \x91 |0 + \x92 |0 + \x82 |0 + \x93 |0 + \x94 |0 + \x84 |0 + \x86 |0 + \x87 |0 + \x95 |0 + \x85 |0 + \x89 |0 + \x8B |0 + \x9B |0 + \x80 |1 + \x99 |0 + \x21 |1 + \x22 |1 + \x23 |1 + \x24 |1 + \x25 |1 + \x26 |1 + \x27 |1 + \x28 |1 + \x29 |1 + \x2A |1 + \x2B |1 + \x2C |1 + \x2D |1 + \x2E |1 + \x2F |1 + \x30 |1 + \x31 |1 + \x32 |1 + \x33 |1 + \x34 |1 + \x35 |1 + \x36 |1 + \x37 |1 + \x38 |1 + \x39 |1 + \x3A |1 + \x3B |1 + \x3C |1 + \x3D |1 + \x3E |1 + \x3F |1 + \x40 |1 + \x41 |1 + \x42 |1 + \x43 |1 + \x44 |1 + \x45 |1 + \x46 |1 + \x47 |1 + \x48 |1 + \x49 |1 + \x4A |1 + \x4B |1 + \x4C |1 + \x4D |1 + \x4E |1 + \x4F |1 + \x50 |1 + \x51 |1 + \x52 |1 + \x53 |1 + \x54 |1 + \x55 |1 + \x56 |1 + \x57 |1 + \x58 |1 + \x59 |1 + \x5A |1 + \x5B |1 + \x5C |1 + \x5D |1 + \x5E |1 + \x5F |1 + \x60 |1 + \x61 |1 + \x62 |1 + \x63 |1 + \x64 |1 + \x65 |1 + \x66 |1 + \x67 |1 + \x68 |1 + \x69 |1 + \x6A |1 + \x6B |1 + \x6C |1 + \x6D |1 + \x6E |1 + \x6F |1 + \x70 |1 + \x71 |1 + \x72 |1 + \x73 |1 + \x74 |1 + \x75 |1 + \x76 |1 + \x77 |1 + \x78 |1 + \x79 |1 + \x7A |1 + \x7B |1 + \x7C |1 + \x7D |1 + \x7E |1 +END CHARMAP diff --git a/ext/Encode/Encode/ibm-1251.ucm b/ext/Encode/Encode/ibm-1251.ucm new file mode 100644 index 0000000..26bfc2a --- /dev/null +++ b/ext/Encode/Encode/ibm-1251.ucm @@ -0,0 +1,377 @@ +# ******************************************************************************* +# * +# * Copyright (C) 1995-2000, International Business Machines +# * Corporation and others. All Rights Reserved. +# * +# ******************************************************************************* +# +# File created on Fri Nov 10 13:07:49 2000 +# +# File created by dtcocm version 3.00 +# from source files 34B004E3.RPMAP100 and 04E334B0.TPMAP100 +# +# Table Version : 1.01 +# + "cp1251" + "AXXXX" + 1 + 1 + "SBCS" + \x1A +# +CHARMAP +# +# +#ISO 10646 IBM-1251 +#_________ _________ + \x00 |0 + \x01 |0 + \x02 |0 + \x03 |0 + \x04 |0 + \x05 |0 + \x06 |0 + \x07 |0 + \x08 |0 + \x09 |0 + \x0A |0 + \x0B |0 + \x0C |0 + \x0D |0 + \x0E |0 + \x0F |0 + \x10 |0 + \x11 |0 + \x12 |0 + \x13 |0 + \x14 |0 + \x15 |0 + \x16 |0 + \x17 |0 + \x18 |0 + \x19 |0 + \x1A |0 + \x1B |0 + \x1C |0 + \x1D |0 + \x1E |0 + \x1F |0 + \x20 |0 + \x21 |0 + \x22 |0 + \x23 |0 + \x24 |0 + \x25 |0 + \x26 |0 + \x27 |0 + \x28 |0 + \x29 |0 + \x2A |0 + \x2B |0 + \x2C |0 + \x2D |0 + \x2E |0 + \x2F |0 + \x30 |0 + \x31 |0 + \x32 |0 + \x33 |0 + \x34 |0 + \x35 |0 + \x36 |0 + \x37 |0 + \x38 |0 + \x39 |0 + \x3A |0 + \x3B |0 + \x3C |0 + \x3D |0 + \x3E |0 + \x3F |0 + \x40 |0 + \x41 |0 + \x42 |0 + \x43 |0 + \x44 |0 + \x45 |0 + \x46 |0 + \x47 |0 + \x48 |0 + \x49 |0 + \x4A |0 + \x4B |0 + \x4C |0 + \x4D |0 + \x4E |0 + \x4F |0 + \x50 |0 + \x51 |0 + \x52 |0 + \x53 |0 + \x54 |0 + \x55 |0 + \x56 |0 + \x57 |0 + \x58 |0 + \x59 |0 + \x5A |0 + \x5B |0 + \x5C |0 + \x5D |0 + \x5E |0 + \x5F |0 + \x60 |0 + \x61 |0 + \x62 |0 + \x63 |0 + \x64 |0 + \x65 |0 + \x66 |0 + \x67 |0 + \x68 |0 + \x69 |0 + \x6A |0 + \x6B |0 + \x6C |0 + \x6D |0 + \x6E |0 + \x6F |0 + \x70 |0 + \x71 |0 + \x72 |0 + \x73 |0 + \x74 |0 + \x75 |0 + \x76 |0 + \x77 |0 + \x78 |0 + \x79 |0 + \x7A |0 + \x7B |0 + \x7C |0 + \x7D |0 + \x7E |0 + \x7F |0 + \x88 |0 + \x98 |0 + \xA0 |0 + \xA4 |0 + \xA6 |0 + \xA7 |0 + \xA9 |0 + \xAB |0 + \xAC |0 + \xAD |0 + \xAE |0 + \xB0 |0 + \xB1 |0 + \xB5 |0 + \xB6 |0 + \xB7 |0 + \xBB |0 + \xA8 |0 + \x80 |0 + \x81 |0 + \xAA |0 + \xBD |0 + \xB2 |0 + \xAF |0 + \xA3 |0 + \x8A |0 + \x8C |0 + \x8E |0 + \x8D |0 + \xA1 |0 + \x8F |0 + \xC0 |0 + \xC1 |0 + \xC2 |0 + \xC3 |0 + \xC4 |0 + \xC5 |0 + \xC6 |0 + \xC7 |0 + \xC8 |0 + \xC9 |0 + \xCA |0 + \xCB |0 + \xCC |0 + \xCD |0 + \xCE |0 + \xCF |0 + \xD0 |0 + \xD1 |0 + \xD2 |0 + \xD3 |0 + \xD4 |0 + \xD5 |0 + \xD6 |0 + \xD7 |0 + \xD8 |0 + \xD9 |0 + \xDA |0 + \xDB |0 + \xDC |0 + \xDD |0 + \xDE |0 + \xDF |0 + \xE0 |0 + \xE1 |0 + \xE2 |0 + \xE3 |0 + \xE4 |0 + \xE5 |0 + \xE6 |0 + \xE7 |0 + \xE8 |0 + \xE9 |0 + \xEA |0 + \xEB |0 + \xEC |0 + \xED |0 + \xEE |0 + \xEF |0 + \xF0 |0 + \xF1 |0 + \xF2 |0 + \xF3 |0 + \xF4 |0 + \xF5 |0 + \xF6 |0 + \xF7 |0 + \xF8 |0 + \xF9 |0 + \xFA |0 + \xFB |0 + \xFC |0 + \xFD |0 + \xFE |0 + \xFF |0 + \xB8 |0 + \x90 |0 + \x83 |0 + \xBA |0 + \xBE |0 + \xB3 |0 + \xBF |0 + \xBC |0 + \x9A |0 + \x9C |0 + \x9E |0 + \x9D |0 + \xA2 |0 + \x9F |0 + \xA5 |0 + \xB4 |0 + \x96 |0 + \x97 |0 + \x91 |0 + \x92 |0 + \x82 |0 + \x93 |0 + \x94 |0 + \x84 |0 + \x86 |0 + \x87 |0 + \x95 |0 + \x85 |0 + \x89 |0 + \x8B |0 + \x9B |0 + \xB9 |0 + \x99 |0 + \x21 |1 + \x22 |1 + \x23 |1 + \x24 |1 + \x25 |1 + \x26 |1 + \x27 |1 + \x28 |1 + \x29 |1 + \x2A |1 + \x2B |1 + \x2C |1 + \x2D |1 + \x2E |1 + \x2F |1 + \x30 |1 + \x31 |1 + \x32 |1 + \x33 |1 + \x34 |1 + \x35 |1 + \x36 |1 + \x37 |1 + \x38 |1 + \x39 |1 + \x3A |1 + \x3B |1 + \x3C |1 + \x3D |1 + \x3E |1 + \x3F |1 + \x40 |1 + \x41 |1 + \x42 |1 + \x43 |1 + \x44 |1 + \x45 |1 + \x46 |1 + \x47 |1 + \x48 |1 + \x49 |1 + \x4A |1 + \x4B |1 + \x4C |1 + \x4D |1 + \x4E |1 + \x4F |1 + \x50 |1 + \x51 |1 + \x52 |1 + \x53 |1 + \x54 |1 + \x55 |1 + \x56 |1 + \x57 |1 + \x58 |1 + \x59 |1 + \x5A |1 + \x5B |1 + \x5C |1 + \x5D |1 + \x5E |1 + \x5F |1 + \x60 |1 + \x61 |1 + \x62 |1 + \x63 |1 + \x64 |1 + \x65 |1 + \x66 |1 + \x67 |1 + \x68 |1 + \x69 |1 + \x6A |1 + \x6B |1 + \x6C |1 + \x6D |1 + \x6E |1 + \x6F |1 + \x70 |1 + \x71 |1 + \x72 |1 + \x73 |1 + \x74 |1 + \x75 |1 + \x76 |1 + \x77 |1 + \x78 |1 + \x79 |1 + \x7A |1 + \x7B |1 + \x7C |1 + \x7D |1 + \x7E |1 +END CHARMAP diff --git a/ext/Encode/Encode/ibm-1252.ucm b/ext/Encode/Encode/ibm-1252.ucm new file mode 100644 index 0000000..2f4ec2e --- /dev/null +++ b/ext/Encode/Encode/ibm-1252.ucm @@ -0,0 +1,381 @@ +# ******************************************************************************* +# * +# * Copyright (C) 1997-2000, International Business Machines +# * Corporation and others. All Rights Reserved. +# * +# ******************************************************************************* +# +# File created on Fri Nov 10 13:07:49 2000 +# +# File created by dtcocm version 3.00 +# from source files 34B004E4.RPMAP100 and 04E434B0.TPMAP100 +# +# Table Version : 1.01 +# + "cp1252" + "AXXXX" + 1 + 1 + "SBCS" + \x1A +# +CHARMAP +# +# +#ISO 10646 IBM-1252 +#_________ _________ + \x00 |0 + \x01 |0 + \x02 |0 + \x03 |0 + \x04 |0 + \x05 |0 + \x06 |0 + \x07 |0 + \x08 |0 + \x09 |0 + \x0A |0 + \x0B |0 + \x0C |0 + \x0D |0 + \x0E |0 + \x0F |0 + \x10 |0 + \x11 |0 + \x12 |0 + \x13 |0 + \x14 |0 + \x15 |0 + \x16 |0 + \x17 |0 + \x18 |0 + \x19 |0 + \x1A |0 + \x1B |0 + \x1C |0 + \x1D |0 + \x1E |0 + \x1F |0 + \x20 |0 + \x21 |0 + \x22 |0 + \x23 |0 + \x24 |0 + \x25 |0 + \x26 |0 + \x27 |0 + \x28 |0 + \x29 |0 + \x2A |0 + \x2B |0 + \x2C |0 + \x2D |0 + \x2E |0 + \x2F |0 + \x30 |0 + \x31 |0 + \x32 |0 + \x33 |0 + \x34 |0 + \x35 |0 + \x36 |0 + \x37 |0 + \x38 |0 + \x39 |0 + \x3A |0 + \x3B |0 + \x3C |0 + \x3D |0 + \x3E |0 + \x3F |0 + \x40 |0 + \x41 |0 + \x42 |0 + \x43 |0 + \x44 |0 + \x45 |0 + \x46 |0 + \x47 |0 + \x48 |0 + \x49 |0 + \x4A |0 + \x4B |0 + \x4C |0 + \x4D |0 + \x4E |0 + \x4F |0 + \x50 |0 + \x51 |0 + \x52 |0 + \x53 |0 + \x54 |0 + \x55 |0 + \x56 |0 + \x57 |0 + \x58 |0 + \x59 |0 + \x5A |0 + \x5B |0 + \x5C |0 + \x5D |0 + \x5E |0 + \x5F |0 + \x60 |0 + \x61 |0 + \x62 |0 + \x63 |0 + \x64 |0 + \x65 |0 + \x66 |0 + \x67 |0 + \x68 |0 + \x69 |0 + \x6A |0 + \x6B |0 + \x6C |0 + \x6D |0 + \x6E |0 + \x6F |0 + \x70 |0 + \x71 |0 + \x72 |0 + \x73 |0 + \x74 |0 + \x75 |0 + \x76 |0 + \x77 |0 + \x78 |0 + \x79 |0 + \x7A |0 + \x7B |0 + \x7C |0 + \x7D |0 + \x7E |0 + \x7F |0 + \x80 |3 + \x81 |0 + \x8D |0 + \x8E |3 + \x8F |0 + \x90 |0 + \x9D |0 + \x9E |3 + \xA0 |0 + \xA1 |0 + \xA2 |0 + \xA3 |0 + \xA4 |0 + \xA5 |0 + \xA6 |0 + \xA7 |0 + \xA8 |0 + \xA9 |0 + \xAA |0 + \xAB |0 + \xAC |0 + \xAD |0 + \xAE |0 + \xAF |0 + \xB0 |0 + \xB1 |0 + \xB2 |0 + \xB3 |0 + \xB4 |0 + \xB5 |0 + \xB6 |0 + \xB7 |0 + \xB8 |0 + \xB9 |0 + \xBA |0 + \xBB |0 + \xBC |0 + \xBD |0 + \xBE |0 + \xBF |0 + \xC0 |0 + \xC1 |0 + \xC2 |0 + \xC3 |0 + \xC4 |0 + \xC5 |0 + \xC6 |0 + \xC7 |0 + \xC8 |0 + \xC9 |0 + \xCA |0 + \xCB |0 + \xCC |0 + \xCD |0 + \xCE |0 + \xCF |0 + \xD0 |0 + \xD1 |0 + \xD2 |0 + \xD3 |0 + \xD4 |0 + \xD5 |0 + \xD6 |0 + \xD7 |0 + \xD8 |0 + \xD9 |0 + \xDA |0 + \xDB |0 + \xDC |0 + \xDD |0 + \xDE |0 + \xDF |0 + \xE0 |0 + \xE1 |0 + \xE2 |0 + \xE3 |0 + \xE4 |0 + \xE5 |0 + \xE6 |0 + \xE7 |0 + \xE8 |0 + \xE9 |0 + \xEA |0 + \xEB |0 + \xEC |0 + \xED |0 + \xEE |0 + \xEF |0 + \xF0 |0 + \xF1 |0 + \xF2 |0 + \xF3 |0 + \xF4 |0 + \xF5 |0 + \xF6 |0 + \xF7 |0 + \xF8 |0 + \xF9 |0 + \xFA |0 + \xFB |0 + \xFC |0 + \xFD |0 + \xFE |0 + \xFF |0 + \xD0 |1 + \x8C |0 + \x9C |0 + \x8A |0 + \x9A |0 + \x9F |0 + \x8E |1 + \x9E |1 + \x83 |0 + \x88 |0 + \x98 |0 + \x96 |0 + \x97 |0 + \x91 |0 + \x92 |0 + \x82 |0 + \x93 |0 + \x94 |0 + \x84 |0 + \x86 |0 + \x87 |0 + \x95 |0 + \x85 |0 + \x89 |0 + \x8B |0 + \x9B |0 + \x80 |1 + \x99 |0 + \x21 |1 + \x22 |1 + \x23 |1 + \x24 |1 + \x25 |1 + \x26 |1 + \x27 |1 + \x28 |1 + \x29 |1 + \x2A |1 + \x2B |1 + \x2C |1 + \x2D |1 + \x2E |1 + \x2F |1 + \x30 |1 + \x31 |1 + \x32 |1 + \x33 |1 + \x34 |1 + \x35 |1 + \x36 |1 + \x37 |1 + \x38 |1 + \x39 |1 + \x3A |1 + \x3B |1 + \x3C |1 + \x3D |1 + \x3E |1 + \x3F |1 + \x40 |1 + \x41 |1 + \x42 |1 + \x43 |1 + \x44 |1 + \x45 |1 + \x46 |1 + \x47 |1 + \x48 |1 + \x49 |1 + \x4A |1 + \x4B |1 + \x4C |1 + \x4D |1 + \x4E |1 + \x4F |1 + \x50 |1 + \x51 |1 + \x52 |1 + \x53 |1 + \x54 |1 + \x55 |1 + \x56 |1 + \x57 |1 + \x58 |1 + \x59 |1 + \x5A |1 + \x5B |1 + \x5C |1 + \x5D |1 + \x5E |1 + \x5F |1 + \x60 |1 + \x61 |1 + \x62 |1 + \x63 |1 + \x64 |1 + \x65 |1 + \x66 |1 + \x67 |1 + \x68 |1 + \x69 |1 + \x6A |1 + \x6B |1 + \x6C |1 + \x6D |1 + \x6E |1 + \x6F |1 + \x70 |1 + \x71 |1 + \x72 |1 + \x73 |1 + \x74 |1 + \x75 |1 + \x76 |1 + \x77 |1 + \x78 |1 + \x79 |1 + \x7A |1 + \x7B |1 + \x7C |1 + \x7D |1 + \x7E |1 +END CHARMAP diff --git a/ext/Encode/Encode/ibm-1253.ucm b/ext/Encode/Encode/ibm-1253.ucm new file mode 100644 index 0000000..847dd8b --- /dev/null +++ b/ext/Encode/Encode/ibm-1253.ucm @@ -0,0 +1,376 @@ +# ******************************************************************************* +# * +# * Copyright (C) 1995-2000, International Business Machines +# * Corporation and others. All Rights Reserved. +# * +# ******************************************************************************* +# +# File created on Fri Nov 10 13:07:49 2000 +# +# File created by dtcocm version 3.00 +# from source files 34B004E5.RPMAP100 and 04E534B0.TPMAP100 +# +# Table Version : 1.01 +# + "cp1253" + "AXXXX" + 1 + 1 + "SBCS" + \x1A +# +CHARMAP +# +# +#ISO 10646 IBM-1253 +#_________ _________ + \x00 |0 + \x01 |0 + \x02 |0 + \x03 |0 + \x04 |0 + \x05 |0 + \x06 |0 + \x07 |0 + \x08 |0 + \x09 |0 + \x0A |0 + \x0B |0 + \x0C |0 + \x0D |0 + \x0E |0 + \x0F |0 + \x10 |0 + \x11 |0 + \x12 |0 + \x13 |0 + \x14 |0 + \x15 |0 + \x16 |0 + \x17 |0 + \x18 |0 + \x19 |0 + \x1A |0 + \x1B |0 + \x1C |0 + \x1D |0 + \x1E |0 + \x1F |0 + \x20 |0 + \x21 |0 + \x22 |0 + \x23 |0 + \x24 |0 + \x25 |0 + \x26 |0 + \x27 |0 + \x28 |0 + \x29 |0 + \x2A |0 + \x2B |0 + \x2C |0 + \x2D |0 + \x2E |0 + \x2F |0 + \x30 |0 + \x31 |0 + \x32 |0 + \x33 |0 + \x34 |0 + \x35 |0 + \x36 |0 + \x37 |0 + \x38 |0 + \x39 |0 + \x3A |0 + \x3B |0 + \x3C |0 + \x3D |0 + \x3E |0 + \x3F |0 + \x40 |0 + \x41 |0 + \x42 |0 + \x43 |0 + \x44 |0 + \x45 |0 + \x46 |0 + \x47 |0 + \x48 |0 + \x49 |0 + \x4A |0 + \x4B |0 + \x4C |0 + \x4D |0 + \x4E |0 + \x4F |0 + \x50 |0 + \x51 |0 + \x52 |0 + \x53 |0 + \x54 |0 + \x55 |0 + \x56 |0 + \x57 |0 + \x58 |0 + \x59 |0 + \x5A |0 + \x5B |0 + \x5C |0 + \x5D |0 + \x5E |0 + \x5F |0 + \x60 |0 + \x61 |0 + \x62 |0 + \x63 |0 + \x64 |0 + \x65 |0 + \x66 |0 + \x67 |0 + \x68 |0 + \x69 |0 + \x6A |0 + \x6B |0 + \x6C |0 + \x6D |0 + \x6E |0 + \x6F |0 + \x70 |0 + \x71 |0 + \x72 |0 + \x73 |0 + \x74 |0 + \x75 |0 + \x76 |0 + \x77 |0 + \x78 |0 + \x79 |0 + \x7A |0 + \x7B |0 + \x7C |0 + \x7D |0 + \x7E |0 + \x7F |0 + \x80 |0 + \x81 |0 + \x88 |0 + \x8A |0 + \x8C |0 + \x8D |0 + \x8E |0 + \x8F |0 + \x90 |0 + \x98 |0 + \x9A |0 + \x9C |0 + \x9D |0 + \x9E |0 + \x9F |0 + \xA0 |0 + \xA3 |0 + \xA4 |0 + \xA5 |0 + \xA6 |0 + \xA7 |0 + \xA8 |0 + \xA9 |0 + \xAA |0 + \xAB |0 + \xAC |0 + \xAD |0 + \xAE |0 + \xB0 |0 + \xB1 |0 + \xB2 |0 + \xB3 |0 + \xB5 |0 + \xB6 |0 + \xB7 |0 + \xBB |0 + \xBD |0 + \x83 |0 + \xB4 |0 + \xA1 |0 + \xA2 |0 + \xB8 |0 + \xB9 |0 + \xBA |0 + \xBC |0 + \xBE |0 + \xBF |0 + \xC0 |0 + \xC1 |0 + \xC2 |0 + \xC3 |0 + \xC4 |0 + \xC5 |0 + \xC6 |0 + \xC7 |0 + \xC8 |0 + \xC9 |0 + \xCA |0 + \xCB |0 + \xCC |0 + \xCD |0 + \xCE |0 + \xCF |0 + \xD0 |0 + \xD1 |0 + \xD3 |0 + \xD4 |0 + \xD5 |0 + \xD6 |0 + \xD7 |0 + \xD8 |0 + \xD9 |0 + \xDA |0 + \xDB |0 + \xDC |0 + \xDD |0 + \xDE |0 + \xDF |0 + \xE0 |0 + \xE1 |0 + \xE2 |0 + \xE3 |0 + \xE4 |0 + \xE5 |0 + \xE6 |0 + \xE7 |0 + \xE8 |0 + \xE9 |0 + \xEA |0 + \xEB |0 + \xEC |0 + \xED |0 + \xEE |0 + \xEF |0 + \xF0 |0 + \xF1 |0 + \xF2 |0 + \xF3 |0 + \xF4 |0 + \xF5 |0 + \xF6 |0 + \xF7 |0 + \xF8 |0 + \xF9 |0 + \xFA |0 + \xFB |0 + \xFC |0 + \xFD |0 + \xFE |0 + \xF6 |1 + \x96 |0 + \x97 |0 + \xAF |0 + \x91 |0 + \x92 |0 + \x82 |0 + \x93 |0 + \x94 |0 + \x84 |0 + \x86 |0 + \x87 |0 + \x95 |0 + \x85 |0 + \x89 |0 + \x8B |0 + \x9B |0 + \x99 |0 + \x21 |1 + \x22 |1 + \x23 |1 + \x24 |1 + \x25 |1 + \x26 |1 + \x27 |1 + \x28 |1 + \x29 |1 + \x2A |1 + \x2B |1 + \x2C |1 + \x2D |1 + \x2E |1 + \x2F |1 + \x30 |1 + \x31 |1 + \x32 |1 + \x33 |1 + \x34 |1 + \x35 |1 + \x36 |1 + \x37 |1 + \x38 |1 + \x39 |1 + \x3A |1 + \x3B |1 + \x3C |1 + \x3D |1 + \x3E |1 + \x3F |1 + \x40 |1 + \x41 |1 + \x42 |1 + \x43 |1 + \x44 |1 + \x45 |1 + \x46 |1 + \x47 |1 + \x48 |1 + \x49 |1 + \x4A |1 + \x4B |1 + \x4C |1 + \x4D |1 + \x4E |1 + \x4F |1 + \x50 |1 + \x51 |1 + \x52 |1 + \x53 |1 + \x54 |1 + \x55 |1 + \x56 |1 + \x57 |1 + \x58 |1 + \x59 |1 + \x5A |1 + \x5B |1 + \x5C |1 + \x5D |1 + \x5E |1 + \x5F |1 + \x60 |1 + \x61 |1 + \x62 |1 + \x63 |1 + \x64 |1 + \x65 |1 + \x66 |1 + \x67 |1 + \x68 |1 + \x69 |1 + \x6A |1 + \x6B |1 + \x6C |1 + \x6D |1 + \x6E |1 + \x6F |1 + \x70 |1 + \x71 |1 + \x72 |1 + \x73 |1 + \x74 |1 + \x75 |1 + \x76 |1 + \x77 |1 + \x78 |1 + \x79 |1 + \x7A |1 + \x7B |1 + \x7C |1 + \x7D |1 + \x7E |1 +END CHARMAP diff --git a/ext/Encode/Encode/ibm-1254.ucm b/ext/Encode/Encode/ibm-1254.ucm new file mode 100644 index 0000000..0ad64ce --- /dev/null +++ b/ext/Encode/Encode/ibm-1254.ucm @@ -0,0 +1,377 @@ +# ******************************************************************************* +# * +# * Copyright (C) 1995-2000, International Business Machines +# * Corporation and others. All Rights Reserved. +# * +# ******************************************************************************* +# +# File created on Fri Nov 10 13:07:49 2000 +# +# File created by dtcocm version 3.00 +# from source files 34B004E6.RPMAP100 and 04E634B0.TPMAP100 +# +# Table Version : 1.01 +# + "cp1254" + "AXXXX" + 1 + 1 + "SBCS" + \x1A +# +CHARMAP +# +# +#ISO 10646 IBM-1254 +#_________ _________ + \x00 |0 + \x01 |0 + \x02 |0 + \x03 |0 + \x04 |0 + \x05 |0 + \x06 |0 + \x07 |0 + \x08 |0 + \x09 |0 + \x0A |0 + \x0B |0 + \x0C |0 + \x0D |0 + \x0E |0 + \x0F |0 + \x10 |0 + \x11 |0 + \x12 |0 + \x13 |0 + \x14 |0 + \x15 |0 + \x16 |0 + \x17 |0 + \x18 |0 + \x19 |0 + \x1A |0 + \x1B |0 + \x1C |0 + \x1D |0 + \x1E |0 + \x1F |0 + \x20 |0 + \x21 |0 + \x22 |0 + \x23 |0 + \x24 |0 + \x25 |0 + \x26 |0 + \x27 |0 + \x28 |0 + \x29 |0 + \x2A |0 + \x2B |0 + \x2C |0 + \x2D |0 + \x2E |0 + \x2F |0 + \x30 |0 + \x31 |0 + \x32 |0 + \x33 |0 + \x34 |0 + \x35 |0 + \x36 |0 + \x37 |0 + \x38 |0 + \x39 |0 + \x3A |0 + \x3B |0 + \x3C |0 + \x3D |0 + \x3E |0 + \x3F |0 + \x40 |0 + \x41 |0 + \x42 |0 + \x43 |0 + \x44 |0 + \x45 |0 + \x46 |0 + \x47 |0 + \x48 |0 + \x49 |0 + \x4A |0 + \x4B |0 + \x4C |0 + \x4D |0 + \x4E |0 + \x4F |0 + \x50 |0 + \x51 |0 + \x52 |0 + \x53 |0 + \x54 |0 + \x55 |0 + \x56 |0 + \x57 |0 + \x58 |0 + \x59 |0 + \x5A |0 + \x5B |0 + \x5C |0 + \x5D |0 + \x5E |0 + \x5F |0 + \x60 |0 + \x61 |0 + \x62 |0 + \x63 |0 + \x64 |0 + \x65 |0 + \x66 |0 + \x67 |0 + \x68 |0 + \x69 |0 + \x6A |0 + \x6B |0 + \x6C |0 + \x6D |0 + \x6E |0 + \x6F |0 + \x70 |0 + \x71 |0 + \x72 |0 + \x73 |0 + \x74 |0 + \x75 |0 + \x76 |0 + \x77 |0 + \x78 |0 + \x79 |0 + \x7A |0 + \x7B |0 + \x7C |0 + \x7D |0 + \x7E |0 + \x7F |0 + \x80 |0 + \x81 |0 + \x8D |0 + \x8E |0 + \x8F |0 + \x90 |0 + \x9D |0 + \x9E |0 + \xA0 |0 + \xA1 |0 + \xA2 |0 + \xA3 |0 + \xA4 |0 + \xA5 |0 + \xA6 |0 + \xA7 |0 + \xA8 |0 + \xA9 |0 + \xAA |0 + \xAB |0 + \xAC |0 + \xAD |0 + \xAE |0 + \xAF |0 + \xB0 |0 + \xB1 |0 + \xB2 |0 + \xB3 |0 + \xB4 |0 + \xB5 |0 + \xB6 |0 + \xB7 |0 + \xB8 |0 + \xB9 |0 + \xBA |0 + \xBB |0 + \xBC |0 + \xBD |0 + \xBE |0 + \xBF |0 + \xC0 |0 + \xC1 |0 + \xC2 |0 + \xC3 |0 + \xC4 |0 + \xC5 |0 + \xC6 |0 + \xC7 |0 + \xC8 |0 + \xC9 |0 + \xCA |0 + \xCB |0 + \xCC |0 + \xCD |0 + \xCE |0 + \xCF |0 + \xD1 |0 + \xD2 |0 + \xD3 |0 + \xD4 |0 + \xD5 |0 + \xD6 |0 + \xD7 |0 + \xD8 |0 + \xD9 |0 + \xDA |0 + \xDB |0 + \xDC |0 + \xDF |0 + \xE0 |0 + \xE1 |0 + \xE2 |0 + \xE3 |0 + \xE4 |0 + \xE5 |0 + \xE6 |0 + \xE7 |0 + \xE8 |0 + \xE9 |0 + \xEA |0 + \xEB |0 + \xEC |0 + \xED |0 + \xEE |0 + \xEF |0 + \xF1 |0 + \xF2 |0 + \xF3 |0 + \xF4 |0 + \xF5 |0 + \xF6 |0 + \xF7 |0 + \xF8 |0 + \xF9 |0 + \xFA |0 + \xFB |0 + \xFC |0 + \xFF |0 + \xD0 |0 + \xF0 |0 + \xDD |0 + \xFD |0 + \x8C |0 + \x9C |0 + \xDE |0 + \xFE |0 + \x8A |0 + \x9A |0 + \x9F |0 + \x83 |0 + \x88 |0 + \x98 |0 + \x96 |0 + \x97 |0 + \x91 |0 + \x92 |0 + \x82 |0 + \x93 |0 + \x94 |0 + \x84 |0 + \x86 |0 + \x87 |0 + \x95 |0 + \x85 |0 + \x89 |0 + \x8B |0 + \x9B |0 + \x99 |0 + \x21 |1 + \x22 |1 + \x23 |1 + \x24 |1 + \x25 |1 + \x26 |1 + \x27 |1 + \x28 |1 + \x29 |1 + \x2A |1 + \x2B |1 + \x2C |1 + \x2D |1 + \x2E |1 + \x2F |1 + \x30 |1 + \x31 |1 + \x32 |1 + \x33 |1 + \x34 |1 + \x35 |1 + \x36 |1 + \x37 |1 + \x38 |1 + \x39 |1 + \x3A |1 + \x3B |1 + \x3C |1 + \x3D |1 + \x3E |1 + \x3F |1 + \x40 |1 + \x41 |1 + \x42 |1 + \x43 |1 + \x44 |1 + \x45 |1 + \x46 |1 + \x47 |1 + \x48 |1 + \x49 |1 + \x4A |1 + \x4B |1 + \x4C |1 + \x4D |1 + \x4E |1 + \x4F |1 + \x50 |1 + \x51 |1 + \x52 |1 + \x53 |1 + \x54 |1 + \x55 |1 + \x56 |1 + \x57 |1 + \x58 |1 + \x59 |1 + \x5A |1 + \x5B |1 + \x5C |1 + \x5D |1 + \x5E |1 + \x5F |1 + \x60 |1 + \x61 |1 + \x62 |1 + \x63 |1 + \x64 |1 + \x65 |1 + \x66 |1 + \x67 |1 + \x68 |1 + \x69 |1 + \x6A |1 + \x6B |1 + \x6C |1 + \x6D |1 + \x6E |1 + \x6F |1 + \x70 |1 + \x71 |1 + \x72 |1 + \x73 |1 + \x74 |1 + \x75 |1 + \x76 |1 + \x77 |1 + \x78 |1 + \x79 |1 + \x7A |1 + \x7B |1 + \x7C |1 + \x7D |1 + \x7E |1 +END CHARMAP diff --git a/ext/Encode/Encode/ibm-1255.ucm b/ext/Encode/Encode/ibm-1255.ucm new file mode 100644 index 0000000..05051c7 --- /dev/null +++ b/ext/Encode/Encode/ibm-1255.ucm @@ -0,0 +1,359 @@ +# ******************************************************************************* +# * +# * Copyright (C) 1995-2000, International Business Machines +# * Corporation and others. All Rights Reserved. +# * +# ******************************************************************************* +# +# File created on Fri Nov 10 13:07:49 2000 +# +# File created by dtcocm version 3.00 +# from source files 34B004E7.RPMAP100 and 04E734B0.TPMAP100 +# +# Table Version : 1.01 +# + "cp1255" + "AXXXX" + 1 + 1 + "SBCS" + \x1A +# +CHARMAP +# +# +#ISO 10646 IBM-1255 +#_________ _________ + \x00 |0 + \x01 |0 + \x02 |0 + \x03 |0 + \x04 |0 + \x05 |0 + \x06 |0 + \x07 |0 + \x08 |0 + \x09 |0 + \x0A |0 + \x0B |0 + \x0C |0 + \x0D |0 + \x0E |0 + \x0F |0 + \x10 |0 + \x11 |0 + \x12 |0 + \x13 |0 + \x14 |0 + \x15 |0 + \x16 |0 + \x17 |0 + \x18 |0 + \x19 |0 + \x1A |0 + \x1B |0 + \x1C |0 + \x1D |0 + \x1E |0 + \x1F |0 + \x20 |0 + \x21 |0 + \x22 |0 + \x23 |0 + \x24 |0 + \x25 |0 + \x26 |0 + \x27 |0 + \x28 |0 + \x29 |0 + \x2A |0 + \x2B |0 + \x2C |0 + \x2D |0 + \x2E |0 + \x2F |0 + \x30 |0 + \x31 |0 + \x32 |0 + \x33 |0 + \x34 |0 + \x35 |0 + \x36 |0 + \x37 |0 + \x38 |0 + \x39 |0 + \x3A |0 + \x3B |0 + \x3C |0 + \x3D |0 + \x3E |0 + \x3F |0 + \x40 |0 + \x41 |0 + \x42 |0 + \x43 |0 + \x44 |0 + \x45 |0 + \x46 |0 + \x47 |0 + \x48 |0 + \x49 |0 + \x4A |0 + \x4B |0 + \x4C |0 + \x4D |0 + \x4E |0 + \x4F |0 + \x50 |0 + \x51 |0 + \x52 |0 + \x53 |0 + \x54 |0 + \x55 |0 + \x56 |0 + \x57 |0 + \x58 |0 + \x59 |0 + \x5A |0 + \x5B |0 + \x5C |0 + \x5D |0 + \x5E |0 + \x5F |0 + \x60 |0 + \x61 |0 + \x62 |0 + \x63 |0 + \x64 |0 + \x65 |0 + \x66 |0 + \x67 |0 + \x68 |0 + \x69 |0 + \x6A |0 + \x6B |0 + \x6C |0 + \x6D |0 + \x6E |0 + \x6F |0 + \x70 |0 + \x71 |0 + \x72 |0 + \x73 |0 + \x74 |0 + \x75 |0 + \x76 |0 + \x77 |0 + \x78 |0 + \x79 |0 + \x7A |0 + \x7B |0 + \x7C |0 + \x7D |0 + \x7E |0 + \x7F |0 + \x80 |0 + \x81 |0 + \x8A |0 + \x8C |0 + \x8D |0 + \x8E |0 + \x8F |0 + \x90 |0 + \x9A |0 + \x9C |0 + \x9D |0 + \x9E |0 + \x9F |0 + \xA0 |0 + \xA2 |0 + \xA3 |0 + \xA5 |0 + \xA6 |0 + \xA7 |0 + \xA8 |0 + \xA9 |0 + \xAB |0 + \xAC |0 + \xAD |0 + \xAE |0 + \xAF |0 + \xB0 |0 + \xB1 |0 + \xB2 |0 + \xB3 |0 + \xB4 |0 + \xB5 |0 + \xB6 |0 + \xB7 |0 + \xB9 |0 + \xBB |0 + \xBC |0 + \xBD |0 + \xBE |0 + \x83 |0 + \x88 |0 + \x98 |0 + \xC0 |0 + \xC1 |0 + \xC2 |0 + \xC3 |0 + \xC4 |0 + \xC5 |0 + \xC6 |0 + \xC7 |0 + \xC8 |0 + \xC9 |0 + \xCB |0 + \xCC |0 + \xCD |0 + \xCE |0 + \xCF |0 + \xD0 |0 + \xD1 |0 + \xD2 |0 + \xD3 |0 + \xE0 |0 + \xE1 |0 + \xE2 |0 + \xE3 |0 + \xE4 |0 + \xE5 |0 + \xE6 |0 + \xE7 |0 + \xE8 |0 + \xE9 |0 + \xEA |0 + \xEB |0 + \xEC |0 + \xED |0 + \xEE |0 + \xEF |0 + \xF0 |0 + \xF1 |0 + \xF2 |0 + \xF3 |0 + \xF4 |0 + \xF5 |0 + \xF6 |0 + \xF7 |0 + \xF8 |0 + \xF9 |0 + \xFA |0 + \xD4 |0 + \xD5 |0 + \xD6 |0 + \xFD |0 + \xFE |0 + \x96 |0 + \x97 |0 + \x91 |0 + \x92 |0 + \x82 |0 + \x93 |0 + \x94 |0 + \x84 |0 + \x86 |0 + \x87 |0 + \x95 |0 + \x85 |0 + \x89 |0 + \x8B |0 + \x9B |0 + \xA4 |0 + \x99 |0 + \x21 |1 + \x22 |1 + \x23 |1 + \x24 |1 + \x25 |1 + \x26 |1 + \x27 |1 + \x28 |1 + \x29 |1 + \x2A |1 + \x2B |1 + \x2C |1 + \x2D |1 + \x2E |1 + \x2F |1 + \x30 |1 + \x31 |1 + \x32 |1 + \x33 |1 + \x34 |1 + \x35 |1 + \x36 |1 + \x37 |1 + \x38 |1 + \x39 |1 + \x3A |1 + \x3B |1 + \x3C |1 + \x3D |1 + \x3E |1 + \x3F |1 + \x40 |1 + \x41 |1 + \x42 |1 + \x43 |1 + \x44 |1 + \x45 |1 + \x46 |1 + \x47 |1 + \x48 |1 + \x49 |1 + \x4A |1 + \x4B |1 + \x4C |1 + \x4D |1 + \x4E |1 + \x4F |1 + \x50 |1 + \x51 |1 + \x52 |1 + \x53 |1 + \x54 |1 + \x55 |1 + \x56 |1 + \x57 |1 + \x58 |1 + \x59 |1 + \x5A |1 + \x5B |1 + \x5C |1 + \x5D |1 + \x5E |1 + \x5F |1 + \x60 |1 + \x61 |1 + \x62 |1 + \x63 |1 + \x64 |1 + \x65 |1 + \x66 |1 + \x67 |1 + \x68 |1 + \x69 |1 + \x6A |1 + \x6B |1 + \x6C |1 + \x6D |1 + \x6E |1 + \x6F |1 + \x70 |1 + \x71 |1 + \x72 |1 + \x73 |1 + \x74 |1 + \x75 |1 + \x76 |1 + \x77 |1 + \x78 |1 + \x79 |1 + \x7A |1 + \x7B |1 + \x7C |1 + \x7D |1 + \x7E |1 +END CHARMAP diff --git a/ext/Encode/Encode/ibm-1256.ucm b/ext/Encode/Encode/ibm-1256.ucm new file mode 100644 index 0000000..dcbfda9 --- /dev/null +++ b/ext/Encode/Encode/ibm-1256.ucm @@ -0,0 +1,534 @@ +# ******************************************************************************* +# * +# * Copyright (C) 1995-2000, International Business Machines +# * Corporation and others. All Rights Reserved. +# * +# ******************************************************************************* +# +# File created on Fri Nov 10 13:07:50 2000 +# +# File created by dtcocm version 3.00 +# from source files 34B004E8.RPMAP110 and 04E834B0.TPMAP110 +# +# Table Version : 1.01 +# + "cp1256" + "AXXXX" + 1 + 1 + "SBCS" + \x1A +# +CHARMAP +# +# +#ISO 10646 IBM-1256 +#_________ _________ + \x00 |0 + \x01 |0 + \x02 |0 + \x03 |0 + \x04 |0 + \x05 |0 + \x06 |0 + \x07 |0 + \x08 |0 + \x09 |0 + \x0A |0 + \x0B |0 + \x0C |0 + \x0D |0 + \x0E |0 + \x0F |0 + \x10 |0 + \x11 |0 + \x12 |0 + \x13 |0 + \x14 |0 + \x15 |0 + \x16 |0 + \x17 |0 + \x18 |0 + \x19 |0 + \x1A |0 + \x1B |0 + \x1C |0 + \x1D |0 + \x1E |0 + \x1F |0 + \x20 |0 + \x21 |0 + \x22 |0 + \x23 |0 + \x24 |0 + \x25 |0 + \x26 |0 + \x27 |0 + \x28 |0 + \x29 |0 + \x2A |0 + \x2B |0 + \x2C |0 + \x2D |0 + \x2E |0 + \x2F |0 + \x30 |0 + \x31 |0 + \x32 |0 + \x33 |0 + \x34 |0 + \x35 |0 + \x36 |0 + \x37 |0 + \x38 |0 + \x39 |0 + \x3A |0 + \x3B |0 + \x3C |0 + \x3D |0 + \x3E |0 + \x3F |0 + \x40 |0 + \x41 |0 + \x42 |0 + \x43 |0 + \x44 |0 + \x45 |0 + \x46 |0 + \x47 |0 + \x48 |0 + \x49 |0 + \x4A |0 + \x4B |0 + \x4C |0 + \x4D |0 + \x4E |0 + \x4F |0 + \x50 |0 + \x51 |0 + \x52 |0 + \x53 |0 + \x54 |0 + \x55 |0 + \x56 |0 + \x57 |0 + \x58 |0 + \x59 |0 + \x5A |0 + \x5B |0 + \x5C |0 + \x5D |0 + \x5E |0 + \x5F |0 + \x60 |0 + \x61 |0 + \x62 |0 + \x63 |0 + \x64 |0 + \x65 |0 + \x66 |0 + \x67 |0 + \x68 |0 + \x69 |0 + \x6A |0 + \x6B |0 + \x6C |0 + \x6D |0 + \x6E |0 + \x6F |0 + \x70 |0 + \x71 |0 + \x72 |0 + \x73 |0 + \x74 |0 + \x75 |0 + \x76 |0 + \x77 |0 + \x78 |0 + \x79 |0 + \x7A |0 + \x7B |0 + \x7C |0 + \x7D |0 + \x7E |0 + \x7F |0 + \x80 |0 + \x8A |0 + \x8F |0 + \x98 |0 + \x9A |0 + \x9F |0 + \xA0 |0 + \xA2 |0 + \xA3 |0 + \xA4 |0 + \xA5 |0 + \xA6 |0 + \xA7 |0 + \xA8 |0 + \xA9 |0 + \xAB |0 + \xAC |0 + \xAD |0 + \xAE |0 + \xAF |0 + \xB0 |0 + \xB1 |0 + \xB2 |0 + \xB3 |0 + \xB4 |0 + \xB5 |0 + \xB6 |0 + \xB7 |0 + \xB8 |0 + \xB9 |0 + \xBB |0 + \xBC |0 + \xBD |0 + \xBE |0 + \xD7 |0 + \xE0 |0 + \xE2 |0 + \xE7 |0 + \xE8 |0 + \xE9 |0 + \xEA |0 + \xEB |0 + \xEE |0 + \xEF |0 + \xF4 |0 + \xF7 |0 + \xF9 |0 + \xFB |0 + \xFC |0 + \x8C |0 + \x9C |0 + \x83 |0 + \x88 |0 + \xA1 |0 + \xBA |0 + \xBF |0 + \xC1 |0 + \xC2 |0 + \xC3 |0 + \xC4 |0 + \xC5 |0 + \xC6 |0 + \xC7 |0 + \xC8 |0 + \xC9 |0 + \xCA |0 + \xCB |0 + \xCC |0 + \xCD |0 + \xCE |0 + \xCF |0 + \xD0 |0 + \xD1 |0 + \xD2 |0 + \xD3 |0 + \xD4 |0 + \xD5 |0 + \xD6 |0 + \xD8 |0 + \xD9 |0 + \xDA |0 + \xDB |0 + \xDC |0 + \xDD |0 + \xDE |0 + \xDF |0 + \xE1 |0 + \xE3 |0 + \xE4 |0 + \xE5 |0 + \xE6 |0 + \xEC |0 + \xED |0 + \xF0 |0 + \xF1 |0 + \xF2 |0 + \xF3 |0 + \xF5 |0 + \xF6 |0 + \xF8 |0 + \xFA |0 + \x30 |1 + \x31 |1 + \x32 |1 + \x33 |1 + \x34 |1 + \x35 |1 + \x36 |1 + \x37 |1 + \x38 |1 + \x39 |1 + \x2C |1 + \x2E |1 + \x81 |0 + \x8D |0 + \x8E |0 + \x90 |0 + \x30 |1 + \x31 |1 + \x32 |1 + \x33 |1 + \x34 |1 + \x35 |1 + \x36 |1 + \x37 |1 + \x38 |1 + \x39 |1 + \x9D |0 + \x9E |0 + \xFD |0 + \xFE |0 + \x96 |0 + \x97 |0 + \x91 |0 + \x92 |0 + \x82 |0 + \x93 |0 + \x94 |0 + \x84 |0 + \x86 |0 + \x87 |0 + \x95 |0 + \x85 |0 + \x89 |0 + \x8B |0 + \x9B |0 + \x99 |0 + \x81 |1 + \x81 |1 + \x8D |1 + \x8D |1 + \x8E |1 + \x90 |1 + \x90 |1 + \xF0 |1 + \xF0 |1 + \xF1 |1 + \xF2 |1 + \xF3 |1 + \xF3 |1 + \xF5 |1 + \xF5 |1 + \xF6 |1 + \xF6 |1 + \xF8 |1 + \xF8 |1 + \xFA |1 + \xFA |1 + \xC1 |1 + \xC2 |1 + \xC2 |1 + \xC3 |1 + \xC3 |1 + \xC4 |1 + \xC4 |1 + \xC5 |1 + \xC5 |1 + \xC6 |1 + \xC6 |1 + \xC6 |1 + \xC6 |1 + \xC7 |1 + \xC7 |1 + \xC8 |1 + \xC8 |1 + \xC8 |1 + \xC8 |1 + \xC9 |1 + \xC9 |1 + \xCA |1 + \xCA |1 + \xCA |1 + \xCA |1 + \xCB |1 + \xCB |1 + \xCB |1 + \xCB |1 + \xCC |1 + \xCC |1 + \xCC |1 + \xCC |1 + \xCD |1 + \xCD |1 + \xCD |1 + \xCD |1 + \xCE |1 + \xCE |1 + \xCE |1 + \xCE |1 + \xCF |1 + \xCF |1 + \xD0 |1 + \xD0 |1 + \xD1 |1 + \xD1 |1 + \xD2 |1 + \xD2 |1 + \xD3 |1 + \xD3 |1 + \xD3 |1 + \xD3 |1 + \xD4 |1 + \xD4 |1 + \xD4 |1 + \xD4 |1 + \xD5 |1 + \xD5 |1 + \xD5 |1 + \xD5 |1 + \xD6 |1 + \xD6 |1 + \xD6 |1 + \xD6 |1 + \xD8 |1 + \xD8 |1 + \xD8 |1 + \xD8 |1 + \xD9 |1 + \xD9 |1 + \xD9 |1 + \xD9 |1 + \xDA |1 + \xDA |1 + \xDA |1 + \xDA |1 + \xDB |1 + \xDB |1 + \xDB |1 + \xDB |1 + \xDD |1 + \xDD |1 + \xDD |1 + \xDD |1 + \xDE |1 + \xDE |1 + \xDE |1 + \xDE |1 + \xDF |1 + \xDF |1 + \xDF |1 + \xDF |1 + \xE1 |1 + \xE1 |1 + \xE1 |1 + \xE1 |1 + \xE3 |1 + \xE3 |1 + \xE3 |1 + \xE3 |1 + \xE4 |1 + \xE4 |1 + \xE4 |1 + \xE4 |1 + \xE5 |1 + \xE5 |1 + \xE5 |1 + \xE5 |1 + \xE6 |1 + \xE6 |1 + \xEC |1 + \xEC |1 + \xED |1 + \xED |1 + \xED |1 + \xED |1 + \x21 |1 + \x22 |1 + \x23 |1 + \x24 |1 + \x25 |1 + \x26 |1 + \x27 |1 + \x28 |1 + \x29 |1 + \x2A |1 + \x2B |1 + \x2C |1 + \x2D |1 + \x2E |1 + \x2F |1 + \x30 |1 + \x31 |1 + \x32 |1 + \x33 |1 + \x34 |1 + \x35 |1 + \x36 |1 + \x37 |1 + \x38 |1 + \x39 |1 + \x3A |1 + \x3B |1 + \x3C |1 + \x3D |1 + \x3E |1 + \x3F |1 + \x40 |1 + \x41 |1 + \x42 |1 + \x43 |1 + \x44 |1 + \x45 |1 + \x46 |1 + \x47 |1 + \x48 |1 + \x49 |1 + \x4A |1 + \x4B |1 + \x4C |1 + \x4D |1 + \x4E |1 + \x4F |1 + \x50 |1 + \x51 |1 + \x52 |1 + \x53 |1 + \x54 |1 + \x55 |1 + \x56 |1 + \x57 |1 + \x58 |1 + \x59 |1 + \x5A |1 + \x5B |1 + \x5C |1 + \x5D |1 + \x5E |1 + \x5F |1 + \x60 |1 + \x61 |1 + \x62 |1 + \x63 |1 + \x64 |1 + \x65 |1 + \x66 |1 + \x67 |1 + \x68 |1 + \x69 |1 + \x6A |1 + \x6B |1 + \x6C |1 + \x6D |1 + \x6E |1 + \x6F |1 + \x70 |1 + \x71 |1 + \x72 |1 + \x73 |1 + \x74 |1 + \x75 |1 + \x76 |1 + \x77 |1 + \x78 |1 + \x79 |1 + \x7A |1 + \x7B |1 + \x7C |1 + \x7D |1 + \x7E |1 +END CHARMAP diff --git a/ext/Encode/Encode/ibm-1257.ucm b/ext/Encode/Encode/ibm-1257.ucm new file mode 100644 index 0000000..f19a5c4 --- /dev/null +++ b/ext/Encode/Encode/ibm-1257.ucm @@ -0,0 +1,373 @@ +# ******************************************************************************* +# * +# * Copyright (C) 1995-2000, International Business Machines +# * Corporation and others. All Rights Reserved. +# * +# ******************************************************************************* +# +# File created on Fri Nov 10 13:07:50 2000 +# +# File created by dtcocm version 3.00 +# from source files 34B004E9.RPMAP100 and 04E934B0.TPMAP100 +# +# Table Version : 1.01 +# + "cp1257" + "AXXXX" + 1 + 1 + "SBCS" + \x1A +# +CHARMAP +# +# +#ISO 10646 IBM-1257 +#_________ _________ + \x00 |0 + \x01 |0 + \x02 |0 + \x03 |0 + \x04 |0 + \x05 |0 + \x06 |0 + \x07 |0 + \x08 |0 + \x09 |0 + \x0A |0 + \x0B |0 + \x0C |0 + \x0D |0 + \x0E |0 + \x0F |0 + \x10 |0 + \x11 |0 + \x12 |0 + \x13 |0 + \x14 |0 + \x15 |0 + \x16 |0 + \x17 |0 + \x18 |0 + \x19 |0 + \x1A |0 + \x1B |0 + \x1C |0 + \x1D |0 + \x1E |0 + \x1F |0 + \x20 |0 + \x21 |0 + \x22 |0 + \x23 |0 + \x24 |0 + \x25 |0 + \x26 |0 + \x27 |0 + \x28 |0 + \x29 |0 + \x2A |0 + \x2B |0 + \x2C |0 + \x2D |0 + \x2E |0 + \x2F |0 + \x30 |0 + \x31 |0 + \x32 |0 + \x33 |0 + \x34 |0 + \x35 |0 + \x36 |0 + \x37 |0 + \x38 |0 + \x39 |0 + \x3A |0 + \x3B |0 + \x3C |0 + \x3D |0 + \x3E |0 + \x3F |0 + \x40 |0 + \x41 |0 + \x42 |0 + \x43 |0 + \x44 |0 + \x45 |0 + \x46 |0 + \x47 |0 + \x48 |0 + \x49 |0 + \x4A |0 + \x4B |0 + \x4C |0 + \x4D |0 + \x4E |0 + \x4F |0 + \x50 |0 + \x51 |0 + \x52 |0 + \x53 |0 + \x54 |0 + \x55 |0 + \x56 |0 + \x57 |0 + \x58 |0 + \x59 |0 + \x5A |0 + \x5B |0 + \x5C |0 + \x5D |0 + \x5E |0 + \x5F |0 + \x60 |0 + \x61 |0 + \x62 |0 + \x63 |0 + \x64 |0 + \x65 |0 + \x66 |0 + \x67 |0 + \x68 |0 + \x69 |0 + \x6A |0 + \x6B |0 + \x6C |0 + \x6D |0 + \x6E |0 + \x6F |0 + \x70 |0 + \x71 |0 + \x72 |0 + \x73 |0 + \x74 |0 + \x75 |0 + \x76 |0 + \x77 |0 + \x78 |0 + \x79 |0 + \x7A |0 + \x7B |0 + \x7C |0 + \x7D |0 + \x7E |0 + \x7F |0 + \x80 |0 + \x81 |0 + \x83 |0 + \x88 |0 + \x8A |0 + \x8C |0 + \x8D |0 + \x8E |0 + \x8F |0 + \x90 |0 + \x98 |0 + \x9A |0 + \x9C |0 + \x9D |0 + \x9E |0 + \x9F |0 + \xA0 |0 + \xA2 |0 + \xA3 |0 + \xA4 |0 + \xA6 |0 + \xA7 |0 + \xA9 |0 + \xAB |0 + \xAC |0 + \xAD |0 + \xAE |0 + \xB0 |0 + \xB1 |0 + \xB2 |0 + \xB3 |0 + \xB5 |0 + \xB6 |0 + \xB7 |0 + \xB9 |0 + \xBB |0 + \xBC |0 + \xBD |0 + \xBE |0 + \xC4 |0 + \xC5 |0 + \xAF |0 + \xC9 |0 + \xD3 |0 + \xD5 |0 + \xD6 |0 + \xD7 |0 + \xA8 |0 + \xDC |0 + \xDF |0 + \xE4 |0 + \xE5 |0 + \xBF |0 + \xE9 |0 + \xF3 |0 + \xF5 |0 + \xF6 |0 + \xF7 |0 + \xB8 |0 + \xFC |0 + \xC2 |0 + \xE2 |0 + \xC0 |0 + \xE0 |0 + \xC3 |0 + \xE3 |0 + \xC8 |0 + \xE8 |0 + \xC7 |0 + \xE7 |0 + \xCB |0 + \xEB |0 + \xC6 |0 + \xE6 |0 + \xCC |0 + \xEC |0 + \xCE |0 + \xEE |0 + \xC1 |0 + \xE1 |0 + \xCD |0 + \xED |0 + \xCF |0 + \xEF |0 + \xD9 |0 + \xF9 |0 + \xD1 |0 + \xF1 |0 + \xD2 |0 + \xF2 |0 + \xD4 |0 + \xF4 |0 + \xAA |0 + \xBA |0 + \xDA |0 + \xFA |0 + \xD0 |0 + \xF0 |0 + \xDB |0 + \xFB |0 + \xD8 |0 + \xF8 |0 + \xCA |0 + \xEA |0 + \xDD |0 + \xFD |0 + \xDE |0 + \xFE |0 + \x96 |0 + \x97 |0 + \x91 |0 + \x92 |0 + \x82 |0 + \x93 |0 + \x94 |0 + \x84 |0 + \x86 |0 + \x87 |0 + \x95 |0 + \x85 |0 + \x89 |0 + \x8B |0 + \x9B |0 + \x99 |0 + \x21 |1 + \x22 |1 + \x23 |1 + \x24 |1 + \x25 |1 + \x26 |1 + \x27 |1 + \x28 |1 + \x29 |1 + \x2A |1 + \x2B |1 + \x2C |1 + \x2D |1 + \x2E |1 + \x2F |1 + \x30 |1 + \x31 |1 + \x32 |1 + \x33 |1 + \x34 |1 + \x35 |1 + \x36 |1 + \x37 |1 + \x38 |1 + \x39 |1 + \x3A |1 + \x3B |1 + \x3C |1 + \x3D |1 + \x3E |1 + \x3F |1 + \x40 |1 + \x41 |1 + \x42 |1 + \x43 |1 + \x44 |1 + \x45 |1 + \x46 |1 + \x47 |1 + \x48 |1 + \x49 |1 + \x4A |1 + \x4B |1 + \x4C |1 + \x4D |1 + \x4E |1 + \x4F |1 + \x50 |1 + \x51 |1 + \x52 |1 + \x53 |1 + \x54 |1 + \x55 |1 + \x56 |1 + \x57 |1 + \x58 |1 + \x59 |1 + \x5A |1 + \x5B |1 + \x5C |1 + \x5D |1 + \x5E |1 + \x5F |1 + \x60 |1 + \x61 |1 + \x62 |1 + \x63 |1 + \x64 |1 + \x65 |1 + \x66 |1 + \x67 |1 + \x68 |1 + \x69 |1 + \x6A |1 + \x6B |1 + \x6C |1 + \x6D |1 + \x6E |1 + \x6F |1 + \x70 |1 + \x71 |1 + \x72 |1 + \x73 |1 + \x74 |1 + \x75 |1 + \x76 |1 + \x77 |1 + \x78 |1 + \x79 |1 + \x7A |1 + \x7B |1 + \x7C |1 + \x7D |1 + \x7E |1 +END CHARMAP diff --git a/ext/Encode/Encode/ibm-1258.ucm b/ext/Encode/Encode/ibm-1258.ucm new file mode 100644 index 0000000..6e24ab4 --- /dev/null +++ b/ext/Encode/Encode/ibm-1258.ucm @@ -0,0 +1,377 @@ +# ******************************************************************************* +# * +# * Copyright (C) 1995-2000, International Business Machines +# * Corporation and others. All Rights Reserved. +# * +# ******************************************************************************* +# +# File created on Fri Nov 10 13:07:50 2000 +# +# File created by dtcocm version 3.00 +# from source files 34B004EA.RPMAP100 and 04EA34B0.TPMAP100 +# +# Table Version : 1.01 +# + "cp1258" + "AXXXX" + 1 + 1 + "SBCS" + \x1A +# +CHARMAP +# +# +#ISO 10646 IBM-1258 +#_________ _________ + \x00 |0 + \x01 |0 + \x02 |0 + \x03 |0 + \x04 |0 + \x05 |0 + \x06 |0 + \x07 |0 + \x08 |0 + \x09 |0 + \x0A |0 + \x0B |0 + \x0C |0 + \x0D |0 + \x0E |0 + \x0F |0 + \x10 |0 + \x11 |0 + \x12 |0 + \x13 |0 + \x14 |0 + \x15 |0 + \x16 |0 + \x17 |0 + \x18 |0 + \x19 |0 + \x1A |0 + \x1B |0 + \x1C |0 + \x1D |0 + \x1E |0 + \x1F |0 + \x20 |0 + \x21 |0 + \x22 |0 + \x23 |0 + \x24 |0 + \x25 |0 + \x26 |0 + \x27 |0 + \x28 |0 + \x29 |0 + \x2A |0 + \x2B |0 + \x2C |0 + \x2D |0 + \x2E |0 + \x2F |0 + \x30 |0 + \x31 |0 + \x32 |0 + \x33 |0 + \x34 |0 + \x35 |0 + \x36 |0 + \x37 |0 + \x38 |0 + \x39 |0 + \x3A |0 + \x3B |0 + \x3C |0 + \x3D |0 + \x3E |0 + \x3F |0 + \x40 |0 + \x41 |0 + \x42 |0 + \x43 |0 + \x44 |0 + \x45 |0 + \x46 |0 + \x47 |0 + \x48 |0 + \x49 |0 + \x4A |0 + \x4B |0 + \x4C |0 + \x4D |0 + \x4E |0 + \x4F |0 + \x50 |0 + \x51 |0 + \x52 |0 + \x53 |0 + \x54 |0 + \x55 |0 + \x56 |0 + \x57 |0 + \x58 |0 + \x59 |0 + \x5A |0 + \x5B |0 + \x5C |0 + \x5D |0 + \x5E |0 + \x5F |0 + \x60 |0 + \x61 |0 + \x62 |0 + \x63 |0 + \x64 |0 + \x65 |0 + \x66 |0 + \x67 |0 + \x68 |0 + \x69 |0 + \x6A |0 + \x6B |0 + \x6C |0 + \x6D |0 + \x6E |0 + \x6F |0 + \x70 |0 + \x71 |0 + \x72 |0 + \x73 |0 + \x74 |0 + \x75 |0 + \x76 |0 + \x77 |0 + \x78 |0 + \x79 |0 + \x7A |0 + \x7B |0 + \x7C |0 + \x7D |0 + \x7E |0 + \x7F |0 + \x80 |0 + \x81 |0 + \x8A |0 + \x8D |0 + \x8E |0 + \x8F |0 + \x90 |0 + \x9A |0 + \x9D |0 + \x9E |0 + \xA0 |0 + \xA1 |0 + \xA2 |0 + \xA3 |0 + \xA4 |0 + \xA5 |0 + \xA6 |0 + \xA7 |0 + \xA8 |0 + \xA9 |0 + \xAA |0 + \xAB |0 + \xAC |0 + \xAD |0 + \xAE |0 + \xAF |0 + \xB0 |0 + \xB1 |0 + \xB2 |0 + \xB3 |0 + \xB4 |0 + \xB5 |0 + \xB6 |0 + \xB7 |0 + \xB8 |0 + \xB9 |0 + \xBA |0 + \xBB |0 + \xBC |0 + \xBD |0 + \xBE |0 + \xBF |0 + \xC0 |0 + \xC1 |0 + \xC2 |0 + \xC4 |0 + \xC5 |0 + \xC6 |0 + \xC7 |0 + \xC8 |0 + \xC9 |0 + \xCA |0 + \xCB |0 + \xCD |0 + \xCE |0 + \xCF |0 + \xD1 |0 + \xD3 |0 + \xD4 |0 + \xD6 |0 + \xD7 |0 + \xD8 |0 + \xD9 |0 + \xDA |0 + \xDB |0 + \xDC |0 + \xDF |0 + \xE0 |0 + \xE1 |0 + \xE2 |0 + \xE4 |0 + \xE5 |0 + \xE6 |0 + \xE7 |0 + \xE8 |0 + \xE9 |0 + \xEA |0 + \xEB |0 + \xED |0 + \xEE |0 + \xEF |0 + \xF1 |0 + \xF3 |0 + \xF4 |0 + \xF6 |0 + \xF7 |0 + \xF8 |0 + \xF9 |0 + \xFA |0 + \xFB |0 + \xFC |0 + \xFF |0 + \xC3 |0 + \xE3 |0 + \xD0 |0 + \xF0 |0 + \x8C |0 + \x9C |0 + \x9F |0 + \x83 |0 + \xD5 |0 + \xF5 |0 + \xDD |0 + \xFD |0 + \x88 |0 + \x98 |0 + \xCC |0 + \xEC |0 + \xDE |0 + \xD2 |0 + \xF2 |0 + \x96 |0 + \x97 |0 + \x91 |0 + \x92 |0 + \x82 |0 + \x93 |0 + \x94 |0 + \x84 |0 + \x86 |0 + \x87 |0 + \x95 |0 + \x85 |0 + \x89 |0 + \x8B |0 + \x9B |0 + \xFE |0 + \x99 |0 + \x21 |1 + \x22 |1 + \x23 |1 + \x24 |1 + \x25 |1 + \x26 |1 + \x27 |1 + \x28 |1 + \x29 |1 + \x2A |1 + \x2B |1 + \x2C |1 + \x2D |1 + \x2E |1 + \x2F |1 + \x30 |1 + \x31 |1 + \x32 |1 + \x33 |1 + \x34 |1 + \x35 |1 + \x36 |1 + \x37 |1 + \x38 |1 + \x39 |1 + \x3A |1 + \x3B |1 + \x3C |1 + \x3D |1 + \x3E |1 + \x3F |1 + \x40 |1 + \x41 |1 + \x42 |1 + \x43 |1 + \x44 |1 + \x45 |1 + \x46 |1 + \x47 |1 + \x48 |1 + \x49 |1 + \x4A |1 + \x4B |1 + \x4C |1 + \x4D |1 + \x4E |1 + \x4F |1 + \x50 |1 + \x51 |1 + \x52 |1 + \x53 |1 + \x54 |1 + \x55 |1 + \x56 |1 + \x57 |1 + \x58 |1 + \x59 |1 + \x5A |1 + \x5B |1 + \x5C |1 + \x5D |1 + \x5E |1 + \x5F |1 + \x60 |1 + \x61 |1 + \x62 |1 + \x63 |1 + \x64 |1 + \x65 |1 + \x66 |1 + \x67 |1 + \x68 |1 + \x69 |1 + \x6A |1 + \x6B |1 + \x6C |1 + \x6D |1 + \x6E |1 + \x6F |1 + \x70 |1 + \x71 |1 + \x72 |1 + \x73 |1 + \x74 |1 + \x75 |1 + \x76 |1 + \x77 |1 + \x78 |1 + \x79 |1 + \x7A |1 + \x7B |1 + \x7C |1 + \x7D |1 + \x7E |1 +END CHARMAP diff --git a/ext/Encode/MANIFEST b/ext/Encode/MANIFEST index 4d5c6fa..1dec5ce 100644 --- a/ext/Encode/MANIFEST +++ b/ext/Encode/MANIFEST @@ -97,6 +97,15 @@ Encode/gb12345.enc Encode table Encode/gb1988.enc Encode table Encode/gb2312.enc Encode table Encode/gsm0338.enc Encode table +Encode/ibm-1250.ucm Encode table +Encode/ibm-1251.ucm Encode table +Encode/ibm-1252.ucm Encode table +Encode/ibm-1253.ucm Encode table +Encode/ibm-1254.ucm Encode table +Encode/ibm-1255.ucm Encode table +Encode/ibm-1256.ucm Encode table +Encode/ibm-1257.ucm Encode table +Encode/ibm-1258.ucm Encode table Encode/ir-197.enc Encode table Encode/iso-ir-165.enc Encode table Encode/jis0201.enc Encode table @@ -146,14 +155,16 @@ TW/TW.pm Encode extension compile Encode extension encengine.c Encode extension encode.h Encode extension +lib/Encode/Alias.pm Encode extension lib/Encode/CN/HZ.pm Encode extension -lib/Encode/Description.pod General topics on character encodings +lib/Encode/Details.pod Detailed topics on character encodings lib/Encode/Encoding.pm Encode extension lib/Encode/Internal.pm Encode extension lib/Encode/JP/Constants.pm Encode extension lib/Encode/JP/H2Z.pm Encode extension lib/Encode/JP/ISO_2022_JP.pm Encode extension lib/Encode/JP/JIS.pm Encode extension +lib/Encode/Supported.pod Documents supported encodings lib/Encode/Tcl.pm Encode extension lib/Encode/Tcl/Escape.pm Encode extension lib/Encode/Tcl/Table.pm Encode extension @@ -163,7 +174,7 @@ lib/Encode/iso10646_1.pm Encode extension lib/Encode/ucs2_le.pm Encode extension lib/Encode/utf8.pm Encode extension lib/EncodeFormat.pod Encode extension -t/CJKalias.t Encode extension test +t/Aliases.t Encode extension test t/CN.t Encode extension test t/Encode.t Encode extension test t/JP.t Encode extension test diff --git a/ext/Encode/Makefile.PL b/ext/Encode/Makefile.PL index 8a7458c..ff80352 100644 --- a/ext/Encode/Makefile.PL +++ b/ext/Encode/Makefile.PL @@ -2,10 +2,24 @@ use 5.7.2; use strict; use ExtUtils::MakeMaker; -my %tables = (8859 => ['ascii.ucm', 'cp1250.ucm', 'koi8-r.ucm', 'jis0201.ucm' ], - EBCDIC => ['cp1047.ucm','cp37.ucm','posix-bc.ucm'], - Symbols => ['symbol.ucm','dingbats.ucm'], - ); +my %tables = + ( + 8859 => ['ascii.ucm', 'koi8-r.ucm', 'viscii.ucm', + 'ibm-1250.ucm', 'ibm-1251.ucm', + 'ibm-1253.ucm', 'ibm-1254.ucm', + 'ibm-1255.ucm', 'ibm-1256.ucm', + 'ibm-1257.ucm', 'ibm-1258.ucm', + 'ibm-1252.ucm', + qw(macCentEuro.enc macCroatian.enc + macCyrillic.enc macDingbats.enc + macGreek.enc macIceland.enc + macRoman.enc macRumanian.enc + macSami.enc macThai.enc + macTurkish.enc macUkraine.enc), + ], + EBCDIC => ['cp1047.ucm','cp37.ucm','posix-bc.ucm'], + Symbols => ['symbol.ucm','dingbats.ucm'], + ); opendir(ENC,'Encode'); while (defined(my $file = readdir(ENC))) diff --git a/ext/Encode/lib/Encode/Alias.pm b/ext/Encode/lib/Encode/Alias.pm new file mode 100644 index 0000000..83d09ae --- /dev/null +++ b/ext/Encode/lib/Encode/Alias.pm @@ -0,0 +1,227 @@ +package Encode::Alias; +use strict; +use Encode qw(find_encoding); +our $VERSION = do { my @r = (q$Revision: 0.95 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $DEBUG = 0; +require Exporter; + +our @ISA = qw(Exporter); + +# Public, encouraged API is exported by default +our @EXPORT = qw ( + findAlias + define_alias + ); + +our @Alias; # ordered matching list +our %Alias; # cached known aliases + +sub findAlias +{ + my $class = shift; + local $_ = shift; + unless (exists $Alias{$_}) + { + for (my $i=0; $i < @Alias; $i += 2) + { + my $alias = $Alias[$i]; + my $val = $Alias[$i+1]; + my $new; + if (ref($alias) eq 'Regexp' && $_ =~ $alias) + { + $new = eval $val; + # $@ and warn "$val, $@"; + } + elsif (ref($alias) eq 'CODE') + { + $new = $alias->($val); + } + elsif (lc($_) eq lc($alias)) + { + $new = $val; + } + if (defined($new)) + { + next if $new eq $_; # avoid (direct) recursion on bugs + my $enc = (ref($new)) ? $new : Encode::find_encoding($new); + if ($enc) + { + $Alias{$_} = $enc; + last; + } + } + } + } + return $Alias{$_}; +} + +sub define_alias +{ + while (@_) + { + my ($alias,$name) = splice(@_,0,2); + unshift(@Alias, $alias => $name); # newer one has precedence + # clear %Alias cache to allow overrides + if (ref($alias)){ + for my $k (keys %Alias){ + if (ref($alias) eq 'Regexp' && $k =~ $alias) + { + $DEBUG and warn $k; + delete $Alias{$k}; + } + elsif (ref($alias) eq 'CODE') + { + delete $Alias{$alias->($name)}; + } + } + }else{ + delete $Alias{$alias}; + } + } +} + + +# Allow variants of iso-8859-1 etc. +define_alias( qr/^iso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' ); + +# At least HP-UX has these. +define_alias( qr/^iso8859(\d+)$/i => '"iso-8859-$1"' ); + +# More HP stuff. +define_alias( qr/^(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i => '"${1}8"' ); + +# The Official name of ASCII. +define_alias( qr/^ANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' ); + +# This is a font issue, not an encoding issue. +# (The currency symbol of the Latin 1 upper half +# has been redefined as the euro symbol.) +define_alias( qr/^(.+)\@euro$/i => '"$1"' ); + +# Allow latin-1 style names as well + # 0 1 2 3 4 5 6 7 8 9 10 +our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 ); +define_alias( qr/^(?:iso[-_]?)?latin[-_]?(\d+)$/i + => '"iso-8859-$Encode::Alias::Latin2iso[$1]"' ); + +# Allow winlatin1 style names as well +our %Winlatin2cp = ( + 'latin1' => 1252, + 'latin2' => 1250, + 'cyrillic' => 1251, + 'greek' => 1253, + 'turkish' => 1254, + 'hebrew' => 1255, + 'arabic' => 1256, + 'baltic' => 1257, + 'vietnamese' => 1258, + ); + +define_alias( qr/win(latin[12]|cyrillic|baltic|greek|turkish| + hebrew|arabic|baltic|vietnamese)$/ix => + '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' ); + +# Common names for non-latin prefered MIME names +define_alias( 'ascii' => 'US-ascii', + 'cyrillic' => 'iso-8859-5', + 'arabic' => 'iso-8859-6', + 'greek' => 'iso-8859-7', + 'hebrew' => 'iso-8859-8', + 'thai' => 'iso-8859-11', + 'tis620' => 'iso-8859-11', + ); + +# At least AIX has IBM-NNN (surprisingly...) instead of cpNNN. +# And Microsoft has their own naming (again, surprisingly). +define_alias( qr/^(?:ibm|ms)[-_]?(\d\d\d\d?)$/i => '"cp$1"'); + +# Sometimes seen with a leading zero. +define_alias( qr/^cp037$/i => '"cp37"'); + +# Ououououou. +define_alias( qr/^macRomanian$/i => '"macRumanian"'); + +# Standardize on the dashed versions. +define_alias( qr/^utf8$/i => 'utf-8' ); +define_alias( qr/^koi8r$/i => 'koi8-r' ); +define_alias( qr/^koi8u$/i => 'koi8-u' ); + +# TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8 +# TODO: HP-UX '15' encodings japanese15 korean15 roi15 +# TODO: Cyrillic encoding ISO-IR-111 (useful?) +# TODO: Armenian encoding ARMSCII-8 +# TODO: Hebrew encoding ISO-8859-8-1 +# TODO: Thai encoding TCVN +# TODO: Korean encoding Johab +# TODO: Vietnamese encodings VPS +# TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese +# ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic +# Farsi Georgian Gujarati Gurmukhi Hebrew Japanese +# Kannada Khmer Korean Laotian Malayalam Mongolian +# Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese + +# Map white space and _ to '-' +define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' ); + +1; +__END__ +=head1 NAME + +Encode::Alias - alias defintions to encodings + +=head1 SYNOPSIS + + use Encode qw(define_alias); + define_alias( newName => ENCODING); + +=head1 DESCRIPTION + +Allows newName to be used as am alias for ENCODING. ENCODING may be +either the name of an encoding or and encoding object (as described in L). + +Currently I can be specified in the following ways: + +=over 4 + +=item As a simple string. + +=item As a qr// compiled regular expression, e.g.: + + define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' ); + +In this case if I is not a reference it is C-ed to +allow C<$1> etc. to be subsituted. The example is one way to names as +used in X11 font names to alias the MIME names for the iso-8859-* +family. Note the double quote inside the single quote. + +If you are using regex here, you have to do so or it won't work in +this case. Also not regex is tricky even for the experienced. Use it +with caution. + +=item As a code reference, e.g.: + + define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } , ''); + +In this case C<$_> will be set to the name that is being looked up and +I is passed to the sub as its first argument. The example +is another way to names as used in X11 font names to alias the MIME +names for the iso-8859-* family. + +=item Alias overloading + +You can override predefined aliases by simply applying define_alias(). +New alias is always evaluated first and when neccessary define_alias() +flushes internal cache to make new definition available. + + # redirect SHIFT_JIS to MS/IBM Code Page 932, which is a + # superset of SHIFT_JIS + + Encode::define_alias( qr/shift.*jis$/i => '"cp932"' ); + Encode::define_alias( qr/sjis$/i => '"cp932"' ); + +=head1 SEE ALSO + +L, L + +=back + diff --git a/ext/Encode/lib/Encode/Description.pod b/ext/Encode/lib/Encode/Details.pod similarity index 100% rename from ext/Encode/lib/Encode/Description.pod rename to ext/Encode/lib/Encode/Details.pod diff --git a/ext/Encode/lib/Encode/Supported.pod b/ext/Encode/lib/Encode/Supported.pod new file mode 100644 index 0000000..2676db2 --- /dev/null +++ b/ext/Encode/lib/Encode/Supported.pod @@ -0,0 +1,213 @@ +=head1 NAME + +Encode::Supported -- Supported encodings by Encode + +=head1 DESCRIPTION + +=Encoding Names + +Encoding names are case insensitive. White space in names +is ignored. In addition an encoding may have aliases. +Each encoding has one "canonical" name. The "canonical" +name is chosen from the names of the encoding by picking +he first in the following sequence: + + o The MIME name as defined in IETF RFCs. + o The name in the IANA registry. + o The name used by the organization that defined it. + +Because of all the alias issues, and because in the gen- +eral case encodings have state, "Encode" uses the encoding +object internally once an operation is in progress. + +=head2 Supported Encodings + +As of Perl 5.8.0, at least the following encodings are recognized. +Note that unless otherwise specified, they are all case insensitive +(via alias) and all occurance of spaces are replaced with '-'. In +other words, "ISO 8859 1" and "iso-8859-1" are identical. + +=head3 ASCII + + Canonical Aliases + ----------------------- + ascii uc-ascii + +=head3 The Unicode + + utf8 UTF-8 + utf16 UTF-16 + ucs2 UCS-2, iso-10646-1 + +=head3 The ISO 8859, KOI, and other 1-byte encodings + +The following encodings are based upon ASCII. For most cases it uses +\x80-\xff (upper half) to map non-ASCII characters. + + iso-8859-1 latin1 + iso-8859-2 latin2 + iso-8859-3 latin3 + iso-8859-4 latin4 + iso-8859-5 latin + iso-8859-6 latin + iso-8859-7 + iso-8859-8 + iso-8859-9 latin5 + iso-8859-10 latin6 + iso-8859-11 + (iso-8859-12 is nonexistent) + iso-8859-13 latin7 + iso-8859-14 latin8 + iso-8859-15 latin9 + iso-8859-16 latin10 + + koi8-f + koi8-r + koi8-u + + viscii # ASCII + vietnamese + + cp1250 WinLatin2 + cp1251 WinCyrillic + cp1252 WinLatin1 + cp1253 WinGreek + cp1254 WinTurkiskh + cp1255 WinHebrew + cp1256 WinArabic + cp1257 WinBaltic + cp1258 WinVietnamese + # all cp* are also available as ibm-* and ms-* + + maccentraleuropean + maccroatian + macroman + maccyrillic + macromanian + macdingbats + macsami + macgreek + macthai + macicelandic + macturkish + macukraine + +=head3 The CJK: Chinese, Japanese, Korean (Multibyte) + +Note Vietnamese is listed above. Also read "Encoding vs Charset" +below. Also note these are impelemented in distinct module by +languages, due the the size concerns. See these perldocs also. + + cp936 gbk # Encode::CN + euc-cn # Encode::CN + gb12345 # Encode::CN + gb2312 # Encode::CN + gb2312 # Encode::CN + hz # Encode::CN + iso-ir-165 # Encode::CN + + 7bit-jis jis # Encode::JP + cp932 # Encode::JP + euc-jp ujis # Encode::JP + iso-2022-jp # Encode::JP + macjapan # Encode::JP + shiftjis Shift_JIS, sjis # Encode::JP + + euc-kr # Encode::KR + ksc5601 # Encode::KR + cp949 # Encode::KR + + big5 # Encode::TW + big5-hkscs # Encode::TW + cp950 # Encode::TW + +Due to size concerns, additional Chinese encodings including "GB +18030", "EUC-TW" and "BIG5PLUS" are distributed separately on CPAN, +under the name Encode::HanExtra. + +=head3 EBCDIC + +See perlebcdic for details. + + cp1047 + cp37 + posix-bc + +=head3 Symbols and dingbats + + symbol + dingbats + +=head1 Encoding vs. Charset + +Character encoding (or just "encoding") and Character Set (or just +"charset") are often used interchangeably but they are different +concepts. + +Charset determines which characters to be included in a given text. + +Encoding actually maps charset(s) to stream of bits. + +Note a given encoding contains multiple charsets. For instance, +euc-jp contains ASCII, JIS X 0201 (Hankaku Kana), JIS X 0208 (Zenkaku +Kana and Kanji) and JIS X 0212 (Extended Kanji) in a single encoding. + +As the name suggests, the Encode module supports encodings, not +individual charsets. + +=head1 Encoding Classification (by Anton Tagunov) + +Encodings + + US-ASCII UTF-8 KOI8-R ISO-8859-* + ISO-2022-CN ISO-2022-JP Big5 + EUC-CN EUC-JP EUC-KR + +are -registered as +preferred MIME names and may probably be used over the Internet. So is + + Shift_JIS + +but despite its wide spread it bears the label of being +Microsft proprietary -- was. Now Shift JIS is official as of +JIS X 0208-1997. + + UTF-16 KOI8-U + +are IANA-registered preferred MIME names but probably +shoule be avoided as encoding for web pages due to lack of +browser support. + + ISO-2022 (http://www.ecma.ch/ecma1/STAND/ECMA-035.HTM) + ISO-2022-JP-1 (http://www.faqs.org/rfcs/rfc2237.html) + ISO-IR-165 (http://www.faqs.org/rfcs/rfc1345.html) + GBK + VISCII + GB 12345 (only plains 1 and 2 available) + GB 18030 + CNS 11643 + +are totally valid encodings but not registered at IANA. + + BIG5PLUS + EUC-JP-0212 (Encode::lib::Encode::Tcl::Extended) + +are a bit proprietary + +You may probably get some info on CJK encodings at + +brief description for most of the mentioned CJK encodings + +F + +several years old, but still useful + +F + +and some in-depth reading for the heroes :-) +F (eq ISO-2022) + +=head1 See Also + +L, L, L, L, L + +=cut diff --git a/ext/Encode/t/Aliases.t b/ext/Encode/t/Aliases.t new file mode 100644 index 0000000..3afaeae --- /dev/null +++ b/ext/Encode/t/Aliases.t @@ -0,0 +1,87 @@ +#!../perl + +use strict; +use Encode::CN; +use Encode::JP; +use Encode::KR; +use Encode::TW; + +my %a2c; + +BEGIN { + %a2c = ( + 'ascii' => 'US-ascii', + 'cyrillic' => 'iso-8859-5', + 'arabic' => 'iso-8859-6', + 'greek' => 'iso-8859-7', + 'hebrew' => 'iso-8859-8', + 'thai' => 'iso-8859-11', + 'tis620' => 'iso-8859-11', + 'ja_JP.euc' => 'euc-jp', + 'x-euc-jp' => 'euc-jp', + 'zh_CN.euc' => 'euc-cn', + 'x-euc-cn' => 'euc-cn', + 'ko_KR.euc' => 'euc-kr', + 'x-euc-kr' => 'euc-kr', + 'ujis' => 'euc-jp', + 'Shift_JIS' => 'shiftjis', + 'x-sjis' => 'shiftjis', + 'jis' => '7bit-jis', + 'big-5' => 'big5', + 'zh_TW.Big5' => 'big5', + 'big5-hk' => 'big5-hkscs', + 'WinLatin1' => 'cp1252', + 'WinLatin2' => 'cp1250', + 'WinCyrillic' => 'cp1251', + 'WinGreek' => 'cp1253', + 'WinTurkish' => 'cp1254', + 'WinHebrew' => 'cp1255', + 'WinArabic' => 'cp1256', + 'WinBaltic' => 'cp1257', + 'WinVietnamese' => 'cp1258', + ); + + for my $i (1..11,13..16){ + $a2c{"ISO 8859 $i"} = "iso-8859-$i"; + } + for my $i (1..10){ + $a2c{"ISO Latin $i"} = "iso-8859-$Encode::Alias::Latin2iso[$i]"; + } + for my $k (keys %Encode::Alias::Winlatin2cp){ + my $v = $Encode::Alias::Winlatin2cp{$k}; + $a2c{"Win" . ucfirst($k)} = "cp" . $v; + $a2c{"IBM-$v"} = "cp" . $v; + $a2c{"MS-$v"} = "cp" . $v; + } +} + +use Test::More tests => (scalar keys %a2c) * 2; + +print "# alias test\n"; + +foreach my $a (keys %a2c){ + my $e = Encode::find_encoding($a); + is((defined($e) and $e->name), $a2c{$a}); +} + +# now we override some of the aliases and see if it works fine + +Encode::define_alias( qr/shift.*jis$/i => '"macjapan"' ); +Encode::define_alias( qr/sjis$/i => '"cp932"' ); + +@a2c{qw(Shift_JIS x-sjis)} = qw(macjapan cp932); + +print "# alias test with alias overrides\n"; + +foreach my $a (keys %a2c){ + my $e = Encode::find_encoding($a); + is((defined($e) and $e->name), $a2c{$a}); +} + +__END__ +for (my $i = 0; $i < @Encode::Alias::Alias; $i+=2){ + my ($k, $v) = @Encode::Alias::Alias[$i, $i+1]; + print "$k => $v\n"; +} + + diff --git a/ext/Encode/t/CJKalias.t b/ext/Encode/t/CJKalias.t deleted file mode 100644 index cc1efe5..0000000 --- a/ext/Encode/t/CJKalias.t +++ /dev/null @@ -1,35 +0,0 @@ -use strict; -use Encode::CN; -use Encode::JP; -use Encode::KR; -use Encode::TW; - -print "# alias test\n"; - -my %a2c; - -BEGIN { - %a2c = qw( - ja_JP.euc euc-jp - x-euc-jp euc-jp - zh_CN.euc euc-cn - x-euc-cn euc-cn - ko_KR.euc euc-kr - x-euc-kr euc-kr - ujis euc-jp - Shift_JIS shiftjis - x-sjis shiftjis - jis 7bit-jis - big-5 big5 - zh_TW.Big5 big5 - big5-hk big5-hkscs - ); -} - -use Test::More tests => scalar keys %a2c; - -foreach my $a (keys %a2c){ - my $e = Encode::find_encoding($a); - is($e->name, $a2c{$a}); -} -