Upgrade to Encode 0.99, from Dan Kogai.

[p5sagit/p5-mst-13.2.git] / ext / Encode / Encode.pm
diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm

index bdfd686..42bb24e 100644 (file)
--- a/ext/Encode/Encode.pm
+++ b/ext/Encode/Encode.pm
@@ -1,6 +1,7 @@
 package Encode;
 use strict;
-our $VERSION = do { my @r = (q$Revision: 0.95 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 0.99 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $DEBUG = 0;
 
 require DynaLoader;
 require Exporter;
@@ -20,7 +21,6 @@ our @EXPORT = qw (
 our @EXPORT_OK =
     qw(
        define_encoding
-       define_alias
        from_to
        is_utf8
        is_8bit
@@ -37,52 +37,88 @@ bootstrap Encode ();
 
 use Carp;
 
+our $ON_EBCDIC = (ord("A") == 193);
 use Encode::Alias;
 
-# Make a %encoding package variable to allow a certain amount of cheating
-our %encoding;
+# Make a %Encoding package variable to allow a certain amount of cheating
+our %Encoding;
 
-our %external_tables =
+our %ExtModule =
     (
-       'euc-cn'        => 'Encode/CN.pm',
-       gb2312          => 'Encode/CN.pm',
-       gb12345         => 'Encode/CN.pm',
-       gbk             => 'Encode/CN.pm',
-       cp936           => 'Encode/CN.pm',
-       'iso-ir-165'    => 'Encode/CN.pm',
-       'euc-jp'        => 'Encode/JP.pm',
-       'iso-2022-jp'   => 'Encode/JP.pm',
-       '7bit-jis'      => 'Encode/JP.pm',
-       shiftjis        => 'Encode/JP.pm',
-       macjapan        => 'Encode/JP.pm',
-       cp932           => 'Encode/JP.pm',
-       'euc-kr'        => 'Encode/KR.pm',
-       ksc5601         => 'Encode/KR.pm',
-       cp949           => 'Encode/KR.pm',
-       big5            => 'Encode/TW.pm',
-       'big5-hkscs'    => 'Encode/TW.pm',
-       cp950           => 'Encode/TW.pm',
-       gb18030         => 'Encode/HanExtra.pm',
-       big5plus        => 'Encode/HanExtra.pm',
-       'euc-tw'        => 'Encode/HanExtra.pm',
+     viscii             => 'Encode/Byte.pm',
+     'koi8-r'           => 'Encode/Byte.pm',
+     cp1047             => 'Encode/EBCDIC.pm',
+     cp37               => 'Encode/EBCDIC.pm',
+     'posix-bc'         => 'Encode/EBCDIC.pm',
+     symbol             => 'Encode/Symbol.pm',
+     dingbats           => 'Encode/Symbol.pm',
     );
 
+for my $k (2..11,13..16){
+    $ExtModule{"iso-8859-$k"} = 'Encode/Byte.pm';
+}
+
+for my $k (1250..1258){
+    $ExtModule{"cp$k"} = 'Encode/Byte.pm';
+}
+
+unless ($ON_EBCDIC) { # CJK added to autoload unless EBCDIC env
+%ExtModule =(
+            %ExtModule,
+            'euc-cn'           => 'Encode/CN.pm',
+            gb2312             => 'Encode/CN.pm',
+            gb12345            => 'Encode/CN.pm',
+            gbk                => 'Encode/CN.pm',
+            cp936              => 'Encode/CN.pm',
+            'iso-ir-165'       => 'Encode/CN.pm',
+            'euc-jp'           => 'Encode/JP.pm',
+            'iso-2022-jp'      => 'Encode/JP.pm',
+            'iso-2022-jp-1'    => 'Encode/JP.pm',
+            '7bit-jis'         => 'Encode/JP.pm',
+            shiftjis           => 'Encode/JP.pm',
+            macjapan           => 'Encode/JP.pm',
+            cp932              => 'Encode/JP.pm',
+            'euc-kr'           => 'Encode/KR.pm',
+            ksc5601            => 'Encode/KR.pm',
+            cp949              => 'Encode/KR.pm',
+            big5               => 'Encode/TW.pm',
+            'big5-hkscs'       => 'Encode/TW.pm',
+            cp950              => 'Encode/TW.pm',
+            gb18030            => 'Encode/HanExtra.pm',
+            big5plus           => 'Encode/HanExtra.pm',
+            'euc-tw'           => 'Encode/HanExtra.pm',
+            );
+}
+
+for my $k (qw(centeuro croatian cyrillic dingbats greek
+             iceland roman rumanian sami 
+             thai turkish  ukraine))
+{
+    $ExtModule{"mac$k"} = 'Encode/Byte.pm';
+}
+
+
 sub encodings
 {
- my ($class) = @_;
- return
-     map { $_->[0] }
-         sort { $a->[1] cmp $b->[1] }
-               map { [$_, lc $_] }
-                   grep { $_ ne 'Internal' }
-                        keys %encoding;
+    my $class = shift;
+    my @modules = (@_ and $_[0] eq ":all") ? values %ExtModule : @_;
+    for my $m (@modules)
+    {
+       $DEBUG and warn "about to require $m;";
+       eval { require $m; };
+    }
+    return
+       map({$_->[0]} 
+           sort({$a->[1] cmp $b->[1]}
+                map({[$_, lc $_]} 
+                    grep({ $_ ne 'Internal' }  keys %Encoding))));
 }
 
 sub define_encoding
 {
     my $obj  = shift;
     my $name = shift;
-    $encoding{$name} = $obj;
+    $Encoding{$name} = $obj;
     my $lc = lc($name);
     define_alias($lc => $obj) unless $lc eq $name;
     while (@_)
@@ -102,25 +138,25 @@ sub getEncoding
        return $name;
     }
     my $lc = lc $name;
-    if (exists $encoding{$name})
+    if (exists $Encoding{$name})
     {
-       return $encoding{$name};
+       return $Encoding{$name};
     }
-    if (exists $encoding{$lc})
+    if (exists $Encoding{$lc})
     {
-       return $encoding{$lc};
+       return $Encoding{$lc};
     }
 
-    my $oc = $class->findAlias($name);
+    my $oc = $class->find_alias($name);
     return $oc if defined $oc;
 
-    $oc = $class->findAlias($lc) if $lc ne $name;
+    $oc = $class->find_alias($lc) if $lc ne $name;
     return $oc if defined $oc;
 
-    if (!$skip_external and exists $external_tables{$lc})
+    if (!$skip_external and exists $ExtModule{$lc})
     {
-       require $external_tables{$lc};
-       return $encoding{$name} if exists $encoding{$name};
+       eval{ require $ExtModule{$lc}; };
+       return $Encoding{$name} if exists $Encoding{$name};
     }
 
     return;
@@ -185,7 +221,7 @@ require Encode::XS;
 require Encode::Internal;
 require Encode::Unicode;
 require Encode::utf8;
-require Encode::iso10646_1;
+require Encode::10646_1;
 require Encode::ucs2_le;
 
 1;
@@ -206,7 +242,7 @@ The C<Encode> module provides the interfaces between Perl's strings
 and the rest of the system.  Perl strings are sequences of B<characters>.
 
 To find more about character encodings, please consult
-L<Encode::Details> . This document focuses on programming references.
+L<Encode::Details>. This document focuses on programming references.
 
 =head1 PERL ENCODING API
 
@@ -214,9 +250,7 @@ L<Encode::Details> . This document focuses on programming references.
 
 =over 4
 
-=item *
-
-        $bytes  = encode(ENCODING, $string[, CHECK])
+=item $bytes  = encode(ENCODING, $string[, CHECK])
 
 Encodes string from Perl's internal form into I<ENCODING> and returns
 a sequence of octets.  For CHECK see L</"Handling Malformed Data">.
@@ -226,9 +260,7 @@ to octets:
 
        $octets = encode("utf8", $unicode);
 
-=item *
-
-        $string = decode(ENCODING, $bytes[, CHECK])
+=item $string = decode(ENCODING, $bytes[, CHECK])
 
 Decode sequence of octets assumed to be in I<ENCODING> into Perl's
 internal form and returns the resulting string.  For CHECK see
@@ -238,9 +270,7 @@ For example to convert ISO-8859-1 data to UTF-8:
 
        $utf8 = decode("latin1", $latin1);
 
-=item *
-
-       from_to($string, FROM_ENCODING, TO_ENCODING[, CHECK])
+=item from_to($string, FROM_ENCODING, TO_ENCODING[, CHECK])
 
 Convert B<in-place> the data between two encodings.  How did the data
 in $string originally get to be in FROM_ENCODING?  Either using
@@ -314,32 +344,28 @@ Hybrids of above.
 
 Multiple return values rather than in-place modifications.
 
-Index into the string could be pos($str) allowing s/\G...//.
+Index into the string could be C<pos($str)> allowing C<s/\G...//>.
 
 =back
 
 =head2 UTF-8 / utf8
 
 The Unicode consortium defines the UTF-8 standard as a way of encoding
-the entire Unicode repertiore as sequences of octets.  This encoding is
-expected to become very widespread. Perl can use this form internaly
+the entire Unicode repertoire as sequences of octets.  This encoding is
+expected to become very widespread. Perl can use this form internally
 to represent strings, so conversions to and from this form are
 particularly efficient (as octets in memory do not have to change,
 just the meta-data that tells Perl how to treat them).
 
 =over 4
 
-=item *
-
-        $bytes = encode_utf8($string);
+=item $bytes = encode_utf8($string);
 
 The characters that comprise string are encoded in Perl's superset of UTF-8
 and the resulting octets returned as a sequence of bytes. All possible
 characters have a UTF-8 representation so this function cannot fail.
 
-=item *
-
-        $string = decode_utf8($bytes [,CHECK]);
+=item $string = decode_utf8($bytes [, CHECK]);
 
 The sequence of octets represented by $bytes is decoded from UTF-8
 into a sequence of logical characters. Not all sequences of octets
@@ -350,18 +376,30 @@ For CHECK see L</"Handling Malformed Data">.
 
 =head2 Listing available encodings
 
-  use Encode qw(encodings);
-  @list = encodings();
+  use Encode;
+  @list = Encode->encodings();
+
+Returns a list of the canonical names of the available encodings that
+are loaded.  To get a list of all available encodings including the
+ones that are not loaded yet, say
 
-Returns a list of the canonical names of the available encodings. 
+  @all_encodings = Encode->encodings(":all");
 
-To find which encodings are suppoted by this package in details, 
+Or you can give the name of specific module.
+
+  @with_jp = Encode->encodings("Encode/JP.pm");
+
+Note in this case you have to say C<"Encode/JP.pm"> instead of
+C<"Encode::JP">.
+
+To find which encodings are supported by this package in details, 
 see L<Encode::Supported>.
 
 =head2 Defining Aliases
 
-  use Encode qw(define_alias);
-  define_alias( newName => ENCODING);
+  use Encode;
+  use Encode::Alias;
+  define_alias(newName => ENCODING);
 
 Allows newName to be used as am alias for ENCODING. ENCODING may be
 either the name of an encoding or and encoding object (as above).
@@ -371,7 +409,7 @@ See L<Encode::Alias> on details.
 =head1 Defining Encodings
 
     use Encode qw(define_alias);
-    define_encoding( $object, 'canonicalName' [,alias...]);
+    define_encoding($object, 'canonicalName' [, alias...]);
 
 Causes I<canonicalName> to be associated with I<$object>.  The object
 should provide the interface described in L<Encode::Encoding>
@@ -451,15 +489,13 @@ implementation.  As such they are efficient, but may change.
 
 =over 4
 
-=item * is_utf8(STRING [, CHECK])
+=item is_utf8(STRING [, CHECK])
 
 [INTERNAL] Test whether the UTF-8 flag is turned on in the STRING.
 If CHECK is true, also checks the data in STRING for being well-formed
 UTF-8.  Returns true if successful, false otherwise.
 
-=item *
-
-        _utf8_on(STRING)
+=item _utf8_on(STRING)
 
 [INTERNAL] Turn on the UTF-8 flag in STRING.  The data in STRING is
 B<not> checked for being well-formed UTF-8.  Do not use unless you
@@ -467,9 +503,7 @@ B<know> that the STRING is well-formed UTF-8.  Returns the previous
 state of the UTF-8 flag (so please don't test the return value as
 I<not> success or failure), or C<undef> if STRING is not a string.
 
-=item *
-
-        _utf8_off(STRING)
+=item _utf8_off(STRING)
 
 [INTERNAL] Turn off the UTF-8 flag in STRING.  Do not use frivolously.
 Returns the previous state of the UTF-8 flag (so please don't test the