Attempt to fix core-specific logic in IPC::Open2 tests

[p5sagit/p5-mst-13.2.git] / ext / Encode / encoding.pm
diff --git a/ext/Encode/encoding.pm b/ext/Encode/encoding.pm

index d0b083a..be20a49 100644 (file)
--- a/ext/Encode/encoding.pm
+++ b/ext/Encode/encoding.pm
@@ -1,86 +1,101 @@
-# $Id: encoding.pm,v 2.2 2005/09/08 14:17:17 dankogai Exp dankogai $
+# $Id: encoding.pm,v 2.8 2009/02/15 17:44:13 dankogai Exp $
 package encoding;
-our $VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = '2.6_01';
 
 use Encode;
 use strict;
+use warnings;
 
 sub DEBUG () { 0 }
 
 BEGIN {
-    if (ord("A") == 193) {
-       require Carp;
-       Carp::croak("encoding: pragma does not support EBCDIC platforms");
+    if ( ord("A") == 193 ) {
+        require Carp;
+        Carp::croak("encoding: pragma does not support EBCDIC platforms");
     }
 }
 
 our $HAS_PERLIO = 0;
 eval { require PerlIO::encoding };
-unless ($@){
-    $HAS_PERLIO = (PerlIO::encoding->VERSION >= 0.02);
+unless ($@) {
+    $HAS_PERLIO = ( PerlIO::encoding->VERSION >= 0.02 );
 }
 
-sub _exception{
+sub _exception {
     my $name = shift;
-    $] > 5.008 and return 0;               # 5.8.1 or higher then no
-    my %utfs = map {$_=>1}
-       qw(utf8 UCS-2BE UCS-2LE UTF-16 UTF-16BE UTF-16LE
-          UTF-32 UTF-32BE UTF-32LE);
-    $utfs{$name} or return 0;               # UTFs or no
-    require Config; Config->import(); our %Config;
-    return $Config{perl_patchlevel} ? 0 : 1 # maintperl then no
+    $] > 5.008 and return 0;    # 5.8.1 or higher then no
+    my %utfs = map { $_ => 1 }
+      qw(utf8 UCS-2BE UCS-2LE UTF-16 UTF-16BE UTF-16LE
+      UTF-32 UTF-32BE UTF-32LE);
+    $utfs{$name} or return 0;    # UTFs or no
+    require Config;
+    Config->import();
+    our %Config;
+    return $Config{perl_patchlevel} ? 0 : 1    # maintperl then no
 }
 
-sub in_locale { $^H & ($locale::hint_bits || 0)}
+sub in_locale { $^H & ( $locale::hint_bits || 0 ) }
 
 sub _get_locale_encoding {
     my $locale_encoding;
 
     # I18N::Langinfo isn't available everywhere
     eval {
-       require I18N::Langinfo;
-       I18N::Langinfo->import(qw(langinfo CODESET));
-       $locale_encoding = langinfo(CODESET());
+        require I18N::Langinfo;
+        I18N::Langinfo->import(qw(langinfo CODESET));
+        $locale_encoding = langinfo( CODESET() );
     };
-    
+
     my $country_language;
 
     no warnings 'uninitialized';
 
-    if (not $locale_encoding && in_locale()) {
-       if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) {
-           ($country_language, $locale_encoding) = ($1, $2);
-       } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) {
-           ($country_language, $locale_encoding) = ($1, $2);
-       }
-       # LANGUAGE affects only LC_MESSAGES only on glibc
-    } elsif (not $locale_encoding) {
-       if ($ENV{LC_ALL} =~ /\butf-?8\b/i ||
-           $ENV{LANG}   =~ /\butf-?8\b/i) {
-           $locale_encoding = 'utf8';
-       }
-       # Could do more heuristics based on the country and language
-       # parts of LC_ALL and LANG (the parts before the dot (if any)),
-       # since we have Locale::Country and Locale::Language available.
-       # TODO: get a database of Language -> Encoding mappings
-       # (the Estonian database at http://www.eki.ee/letter/
-       # would be excellent!) --jhi
+    if ( (not $locale_encoding) && in_locale() ) {
+        if ( $ENV{LC_ALL} =~ /^([^.]+)\.([^.@]+)(@.*)?$/ ) {
+            ( $country_language, $locale_encoding ) = ( $1, $2 );
+        }
+        elsif ( $ENV{LANG} =~ /^([^.]+)\.([^.@]+)(@.*)?$/ ) {
+            ( $country_language, $locale_encoding ) = ( $1, $2 );
+        }
+
+        # LANGUAGE affects only LC_MESSAGES only on glibc
+    }
+    elsif ( not $locale_encoding ) {
+        if (   $ENV{LC_ALL} =~ /\butf-?8\b/i
+            || $ENV{LANG} =~ /\butf-?8\b/i )
+        {
+            $locale_encoding = 'utf8';
+        }
+
+        # Could do more heuristics based on the country and language
+        # parts of LC_ALL and LANG (the parts before the dot (if any)),
+        # since we have Locale::Country and Locale::Language available.
+        # TODO: get a database of Language -> Encoding mappings
+        # (the Estonian database at http://www.eki.ee/letter/
+        # would be excellent!) --jhi
     }
-    if (defined $locale_encoding &&
-       lc($locale_encoding) eq 'euc' &&
-       defined $country_language) {
-       if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) {
-           $locale_encoding = 'euc-jp';
-       } elsif ($country_language =~ /^ko_KR|korean?$/i) {
-           $locale_encoding = 'euc-kr';
-       } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) {
-           $locale_encoding = 'euc-cn';
-       } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) {
-           $locale_encoding = 'euc-tw';
-       } else {
-           require Carp;
-           Carp::croak("encoding: Locale encoding '$locale_encoding' too ambiguous");
-       }
+    if (   defined $locale_encoding
+        && lc($locale_encoding) eq 'euc'
+        && defined $country_language )
+    {
+        if ( $country_language =~ /^ja_JP|japan(?:ese)?$/i ) {
+            $locale_encoding = 'euc-jp';
+        }
+        elsif ( $country_language =~ /^ko_KR|korean?$/i ) {
+            $locale_encoding = 'euc-kr';
+        }
+        elsif ( $country_language =~ /^zh_CN|chin(?:a|ese)$/i ) {
+            $locale_encoding = 'euc-cn';
+        }
+        elsif ( $country_language =~ /^zh_TW|taiwan(?:ese)?$/i ) {
+            $locale_encoding = 'euc-tw';
+        }
+        else {
+            require Carp;
+            Carp::croak(
+                "encoding: Locale encoding '$locale_encoding' too ambiguous"
+            );
+        }
     }
 
     return $locale_encoding;
@@ -89,82 +104,89 @@ sub _get_locale_encoding {
 sub import {
     my $class = shift;
     my $name  = shift;
-    if ($name eq ':_get_locale_encoding') { # used by lib/open.pm
-       my $caller = caller();
+    if ( $name eq ':_get_locale_encoding' ) {    # used by lib/open.pm
+        my $caller = caller();
         {
-           no strict 'refs';
-           *{"${caller}::_get_locale_encoding"} = \&_get_locale_encoding;
-       }
-       return;
+            no strict 'refs';
+            *{"${caller}::_get_locale_encoding"} = \&_get_locale_encoding;
+        }
+        return;
     }
     $name = _get_locale_encoding() if $name eq ':locale';
     my %arg = @_;
     $name = $ENV{PERL_ENCODING} unless defined $name;
     my $enc = find_encoding($name);
-    unless (defined $enc) {
-       require Carp;
-       Carp::croak("encoding: Unknown encoding '$name'");
+    unless ( defined $enc ) {
+        require Carp;
+        Carp::croak("encoding: Unknown encoding '$name'");
+    }
+    $name = $enc->name;    # canonize
+    unless ( $arg{Filter} ) {
+        DEBUG and warn "_exception($name) = ", _exception($name);
+        _exception($name) or ${^ENCODING} = $enc;
+        $HAS_PERLIO or return 1;
     }
-    $name = $enc->name; # canonize
-    unless ($arg{Filter}) {
-       DEBUG and warn "_exception($name) = ", _exception($name);
-       _exception($name) or ${^ENCODING} = $enc;
-       $HAS_PERLIO or return 1;
-    }else{
-       defined(${^ENCODING}) and undef ${^ENCODING};
-       # implicitly 'use utf8'
-       require utf8; # to fetch $utf8::hint_bits;
-       $^H |= $utf8::hint_bits;
-       eval {
-           require Filter::Util::Call ;
-           Filter::Util::Call->import ;
-           filter_add(sub{
-                          my $status = filter_read();
-                           if ($status > 0){
-                              $_ = $enc->decode($_, 1);
-                              DEBUG and warn $_;
-                          }
-                          $status ;
-                      });
-       };
+    else {
+        defined( ${^ENCODING} ) and undef ${^ENCODING};
+
+        # implicitly 'use utf8'
+        require utf8;      # to fetch $utf8::hint_bits;
+        $^H |= $utf8::hint_bits;
+        eval {
+            require Filter::Util::Call;
+            Filter::Util::Call->import;
+            filter_add(
+                sub {
+                    my $status = filter_read();
+                    if ( $status > 0 ) {
+                        $_ = $enc->decode( $_, 1 );
+                        DEBUG and warn $_;
+                    }
+                    $status;
+                }
+            );
+        };
         $@ eq '' and DEBUG and warn "Filter installed";
     }
     defined ${^UNICODE} and ${^UNICODE} != 0 and return 1;
-    for my $h (qw(STDIN STDOUT)){
-       if ($arg{$h}){
-           unless (defined find_encoding($arg{$h})) {
-               require Carp;
-               Carp::croak("encoding: Unknown encoding for $h, '$arg{$h}'");
-           }
-           eval { binmode($h, ":raw :encoding($arg{$h})") };
-       }else{
-           unless (exists $arg{$h}){
-               eval { 
-                   no warnings 'uninitialized';
-                   binmode($h, ":raw :encoding($name)");
-               };
-           }
-       }
-       if ($@){
-           require Carp;
-           Carp::croak($@);
-       }
+    for my $h (qw(STDIN STDOUT)) {
+        if ( $arg{$h} ) {
+            unless ( defined find_encoding( $arg{$h} ) ) {
+                require Carp;
+                Carp::croak(
+                    "encoding: Unknown encoding for $h, '$arg{$h}'");
+            }
+            eval { binmode( $h, ":raw :encoding($arg{$h})" ) };
+        }
+        else {
+            unless ( exists $arg{$h} ) {
+                eval {
+                    no warnings 'uninitialized';
+                    binmode( $h, ":raw :encoding($name)" );
+                };
+            }
+        }
+        if ($@) {
+            require Carp;
+            Carp::croak($@);
+        }
     }
-    return 1; # I doubt if we need it, though
+    return 1;    # I doubt if we need it, though
 }
 
-sub unimport{
+sub unimport {
     no warnings;
     undef ${^ENCODING};
-    if ($HAS_PERLIO){
-       binmode(STDIN,  ":raw");
-       binmode(STDOUT, ":raw");
-    }else{
-       binmode(STDIN);
-       binmode(STDOUT);
+    if ($HAS_PERLIO) {
+        binmode( STDIN,  ":raw" );
+        binmode( STDOUT, ":raw" );
+    }
+    else {
+        binmode(STDIN);
+        binmode(STDOUT);
     }
-    if ($INC{"Filter/Util/Call.pm"}){
-       eval { filter_del() };
+    if ( $INC{"Filter/Util/Call.pm"} ) {
+        eval { filter_del() };
     }
 }
 
@@ -184,8 +206,8 @@ encoding - allows you to write your script in non-ascii or non-utf8
 
   # or you can even do this if your shell supports your native encoding
 
-  perl -Mencoding=latin2 -e '...' # Feeling centrally European?
-  perl -Mencoding=euc-kr -e '...' # Or Korean?
+  perl -Mencoding=latin2 -e'...' # Feeling centrally European?
+  perl -Mencoding=euc-kr -e'...' # Or Korean?
 
   # more control
 
@@ -285,6 +307,14 @@ Will print C<2>, because C<$string> is upgraded as UTF-8.  Without
 C<use encoding 'utf8';>, it will print C<4> instead, since C<$string>
 is three octets when interpreted as Latin-1.
 
+=head2 Side effects
+
+If the C<encoding> pragma is in scope then the lengths returned are
+calculated from the length of C<$/> in Unicode characters, which is not
+always the same as the length of C<$/> in the native encoding.
+
+This pragma affects utf8::upgrade, but not utf8::downgrade.
+
 =head1 FEATURES THAT REQUIRE 5.8.1
 
 Some of the features offered by this pragma requires perl 5.8.1.  Most
@@ -301,14 +331,14 @@ encodings as Shift_JIS and Big-5 that may contain '\' (BACKSLASH;
 accidentally escape the quoting character that follows.  Perl 5.8.1
 or later fixes this problem.
 
-=item tr// 
+=item tr//
 
 C<tr//> was overlooked by Perl 5 porters when they released perl 5.8.0
 See the section below for details.
 
 =item DATA pseudo-filehandle
 
-Another feature that was overlooked was C<DATA>. 
+Another feature that was overlooked was C<DATA>.
 
 =back
 
@@ -318,7 +348,7 @@ Another feature that was overlooked was C<DATA>.
 
 =item use encoding [I<ENCNAME>] ;
 
-Sets the script encoding to I<ENCNAME>.  And unless ${^UNICODE} 
+Sets the script encoding to I<ENCNAME>.  And unless ${^UNICODE}
 exists and non-zero, PerlIO layers of STDIN and STDOUT are set to
 ":encoding(I<ENCNAME>)".
 
@@ -396,13 +426,13 @@ utf8> to C<${"\x{4eba}"}++>.
 =head2 NOT SCOPED
 
 The pragma is a per script, not a per block lexical.  Only the last
-C<use encoding> or C<no encoding> matters, and it affects 
-B<the whole script>.  However, the <no encoding> pragma is supported and 
-B<use encoding> can appear as many times as you want in a given script. 
+C<use encoding> or C<no encoding> matters, and it affects
+B<the whole script>.  However, the <no encoding> pragma is supported and
+B<use encoding> can appear as many times as you want in a given script.
 The multiple use of this pragma is discouraged.
 
 By the same reason, the use this pragma inside modules is also
-discouraged (though not as strongly discouraged as the case above.  
+discouraged (though not as strongly discouraged as the case above.
 See below).
 
 If you still have to write a module with this pragma, be very careful
@@ -430,16 +460,16 @@ other modules are loaded.  i.e.
 Notice that only literals (string or regular expression) having only
 legacy code points are affected: if you mix data like this
 
-       \xDF\x{100}
+    \xDF\x{100}
 
 the data is assumed to be in (Latin 1 and) Unicode, not in your native
 encoding.  In other words, this will match in "greek":
 
-       "\xDF" =~ /\x{3af}/
+    "\xDF" =~ /\x{3af}/
 
 but this will not
 
-       "\xDF\x{100}" =~ /\x{3af}\x{100}/
+    "\xDF\x{100}" =~ /\x{3af}\x{100}/
 
 since the C<\xDF> (ISO 8859-7 GREEK SMALL LETTER IOTA WITH TONOS) on
 the left will B<not> be upgraded to C<\x{3af}> (Unicode GREEK SMALL
@@ -571,7 +601,7 @@ To understand it, try the code below.
   .
   $camel = "*non-ascii*";
   binmode(STDOUT=>':encoding(utf8)'); # bang!
-  write;              # funny 
+  write;              # funny
   print $camel, "\n"; # fine
 
 Without binmode this happens to work but without binmode, print()
@@ -582,6 +612,11 @@ unicode characters since you have to consider such things as character
 width (i.e. double-width for ideographs) and directions (i.e. BIDI for
 Arabic and Hebrew).
 
+=item Thread safety
+
+C<use encoding ...> is not thread-safe (i.e., do not use in threaded
+applications).
+
 =back
 
 =head2 The Logic of :locale
@@ -599,7 +634,7 @@ returned is used as the default encoding for the open pragma.
 
 If 1. didn't work but we are under the locale pragma, the environment
 variables LC_ALL and LANG (in that order) are matched for encodings
-(the part after C<.>, if any), and if any found, that is used 
+(the part after C<.>, if any), and if any found, that is used
 as the default encoding for the open pragma.
 
 =item 3.
@@ -618,7 +653,7 @@ B<any subsequent file open>, is UTF-8.
 
 =head1 HISTORY
 
-This pragma first appeared in Perl 5.8.0.  For features that require 
+This pragma first appeared in Perl 5.8.0.  For features that require
 5.8.1 and better, see above.
 
 The C<:locale> subpragma was implemented in 2.01, or Perl 5.8.6.