Upgrade to Encode 2.00.

[p5sagit/p5-mst-13.2.git] / ext / Encode / encoding.pm
diff --git a/ext/Encode/encoding.pm b/ext/Encode/encoding.pm

index 3fc6e43..d1181ff 100644 (file)
--- a/ext/Encode/encoding.pm
+++ b/ext/Encode/encoding.pm
@@ -1,10 +1,10 @@
-# $Id: encoding.pm,v 1.45 2003/06/18 09:29:02 dankogai Exp $
+# $Id: encoding.pm,v 2.0 2004/05/16 20:55:16 dankogai Exp $
 package encoding;
-our $VERSION = do { my @r = (q$Revision: 1.45 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
 
 use Encode;
 use strict;
-our $DEBUG = 0;
+sub DEBUG () { 0 }
 
 BEGIN {
     if (ord("A") == 193) {
@@ -42,7 +42,7 @@ sub import {
     }
     $name = $enc->name; # canonize
     unless ($arg{Filter}) {
-       $DEBUG and warn "_exception($name) = ", _exception($name);
+       DEBUG and warn "_exception($name) = ", _exception($name);
        _exception($name) or ${^ENCODING} = $enc;
        $HAS_PERLIO or return 1;
     }else{
@@ -56,14 +56,13 @@ sub import {
            filter_add(sub{
                           my $status = filter_read();
                            if ($status > 0){
-                              # $DEBUG and warn $_;
                               $_ = $enc->decode($_, 1);
-                              $DEBUG and warn $_;
+                              DEBUG and warn $_;
                           }
                           $status ;
                       });
        };
-    }  $DEBUG and warn "Filter installed";
+    }  DEBUG and warn "Filter installed";
     defined ${^UNICODE} and ${^UNICODE} != 0 and return 1;
     for my $h (qw(STDIN STDOUT)){
        if ($arg{$h}){
@@ -193,6 +192,25 @@ not "\x{99F1}\x{99DD} is the symbol of perl.\n".
 
 You can override this by giving extra arguments; see below.
 
+=head2 Implicit upgrading for byte strings
+
+By default, if strings operating under byte semantics and strings
+with Unicode character data are concatenated, the new string will
+be created by decoding the byte strings as I<ISO 8859-1 (Latin-1)>.
+
+The B<encoding> pragma changes this to use the specified encoding
+instead.  For example:
+
+    use encoding 'utf8';
+    my $string = chr(20000); # a Unicode string
+    utf8::encode($string);   # now it's a UTF-8 encoded byte string
+    # concatenate with another Unicode string
+    print length($string . chr(20000));
+
+Will print C<2>, because C<$string> is upgraded as UTF-8.  Without
+C<use encoding 'utf8';>, it will print C<4> instead, since C<$string>
+is three octets when interpreted as Latin-1.
+
 =head1 FEATURES THAT REQUIRE 5.8.1
 
 Some of the features offered by this pragma requires perl 5.8.1.  Most
@@ -398,13 +416,13 @@ This counterintuitive behavior has been fixed in perl 5.8.1.
 
 =head3 workaround to tr///;
 
-In perl 5.8.0, you can work aroud as follows;
+In perl 5.8.0, you can work around as follows;
 
   use encoding 'euc-jp';
   #  ....
   eval qq{ \$kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/ };
 
-Note the C<tr//> expression is surronded by C<qq{}>.  The idea behind
+Note the C<tr//> expression is surrounded by C<qq{}>.  The idea behind
 is the same as classic idiom that makes C<tr///> 'interpolate'.
 
    tr/$from/$to/;            # wrong!