-# $Id: encoding.pm,v 1.45 2003/06/18 09:29:02 dankogai Exp $
+# $Id: encoding.pm,v 2.0 2004/05/16 20:55:16 dankogai Exp $
package encoding;
-our $VERSION = do { my @r = (q$Revision: 1.45 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use Encode;
use strict;
-our $DEBUG = 0;
+sub DEBUG () { 0 }
BEGIN {
if (ord("A") == 193) {
}
$name = $enc->name; # canonize
unless ($arg{Filter}) {
- $DEBUG and warn "_exception($name) = ", _exception($name);
+ DEBUG and warn "_exception($name) = ", _exception($name);
_exception($name) or ${^ENCODING} = $enc;
$HAS_PERLIO or return 1;
}else{
filter_add(sub{
my $status = filter_read();
if ($status > 0){
- # $DEBUG and warn $_;
$_ = $enc->decode($_, 1);
- $DEBUG and warn $_;
+ DEBUG and warn $_;
}
$status ;
});
};
- } $DEBUG and warn "Filter installed";
+ } DEBUG and warn "Filter installed";
defined ${^UNICODE} and ${^UNICODE} != 0 and return 1;
for my $h (qw(STDIN STDOUT)){
if ($arg{$h}){
You can override this by giving extra arguments; see below.
+=head2 Implicit upgrading for byte strings
+
+By default, if strings operating under byte semantics and strings
+with Unicode character data are concatenated, the new string will
+be created by decoding the byte strings as I<ISO 8859-1 (Latin-1)>.
+
+The B<encoding> pragma changes this to use the specified encoding
+instead. For example:
+
+ use encoding 'utf8';
+ my $string = chr(20000); # a Unicode string
+ utf8::encode($string); # now it's a UTF-8 encoded byte string
+ # concatenate with another Unicode string
+ print length($string . chr(20000));
+
+Will print C<2>, because C<$string> is upgraded as UTF-8. Without
+C<use encoding 'utf8';>, it will print C<4> instead, since C<$string>
+is three octets when interpreted as Latin-1.
+
=head1 FEATURES THAT REQUIRE 5.8.1
Some of the features offered by this pragma requires perl 5.8.1. Most
=head3 workaround to tr///;
-In perl 5.8.0, you can work aroud as follows;
+In perl 5.8.0, you can work around as follows;
use encoding 'euc-jp';
# ....
eval qq{ \$kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/ };
-Note the C<tr//> expression is surronded by C<qq{}>. The idea behind
+Note the C<tr//> expression is surrounded by C<qq{}>. The idea behind
is the same as classic idiom that makes C<tr///> 'interpolate'.
tr/$from/$to/; # wrong!