#
-# $Id: Encode.pm,v 1.84 2003/01/10 12:00:16 dankogai Exp dankogai $
+# $Id: Encode.pm,v 2.0 2004/05/16 20:55:15 dankogai Exp $
#
package Encode;
use strict;
-our $VERSION = do { my @r = (q$Revision: 1.84 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
-our $DEBUG = 0;
+our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+sub DEBUG () { 0 }
use XSLoader ();
XSLoader::load(__PACKAGE__, $VERSION);
our @EXPORT = qw(
decode decode_utf8 encode encode_utf8
- encodings find_encoding
+ encodings find_encoding clone_encoding
);
our @FB_FLAGS = qw(DIE_ON_ERR WARN_ON_ERR RETURN_ON_ERR LEAVE_SRC
}else{
%enc = %Encoding;
for my $mod (map {m/::/o ? $_ : "Encode::$_" } @_){
- $DEBUG and warn $mod;
+ DEBUG and warn $mod;
for my $enc (keys %ExtModule){
$ExtModule{$enc} eq $mod and $enc{$enc} = $mod;
}
{
my ($class, $name, $skip_external) = @_;
- ref($name) && $name->can('new_sequence') and return $name;
+ ref($name) && $name->can('renew') and return $name;
exists $Encoding{$name} and return $Encoding{$name};
my $lc = lc $name;
exists $Encoding{$lc} and return $Encoding{$lc};
return;
}
-sub find_encoding
+sub find_encoding($;$)
{
my ($name, $skip_external) = @_;
return __PACKAGE__->getEncoding($name,$skip_external);
}
-sub resolve_alias {
+sub resolve_alias($){
my $obj = find_encoding(shift);
defined $obj and return $obj->name;
return;
}
+sub clone_encoding($){
+ my $obj = find_encoding(shift);
+ ref $obj or return;
+ eval { require Storable };
+ $@ and return;
+ return Storable::dclone($obj);
+}
+
sub encode($$;$)
{
my ($name, $string, $check) = @_;
Carp::croak("Unknown encoding '$name'");
}
my $octets = $enc->encode($string,$check);
- return undef if ($check && length($string));
+ $_[1] = $string if $check;
return $octets;
}
return $str;
}
-sub decode_utf8($)
+sub decode_utf8($;$)
{
- my ($str) = @_;
- return undef unless utf8::decode($str);
- return $str;
+ my ($str, $check) = @_;
+ if ($check){
+ return decode("utf8", $str, $check);
+ }else{
+ return undef unless utf8::decode($str);
+ return $str;
+ }
}
predefine_encodings(1);
push @Encode::utf8::ISA, 'Encode::Encoding';
#
if ($use_xs){
- $DEBUG and warn __PACKAGE__, " XS on";
+ Encode::DEBUG and warn __PACKAGE__, " XS on";
*decode = \&decode_xs;
*encode = \&encode_xs;
}else{
- $DEBUG and warn __PACKAGE__, " XS off";
+ Encode::DEBUG and warn __PACKAGE__, " XS off";
*decode = sub{
my ($obj,$octets,$chk) = @_;
my $str = Encode::decode_utf8($octets);
return $octets;
};
}
+ *cat_decode = sub{ # ($obj, $dst, $src, $pos, $trm, $chk)
+ my ($obj, undef, undef, $pos, $trm) = @_; # currently ignores $chk
+ my ($rdst, $rsrc, $rpos) = \@_[1,2,3];
+ use bytes;
+ if ((my $npos = index($$rsrc, $trm, $pos)) >= 0) {
+ $$rdst .= substr($$rsrc, $pos, $npos - $pos + length($trm));
+ $$rpos = $npos + length($trm);
+ return 1;
+ }
+ $$rdst .= substr($$rsrc, $pos);
+ $$rpos = length($$rsrc);
+ return '';
+ };
$Encode::Encoding{utf8} =
bless {Name => "utf8"} => "Encode::utf8";
}
=head1 Handling Malformed Data
-=over 2
-
The I<CHECK> argument is used as follows. When you omit it,
the behaviour is the same as if you had passed a value of 0 for
I<CHECK>.
+=over 2
+
=item I<CHECK> = Encode::FB_DEFAULT ( == 0)
If I<CHECK> is 0, (en|de)code will put a I<substitution character>
HTMLCREF 0x0200
XMLCREF 0x0400
+=back
+
=head2 Unimplemented fallback schemes
In the future, you will be able to use a code reference to a callback
When you encode, the resulting utf8 flag is always off.
-=item
+=item *
When you decode, the resulting utf8 flag is on unless you can
unambiguously represent data. Here is the definition of
If CHECK is true, also checks the data in STRING for being well-formed
UTF-8. Returns true if successful, false otherwise.
+As of perl 5.8.1, L<utf8> also has utf8::is_utf8().
+
=item _utf8_on(STRING)
[INTERNAL] Turns on the UTF-8 flag in STRING. The data in STRING is