#
-# $Id: Encode.pm,v 1.74 2002/05/28 18:33:54 dankogai Exp dankogai $
+# $Id: Encode.pm,v 1.91 2003/03/09 20:07:20 dankogai Exp $
#
package Encode;
use strict;
-our $VERSION = do { my @r = (q$Revision: 1.74 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.91 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
our $DEBUG = 0;
use XSLoader ();
XSLoader::load(__PACKAGE__, $VERSION);
sub encode($$;$)
{
my ($name, $string, $check) = @_;
+ return undef unless defined $string;
$check ||=0;
my $enc = find_encoding($name);
unless(defined $enc){
sub decode($$;$)
{
my ($name,$octets,$check) = @_;
+ return undef unless defined $octets;
$check ||=0;
my $enc = find_encoding($name);
unless(defined $enc){
sub from_to($$$;$)
{
my ($string,$from,$to,$check) = @_;
+ return undef unless defined $string;
$check ||=0;
my $f = find_encoding($from);
unless (defined $f){
return $str;
}
-predefine_encodings();
+predefine_encodings(1);
#
# This is to restore %Encoding if really needed;
sub predefine_encodings{
use Encode::Encoding;
+ no warnings 'redefine';
+ my $use_xs = shift;
if ($ON_EBCDIC) {
# was in Encode::UTF_EBCDIC
package Encode::UTF_EBCDIC;
# was in Encode::utf8
package Encode::utf8;
push @Encode::utf8::ISA, 'Encode::Encoding';
- *decode = sub{
- my ($obj,$octets,$chk) = @_;
- my $str = Encode::decode_utf8($octets);
- if (defined $str) {
+ #
+ if ($use_xs){
+ $DEBUG and warn __PACKAGE__, " XS on";
+ *decode = \&decode_xs;
+ *encode = \&encode_xs;
+ }else{
+ $DEBUG and warn __PACKAGE__, " XS off";
+ *decode = sub{
+ my ($obj,$octets,$chk) = @_;
+ my $str = Encode::decode_utf8($octets);
+ if (defined $str) {
+ $_[1] = '' if $chk;
+ return $str;
+ }
+ return undef;
+ };
+ *encode = sub {
+ my ($obj,$string,$chk) = @_;
+ my $octets = Encode::encode_utf8($string);
$_[1] = '' if $chk;
- return $str;
+ return $octets;
+ };
+ }
+ *cat_decode = sub{ # ($obj, $dst, $src, $pos, $trm, $chk)
+ my ($obj, undef, undef, $pos, $trm) = @_; # currently ignores $chk
+ my ($rdst, $rsrc, $rpos) = \@_[1,2,3];
+ use bytes;
+ if ((my $npos = index($$rsrc, $trm, $pos)) >= 0) {
+ $$rdst .= substr($$rsrc, $pos, $npos - $pos + length($trm));
+ $$rpos = $npos + length($trm);
+ return 1;
}
- return undef;
- };
- *encode = sub {
- my ($obj,$string,$chk) = @_;
- my $octets = Encode::encode_utf8($string);
- $_[1] = '' if $chk;
- return $octets;
+ $$rdst .= substr($$rsrc, $pos);
+ $$rpos = length($$rsrc);
+ return '';
};
$Encode::Encoding{utf8} =
bless {Name => "utf8"} => "Encode::utf8";
=head1 Handling Malformed Data
-=over 2
-
The I<CHECK> argument is used as follows. When you omit it,
the behaviour is the same as if you had passed a value of 0 for
I<CHECK>.
+=over 2
+
=item I<CHECK> = Encode::FB_DEFAULT ( == 0)
If I<CHECK> is 0, (en|de)code will put a I<substitution character>
while(defined(read $fh, $buffer, 256)){
# buffer may end in a partial character so we append
$data .= $buffer;
- $utf8 .= decode($encoding, $data, ENCODE::FB_QUIET);
+ $utf8 .= decode($encoding, $data, Encode::FB_QUIET);
# $data now contains the unprocessed partial character
}
HTMLCREF 0x0200
XMLCREF 0x0400
+=back
+
=head2 Unimplemented fallback schemes
In the future, you will be able to use a code reference to a callback
When you encode, the resulting utf8 flag is always off.
-=item
+=item *
When you decode, the resulting utf8 flag is on unless you can
unambiguously represent data. Here is the definition of