# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 1.62 2002/04/27 11:17:39 dankogai Exp dankogai $
+# $Id: Changes,v 1.63 2002/04/27 18:59:50 dankogai Exp $
#
-$Revision: 1.62 $ $Date: 2002/04/27 11:17:39 $
+$Revision: 1.63 $ $Date: 2002/04/27 18:59:50 $
+! lib/Encode/Encoding.pm
+! Encoding.pm Unicode/Unicode.pm lib/Encode/Guess.pm lib/Encode/CN/HZ.pm
+! lib/Encode/JP/JIS7.pm lib/Encode/MIME/Header.pm lib/Encode/KR/2022_KR.pm
+ Make use of the Encode::Encoding base class!
+ And other cleanups in Encode.xs upon NI-XS suggestions
+ Message-Id: <20020427160718.1290.15@bactrian.ni-s.u-net.com>
+
+1.62 2002/04/27 11:17:39
! Encode.pm
encodings() now just check %ExtModule instead of eval{require}
all of them for ":all" to conserve more memory.
Typo fixes and improvements by jhi
Message-Id: <200204010201.FAA03564@alpha.hut.fi>, et al.
-1.11 $Date: 2002/04/27 11:17:39 $
+1.11 $Date: 2002/04/27 18:59:50 $
+ t/encoding.t
+ t/jperl.t
! MANIFEST
+#
+# $Id: Encode.pm,v 1.63 2002/04/27 18:59:50 dankogai Exp $
+#
package Encode;
use strict;
-our $VERSION = do { my @r = (q$Revision: 1.62 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.63 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
our $DEBUG = 0;
use XSLoader ();
-XSLoader::load 'Encode';
+XSLoader::load(__PACKAGE__, $VERSION);
require Exporter;
use base qw/Exporter/;
# Documentation moved after __END__ for speed - NI-S
-use Carp;
-
our $ON_EBCDIC = (ord("A") == 193);
use Encode::Alias;
$Encoding{$name} = $obj;
my $lc = lc($name);
define_alias($lc => $obj) unless $lc eq $name;
- while (@_)
- {
+ while (@_){
my $alias = shift;
- define_alias($alias,$obj);
+ define_alias($alias, $obj);
}
return $obj;
}
sub getEncoding
{
- my ($class,$name,$skip_external) = @_;
- my $enc;
- if (ref($name) && $name->can('new_sequence'))
- {
- return $name;
- }
+ my ($class, $name, $skip_external) = @_;
+
+ ref($name) && $name->can('new_sequence') and return $name;
+ exists $Encoding{$name} and return $Encoding{$name};
my $lc = lc $name;
- if (exists $Encoding{$name})
- {
- return $Encoding{$name};
- }
- if (exists $Encoding{$lc})
- {
- return $Encoding{$lc};
- }
+ exists $Encoding{$lc} and return $Encoding{$lc};
my $oc = $class->find_alias($name);
- return $oc if defined $oc;
-
- $oc = $class->find_alias($lc) if $lc ne $name;
- return $oc if defined $oc;
+ defined($oc) and return $oc;
+ $lc ne $name and $oc = $class->find_alias($lc);
+ defined($oc) and return $oc;
unless ($skip_external)
{
if (my $mod = $ExtModule{$name} || $ExtModule{$lc}){
$mod =~ s,::,/,g ; $mod .= '.pm';
eval{ require $mod; };
- return $Encoding{$name} if exists $Encoding{$name};
+ exists $Encoding{$name} and return $Encoding{$name};
}
}
return;
sub find_encoding
{
- my ($name,$skip_external) = @_;
+ my ($name, $skip_external) = @_;
return __PACKAGE__->getEncoding($name,$skip_external);
}
my ($name,$string,$check) = @_;
$check ||=0;
my $enc = find_encoding($name);
- croak("Unknown encoding '$name'") unless defined $enc;
+ unless(defined $enc){
+ require Carp;
+ Carp::croak("Unknown encoding '$name'");
+ }
my $octets = $enc->encode($string,$check);
return undef if ($check && length($string));
return $octets;
my ($name,$octets,$check) = @_;
$check ||=0;
my $enc = find_encoding($name);
- croak("Unknown encoding '$name'") unless defined $enc;
+ unless(defined $enc){
+ require Carp;
+ Carp::croak("Unknown encoding '$name'");
+ }
my $string = $enc->decode($octets,$check);
$_[1] = $octets if $check;
return $string;
my ($string,$from,$to,$check) = @_;
$check ||=0;
my $f = find_encoding($from);
- croak("Unknown encoding '$from'") unless defined $f;
+ unless (defined $f){
+ require Carp;
+ Carp::croak("Unknown encoding '$from'");
+ }
my $t = find_encoding($to);
- croak("Unknown encoding '$to'") unless defined $t;
+ unless (defined $t){
+ require Carp;
+ Carp::croak("Unknown encoding '$to'");
+ }
my $uni = $f->decode($string,$check);
return undef if ($check && length($string));
$string = $t->encode($uni,$check);
#
# This is to restore %Encoding if really needed;
#
+
sub predefine_encodings{
+ use Encode::Encoding;
if ($ON_EBCDIC) {
# was in Encode::UTF_EBCDIC
package Encode::UTF_EBCDIC;
- *name = sub{ shift->{'Name'} };
- *new_sequence = sub{ return $_[0] };
- *needs_lines = sub{ 0 };
- *perlio_ok = sub {
- eval{ require PerlIO::encoding };
- return $@ ? 0 : 1;
- };
+ push @Encode::UTF_EBCDIC::ISA, 'Encode::Encoding';
*decode = sub{
my ($obj,$str,$chk) = @_;
my $res = '';
$Encode::Encoding{Unicode} =
bless {Name => "UTF_EBCDIC"} => "Encode::UTF_EBCDIC";
} else {
- # was in Encode::UTF_EBCDIC
package Encode::Internal;
- *name = sub{ shift->{'Name'} };
- *new_sequence = sub{ return $_[0] };
- *needs_lines = sub{ 0 };
- *perlio_ok = sub {
- eval{ require PerlIO::encoding };
- return $@ ? 0 : 1;
- };
+ push @Encode::Internal::ISA, 'Encode::Encoding';
*decode = sub{
my ($obj,$str,$chk) = @_;
utf8::upgrade($str);
{
# was in Encode::utf8
package Encode::utf8;
- *name = sub{ shift->{'Name'} };
- *new_sequence = sub{ return $_[0] };
- *needs_lines = sub{ 0 };
- *perlio_ok = sub {
- eval{ require PerlIO::encoding };
- return $@ ? 0 : 1;
- };
+ push @Encode::utf8::ISA, 'Encode::Encoding';
*decode = sub{
my ($obj,$octets,$chk) = @_;
my $str = Encode::decode_utf8($octets);
/*
- $Id: Encode.xs,v 1.40 2002/04/27 11:17:39 dankogai Exp dankogai $
+ $Id: Encode.xs,v 1.41 2002/04/27 18:59:50 dankogai Exp $
*/
#define PERL_NO_GET_CONTEXT
#include "XSUB.h"
#define U8 U8
#include "encode.h"
+
+# define PERLIO_MODNAME "PerlIO::encoding"
# define PERLIO_FILENAME "PerlIO/encoding.pm"
/* set 1 or more to profile. t/encoding.t dumps core because of
SvCUR_set(src, sdone);
}
/* warn("check = 0x%X, code = 0x%d\n", check, code); */
- if (code && !(check & ENCODE_RETURN_ON_ERR)) {
- return &PL_sv_undef;
- }
SvCUR_set(dst, dlen+ddone);
SvPOK_only(dst);
CODE:
{
encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
- require_pv(PERLIO_FILENAME);
- if (hv_exists(get_hv("INC", 0),
- PERLIO_FILENAME, strlen(PERLIO_FILENAME)))
- {
- ST(0) = &PL_sv_yes;
- }else{
+ /* require_pv(PERLIO_FILENAME); */
+
+ eval_pv("require PerlIO::encoding", 0);
+
+ if (SvTRUE(get_sv("@", 0))) {
ST(0) = &PL_sv_no;
+ }else{
+ ST(0) = &PL_sv_yes;
}
XSRETURN(1);
}
use strict;
use warnings;
-our $VERSION = do { my @r = (q$Revision: 1.35 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.36 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use XSLoader;
XSLoader::load(__PACKAGE__,$VERSION);
#
require Encode;
+
for my $name (qw(UTF-16 UTF-16BE UTF-16LE
UTF-32 UTF-32BE UTF-32LE
UCS-2BE UCS-2LE))
}
-sub name { shift->{'Name'} }
-sub new_sequence
-{
- my $self = shift;
- # Return the original if endian known
- return $self if ($self->{endian});
- # Return a clone
- return bless {%$self},ref($self);
-}
-
-sub needs_lines { 0 };
-
-sub perlio_ok {
- eval{ require PerlIO::encoding };
- if ($@){
- return 0;
- }else{
- return 1;
- }
-}
-
+use base qw(Encode::Encoding);
#
# three implementations of (en|de)code exist. The XS version is the
package encoding;
-our $VERSION = do { my @r = (q$Revision: 1.33 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.34 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use Encode;
use strict;
BEGIN {
if (ord("A") == 193) {
require Carp;
- Carp::croak "encoding pragma does not support EBCDIC platforms";
+ Carp::croak("encoding pragma does not support EBCDIC platforms");
}
}
my $enc = find_encoding($name);
unless (defined $enc) {
require Carp;
- Carp::croak "Unknown encoding '$name'";
+ Carp::croak("Unknown encoding '$name'");
}
unless ($arg{Filter}){
${^ENCODING} = $enc; # this is all you need, actually.
if ($arg{$h}){
unless (defined find_encoding($arg{$h})) {
require Carp;
- Carp::croak "Unknown encoding for $h, '$arg{$h}'";
+ Carp::croak("Unknown encoding for $h, '$arg{$h}'");
}
eval { binmode($h, ":encoding($arg{$h})") };
}else{
package Encode::Alias;
use strict;
use Encode;
-our $VERSION = do { my @r = (q$Revision: 1.29 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.30 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
our $DEBUG = 0;
-require Exporter;
-our @ISA = qw(Exporter);
+use base qw(Exporter);
# Public, encouraged API is exported by default
use strict;
use vars qw($VERSION);
-$VERSION = do { my @r = (q$Revision: 1.3 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+$VERSION = do { my @r = (q$Revision: 1.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use Encode ();
-use Encode::CN;
-use base 'Encode::Encoding';
+
+use base qw(Encode::Encoding);
+__PACKAGE__->Define('hz');
# HZ is only escaped GB, so we implement it with the
# GB2312(raw) encoding here. Cf. RFCs 1842 & 1843.
-my $canon = 'hz';
-my $obj = bless {name => $canon}, __PACKAGE__;
-$obj->Define($canon);
+
sub needs_lines { 1 }
package Encode::Encoding;
# Base class for classes which implement encodings
use strict;
-our $VERSION = do { my @r = (q$Revision: 1.28 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.29 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
sub Define
{
Encode::define_encoding($obj, $canonical, @_);
}
-sub name { shift->{'Name'} }
+sub name { return shift->{'Name'} }
+sub new_sequence { return $_[0] }
+
+sub needs_lines { 0 };
+
+sub perlio_ok {
+ eval{ require PerlIO::encoding };
+ return $@ ? 0 : 1;
+}
# Temporary legacy methods
sub toUnicode { shift->decode(@_) }
sub fromUnicode { shift->encode(@_) }
-sub new_sequence { return $_[0] }
+#
+# Needs to be overloaded or just croak
+#
-sub perlio_ok { 0 }
+sub encode {
+ require Carp;
+ my $obj = shift;
+ my $class = ref($obj) ? ref($obj) : $obj;
+ Carp::croak $class, "->encode() not defined!";
+}
-sub needs_lines { 0 }
+sub decode{
+ require Carp;
+ my $obj = shift;
+ my $class = ref($obj) ? ref($obj) : $obj;
+ Carp::croak $class, "->encode() not defined!";
+}
sub DESTROY {}
=head1 DESCRIPTION
As mentioned in L<Encode>, encodings are (in the current
-implementation at least) defined by objects. The mapping of encoding
-name to object is via the C<%encodings> hash.
+implementation at least) defined as objects. The mapping of encoding
+name to object is via the C<%Encode::Encoding> hash. Though you can
+directly manipulate this hash, it is strongly encouraged to use this
+base class module and add encode() and decode() methods.
-The values of the hash can currently be either strings or objects.
-The string form may go away in the future. The string form occurs
-when C<encodings()> has scanned C<@INC> for loadable encodings but has
-not actually loaded the encoding in question. This is because the
-current "loading" process is all Perl and a bit slow.
+=head2 Methods you should implement
-Once an encoding is loaded, the value of the hash is the object which
-implements the encoding. The object should provide the following
-interface:
+You are strongly encouraged to implement methods below, at least
+either encode() or decode().
=over 4
-=item -E<gt>name
-
-MUST return the string representing the canonical name of the encoding.
-
-=item -E<gt>new_sequence
-
-This is a placeholder for encodings with state. It should return an
-object which implements this interface. All current implementations
-return the original object.
-
-=item -E<gt>encode($string,$check)
+=item -E<gt>encode($string [,$check])
MUST return the octet sequence representing I<$string>.
=back
-=item -E<gt>decode($octets,$check)
+=item -E<gt>decode($octets [,$check])
MUST return the string that I<$octets> represents.
=back
+=head2 Other methods defined in Encode::Encodings
+
+You do not have to override methods shown below unless you have to.
+
+=over 4
+
+=item -E<gt>name
+
+Predefined As:
+
+ sub name { return shift->{'Name'} }
+
+MUST return the string representing the canonical name of the encoding.
+
+=item -E<gt>new_sequence
+
+Predefined As:
+
+ sub new_sequence { return $_[0] }
+
+This is a placeholder for encodings with state. It should return an
+object which implements this interface. All current implementations
+return the original object.
+
=item -E<gt>perlio_ok()
-If you want your encoding to work with PerlIO, you MUST define this
-method so that it returns 1 when PerlIO is enabled. Here is an
-example;
-
- sub perlio_ok {
- eval { require PerlIO::encoding };
- if ($@){
- return 0;
- }else{
- return 1;
- }
- }
+Predefined As:
+ sub perlio_ok {
+ eval{ require PerlIO::encoding };
+ return $@ ? 0 : 1;
+ }
-By default, this method is defined as follows;
+If your encoding does not support PerlIO for some reasons, just;
sub perlio_ok { 0 }
=item -E<gt>needs_lines()
+Predefined As:
+
+ sub needs_lines { 0 };
+
If your encoding can work with PerlIO but needs line buffering, you
MUST define this method so it returns true. 7bit ISO-2022 encodings
are one example that needs this. When this method is missing, false
=back
+=head2 Example: Encode::ROT13
+
+ package Encode::ROT13;
+ use strict;
+ use base qw(Encode::Encoding);
+
+ __PACKAGE__->Define('rot13');
+
+ sub encode($$;$){
+ my ($obj, $str, $chk) = @_;
+ $str =~ tr/A-Za-z/N-ZA-Mn-za-m/;
+ $_[1] = '' if $chk; # this is what in-place edit means
+ return $str;
+ }
+
+ # Jr pna or ynml yvxr guvf;
+ *decode = \&encode;
+
+ 1;
+
+=head1 Why the heck Encode API is different?
+
It should be noted that the I<$check> behaviour is different from the
outer public API. The logic is that the "unchecked" case is useful
when the encoding is part of a stream which may be reporting errors
It is also highly desirable that encoding classes inherit from
C<Encode::Encoding> as a base class. This allows that class to define
-additional behaviour for all encoding objects. For example, built-in
-Unicode, UCS-2, and UTF-8 classes use
+additional behaviour for all encoding objects.
package Encode::MyEncoding;
use base qw(Encode::Encoding);
package Encode::Guess;
use strict;
-use Carp;
use Encode qw(:fallbacks find_encoding);
-our $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.3 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
my $Canon = 'Guess';
our $DEBUG = 0;
Suspects => { %DEF_SUSPECTS },
} => __PACKAGE__;
-sub name { shift->{'Name'} }
-sub new_sequence { $_[0] }
+use base qw(Encode::Encoding);
sub needs_lines { 1 }
sub perlio_ok { 0 }
-sub DESTROY{}
our @EXPORT = qw(guess_encoding);
sub decode($$;$){
my ($obj, $octet, $chk) = @_;
my $guessed = guess($obj, $octet);
- ref($guessed) or croak $guessed;
+ unless (ref($guessed)){
+ require Carp;
+ Carp::croak($guessed);
+ }
my $utf8 = $guessed->decode($octet, $chk);
$_[1] = $octet if $chk;
return $utf8;
}
-sub encode{
- croak "Tsk, tsk, tsk. You can't be too lazy here!";
-}
-
sub guess_encoding{
guess($Encode::Encoding{$Canon}, @_);
}
#
-# $Id: H2Z.pm,v 1.1 2002/04/22 03:43:05 dankogai Exp $
+# $Id: H2Z.pm,v 1.2 2002/04/27 18:59:50 dankogai Exp $
#
package Encode::JP::H2Z;
use strict;
-our $RCSID = q$Id: H2Z.pm,v 1.1 2002/04/22 03:43:05 dankogai Exp $;
-our $VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
-
-use Carp;
+our $RCSID = q$Id: H2Z.pm,v 1.2 2002/04/27 18:59:50 dankogai Exp $;
+our $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use Encode::CJKConstants qw(:all);
package Encode::JP::JIS7;
use strict;
-our $VERSION = do { my @r = (q$Revision: 1.7 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.8 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use Encode qw(:fallbacks);
} => __PACKAGE__;
}
-sub name { shift->{'Name'} }
-
-sub new_sequence { $_[0] }
+use base qw(Encode::Encoding);
+# we override this to 1 so PerlIO works
sub needs_lines { 1 }
-sub perlio_ok {
- eval{ require PerlIO::encoding };
- if ($@){
- return 0;
- }else{
- return (PerlIO::encoding->VERSION >= 0.03);
- }
-}
-
use Encode::CJKConstants qw(:all);
our $DEBUG = 0;
package Encode::KR::2022_KR;
-use Encode qw(:fallbacks);
-use base 'Encode::Encoding';
-
use strict;
-our $VERSION = do { my @r = (q$Revision: 1.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.5 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+use Encode qw(:fallbacks);
-my $canon = 'iso-2022-kr';
-my $obj = bless {name => $canon}, __PACKAGE__;
-$obj->Define($canon);
-
-sub name { return $_[0]->{name}; }
+use base qw(Encode::Encoding);
+__PACKAGE__->Define('iso-2022-kr');
-sub needs_lines { 1 }
+sub needs_lines { 1 }
sub perlio_ok {
return 0; # for the time being
package Encode::MIME::Header;
use strict;
# use warnings;
-our $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.3 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use Encode qw(find_encoding encode_utf8);
use MIME::Base64;
Name => 'MIME-Q',
} => __PACKAGE__;
-sub name { shift->{'Name'} }
-sub new_sequence { $_[0] }
+use base qw(Encode::Encoding);
+
sub needs_lines { 1 }
sub perlio_ok{ 0 };
is (decode('Guess', encode('euc-jp', $utf8on)), $utf8on, "decode('Guess')");
eval{ encode('Guess', $utf8on) };
-like($@, qr/lazy/io, "no encode()");
+like($@, qr/not defined/io, "no encode()");
my %CJKT =
(