#
-# $Id: Encoder.pm,v 0.1 2002/04/08 02:35:10 dankogai Exp $
+# $Id: Encoder.pm,v 2.0 2004/05/16 20:55:17 dankogai Exp $
#
-package Encoder;
+package Encode::Encoder;
use strict;
-our $VERSION = do { my @r = (q$Revision: 0.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+use warnings;
+our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
require Exporter;
our @ISA = qw(Exporter);
-# Public, encouraged API is exported by default
-our @EXPORT = qw (
- encoder
-);
+our @EXPORT_OK = qw ( encoder );
our $AUTOLOAD;
-our $DEBUG = 0;
+sub DEBUG () { 0 }
use Encode qw(encode decode find_encoding from_to);
use Carp;
sub new{
my ($class, $data, $encname) = @_;
- $encname ||= 'utf8';
- my $obj = find_encoding($encname)
- or croak __PACKAGE__, ": unknown encoding: $encname";
+ unless($encname){
+ $encname = Encode::is_utf8($data) ? 'utf8' : '';
+ }else{
+ my $obj = find_encoding($encname)
+ or croak __PACKAGE__, ": unknown encoding: $encname";
+ $encname = $obj->name;
+ }
my $self = {
data => $data,
- encoding => $obj->name,
+ encoding => $encname,
};
bless $self => $class;
}
-sub encoder{ shift->new(@_) }
+sub encoder{ __PACKAGE__->new(@_) }
sub data{
- my ($self, $data) = shift;
- defined $data and $self->{data} = $data;
- return $self;
+ my ($self, $data) = @_;
+ if (defined $data){
+ $self->{data} = $data;
+ return $data;
+ }else{
+ return $self->{data};
+ }
}
sub encoding{
my $obj = find_encoding($encname)
or confess __PACKAGE__, ": unknown encoding: $encname";
$self->{encoding} = $obj->name;
+ return $self;
+ }else{
+ return $self->{encoding}
}
- $self;
+}
+
+sub bytes {
+ my ($self, $encname) = @_;
+ $encname ||= $self->{encoding};
+ my $obj = find_encoding($encname)
+ or confess __PACKAGE__, ": unknown encoding: $encname";
+ $self->{data} = $obj->decode($self->{data}, 1);
+ $self->{encoding} = '' ;
+ return $self;
+}
+
+sub DESTROY{ # defined so it won't autoload.
+ DEBUG and warn shift;
}
sub AUTOLOAD {
$myname =~ s/.*://; # strip fully-qualified portion
my $obj = find_encoding($myname)
or confess __PACKAGE__, ": unknown encoding: $myname";
- $DEBUG and warn $self->{encoding}, " => ", $obj->name;
- from_to($self->{data}, $self->{encoding}, $obj->name, 1);
+ DEBUG and warn $self->{encoding}, " => ", $obj->name;
+ if ($self->{encoding}){
+ from_to($self->{data}, $self->{encoding}, $obj->name, 1);
+ }else{
+ $self->{data} = $obj->encode($self->{data}, 1);
+ }
$self->{encoding} = $obj->name;
return $self;
}
Encode::Encoder -- Object Oriented Encoder
=head1 SYNOPSIS
-
+
use Encode::Encoder;
# Encode::encode("ISO-8859-1", $data);
- Encoder->new($data)->iso_8859_1; # OOP way
+ Encode::Encoder->new($data)->iso_8859_1; # OOP way
# shortcut
+ use Encode::Encoder qw(encoder);
encoder($data)->iso_8859_1;
# you can stack them!
- encoder($data)->iso_8859_1->base64; # provided base64() is defined
+ encoder($data)->iso_8859_1->base64; # provided base64() is defined
+ # you can use it as a decoder as well
+ encoder($base64)->bytes('base64')->latin1;
# stringified
- print encoder($utf8)->latin1 # prints the string in latin1
+ print encoder($data)->utf8->latin1; # prints the string in latin1
# numified
- encoder("\x{abcd}\x{ef}g") == 6; # true. bytes::length($data)
+ encoder("\x{abcd}\x{ef}g")->utf8 == 6; # true. bytes::length($data)
=head1 ABSTRACT
-B<Encode::Encoder> allows you to use Encode via OOP style. This is
-not only more intuitive than functional approach, but also handier
-when you want to stack encodings. Suppose you want your UTF-8 string
-converted to Latin1 then Base64, you can simply say
+B<Encode::Encoder> allows you to use Encode in an object-oriented
+style. This is not only more intuitive than a functional approach,
+but also handier when you want to stack encodings. Suppose you want
+your UTF-8 string converted to Latin1 then Base64: you can simply say
my $base64 = encoder($utf8)->latin1->base64;
instead of
my $latin1 = encode("latin1", $utf8);
+ my $base64 = encode_base64($utf8);
-or lazier and convolted
+or the lazier and more convoluted
my $base64 = encode_base64(encode("latin1", $utf8));
=item *
-There are at least two instance variable stored in hash reference,
+There are at least two instance variables stored in a hash reference,
{data} and {encoding}.
=item *
-When there is no method, it takes the method name as the name of
-encoding and encode instance I<data> with I<encoding>. If successful,
-instance I<encoding> is set accordingly.
+When there is no method, it takes the method name as the name of the
+encoding and encodes the instance I<data> with I<encoding>. If successful,
+the instance I<encoding> is set accordingly.
-=item *
+=item *
-This module is desined to work with L<Encode::Encoding>.
-To make the Base64 transcorder example above really work, you should
-write a module like this.
+You can retrieve the result via -E<gt>data but usually you don't have to
+because the stringify operator ("") is overridden to do exactly that.
- package Encode::Base64;
- use base 'Encode::Encoding';
- __PACKAGE->Define('base64');
- use MIME::Base64;
- sub encode{
- my ($obj, $data) = @_;
- return encode_base64($data);
- }
- sub decode{
- my ($obj, $data) = @_;
- return decode_base64($data);
- }
- 1;
- __END__
+=back
-And your caller module should be like this;
+=head2 Predefined Methods
- use Encode::Encoder;
- use Encode::Base64;
- # and be creative.
+This module predefines the methods below:
+
+=over 4
+
+=item $e = Encode::Encoder-E<gt>new([$data, $encoding]);
-=head2 operator overloading
+returns an encoder object. Its data is initialized with $data if
+present, and its encoding is set to $encoding if present.
-This module overloads two operators, stringify ("") and numify (0+).
+When $encoding is omitted, it defaults to utf8 if $data is already in
+utf8 or "" (empty string) otherwise.
-Stringify dumps the data therein.
+=item encoder()
-Numify returns the number of bytes therein.
+is an alias of Encode::Encoder-E<gt>new(). This one is exported on demand.
-They come in handy when you want to print or find the size of data.
+=item $e-E<gt>data([$data])
+
+When $data is present, sets the instance data to $data and returns the
+object itself. Otherwise, the current instance data is returned.
+
+=item $e-E<gt>encoding([$encoding])
+
+When $encoding is present, sets the instance encoding to $encoding and
+returns the object itself. Otherwise, the current instance encoding is
+returned.
+
+=item $e-E<gt>bytes([$encoding])
+
+decodes instance data from $encoding, or the instance encoding if
+omitted. If the conversion is successful, the instance encoding
+will be set to "".
+
+The name I<bytes> was deliberately picked to avoid namespace tainting
+-- this module may be used as a base class so method names that appear
+in Encode::Encoding are avoided.
=back
-=head2 Predefined Methods
+=head2 Example: base64 transcoder
-This module predefines the methods below;
+This module is designed to work with L<Encode::Encoding>.
+To make the Base64 transcoder example above really work, you could
+write a module like this:
-=over 4
+ package Encode::Base64;
+ use base 'Encode::Encoding';
+ __PACKAGE__->Define('base64');
+ use MIME::Base64;
+ sub encode{
+ my ($obj, $data) = @_;
+ return encode_base64($data);
+ }
+ sub decode{
+ my ($obj, $data) = @_;
+ return decode_base64($data);
+ }
+ 1;
+ __END__
-=item $e = Encode::Encoder-E<gt>new([$data, $encoding]);
+And your caller module would be something like this:
-returns the encoder object. Its data is initialized with $data if
-there, and its encoding is set to $encoding if there.
+ use Encode::Encoder;
+ use Encode::Base64;
-=item encoder()
+ # now you can really do the following
-is an alias of Encode::Encoder-E<gt>new(). This one is exported for
-convenience.
+ encoder($data)->iso_8859_1->base64;
+ encoder($base64)->bytes('base64')->latin1;
-=item $e-E<gt>data($data)
+=head2 Operator Overloading
-sets instance data to $data.
+This module overloads two operators, stringify ("") and numify (0+).
-=item $e-E<gt>encoding($encoding)
+Stringify dumps the data inside the object.
-sets instance encoding to $encoding
+Numify returns the number of bytes in the instance data.
-=back
+They come in handy when you want to print or find the size of data.
=head1 SEE ALSO
-L<Encode>
+L<Encode>,
L<Encode::Encoding>
=cut