2 # $Id: Encoder.pm,v 2.0 2004/05/16 20:55:17 dankogai Exp $
4 package Encode::Encoder;
7 our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
10 our @ISA = qw(Exporter);
11 our @EXPORT_OK = qw ( encoder );
15 use Encode qw(encode decode find_encoding from_to);
19 my ($class, $data, $encname) = @_;
21 $encname = Encode::is_utf8($data) ? 'utf8' : '';
23 my $obj = find_encoding($encname)
24 or croak __PACKAGE__, ": unknown encoding: $encname";
25 $encname = $obj->name;
31 bless $self => $class;
34 sub encoder{ __PACKAGE__->new(@_) }
37 my ($self, $data) = @_;
39 $self->{data} = $data;
47 my ($self, $encname) = @_;
49 my $obj = find_encoding($encname)
50 or confess __PACKAGE__, ": unknown encoding: $encname";
51 $self->{encoding} = $obj->name;
54 return $self->{encoding}
59 my ($self, $encname) = @_;
60 $encname ||= $self->{encoding};
61 my $obj = find_encoding($encname)
62 or confess __PACKAGE__, ": unknown encoding: $encname";
63 $self->{data} = $obj->decode($self->{data}, 1);
64 $self->{encoding} = '' ;
68 sub DESTROY{ # defined so it won't autoload.
75 or confess "$self is not an object";
76 my $myname = $AUTOLOAD;
77 $myname =~ s/.*://; # strip fully-qualified portion
78 my $obj = find_encoding($myname)
79 or confess __PACKAGE__, ": unknown encoding: $myname";
80 DEBUG and warn $self->{encoding}, " => ", $obj->name;
81 if ($self->{encoding}){
82 from_to($self->{data}, $self->{encoding}, $obj->name, 1);
84 $self->{data} = $obj->encode($self->{data}, 1);
86 $self->{encoding} = $obj->name;
91 q("") => sub { $_[0]->{data} },
92 q(0+) => sub { use bytes (); bytes::length($_[0]->{data}) },
101 Encode::Encoder -- Object Oriented Encoder
106 # Encode::encode("ISO-8859-1", $data);
107 Encode::Encoder->new($data)->iso_8859_1; # OOP way
109 use Encode::Encoder qw(encoder);
110 encoder($data)->iso_8859_1;
111 # you can stack them!
112 encoder($data)->iso_8859_1->base64; # provided base64() is defined
113 # you can use it as a decoder as well
114 encoder($base64)->bytes('base64')->latin1;
116 print encoder($data)->utf8->latin1; # prints the string in latin1
118 encoder("\x{abcd}\x{ef}g")->utf8 == 6; # true. bytes::length($data)
122 B<Encode::Encoder> allows you to use Encode in an object-oriented
123 style. This is not only more intuitive than a functional approach,
124 but also handier when you want to stack encodings. Suppose you want
125 your UTF-8 string converted to Latin1 then Base64: you can simply say
127 my $base64 = encoder($utf8)->latin1->base64;
131 my $latin1 = encode("latin1", $utf8);
132 my $base64 = encode_base64($utf8);
134 or the lazier and more convoluted
136 my $base64 = encode_base64(encode("latin1", $utf8));
140 Here is how to use this module.
146 There are at least two instance variables stored in a hash reference,
147 {data} and {encoding}.
151 When there is no method, it takes the method name as the name of the
152 encoding and encodes the instance I<data> with I<encoding>. If successful,
153 the instance I<encoding> is set accordingly.
157 You can retrieve the result via -E<gt>data but usually you don't have to
158 because the stringify operator ("") is overridden to do exactly that.
162 =head2 Predefined Methods
164 This module predefines the methods below:
168 =item $e = Encode::Encoder-E<gt>new([$data, $encoding]);
170 returns an encoder object. Its data is initialized with $data if
171 present, and its encoding is set to $encoding if present.
173 When $encoding is omitted, it defaults to utf8 if $data is already in
174 utf8 or "" (empty string) otherwise.
178 is an alias of Encode::Encoder-E<gt>new(). This one is exported on demand.
180 =item $e-E<gt>data([$data])
182 When $data is present, sets the instance data to $data and returns the
183 object itself. Otherwise, the current instance data is returned.
185 =item $e-E<gt>encoding([$encoding])
187 When $encoding is present, sets the instance encoding to $encoding and
188 returns the object itself. Otherwise, the current instance encoding is
191 =item $e-E<gt>bytes([$encoding])
193 decodes instance data from $encoding, or the instance encoding if
194 omitted. If the conversion is successful, the instance encoding
197 The name I<bytes> was deliberately picked to avoid namespace tainting
198 -- this module may be used as a base class so method names that appear
199 in Encode::Encoding are avoided.
203 =head2 Example: base64 transcoder
205 This module is designed to work with L<Encode::Encoding>.
206 To make the Base64 transcoder example above really work, you could
207 write a module like this:
209 package Encode::Base64;
210 use base 'Encode::Encoding';
211 __PACKAGE__->Define('base64');
214 my ($obj, $data) = @_;
215 return encode_base64($data);
218 my ($obj, $data) = @_;
219 return decode_base64($data);
224 And your caller module would be something like this:
229 # now you can really do the following
231 encoder($data)->iso_8859_1->base64;
232 encoder($base64)->bytes('base64')->latin1;
234 =head2 Operator Overloading
236 This module overloads two operators, stringify ("") and numify (0+).
238 Stringify dumps the data inside the object.
240 Numify returns the number of bytes in the instance data.
242 They come in handy when you want to print or find the size of data.