2 # $Id: Encoder.pm,v 2.1 2006/05/03 18:24:10 dankogai Exp $
4 package Encode::Encoder;
7 our $VERSION = do { my @r = ( q$Revision: 2.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
10 our @ISA = qw(Exporter);
11 our @EXPORT_OK = qw ( encoder );
15 use Encode qw(encode decode find_encoding from_to);
19 my ( $class, $data, $encname ) = @_;
21 $encname = Encode::is_utf8($data) ? 'utf8' : '';
24 my $obj = find_encoding($encname)
25 or croak __PACKAGE__, ": unknown encoding: $encname";
26 $encname = $obj->name;
32 bless $self => $class;
35 sub encoder { __PACKAGE__->new(@_) }
38 my ( $self, $data ) = @_;
39 if ( defined $data ) {
40 $self->{data} = $data;
49 my ( $self, $encname ) = @_;
51 my $obj = find_encoding($encname)
52 or confess __PACKAGE__, ": unknown encoding: $encname";
53 $self->{encoding} = $obj->name;
57 return $self->{encoding};
62 my ( $self, $encname ) = @_;
63 $encname ||= $self->{encoding};
64 my $obj = find_encoding($encname)
65 or confess __PACKAGE__, ": unknown encoding: $encname";
66 $self->{data} = $obj->decode( $self->{data}, 1 );
67 $self->{encoding} = '';
71 sub DESTROY { # defined so it won't autoload.
78 or confess "$self is not an object";
79 my $myname = $AUTOLOAD;
80 $myname =~ s/.*://; # strip fully-qualified portion
81 my $obj = find_encoding($myname)
82 or confess __PACKAGE__, ": unknown encoding: $myname";
83 DEBUG and warn $self->{encoding}, " => ", $obj->name;
84 if ( $self->{encoding} ) {
85 from_to( $self->{data}, $self->{encoding}, $obj->name, 1 );
88 $self->{data} = $obj->encode( $self->{data}, 1 );
90 $self->{encoding} = $obj->name;
95 q("") => sub { $_[0]->{data} },
96 q(0+) => sub { use bytes(); bytes::length( $_[0]->{data} ) },
105 Encode::Encoder -- Object Oriented Encoder
110 # Encode::encode("ISO-8859-1", $data);
111 Encode::Encoder->new($data)->iso_8859_1; # OOP way
113 use Encode::Encoder qw(encoder);
114 encoder($data)->iso_8859_1;
115 # you can stack them!
116 encoder($data)->iso_8859_1->base64; # provided base64() is defined
117 # you can use it as a decoder as well
118 encoder($base64)->bytes('base64')->latin1;
120 print encoder($data)->utf8->latin1; # prints the string in latin1
122 encoder("\x{abcd}\x{ef}g")->utf8 == 6; # true. bytes::length($data)
126 B<Encode::Encoder> allows you to use Encode in an object-oriented
127 style. This is not only more intuitive than a functional approach,
128 but also handier when you want to stack encodings. Suppose you want
129 your UTF-8 string converted to Latin1 then Base64: you can simply say
131 my $base64 = encoder($utf8)->latin1->base64;
135 my $latin1 = encode("latin1", $utf8);
136 my $base64 = encode_base64($utf8);
138 or the lazier and more convoluted
140 my $base64 = encode_base64(encode("latin1", $utf8));
144 Here is how to use this module.
150 There are at least two instance variables stored in a hash reference,
151 {data} and {encoding}.
155 When there is no method, it takes the method name as the name of the
156 encoding and encodes the instance I<data> with I<encoding>. If successful,
157 the instance I<encoding> is set accordingly.
161 You can retrieve the result via -E<gt>data but usually you don't have to
162 because the stringify operator ("") is overridden to do exactly that.
166 =head2 Predefined Methods
168 This module predefines the methods below:
172 =item $e = Encode::Encoder-E<gt>new([$data, $encoding]);
174 returns an encoder object. Its data is initialized with $data if
175 present, and its encoding is set to $encoding if present.
177 When $encoding is omitted, it defaults to utf8 if $data is already in
178 utf8 or "" (empty string) otherwise.
182 is an alias of Encode::Encoder-E<gt>new(). This one is exported on demand.
184 =item $e-E<gt>data([$data])
186 When $data is present, sets the instance data to $data and returns the
187 object itself. Otherwise, the current instance data is returned.
189 =item $e-E<gt>encoding([$encoding])
191 When $encoding is present, sets the instance encoding to $encoding and
192 returns the object itself. Otherwise, the current instance encoding is
195 =item $e-E<gt>bytes([$encoding])
197 decodes instance data from $encoding, or the instance encoding if
198 omitted. If the conversion is successful, the instance encoding
201 The name I<bytes> was deliberately picked to avoid namespace tainting
202 -- this module may be used as a base class so method names that appear
203 in Encode::Encoding are avoided.
207 =head2 Example: base64 transcoder
209 This module is designed to work with L<Encode::Encoding>.
210 To make the Base64 transcoder example above really work, you could
211 write a module like this:
213 package Encode::Base64;
214 use base 'Encode::Encoding';
215 __PACKAGE__->Define('base64');
218 my ($obj, $data) = @_;
219 return encode_base64($data);
222 my ($obj, $data) = @_;
223 return decode_base64($data);
228 And your caller module would be something like this:
233 # now you can really do the following
235 encoder($data)->iso_8859_1->base64;
236 encoder($base64)->bytes('base64')->latin1;
238 =head2 Operator Overloading
240 This module overloads two operators, stringify ("") and numify (0+).
242 Stringify dumps the data inside the object.
244 Numify returns the number of bytes in the instance data.
246 They come in handy when you want to print or find the size of data.