threads::shared 1.24 (phase 2)
[p5sagit/p5-mst-13.2.git] / ext / Encode / lib / Encode / Encoder.pm
CommitLineData
c731e18e 1#
d1256cb1 2# $Id: Encoder.pm,v 2.1 2006/05/03 18:24:10 dankogai Exp $
c731e18e 3#
fdd579e2 4package Encode::Encoder;
c731e18e 5use strict;
fdd579e2 6use warnings;
d1256cb1 7our $VERSION = do { my @r = ( q$Revision: 2.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
c731e18e 8
9require Exporter;
d1256cb1 10our @ISA = qw(Exporter);
448e90bb 11our @EXPORT_OK = qw ( encoder );
c731e18e 12
13our $AUTOLOAD;
8f139f4c 14sub DEBUG () { 0 }
c731e18e 15use Encode qw(encode decode find_encoding from_to);
16use Carp;
17
d1256cb1 18sub new {
19 my ( $class, $data, $encname ) = @_;
20 unless ($encname) {
21 $encname = Encode::is_utf8($data) ? 'utf8' : '';
22 }
23 else {
24 my $obj = find_encoding($encname)
25 or croak __PACKAGE__, ": unknown encoding: $encname";
26 $encname = $obj->name;
fdd579e2 27 }
c731e18e 28 my $self = {
d1256cb1 29 data => $data,
30 encoding => $encname,
31 };
c731e18e 32 bless $self => $class;
33}
34
d1256cb1 35sub encoder { __PACKAGE__->new(@_) }
c731e18e 36
d1256cb1 37sub data {
38 my ( $self, $data ) = @_;
39 if ( defined $data ) {
40 $self->{data} = $data;
41 return $data;
42 }
43 else {
44 return $self->{data};
fdd579e2 45 }
c731e18e 46}
47
d1256cb1 48sub encoding {
49 my ( $self, $encname ) = @_;
50 if ($encname) {
51 my $obj = find_encoding($encname)
52 or confess __PACKAGE__, ": unknown encoding: $encname";
53 $self->{encoding} = $obj->name;
54 return $self;
55 }
56 else {
57 return $self->{encoding};
c731e18e 58 }
fdd579e2 59}
60
61sub bytes {
d1256cb1 62 my ( $self, $encname ) = @_;
fdd579e2 63 $encname ||= $self->{encoding};
d1256cb1 64 my $obj = find_encoding($encname)
65 or confess __PACKAGE__, ": unknown encoding: $encname";
66 $self->{data} = $obj->decode( $self->{data}, 1 );
67 $self->{encoding} = '';
fdd579e2 68 return $self;
69}
70
d1256cb1 71sub DESTROY { # defined so it won't autoload.
8f139f4c 72 DEBUG and warn shift;
c731e18e 73}
74
75sub AUTOLOAD {
76 my $self = shift;
77 my $type = ref($self)
d1256cb1 78 or confess "$self is not an object";
c731e18e 79 my $myname = $AUTOLOAD;
d1256cb1 80 $myname =~ s/.*://; # strip fully-qualified portion
81 my $obj = find_encoding($myname)
82 or confess __PACKAGE__, ": unknown encoding: $myname";
8f139f4c 83 DEBUG and warn $self->{encoding}, " => ", $obj->name;
d1256cb1 84 if ( $self->{encoding} ) {
85 from_to( $self->{data}, $self->{encoding}, $obj->name, 1 );
86 }
87 else {
88 $self->{data} = $obj->encode( $self->{data}, 1 );
fdd579e2 89 }
c731e18e 90 $self->{encoding} = $obj->name;
91 return $self;
92}
93
d1256cb1 94use overload
95 q("") => sub { $_[0]->{data} },
96 q(0+) => sub { use bytes(); bytes::length( $_[0]->{data} ) },
97 fallback => 1,
98 ;
c731e18e 99
1001;
101__END__
102
103=head1 NAME
104
105Encode::Encoder -- Object Oriented Encoder
106
107=head1 SYNOPSIS
151b5d36 108
c731e18e 109 use Encode::Encoder;
110 # Encode::encode("ISO-8859-1", $data);
77ea6967 111 Encode::Encoder->new($data)->iso_8859_1; # OOP way
c731e18e 112 # shortcut
448e90bb 113 use Encode::Encoder qw(encoder);
c731e18e 114 encoder($data)->iso_8859_1;
115 # you can stack them!
fdd579e2 116 encoder($data)->iso_8859_1->base64; # provided base64() is defined
117 # you can use it as a decoder as well
118 encoder($base64)->bytes('base64')->latin1;
c731e18e 119 # stringified
fdd579e2 120 print encoder($data)->utf8->latin1; # prints the string in latin1
c731e18e 121 # numified
fdd579e2 122 encoder("\x{abcd}\x{ef}g")->utf8 == 6; # true. bytes::length($data)
c731e18e 123
124=head1 ABSTRACT
125
0ab8f81e 126B<Encode::Encoder> allows you to use Encode in an object-oriented
127style. This is not only more intuitive than a functional approach,
128but also handier when you want to stack encodings. Suppose you want
129your UTF-8 string converted to Latin1 then Base64: you can simply say
c731e18e 130
131 my $base64 = encoder($utf8)->latin1->base64;
132
133instead of
134
135 my $latin1 = encode("latin1", $utf8);
fdd579e2 136 my $base64 = encode_base64($utf8);
c731e18e 137
0ab8f81e 138or the lazier and more convoluted
c731e18e 139
140 my $base64 = encode_base64(encode("latin1", $utf8));
141
142=head1 Description
143
144Here is how to use this module.
145
146=over 4
147
148=item *
149
0ab8f81e 150There are at least two instance variables stored in a hash reference,
c731e18e 151{data} and {encoding}.
152
153=item *
154
0ab8f81e 155When there is no method, it takes the method name as the name of the
156encoding and encodes the instance I<data> with I<encoding>. If successful,
157the instance I<encoding> is set accordingly.
c731e18e 158
fdd579e2 159=item *
160
161You can retrieve the result via -E<gt>data but usually you don't have to
162because the stringify operator ("") is overridden to do exactly that.
163
164=back
165
166=head2 Predefined Methods
167
0ab8f81e 168This module predefines the methods below:
fdd579e2 169
170=over 4
171
172=item $e = Encode::Encoder-E<gt>new([$data, $encoding]);
173
448e90bb 174returns an encoder object. Its data is initialized with $data if
0ab8f81e 175present, and its encoding is set to $encoding if present.
fdd579e2 176
177When $encoding is omitted, it defaults to utf8 if $data is already in
178utf8 or "" (empty string) otherwise.
179
180=item encoder()
181
448e90bb 182is an alias of Encode::Encoder-E<gt>new(). This one is exported on demand.
fdd579e2 183
184=item $e-E<gt>data([$data])
185
0ab8f81e 186When $data is present, sets the instance data to $data and returns the
187object itself. Otherwise, the current instance data is returned.
fdd579e2 188
189=item $e-E<gt>encoding([$encoding])
190
0ab8f81e 191When $encoding is present, sets the instance encoding to $encoding and
192returns the object itself. Otherwise, the current instance encoding is
fdd579e2 193returned.
194
195=item $e-E<gt>bytes([$encoding])
196
0ab8f81e 197decodes instance data from $encoding, or the instance encoding if
198omitted. If the conversion is successful, the instance encoding
199will be set to "".
fdd579e2 200
201The name I<bytes> was deliberately picked to avoid namespace tainting
202-- this module may be used as a base class so method names that appear
203in Encode::Encoding are avoided.
204
205=back
206
207=head2 Example: base64 transcoder
c731e18e 208
0ab8f81e 209This module is designed to work with L<Encode::Encoding>.
210To make the Base64 transcoder example above really work, you could
211write a module like this:
c731e18e 212
213 package Encode::Base64;
214 use base 'Encode::Encoding';
fdd579e2 215 __PACKAGE__->Define('base64');
c731e18e 216 use MIME::Base64;
217 sub encode{
fdd579e2 218 my ($obj, $data) = @_;
219 return encode_base64($data);
c731e18e 220 }
221 sub decode{
fdd579e2 222 my ($obj, $data) = @_;
223 return decode_base64($data);
c731e18e 224 }
225 1;
226 __END__
227
0ab8f81e 228And your caller module would be something like this:
c731e18e 229
230 use Encode::Encoder;
231 use Encode::Base64;
fdd579e2 232
233 # now you can really do the following
234
235 encoder($data)->iso_8859_1->base64;
236 encoder($base64)->bytes('base64')->latin1;
c731e18e 237
0ab8f81e 238=head2 Operator Overloading
c731e18e 239
240This module overloads two operators, stringify ("") and numify (0+).
241
0ab8f81e 242Stringify dumps the data inside the object.
c731e18e 243
0ab8f81e 244Numify returns the number of bytes in the instance data.
c731e18e 245
246They come in handy when you want to print or find the size of data.
247
c731e18e 248=head1 SEE ALSO
249
0ab8f81e 250L<Encode>,
c731e18e 251L<Encode::Encoding>
252
253=cut