Commit | Line | Data |
c731e18e |
1 | # |
d1256cb1 |
2 | # $Id: Encoder.pm,v 2.1 2006/05/03 18:24:10 dankogai Exp $ |
c731e18e |
3 | # |
fdd579e2 |
4 | package Encode::Encoder; |
c731e18e |
5 | use strict; |
fdd579e2 |
6 | use warnings; |
d1256cb1 |
7 | our $VERSION = do { my @r = ( q$Revision: 2.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; |
c731e18e |
8 | |
9 | require Exporter; |
d1256cb1 |
10 | our @ISA = qw(Exporter); |
448e90bb |
11 | our @EXPORT_OK = qw ( encoder ); |
c731e18e |
12 | |
13 | our $AUTOLOAD; |
8f139f4c |
14 | sub DEBUG () { 0 } |
c731e18e |
15 | use Encode qw(encode decode find_encoding from_to); |
16 | use Carp; |
17 | |
d1256cb1 |
18 | sub new { |
19 | my ( $class, $data, $encname ) = @_; |
20 | unless ($encname) { |
21 | $encname = Encode::is_utf8($data) ? 'utf8' : ''; |
22 | } |
23 | else { |
24 | my $obj = find_encoding($encname) |
25 | or croak __PACKAGE__, ": unknown encoding: $encname"; |
26 | $encname = $obj->name; |
fdd579e2 |
27 | } |
c731e18e |
28 | my $self = { |
d1256cb1 |
29 | data => $data, |
30 | encoding => $encname, |
31 | }; |
c731e18e |
32 | bless $self => $class; |
33 | } |
34 | |
d1256cb1 |
35 | sub encoder { __PACKAGE__->new(@_) } |
c731e18e |
36 | |
d1256cb1 |
37 | sub data { |
38 | my ( $self, $data ) = @_; |
39 | if ( defined $data ) { |
40 | $self->{data} = $data; |
41 | return $data; |
42 | } |
43 | else { |
44 | return $self->{data}; |
fdd579e2 |
45 | } |
c731e18e |
46 | } |
47 | |
d1256cb1 |
48 | sub encoding { |
49 | my ( $self, $encname ) = @_; |
50 | if ($encname) { |
51 | my $obj = find_encoding($encname) |
52 | or confess __PACKAGE__, ": unknown encoding: $encname"; |
53 | $self->{encoding} = $obj->name; |
54 | return $self; |
55 | } |
56 | else { |
57 | return $self->{encoding}; |
c731e18e |
58 | } |
fdd579e2 |
59 | } |
60 | |
61 | sub bytes { |
d1256cb1 |
62 | my ( $self, $encname ) = @_; |
fdd579e2 |
63 | $encname ||= $self->{encoding}; |
d1256cb1 |
64 | my $obj = find_encoding($encname) |
65 | or confess __PACKAGE__, ": unknown encoding: $encname"; |
66 | $self->{data} = $obj->decode( $self->{data}, 1 ); |
67 | $self->{encoding} = ''; |
fdd579e2 |
68 | return $self; |
69 | } |
70 | |
d1256cb1 |
71 | sub DESTROY { # defined so it won't autoload. |
8f139f4c |
72 | DEBUG and warn shift; |
c731e18e |
73 | } |
74 | |
75 | sub AUTOLOAD { |
76 | my $self = shift; |
77 | my $type = ref($self) |
d1256cb1 |
78 | or confess "$self is not an object"; |
c731e18e |
79 | my $myname = $AUTOLOAD; |
d1256cb1 |
80 | $myname =~ s/.*://; # strip fully-qualified portion |
81 | my $obj = find_encoding($myname) |
82 | or confess __PACKAGE__, ": unknown encoding: $myname"; |
8f139f4c |
83 | DEBUG and warn $self->{encoding}, " => ", $obj->name; |
d1256cb1 |
84 | if ( $self->{encoding} ) { |
85 | from_to( $self->{data}, $self->{encoding}, $obj->name, 1 ); |
86 | } |
87 | else { |
88 | $self->{data} = $obj->encode( $self->{data}, 1 ); |
fdd579e2 |
89 | } |
c731e18e |
90 | $self->{encoding} = $obj->name; |
91 | return $self; |
92 | } |
93 | |
d1256cb1 |
94 | use overload |
95 | q("") => sub { $_[0]->{data} }, |
96 | q(0+) => sub { use bytes(); bytes::length( $_[0]->{data} ) }, |
97 | fallback => 1, |
98 | ; |
c731e18e |
99 | |
100 | 1; |
101 | __END__ |
102 | |
103 | =head1 NAME |
104 | |
105 | Encode::Encoder -- Object Oriented Encoder |
106 | |
107 | =head1 SYNOPSIS |
151b5d36 |
108 | |
c731e18e |
109 | use Encode::Encoder; |
110 | # Encode::encode("ISO-8859-1", $data); |
77ea6967 |
111 | Encode::Encoder->new($data)->iso_8859_1; # OOP way |
c731e18e |
112 | # shortcut |
448e90bb |
113 | use Encode::Encoder qw(encoder); |
c731e18e |
114 | encoder($data)->iso_8859_1; |
115 | # you can stack them! |
fdd579e2 |
116 | encoder($data)->iso_8859_1->base64; # provided base64() is defined |
117 | # you can use it as a decoder as well |
118 | encoder($base64)->bytes('base64')->latin1; |
c731e18e |
119 | # stringified |
fdd579e2 |
120 | print encoder($data)->utf8->latin1; # prints the string in latin1 |
c731e18e |
121 | # numified |
fdd579e2 |
122 | encoder("\x{abcd}\x{ef}g")->utf8 == 6; # true. bytes::length($data) |
c731e18e |
123 | |
124 | =head1 ABSTRACT |
125 | |
0ab8f81e |
126 | B<Encode::Encoder> allows you to use Encode in an object-oriented |
127 | style. This is not only more intuitive than a functional approach, |
128 | but also handier when you want to stack encodings. Suppose you want |
129 | your UTF-8 string converted to Latin1 then Base64: you can simply say |
c731e18e |
130 | |
131 | my $base64 = encoder($utf8)->latin1->base64; |
132 | |
133 | instead of |
134 | |
135 | my $latin1 = encode("latin1", $utf8); |
fdd579e2 |
136 | my $base64 = encode_base64($utf8); |
c731e18e |
137 | |
0ab8f81e |
138 | or the lazier and more convoluted |
c731e18e |
139 | |
140 | my $base64 = encode_base64(encode("latin1", $utf8)); |
141 | |
142 | =head1 Description |
143 | |
144 | Here is how to use this module. |
145 | |
146 | =over 4 |
147 | |
148 | =item * |
149 | |
0ab8f81e |
150 | There are at least two instance variables stored in a hash reference, |
c731e18e |
151 | {data} and {encoding}. |
152 | |
153 | =item * |
154 | |
0ab8f81e |
155 | When there is no method, it takes the method name as the name of the |
156 | encoding and encodes the instance I<data> with I<encoding>. If successful, |
157 | the instance I<encoding> is set accordingly. |
c731e18e |
158 | |
fdd579e2 |
159 | =item * |
160 | |
161 | You can retrieve the result via -E<gt>data but usually you don't have to |
162 | because the stringify operator ("") is overridden to do exactly that. |
163 | |
164 | =back |
165 | |
166 | =head2 Predefined Methods |
167 | |
0ab8f81e |
168 | This module predefines the methods below: |
fdd579e2 |
169 | |
170 | =over 4 |
171 | |
172 | =item $e = Encode::Encoder-E<gt>new([$data, $encoding]); |
173 | |
448e90bb |
174 | returns an encoder object. Its data is initialized with $data if |
0ab8f81e |
175 | present, and its encoding is set to $encoding if present. |
fdd579e2 |
176 | |
177 | When $encoding is omitted, it defaults to utf8 if $data is already in |
178 | utf8 or "" (empty string) otherwise. |
179 | |
180 | =item encoder() |
181 | |
448e90bb |
182 | is an alias of Encode::Encoder-E<gt>new(). This one is exported on demand. |
fdd579e2 |
183 | |
184 | =item $e-E<gt>data([$data]) |
185 | |
0ab8f81e |
186 | When $data is present, sets the instance data to $data and returns the |
187 | object itself. Otherwise, the current instance data is returned. |
fdd579e2 |
188 | |
189 | =item $e-E<gt>encoding([$encoding]) |
190 | |
0ab8f81e |
191 | When $encoding is present, sets the instance encoding to $encoding and |
192 | returns the object itself. Otherwise, the current instance encoding is |
fdd579e2 |
193 | returned. |
194 | |
195 | =item $e-E<gt>bytes([$encoding]) |
196 | |
0ab8f81e |
197 | decodes instance data from $encoding, or the instance encoding if |
198 | omitted. If the conversion is successful, the instance encoding |
199 | will be set to "". |
fdd579e2 |
200 | |
201 | The name I<bytes> was deliberately picked to avoid namespace tainting |
202 | -- this module may be used as a base class so method names that appear |
203 | in Encode::Encoding are avoided. |
204 | |
205 | =back |
206 | |
207 | =head2 Example: base64 transcoder |
c731e18e |
208 | |
0ab8f81e |
209 | This module is designed to work with L<Encode::Encoding>. |
210 | To make the Base64 transcoder example above really work, you could |
211 | write a module like this: |
c731e18e |
212 | |
213 | package Encode::Base64; |
214 | use base 'Encode::Encoding'; |
fdd579e2 |
215 | __PACKAGE__->Define('base64'); |
c731e18e |
216 | use MIME::Base64; |
217 | sub encode{ |
fdd579e2 |
218 | my ($obj, $data) = @_; |
219 | return encode_base64($data); |
c731e18e |
220 | } |
221 | sub decode{ |
fdd579e2 |
222 | my ($obj, $data) = @_; |
223 | return decode_base64($data); |
c731e18e |
224 | } |
225 | 1; |
226 | __END__ |
227 | |
0ab8f81e |
228 | And your caller module would be something like this: |
c731e18e |
229 | |
230 | use Encode::Encoder; |
231 | use Encode::Base64; |
fdd579e2 |
232 | |
233 | # now you can really do the following |
234 | |
235 | encoder($data)->iso_8859_1->base64; |
236 | encoder($base64)->bytes('base64')->latin1; |
c731e18e |
237 | |
0ab8f81e |
238 | =head2 Operator Overloading |
c731e18e |
239 | |
240 | This module overloads two operators, stringify ("") and numify (0+). |
241 | |
0ab8f81e |
242 | Stringify dumps the data inside the object. |
c731e18e |
243 | |
0ab8f81e |
244 | Numify returns the number of bytes in the instance data. |
c731e18e |
245 | |
246 | They come in handy when you want to print or find the size of data. |
247 | |
c731e18e |
248 | =head1 SEE ALSO |
249 | |
0ab8f81e |
250 | L<Encode>, |
c731e18e |
251 | L<Encode::Encoding> |
252 | |
253 | =cut |