Commit | Line | Data |
c731e18e |
1 | # |
7237418a |
2 | # $Id: Encoder.pm,v 2.0 2004/05/16 20:55:17 dankogai Exp $ |
c731e18e |
3 | # |
fdd579e2 |
4 | package Encode::Encoder; |
c731e18e |
5 | use strict; |
fdd579e2 |
6 | use warnings; |
7237418a |
7 | our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; |
c731e18e |
8 | |
9 | require Exporter; |
10 | our @ISA = qw(Exporter); |
448e90bb |
11 | our @EXPORT_OK = qw ( encoder ); |
c731e18e |
12 | |
13 | our $AUTOLOAD; |
8f139f4c |
14 | sub DEBUG () { 0 } |
c731e18e |
15 | use Encode qw(encode decode find_encoding from_to); |
16 | use Carp; |
17 | |
18 | sub new{ |
19 | my ($class, $data, $encname) = @_; |
fdd579e2 |
20 | unless($encname){ |
21 | $encname = Encode::is_utf8($data) ? 'utf8' : ''; |
22 | }else{ |
23 | my $obj = find_encoding($encname) |
24 | or croak __PACKAGE__, ": unknown encoding: $encname"; |
25 | $encname = $obj->name; |
26 | } |
c731e18e |
27 | my $self = { |
28 | data => $data, |
fdd579e2 |
29 | encoding => $encname, |
c731e18e |
30 | }; |
31 | bless $self => $class; |
32 | } |
33 | |
fdd579e2 |
34 | sub encoder{ __PACKAGE__->new(@_) } |
c731e18e |
35 | |
36 | sub data{ |
448e90bb |
37 | my ($self, $data) = @_; |
fdd579e2 |
38 | if (defined $data){ |
39 | $self->{data} = $data; |
40 | return $data; |
41 | }else{ |
42 | return $self->{data}; |
43 | } |
c731e18e |
44 | } |
45 | |
46 | sub encoding{ |
47 | my ($self, $encname) = @_; |
48 | if ($encname){ |
49 | my $obj = find_encoding($encname) |
50 | or confess __PACKAGE__, ": unknown encoding: $encname"; |
51 | $self->{encoding} = $obj->name; |
fdd579e2 |
52 | return $self; |
53 | }else{ |
54 | return $self->{encoding} |
c731e18e |
55 | } |
fdd579e2 |
56 | } |
57 | |
58 | sub bytes { |
59 | my ($self, $encname) = @_; |
60 | $encname ||= $self->{encoding}; |
61 | my $obj = find_encoding($encname) |
62 | or confess __PACKAGE__, ": unknown encoding: $encname"; |
63 | $self->{data} = $obj->decode($self->{data}, 1); |
64 | $self->{encoding} = '' ; |
65 | return $self; |
66 | } |
67 | |
68 | sub DESTROY{ # defined so it won't autoload. |
8f139f4c |
69 | DEBUG and warn shift; |
c731e18e |
70 | } |
71 | |
72 | sub AUTOLOAD { |
73 | my $self = shift; |
74 | my $type = ref($self) |
75 | or confess "$self is not an object"; |
76 | my $myname = $AUTOLOAD; |
77 | $myname =~ s/.*://; # strip fully-qualified portion |
78 | my $obj = find_encoding($myname) |
79 | or confess __PACKAGE__, ": unknown encoding: $myname"; |
8f139f4c |
80 | DEBUG and warn $self->{encoding}, " => ", $obj->name; |
fdd579e2 |
81 | if ($self->{encoding}){ |
82 | from_to($self->{data}, $self->{encoding}, $obj->name, 1); |
83 | }else{ |
84 | $self->{data} = $obj->encode($self->{data}, 1); |
85 | } |
c731e18e |
86 | $self->{encoding} = $obj->name; |
87 | return $self; |
88 | } |
89 | |
90 | use overload |
91 | q("") => sub { $_[0]->{data} }, |
92 | q(0+) => sub { use bytes (); bytes::length($_[0]->{data}) }, |
93 | fallback => 1, |
94 | ; |
95 | |
96 | 1; |
97 | __END__ |
98 | |
99 | =head1 NAME |
100 | |
101 | Encode::Encoder -- Object Oriented Encoder |
102 | |
103 | =head1 SYNOPSIS |
151b5d36 |
104 | |
c731e18e |
105 | use Encode::Encoder; |
106 | # Encode::encode("ISO-8859-1", $data); |
77ea6967 |
107 | Encode::Encoder->new($data)->iso_8859_1; # OOP way |
c731e18e |
108 | # shortcut |
448e90bb |
109 | use Encode::Encoder qw(encoder); |
c731e18e |
110 | encoder($data)->iso_8859_1; |
111 | # you can stack them! |
fdd579e2 |
112 | encoder($data)->iso_8859_1->base64; # provided base64() is defined |
113 | # you can use it as a decoder as well |
114 | encoder($base64)->bytes('base64')->latin1; |
c731e18e |
115 | # stringified |
fdd579e2 |
116 | print encoder($data)->utf8->latin1; # prints the string in latin1 |
c731e18e |
117 | # numified |
fdd579e2 |
118 | encoder("\x{abcd}\x{ef}g")->utf8 == 6; # true. bytes::length($data) |
c731e18e |
119 | |
120 | =head1 ABSTRACT |
121 | |
0ab8f81e |
122 | B<Encode::Encoder> allows you to use Encode in an object-oriented |
123 | style. This is not only more intuitive than a functional approach, |
124 | but also handier when you want to stack encodings. Suppose you want |
125 | your UTF-8 string converted to Latin1 then Base64: you can simply say |
c731e18e |
126 | |
127 | my $base64 = encoder($utf8)->latin1->base64; |
128 | |
129 | instead of |
130 | |
131 | my $latin1 = encode("latin1", $utf8); |
fdd579e2 |
132 | my $base64 = encode_base64($utf8); |
c731e18e |
133 | |
0ab8f81e |
134 | or the lazier and more convoluted |
c731e18e |
135 | |
136 | my $base64 = encode_base64(encode("latin1", $utf8)); |
137 | |
138 | =head1 Description |
139 | |
140 | Here is how to use this module. |
141 | |
142 | =over 4 |
143 | |
144 | =item * |
145 | |
0ab8f81e |
146 | There are at least two instance variables stored in a hash reference, |
c731e18e |
147 | {data} and {encoding}. |
148 | |
149 | =item * |
150 | |
0ab8f81e |
151 | When there is no method, it takes the method name as the name of the |
152 | encoding and encodes the instance I<data> with I<encoding>. If successful, |
153 | the instance I<encoding> is set accordingly. |
c731e18e |
154 | |
fdd579e2 |
155 | =item * |
156 | |
157 | You can retrieve the result via -E<gt>data but usually you don't have to |
158 | because the stringify operator ("") is overridden to do exactly that. |
159 | |
160 | =back |
161 | |
162 | =head2 Predefined Methods |
163 | |
0ab8f81e |
164 | This module predefines the methods below: |
fdd579e2 |
165 | |
166 | =over 4 |
167 | |
168 | =item $e = Encode::Encoder-E<gt>new([$data, $encoding]); |
169 | |
448e90bb |
170 | returns an encoder object. Its data is initialized with $data if |
0ab8f81e |
171 | present, and its encoding is set to $encoding if present. |
fdd579e2 |
172 | |
173 | When $encoding is omitted, it defaults to utf8 if $data is already in |
174 | utf8 or "" (empty string) otherwise. |
175 | |
176 | =item encoder() |
177 | |
448e90bb |
178 | is an alias of Encode::Encoder-E<gt>new(). This one is exported on demand. |
fdd579e2 |
179 | |
180 | =item $e-E<gt>data([$data]) |
181 | |
0ab8f81e |
182 | When $data is present, sets the instance data to $data and returns the |
183 | object itself. Otherwise, the current instance data is returned. |
fdd579e2 |
184 | |
185 | =item $e-E<gt>encoding([$encoding]) |
186 | |
0ab8f81e |
187 | When $encoding is present, sets the instance encoding to $encoding and |
188 | returns the object itself. Otherwise, the current instance encoding is |
fdd579e2 |
189 | returned. |
190 | |
191 | =item $e-E<gt>bytes([$encoding]) |
192 | |
0ab8f81e |
193 | decodes instance data from $encoding, or the instance encoding if |
194 | omitted. If the conversion is successful, the instance encoding |
195 | will be set to "". |
fdd579e2 |
196 | |
197 | The name I<bytes> was deliberately picked to avoid namespace tainting |
198 | -- this module may be used as a base class so method names that appear |
199 | in Encode::Encoding are avoided. |
200 | |
201 | =back |
202 | |
203 | =head2 Example: base64 transcoder |
c731e18e |
204 | |
0ab8f81e |
205 | This module is designed to work with L<Encode::Encoding>. |
206 | To make the Base64 transcoder example above really work, you could |
207 | write a module like this: |
c731e18e |
208 | |
209 | package Encode::Base64; |
210 | use base 'Encode::Encoding'; |
fdd579e2 |
211 | __PACKAGE__->Define('base64'); |
c731e18e |
212 | use MIME::Base64; |
213 | sub encode{ |
fdd579e2 |
214 | my ($obj, $data) = @_; |
215 | return encode_base64($data); |
c731e18e |
216 | } |
217 | sub decode{ |
fdd579e2 |
218 | my ($obj, $data) = @_; |
219 | return decode_base64($data); |
c731e18e |
220 | } |
221 | 1; |
222 | __END__ |
223 | |
0ab8f81e |
224 | And your caller module would be something like this: |
c731e18e |
225 | |
226 | use Encode::Encoder; |
227 | use Encode::Base64; |
fdd579e2 |
228 | |
229 | # now you can really do the following |
230 | |
231 | encoder($data)->iso_8859_1->base64; |
232 | encoder($base64)->bytes('base64')->latin1; |
c731e18e |
233 | |
0ab8f81e |
234 | =head2 Operator Overloading |
c731e18e |
235 | |
236 | This module overloads two operators, stringify ("") and numify (0+). |
237 | |
0ab8f81e |
238 | Stringify dumps the data inside the object. |
c731e18e |
239 | |
0ab8f81e |
240 | Numify returns the number of bytes in the instance data. |
c731e18e |
241 | |
242 | They come in handy when you want to print or find the size of data. |
243 | |
c731e18e |
244 | =head1 SEE ALSO |
245 | |
0ab8f81e |
246 | L<Encode>, |
c731e18e |
247 | L<Encode::Encoding> |
248 | |
249 | =cut |