Commit | Line | Data |
b4980992 |
1 | package Catalyst::Plugin::Unicode::Encoding; |
2 | |
3 | use strict; |
4 | use base 'Class::Data::Inheritable'; |
5 | |
6 | use Carp (); |
7 | use MRO::Compat; |
8 | use Try::Tiny; |
9 | |
10 | use Encode 2.21 (); |
11 | our $CHECK = Encode::FB_CROAK | Encode::LEAVE_SRC; |
12 | |
13 | our $VERSION = '2.1'; |
14 | |
15 | __PACKAGE__->mk_classdata('_encoding'); |
16 | |
17 | sub encoding { |
18 | my $c = shift; |
19 | my $encoding; |
20 | |
21 | if ( scalar @_ ) { |
22 | # Let it be set to undef |
23 | if (my $wanted = shift) { |
24 | $encoding = Encode::find_encoding($wanted) |
25 | or Carp::croak( qq/Unknown encoding '$wanted'/ ); |
26 | } |
27 | |
28 | $encoding = ref $c |
29 | ? $c->{encoding} = $encoding |
30 | : $c->_encoding($encoding); |
31 | } else { |
32 | $encoding = ref $c && exists $c->{encoding} |
33 | ? $c->{encoding} |
34 | : $c->_encoding; |
35 | } |
36 | |
37 | return $encoding; |
38 | } |
39 | |
40 | sub finalize_headers { |
41 | my $c = shift; |
42 | |
43 | my $body = $c->response->body; |
44 | |
45 | return $c->next::method(@_) |
46 | unless defined($body); |
47 | |
48 | my $enc = $c->encoding; |
49 | |
50 | return $c->next::method(@_) |
51 | unless $enc; |
52 | |
53 | my ($ct, $ct_enc) = $c->response->content_type; |
54 | |
55 | # Only touch 'text-like' contents |
56 | return $c->next::method(@_) |
57 | unless $c->response->content_type =~ /^text|xml$|javascript$/; |
58 | |
59 | if ($ct_enc && $ct_enc =~ /charset=([^;]*)/) { |
60 | if (uc($1) ne uc($enc->mime_name)) { |
61 | $c->log->debug("Unicode::Encoding is set to encode in '" . |
62 | $enc->mime_name . |
63 | "', content type is '$1', not encoding "); |
64 | return $c->next::method(@_); |
65 | } |
66 | } else { |
67 | $c->res->content_type($c->res->content_type . "; charset=" . $enc->mime_name); |
68 | } |
69 | |
70 | # Encode expects plain scalars (IV, NV or PV) and segfaults on ref's |
71 | $c->response->body( $c->encoding->encode( $body, $CHECK ) ) |
72 | if ref(\$body) eq 'SCALAR'; |
73 | |
74 | $c->next::method(@_); |
75 | } |
76 | |
77 | # Note we have to hook here as uploads also add to the request parameters |
78 | sub prepare_uploads { |
79 | my $c = shift; |
80 | |
81 | $c->next::method(@_); |
82 | |
83 | my $enc = $c->encoding; |
84 | |
85 | for my $key (qw/ parameters query_parameters body_parameters /) { |
86 | for my $value ( values %{ $c->request->{$key} } ) { |
87 | # N.B. Check if already a character string and if so do not try to double decode. |
88 | # http://www.mail-archive.com/catalyst@lists.scsys.co.uk/msg02350.html |
89 | # this avoids exception if we have already decoded content, and is _not_ the |
90 | # same as not encoding on output which is bad news (as it does the wrong thing |
91 | # for latin1 chars for example).. |
92 | $value = $c->_handle_unicode_decoding($value); |
93 | } |
94 | } |
95 | for my $value ( values %{ $c->request->uploads } ) { |
96 | # skip if it fails for uploads, as we don't usually want uploads touched |
97 | # in any way |
98 | $_->{filename} = try { |
99 | $enc->decode( $_->{filename}, $CHECK ) |
100 | } catch { |
101 | $c->handle_unicode_encoding_exception({ |
102 | param_value => $_->{filename}, |
103 | error_msg => $_, |
104 | encoding_step => 'uploads', |
105 | }); |
106 | } for ( ref($value) eq 'ARRAY' ? @{$value} : $value ); |
107 | } |
108 | } |
109 | |
110 | sub prepare_action { |
111 | my $c = shift; |
112 | |
113 | my $ret = $c->next::method(@_); |
114 | |
115 | foreach (@{$c->req->arguments}, @{$c->req->captures}) { |
116 | $_ = $c->_handle_param_unicode_decoding($_); |
117 | } |
118 | |
119 | return $ret; |
120 | } |
121 | |
122 | sub setup { |
123 | my $self = shift; |
124 | |
125 | my $conf = $self->config; |
126 | |
127 | # Allow an explict undef encoding to disable default of utf-8 |
128 | my $enc = exists $conf->{encoding} ? delete $conf->{encoding} : 'UTF-8'; |
129 | $self->encoding( $enc ); |
130 | |
131 | return $self->next::method(@_); |
132 | } |
133 | |
134 | sub _handle_unicode_decoding { |
135 | my ( $self, $value ) = @_; |
136 | |
137 | return unless defined $value; |
138 | |
139 | if ( ref $value eq 'ARRAY' ) { |
140 | foreach ( @$value ) { |
141 | $_ = $self->_handle_unicode_decoding($_); |
142 | } |
143 | return $value; |
144 | } |
145 | elsif ( ref $value eq 'HASH' ) { |
146 | foreach ( values %$value ) { |
147 | $_ = $self->_handle_unicode_decoding($_); |
148 | } |
149 | return $value; |
150 | } |
151 | else { |
152 | return $self->_handle_param_unicode_decoding($value); |
153 | } |
154 | } |
155 | |
156 | sub _handle_param_unicode_decoding { |
157 | my ( $self, $value ) = @_; |
158 | my $enc = $self->encoding; |
159 | return try { |
160 | Encode::is_utf8( $value ) ? |
161 | $value |
162 | : $enc->decode( $value, $CHECK ); |
163 | } |
164 | catch { |
165 | $self->handle_unicode_encoding_exception({ |
166 | param_value => $value, |
167 | error_msg => $_, |
168 | encoding_step => 'params', |
169 | }); |
170 | }; |
171 | } |
172 | |
173 | sub handle_unicode_encoding_exception { |
174 | my ( $self, $exception_ctx ) = @_; |
175 | die $exception_ctx->{error_msg}; |
176 | } |
177 | |
178 | 1; |
179 | |
180 | __END__ |
181 | |
182 | =head1 NAME |
183 | |
184 | Catalyst::Plugin::Unicode::Encoding - Unicode aware Catalyst |
185 | |
186 | =head1 SYNOPSIS |
187 | |
188 | use Catalyst qw[Unicode::Encoding]; |
189 | |
190 | MyApp->config( encoding => 'UTF-8' ); # A valid Encode encoding |
191 | |
192 | |
193 | =head1 DESCRIPTION |
194 | |
195 | On request, decodes all params from encoding into a sequence of |
196 | logical characters. On response, encodes body into encoding. |
197 | |
198 | =head1 METHODS |
199 | |
200 | =over 4 |
201 | |
202 | =item encoding |
203 | |
204 | Returns an instance of an C<Encode> encoding |
205 | |
206 | print $c->encoding->name |
207 | |
208 | =back |
209 | |
210 | =head1 OVERLOADED METHODS |
211 | |
212 | =over |
213 | |
214 | =item finalize_headers |
215 | |
216 | Encodes body into encoding. |
217 | |
218 | =item prepare_uploads |
219 | |
220 | Decodes parameters, query_parameters, body_parameters and filenames |
221 | in file uploads into a sequence of logical characters. |
222 | |
223 | =item prepare_action |
224 | |
225 | Decodes request arguments (i.e. C<< $c->request->arguments >>) and |
226 | captures (i.e. C<< $c->request->captures >>). |
227 | |
228 | =item setup |
229 | |
230 | Setups C<< $c->encoding >> with encoding specified in C<< $c->config->{encoding} >>. |
231 | |
232 | =item handle_unicode_encoding_exception ($exception_context) |
233 | |
234 | Method called when decoding process for a request fails. |
235 | |
236 | An C<$exception_context> hashref is provided to allow you to override the |
237 | behaviour of your application when given data with incorrect encodings. |
238 | |
239 | The default method throws exceptions in the case of invalid request parameters |
240 | (resulting in a 500 error), but ignores errors in upload filenames. |
241 | |
242 | The keys passed in the C<$exception_context> hash are: |
243 | |
244 | =over |
245 | |
246 | =item param_value |
247 | |
248 | The value which was not able to be decoded. |
249 | |
250 | =item error_msg |
251 | |
252 | The exception recieved from L<Encode>. |
253 | |
254 | =item encoding_step |
255 | |
256 | What type of data was being decoded. Valid values are (currently) |
257 | C<params> - for request parameters / arguments / captures |
258 | and C<uploads> - for request upload filenames. |
259 | |
260 | =back |
261 | |
262 | =back |
263 | |
264 | =head1 SEE ALSO |
265 | |
266 | L<Encode>, L<Encode::Encoding>, L<Catalyst::Plugin::Unicode>, L<Catalyst>. |
267 | |
268 | =head1 AUTHORS |
269 | |
270 | Christian Hansen, C<ch@ngmedia.com> |
271 | |
272 | Masahiro Chiba |
273 | |
274 | Tomas Doran, C<bobtfish@bobtfish.net> |
275 | |
276 | =head1 LICENSE |
277 | |
278 | This library is free software . You can redistribute it and/or modify |
279 | it under the same terms as perl itself. |
280 | |
281 | =cut |