Commit | Line | Data |
b4980992 |
1 | package Catalyst::Plugin::Unicode::Encoding; |
2 | |
3 | use strict; |
4 | use base 'Class::Data::Inheritable'; |
5 | |
6 | use Carp (); |
7 | use MRO::Compat; |
8 | use Try::Tiny; |
9 | |
10 | use Encode 2.21 (); |
11 | our $CHECK = Encode::FB_CROAK | Encode::LEAVE_SRC; |
12 | |
13 | our $VERSION = '2.1'; |
14 | |
15 | __PACKAGE__->mk_classdata('_encoding'); |
16 | |
17 | sub encoding { |
18 | my $c = shift; |
19 | my $encoding; |
20 | |
21 | if ( scalar @_ ) { |
22 | # Let it be set to undef |
23 | if (my $wanted = shift) { |
24 | $encoding = Encode::find_encoding($wanted) |
25 | or Carp::croak( qq/Unknown encoding '$wanted'/ ); |
26 | } |
27 | |
28 | $encoding = ref $c |
29 | ? $c->{encoding} = $encoding |
30 | : $c->_encoding($encoding); |
31 | } else { |
32 | $encoding = ref $c && exists $c->{encoding} |
33 | ? $c->{encoding} |
34 | : $c->_encoding; |
35 | } |
36 | |
37 | return $encoding; |
38 | } |
39 | |
40 | sub finalize_headers { |
41 | my $c = shift; |
42 | |
43 | my $body = $c->response->body; |
44 | |
45 | return $c->next::method(@_) |
46 | unless defined($body); |
47 | |
48 | my $enc = $c->encoding; |
49 | |
50 | return $c->next::method(@_) |
51 | unless $enc; |
52 | |
53 | my ($ct, $ct_enc) = $c->response->content_type; |
54 | |
55 | # Only touch 'text-like' contents |
56 | return $c->next::method(@_) |
57 | unless $c->response->content_type =~ /^text|xml$|javascript$/; |
58 | |
59 | if ($ct_enc && $ct_enc =~ /charset=([^;]*)/) { |
60 | if (uc($1) ne uc($enc->mime_name)) { |
61 | $c->log->debug("Unicode::Encoding is set to encode in '" . |
62 | $enc->mime_name . |
63 | "', content type is '$1', not encoding "); |
64 | return $c->next::method(@_); |
65 | } |
66 | } else { |
67 | $c->res->content_type($c->res->content_type . "; charset=" . $enc->mime_name); |
68 | } |
69 | |
70 | # Encode expects plain scalars (IV, NV or PV) and segfaults on ref's |
71 | $c->response->body( $c->encoding->encode( $body, $CHECK ) ) |
72 | if ref(\$body) eq 'SCALAR'; |
73 | |
74 | $c->next::method(@_); |
75 | } |
76 | |
77 | # Note we have to hook here as uploads also add to the request parameters |
78 | sub prepare_uploads { |
79 | my $c = shift; |
80 | |
81 | $c->next::method(@_); |
82 | |
83 | my $enc = $c->encoding; |
84 | |
85 | for my $key (qw/ parameters query_parameters body_parameters /) { |
86 | for my $value ( values %{ $c->request->{$key} } ) { |
87 | # N.B. Check if already a character string and if so do not try to double decode. |
88 | # http://www.mail-archive.com/catalyst@lists.scsys.co.uk/msg02350.html |
89 | # this avoids exception if we have already decoded content, and is _not_ the |
90 | # same as not encoding on output which is bad news (as it does the wrong thing |
91 | # for latin1 chars for example).. |
92 | $value = $c->_handle_unicode_decoding($value); |
93 | } |
94 | } |
95 | for my $value ( values %{ $c->request->uploads } ) { |
96 | # skip if it fails for uploads, as we don't usually want uploads touched |
97 | # in any way |
98 | $_->{filename} = try { |
99 | $enc->decode( $_->{filename}, $CHECK ) |
100 | } catch { |
101 | $c->handle_unicode_encoding_exception({ |
102 | param_value => $_->{filename}, |
103 | error_msg => $_, |
104 | encoding_step => 'uploads', |
105 | }); |
106 | } for ( ref($value) eq 'ARRAY' ? @{$value} : $value ); |
107 | } |
108 | } |
109 | |
110 | sub prepare_action { |
111 | my $c = shift; |
112 | |
113 | my $ret = $c->next::method(@_); |
114 | |
115 | foreach (@{$c->req->arguments}, @{$c->req->captures}) { |
116 | $_ = $c->_handle_param_unicode_decoding($_); |
117 | } |
118 | |
119 | return $ret; |
120 | } |
121 | |
122 | sub setup { |
123 | my $self = shift; |
124 | |
125 | my $conf = $self->config; |
126 | |
127 | # Allow an explict undef encoding to disable default of utf-8 |
1bef5f59 |
128 | my $enc = delete $conf->{encoding}; |
b4980992 |
129 | $self->encoding( $enc ); |
130 | |
131 | return $self->next::method(@_); |
132 | } |
133 | |
134 | sub _handle_unicode_decoding { |
135 | my ( $self, $value ) = @_; |
136 | |
137 | return unless defined $value; |
138 | |
139 | if ( ref $value eq 'ARRAY' ) { |
140 | foreach ( @$value ) { |
141 | $_ = $self->_handle_unicode_decoding($_); |
142 | } |
143 | return $value; |
144 | } |
145 | elsif ( ref $value eq 'HASH' ) { |
146 | foreach ( values %$value ) { |
147 | $_ = $self->_handle_unicode_decoding($_); |
148 | } |
149 | return $value; |
150 | } |
151 | else { |
152 | return $self->_handle_param_unicode_decoding($value); |
153 | } |
154 | } |
155 | |
156 | sub _handle_param_unicode_decoding { |
157 | my ( $self, $value ) = @_; |
158 | my $enc = $self->encoding; |
159 | return try { |
160 | Encode::is_utf8( $value ) ? |
161 | $value |
162 | : $enc->decode( $value, $CHECK ); |
163 | } |
164 | catch { |
165 | $self->handle_unicode_encoding_exception({ |
166 | param_value => $value, |
167 | error_msg => $_, |
168 | encoding_step => 'params', |
169 | }); |
170 | }; |
171 | } |
172 | |
173 | sub handle_unicode_encoding_exception { |
174 | my ( $self, $exception_ctx ) = @_; |
4fb27043 |
175 | $self->log->warn($exception_ctx->{error_msg}); |
176 | return $exception_ctx->{'param_value'}; |
b4980992 |
177 | } |
178 | |
179 | 1; |
180 | |
181 | __END__ |
182 | |
183 | =head1 NAME |
184 | |
185 | Catalyst::Plugin::Unicode::Encoding - Unicode aware Catalyst |
186 | |
187 | =head1 SYNOPSIS |
188 | |
1d4df70b |
189 | use Catalyst; |
b4980992 |
190 | |
191 | MyApp->config( encoding => 'UTF-8' ); # A valid Encode encoding |
192 | |
193 | |
194 | =head1 DESCRIPTION |
195 | |
1d4df70b |
196 | This plugin is automatically loaded by apps. Even though is not a core component |
197 | yet, it will vanish as soon as the code is fully integrated. For more |
198 | information, please refer to C<ENCODING> section at L<Catalyst>. |
b4980992 |
199 | |
200 | =head1 AUTHORS |
201 | |
1d4df70b |
202 | Catalyst Contributors, see Catalyst.pm |
b4980992 |
203 | |
1d4df70b |
204 | =head1 COPYRIGHT |
b4980992 |
205 | |
1d4df70b |
206 | This library is free software. You can redistribute it and/or modify |
207 | it under the same terms as Perl itself. |
b4980992 |
208 | |
209 | =cut |