Upgrade to Encode 2.14
[p5sagit/p5-mst-13.2.git] / ext / Encode / lib / Encode / MIME / Header.pm
CommitLineData
af1f55d9 1package Encode::MIME::Header;
2use strict;
3# use warnings;
41c240f5 4our $VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
bedba681 5use Encode qw(find_encoding encode_utf8 decode_utf8);
af1f55d9 6use MIME::Base64;
7use Carp;
8
9my %seed =
10 (
11 decode_b => '1', # decodes 'B' encoding ?
12 decode_q => '1', # decodes 'Q' encoding ?
13 encode => 'B', # encode with 'B' or 'Q' ?
14 bpl => 75, # bytes per line
15 );
16
17$Encode::Encoding{'MIME-Header'} =
18 bless {
19 %seed,
20 Name => 'MIME-Header',
21 } => __PACKAGE__;
22
23$Encode::Encoding{'MIME-B'} =
24 bless {
25 %seed,
26 decode_q => 0,
27 Name => 'MIME-B',
28 } => __PACKAGE__;
29
30$Encode::Encoding{'MIME-Q'} =
31 bless {
32 %seed,
33 decode_q => 1,
34 encode => 'Q',
35 Name => 'MIME-Q',
36 } => __PACKAGE__;
37
10c5ecbb 38use base qw(Encode::Encoding);
39
af1f55d9 40sub needs_lines { 1 }
41sub perlio_ok{ 0 };
42
43sub decode($$;$){
44 use utf8;
45 my ($obj, $str, $chk) = @_;
46 # zap spaces between encoded words
47 $str =~ s/\?=\s+=\?/\?==\?/gos;
48 # multi-line header to single line
49 $str =~ s/(:?\r|\n|\r\n)[ \t]//gos;
41c240f5 50
51 1 while ($str =~ s/(\=\?[0-9A-Za-z\-_]+\?[Qq]\?)(.*?)\?\=\1(.*?)\?\=/$1$2$3\?\=/); # Concat consecutive QP encoded mime headers
52 # Fixes breaking inside multi-byte characters
53
af1f55d9 54 $str =~
55 s{
56 =\? # begin encoded word
f74b3917 57 ([0-9A-Za-z\-_]+) # charset (encoding)
41c240f5 58 (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
af1f55d9 59 \?([QqBb])\? # delimiter
60 (.*?) # Base64-encodede contents
61 \?= # end encoded word
62 }{
63 if (uc($2) eq 'B'){
64 $obj->{decode_b} or croak qq(MIME "B" unsupported);
65 decode_b($1, $3);
66 }elsif(uc($2) eq 'Q'){
67 $obj->{decode_q} or croak qq(MIME "Q" unsupported);
68 decode_q($1, $3);
69 }else{
70 croak qq(MIME "$2" encoding is nonexistent!);
71 }
72 }egox;
73 $_[1] = '' if $chk;
74 return $str;
75}
76
77sub decode_b{
78 my $enc = shift;
bedba681 79 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
af1f55d9 80 my $db64 = decode_base64(shift);
ab3374e4 81 return $d->name eq 'utf8' ?
82 Encode::decode_utf8($db64) : $d->decode($db64, Encode::FB_PERLQQ);
af1f55d9 83}
84
85sub decode_q{
86 my ($enc, $q) = @_;
bedba681 87 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
af1f55d9 88 $q =~ s/_/ /go;
89 $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
ab3374e4 90 return $d->name eq 'utf8' ?
91 Encode::decode_utf8($q) : $d->decode($q, Encode::FB_PERLQQ);
af1f55d9 92}
93
94my $especials =
95 join('|' =>
96 map {quotemeta(chr($_))}
97 unpack("C*", qq{()<>@,;:\"\'/[]?.=}));
98
bedba681 99my $re_encoded_word =
100 qr{
101 (?:
102 =\? # begin encoded word
103 (?:[0-9A-Za-z\-_]+) # charset (encoding)
41c240f5 104 (?:\*\w+(?:-\w+)*)? # language (RFC 2231)
bedba681 105 \?(?:[QqBb])\? # delimiter
106 (?:.*?) # Base64-encodede contents
107 \?= # end encoded word
108 )
109 }xo;
110
111my $re_especials = qr{$re_encoded_word|$especials}xo;
af1f55d9 112
113sub encode($$;$){
114 my ($obj, $str, $chk) = @_;
115 my @line = ();
116 for my $line (split /\r|\n|\r\n/o, $str){
117 my (@word, @subline);
118 for my $word (split /($re_especials)/o, $line){
bedba681 119 if ($word =~ /[^\x00-\x7f]/o or $word =~ /^$re_encoded_word$/o){
af1f55d9 120 push @word, $obj->_encode($word);
121 }else{
122 push @word, $word;
123 }
124 }
125 my $subline = '';
126 for my $word (@word){
127 use bytes ();
128 if (bytes::length($subline) + bytes::length($word) > $obj->{bpl}){
129 push @subline, $subline;
130 $subline = '';
131 }
132 $subline .= $word;
133 }
134 $subline and push @subline, $subline;
135 push @line, join("\n " => @subline);
136 }
137 $_[1] = '' if $chk;
138 return join("\n", @line);
139}
140
141use constant HEAD => '=?UTF-8?';
142use constant TAIL => '?=';
143use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
144
145sub _encode{
146 my ($o, $str) = @_;
147 my $enc = $o->{encode};
148 my $llen = ($o->{bpl} - length(HEAD) - 2 - length(TAIL));
11067275 149 # to coerce a floating-point arithmetics, the following contains
150 # .0 in numbers -- dankogai
151 $llen *= $enc eq 'B' ? 3.0/4.0 : 1.0/3.0;
af1f55d9 152 my @result = ();
153 my $chunk = '';
b786ee6f 154 while(length(my $chr = substr($str, 0, 1, ''))){
af1f55d9 155 use bytes ();
156 if (bytes::length($chunk) + bytes::length($chr) > $llen){
157 push @result, SINGLE->{$enc}($chunk);
158 $chunk = '';
159 }
160 $chunk .= $chr;
161 }
162 $chunk and push @result, SINGLE->{$enc}($chunk);
163 return @result;
164}
165
166sub _encode_b{
167 HEAD . 'B?' . encode_base64(encode_utf8(shift), '') . TAIL;
168}
169
170sub _encode_q{
171 my $chunk = shift;
172 $chunk =~ s{
173 ([^0-9A-Za-z])
174 }{
175 join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
176 }egox;
bedba681 177 return decode_utf8(HEAD . 'Q?' . $chunk . TAIL);
af1f55d9 178}
179
1801;
181__END__
182
183=head1 NAME
184
185Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
186
187=head1 SYNOPSIS
188
189 use Encode qw/encode decode/;
190 $utf8 = decode('MIME-Header', $header);
191 $header = encode('MIME-Header', $utf8);
192
193=head1 ABSTRACT
194
195This module implements RFC 2047 Mime Header Encoding. There are 3
196variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The
197difference is described below
198
199 decode() encode()
200 ----------------------------------------------
201 MIME-Header Both B and Q =?UTF-8?B?....?=
202 MIME-B B only; Q croaks =?UTF-8?B?....?=
203 MIME-Q Q only; B croaks =?UTF-8?Q?....?=
204
205=head1 DESCRIPTION
206
207When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
208is extracted and decoded for I<X> encoding (B for Base64, Q for
209Quoted-Printable). Then the decoded chunk is fed to
210decode(I<encoding>). So long as I<encoding> is supported by Encode,
211any source encoding is fine.
212
213When you encode, it just encodes UTF-8 string with I<X> encoding then
214quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to
215encode are left as is and long lines are folded within 76 bytes per
216line.
217
218=head1 BUGS
219
7e19fb92 220It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
af1f55d9 221and =?ISO-8859-1?= but that makes the implementation too complicated.
222These days major mail agents all support =?UTF-8? so I think it is
223just good enough.
224
56ff7374 225Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by
226Makamaka. Thre are still too many MUAs especially cellular phone
227handsets which does not grok UTF-8.
228
af1f55d9 229=head1 SEE ALSO
230
231L<Encode>
232
233RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other
234locations.
235
236=cut