1 package Encode::MIME::Header;
4 our $VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
5 use Encode qw(find_encoding encode_utf8 decode_utf8);
11 decode_b => '1', # decodes 'B' encoding ?
12 decode_q => '1', # decodes 'Q' encoding ?
13 encode => 'B', # encode with 'B' or 'Q' ?
14 bpl => 75, # bytes per line
17 $Encode::Encoding{'MIME-Header'} =
20 Name => 'MIME-Header',
23 $Encode::Encoding{'MIME-B'} =
30 $Encode::Encoding{'MIME-Q'} =
38 use base qw(Encode::Encoding);
45 my ($obj, $str, $chk) = @_;
46 # zap spaces between encoded words
47 $str =~ s/\?=\s+=\?/\?==\?/gos;
48 # multi-line header to single line
49 $str =~ s/(:?\r|\n|\r\n)[ \t]//gos;
51 1 while ($str =~ s/(\=\?[0-9A-Za-z\-_]+\?[Qq]\?)(.*?)\?\=\1(.*?)\?\=/$1$2$3\?\=/); # Concat consecutive QP encoded mime headers
52 # Fixes breaking inside multi-byte characters
56 =\? # begin encoded word
57 ([0-9A-Za-z\-_]+) # charset (encoding)
58 (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
59 \?([QqBb])\? # delimiter
60 (.*?) # Base64-encodede contents
61 \?= # end encoded word
64 $obj->{decode_b} or croak qq(MIME "B" unsupported);
66 }elsif(uc($2) eq 'Q'){
67 $obj->{decode_q} or croak qq(MIME "Q" unsupported);
70 croak qq(MIME "$2" encoding is nonexistent!);
79 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
80 my $db64 = decode_base64(shift);
81 return $d->name eq 'utf8' ?
82 Encode::decode_utf8($db64) : $d->decode($db64, Encode::FB_PERLQQ);
87 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
89 $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
90 return $d->name eq 'utf8' ?
91 Encode::decode_utf8($q) : $d->decode($q, Encode::FB_PERLQQ);
96 map {quotemeta(chr($_))}
97 unpack("C*", qq{()<>@,;:\"\'/[]?.=}));
102 =\? # begin encoded word
103 (?:[0-9A-Za-z\-_]+) # charset (encoding)
104 (?:\*\w+(?:-\w+)*)? # language (RFC 2231)
105 \?(?:[QqBb])\? # delimiter
106 (?:.*?) # Base64-encodede contents
107 \?= # end encoded word
111 my $re_especials = qr{$re_encoded_word|$especials}xo;
114 my ($obj, $str, $chk) = @_;
116 for my $line (split /\r|\n|\r\n/o, $str){
117 my (@word, @subline);
118 for my $word (split /($re_especials)/o, $line){
119 if ($word =~ /[^\x00-\x7f]/o or $word =~ /^$re_encoded_word$/o){
120 push @word, $obj->_encode($word);
126 for my $word (@word){
128 if (bytes::length($subline) + bytes::length($word) > $obj->{bpl}){
129 push @subline, $subline;
134 $subline and push @subline, $subline;
135 push @line, join("\n " => @subline);
138 return join("\n", @line);
141 use constant HEAD => '=?UTF-8?';
142 use constant TAIL => '?=';
143 use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
147 my $enc = $o->{encode};
148 my $llen = ($o->{bpl} - length(HEAD) - 2 - length(TAIL));
149 # to coerce a floating-point arithmetics, the following contains
150 # .0 in numbers -- dankogai
151 $llen *= $enc eq 'B' ? 3.0/4.0 : 1.0/3.0;
154 while(length(my $chr = substr($str, 0, 1, ''))){
156 if (bytes::length($chunk) + bytes::length($chr) > $llen){
157 push @result, SINGLE->{$enc}($chunk);
162 $chunk and push @result, SINGLE->{$enc}($chunk);
167 HEAD . 'B?' . encode_base64(encode_utf8(shift), '') . TAIL;
175 join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
177 return decode_utf8(HEAD . 'Q?' . $chunk . TAIL);
185 Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
189 use Encode qw/encode decode/;
190 $utf8 = decode('MIME-Header', $header);
191 $header = encode('MIME-Header', $utf8);
195 This module implements RFC 2047 Mime Header Encoding. There are 3
196 variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The
197 difference is described below
200 ----------------------------------------------
201 MIME-Header Both B and Q =?UTF-8?B?....?=
202 MIME-B B only; Q croaks =?UTF-8?B?....?=
203 MIME-Q Q only; B croaks =?UTF-8?Q?....?=
207 When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
208 is extracted and decoded for I<X> encoding (B for Base64, Q for
209 Quoted-Printable). Then the decoded chunk is fed to
210 decode(I<encoding>). So long as I<encoding> is supported by Encode,
211 any source encoding is fine.
213 When you encode, it just encodes UTF-8 string with I<X> encoding then
214 quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to
215 encode are left as is and long lines are folded within 76 bytes per
220 It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
221 and =?ISO-8859-1?= but that makes the implementation too complicated.
222 These days major mail agents all support =?UTF-8? so I think it is
225 Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by
226 Makamaka. Thre are still too many MUAs especially cellular phone
227 handsets which does not grok UTF-8.
233 RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other