1 package Encode::MIME::Header;
4 our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
5 use Encode qw(find_encoding encode_utf8 decode_utf8);
11 decode_b => '1', # decodes 'B' encoding ?
12 decode_q => '1', # decodes 'Q' encoding ?
13 encode => 'B', # encode with 'B' or 'Q' ?
14 bpl => 75, # bytes per line
17 $Encode::Encoding{'MIME-Header'} =
20 Name => 'MIME-Header',
23 $Encode::Encoding{'MIME-B'} =
30 $Encode::Encoding{'MIME-Q'} =
38 use base qw(Encode::Encoding);
45 my ($obj, $str, $chk) = @_;
46 # zap spaces between encoded words
47 $str =~ s/\?=\s+=\?/\?==\?/gos;
48 # multi-line header to single line
49 $str =~ s/(:?\r|\n|\r\n)[ \t]//gos;
52 =\? # begin encoded word
53 ([0-9A-Za-z\-_]+) # charset (encoding)
54 \?([QqBb])\? # delimiter
55 (.*?) # Base64-encodede contents
56 \?= # end encoded word
59 $obj->{decode_b} or croak qq(MIME "B" unsupported);
61 }elsif(uc($2) eq 'Q'){
62 $obj->{decode_q} or croak qq(MIME "Q" unsupported);
65 croak qq(MIME "$2" encoding is nonexistent!);
74 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
75 my $db64 = decode_base64(shift);
76 return $d->name eq 'utf8' ?
77 Encode::decode_utf8($db64) : $d->decode($db64, Encode::FB_PERLQQ);
82 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
84 $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
85 return $d->name eq 'utf8' ?
86 Encode::decode_utf8($q) : $d->decode($q, Encode::FB_PERLQQ);
91 map {quotemeta(chr($_))}
92 unpack("C*", qq{()<>@,;:\"\'/[]?.=}));
97 =\? # begin encoded word
98 (?:[0-9A-Za-z\-_]+) # charset (encoding)
99 \?(?:[QqBb])\? # delimiter
100 (?:.*?) # Base64-encodede contents
101 \?= # end encoded word
105 my $re_especials = qr{$re_encoded_word|$especials}xo;
108 my ($obj, $str, $chk) = @_;
110 for my $line (split /\r|\n|\r\n/o, $str){
111 my (@word, @subline);
112 for my $word (split /($re_especials)/o, $line){
113 if ($word =~ /[^\x00-\x7f]/o or $word =~ /^$re_encoded_word$/o){
114 push @word, $obj->_encode($word);
120 for my $word (@word){
122 if (bytes::length($subline) + bytes::length($word) > $obj->{bpl}){
123 push @subline, $subline;
128 $subline and push @subline, $subline;
129 push @line, join("\n " => @subline);
132 return join("\n", @line);
135 use constant HEAD => '=?UTF-8?';
136 use constant TAIL => '?=';
137 use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
141 my $enc = $o->{encode};
142 my $llen = ($o->{bpl} - length(HEAD) - 2 - length(TAIL));
143 # to coerce a floating-point arithmetics, the following contains
144 # .0 in numbers -- dankogai
145 $llen *= $enc eq 'B' ? 3.0/4.0 : 1.0/3.0;
148 while(length(my $chr = substr($str, 0, 1, ''))){
150 if (bytes::length($chunk) + bytes::length($chr) > $llen){
151 push @result, SINGLE->{$enc}($chunk);
156 $chunk and push @result, SINGLE->{$enc}($chunk);
161 HEAD . 'B?' . encode_base64(encode_utf8(shift), '') . TAIL;
169 join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
171 return decode_utf8(HEAD . 'Q?' . $chunk . TAIL);
179 Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
183 use Encode qw/encode decode/;
184 $utf8 = decode('MIME-Header', $header);
185 $header = encode('MIME-Header', $utf8);
189 This module implements RFC 2047 Mime Header Encoding. There are 3
190 variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The
191 difference is described below
194 ----------------------------------------------
195 MIME-Header Both B and Q =?UTF-8?B?....?=
196 MIME-B B only; Q croaks =?UTF-8?B?....?=
197 MIME-Q Q only; B croaks =?UTF-8?Q?....?=
201 When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
202 is extracted and decoded for I<X> encoding (B for Base64, Q for
203 Quoted-Printable). Then the decoded chunk is fed to
204 decode(I<encoding>). So long as I<encoding> is supported by Encode,
205 any source encoding is fine.
207 When you encode, it just encodes UTF-8 string with I<X> encoding then
208 quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to
209 encode are left as is and long lines are folded within 76 bytes per
214 It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
215 and =?ISO-8859-1?= but that makes the implementation too complicated.
216 These days major mail agents all support =?UTF-8? so I think it is
223 RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other