1 package Encode::MIME::Header;
4 our $VERSION = do { my @r = (q$Revision: 1.5 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
6 use Encode qw(find_encoding encode_utf8);
12 decode_b => '1', # decodes 'B' encoding ?
13 decode_q => '1', # decodes 'Q' encoding ?
14 encode => 'B', # encode with 'B' or 'Q' ?
15 bpl => 75, # bytes per line
18 $Encode::Encoding{'MIME-Header'} =
21 Name => 'MIME-Header',
24 $Encode::Encoding{'MIME-B'} =
31 $Encode::Encoding{'MIME-Q'} =
39 use base qw(Encode::Encoding);
46 my ($obj, $str, $chk) = @_;
47 # zap spaces between encoded words
48 $str =~ s/\?=\s+=\?/\?==\?/gos;
49 # multi-line header to single line
50 $str =~ s/(:?\r|\n|\r\n)[ \t]//gos;
53 =\? # begin encoded word
54 ([0-9A-Za-z\-_]+) # charset (encoding)
55 \?([QqBb])\? # delimiter
56 (.*?) # Base64-encodede contents
57 \?= # end encoded word
60 $obj->{decode_b} or croak qq(MIME "B" unsupported);
62 }elsif(uc($2) eq 'Q'){
63 $obj->{decode_q} or croak qq(MIME "Q" unsupported);
66 croak qq(MIME "$2" encoding is nonexistent!);
75 my $d = find_encoding($enc) or croak(Unknown encoding "$enc");
76 my $db64 = decode_base64(shift);
77 return $d->decode($db64, Encode::FB_PERLQQ);
82 my $d = find_encoding($enc) or croak(Unknown encoding "$enc");
84 $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
85 return $d->decode($q, Encode::FB_PERLQQ);
90 map {quotemeta(chr($_))}
91 unpack("C*", qq{()<>@,;:\"\'/[]?.=}));
93 my $re_especials = qr/$especials/o;
96 my ($obj, $str, $chk) = @_;
98 for my $line (split /\r|\n|\r\n/o, $str){
100 for my $word (split /($re_especials)/o, $line){
101 if ($word =~ /[^\x00-\x7f]/o){
102 push @word, $obj->_encode($word);
108 for my $word (@word){
110 if (bytes::length($subline) + bytes::length($word) > $obj->{bpl}){
111 push @subline, $subline;
116 $subline and push @subline, $subline;
117 push @line, join("\n " => @subline);
120 return join("\n", @line);
123 use constant HEAD => '=?UTF-8?';
124 use constant TAIL => '?=';
125 use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
129 my $enc = $o->{encode};
130 my $llen = ($o->{bpl} - length(HEAD) - 2 - length(TAIL));
131 # to coerce a floating-point arithmetics, the following contains
132 # .0 in numbers -- dankogai
133 $llen *= $enc eq 'B' ? 3.0/4.0 : 1.0/3.0;
136 while(my $chr = substr($str, 0, 1, '')){
138 if (bytes::length($chunk) + bytes::length($chr) > $llen){
139 push @result, SINGLE->{$enc}($chunk);
144 $chunk and push @result, SINGLE->{$enc}($chunk);
149 HEAD . 'B?' . encode_base64(encode_utf8(shift), '') . TAIL;
157 join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
159 return HEAD . 'Q?' . $chunk . TAIL;
167 Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
171 use Encode qw/encode decode/;
172 $utf8 = decode('MIME-Header', $header);
173 $header = encode('MIME-Header', $utf8);
177 This module implements RFC 2047 Mime Header Encoding. There are 3
178 variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The
179 difference is described below
182 ----------------------------------------------
183 MIME-Header Both B and Q =?UTF-8?B?....?=
184 MIME-B B only; Q croaks =?UTF-8?B?....?=
185 MIME-Q Q only; B croaks =?UTF-8?Q?....?=
189 When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
190 is extracted and decoded for I<X> encoding (B for Base64, Q for
191 Quoted-Printable). Then the decoded chunk is fed to
192 decode(I<encoding>). So long as I<encoding> is supported by Encode,
193 any source encoding is fine.
195 When you encode, it just encodes UTF-8 string with I<X> encoding then
196 quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to
197 encode are left as is and long lines are folded within 76 bytes per
202 It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
203 and =?ISO-8859-1?= but that makes the implementation too complicated.
204 These days major mail agents all support =?UTF-8? so I think it is
211 RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other