1 package Encode::MIME::Header;
4 our $VERSION = do { my @r = (q$Revision: 1.7 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
6 use Encode qw(find_encoding encode_utf8);
12 decode_b => '1', # decodes 'B' encoding ?
13 decode_q => '1', # decodes 'Q' encoding ?
14 encode => 'B', # encode with 'B' or 'Q' ?
15 bpl => 75, # bytes per line
18 $Encode::Encoding{'MIME-Header'} =
21 Name => 'MIME-Header',
24 $Encode::Encoding{'MIME-B'} =
31 $Encode::Encoding{'MIME-Q'} =
39 use base qw(Encode::Encoding);
46 my ($obj, $str, $chk) = @_;
47 # zap spaces between encoded words
48 $str =~ s/\?=\s+=\?/\?==\?/gos;
49 # multi-line header to single line
50 $str =~ s/(:?\r|\n|\r\n)[ \t]//gos;
53 =\? # begin encoded word
54 ([0-9A-Za-z\-_]+) # charset (encoding)
55 \?([QqBb])\? # delimiter
56 (.*?) # Base64-encodede contents
57 \?= # end encoded word
60 $obj->{decode_b} or croak qq(MIME "B" unsupported);
62 }elsif(uc($2) eq 'Q'){
63 $obj->{decode_q} or croak qq(MIME "Q" unsupported);
66 croak qq(MIME "$2" encoding is nonexistent!);
75 my $d = find_encoding($enc) or croak(Unknown encoding "$enc");
76 my $db64 = decode_base64(shift);
77 return $d->name eq 'utf8' ?
78 Encode::decode_utf8($db64) : $d->decode($db64, Encode::FB_PERLQQ);
83 my $d = find_encoding($enc) or croak(Unknown encoding "$enc");
85 $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
86 return $d->name eq 'utf8' ?
87 Encode::decode_utf8($q) : $d->decode($q, Encode::FB_PERLQQ);
92 map {quotemeta(chr($_))}
93 unpack("C*", qq{()<>@,;:\"\'/[]?.=}));
95 my $re_especials = qr/$especials/o;
98 my ($obj, $str, $chk) = @_;
100 for my $line (split /\r|\n|\r\n/o, $str){
101 my (@word, @subline);
102 for my $word (split /($re_especials)/o, $line){
103 if ($word =~ /[^\x00-\x7f]/o){
104 push @word, $obj->_encode($word);
110 for my $word (@word){
112 if (bytes::length($subline) + bytes::length($word) > $obj->{bpl}){
113 push @subline, $subline;
118 $subline and push @subline, $subline;
119 push @line, join("\n " => @subline);
122 return join("\n", @line);
125 use constant HEAD => '=?UTF-8?';
126 use constant TAIL => '?=';
127 use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
131 my $enc = $o->{encode};
132 my $llen = ($o->{bpl} - length(HEAD) - 2 - length(TAIL));
133 # to coerce a floating-point arithmetics, the following contains
134 # .0 in numbers -- dankogai
135 $llen *= $enc eq 'B' ? 3.0/4.0 : 1.0/3.0;
138 while(my $chr = substr($str, 0, 1, '')){
140 if (bytes::length($chunk) + bytes::length($chr) > $llen){
141 push @result, SINGLE->{$enc}($chunk);
146 $chunk and push @result, SINGLE->{$enc}($chunk);
151 HEAD . 'B?' . encode_base64(encode_utf8(shift), '') . TAIL;
159 join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
161 return HEAD . 'Q?' . $chunk . TAIL;
169 Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
173 use Encode qw/encode decode/;
174 $utf8 = decode('MIME-Header', $header);
175 $header = encode('MIME-Header', $utf8);
179 This module implements RFC 2047 Mime Header Encoding. There are 3
180 variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The
181 difference is described below
184 ----------------------------------------------
185 MIME-Header Both B and Q =?UTF-8?B?....?=
186 MIME-B B only; Q croaks =?UTF-8?B?....?=
187 MIME-Q Q only; B croaks =?UTF-8?Q?....?=
191 When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
192 is extracted and decoded for I<X> encoding (B for Base64, Q for
193 Quoted-Printable). Then the decoded chunk is fed to
194 decode(I<encoding>). So long as I<encoding> is supported by Encode,
195 any source encoding is fine.
197 When you encode, it just encodes UTF-8 string with I<X> encoding then
198 quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to
199 encode are left as is and long lines are folded within 76 bytes per
204 It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
205 and =?ISO-8859-1?= but that makes the implementation too complicated.
206 These days major mail agents all support =?UTF-8? so I think it is
213 RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other