1 package Encode::MIME::Header;
4 no warnings 'redefine';
6 our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
7 use Encode qw(find_encoding encode_utf8 decode_utf8);
12 decode_b => '1', # decodes 'B' encoding ?
13 decode_q => '1', # decodes 'Q' encoding ?
14 encode => 'B', # encode with 'B' or 'Q' ?
15 bpl => 75, # bytes per line
18 $Encode::Encoding{'MIME-Header'} =
19 bless { %seed, Name => 'MIME-Header', } => __PACKAGE__;
21 $Encode::Encoding{'MIME-B'} = bless {
27 $Encode::Encoding{'MIME-Q'} = bless {
34 use base qw(Encode::Encoding);
41 my ( $obj, $str, $chk ) = @_;
43 # zap spaces between encoded words
44 $str =~ s/\?=\s+=\?/\?==\?/gos;
46 # multi-line header to single line
47 $str =~ s/(:?\r|\n|\r\n)[ \t]//gos;
50 s/(\=\?[0-9A-Za-z\-_]+\?[Qq]\?)(.*?)\?\=\1(.*?)\?\=/$1$2$3\?\=/ )
51 ; # Concat consecutive QP encoded mime headers
52 # Fixes breaking inside multi-byte characters
55 =\? # begin encoded word
56 ([0-9A-Za-z\-_]+) # charset (encoding)
57 (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
58 \?([QqBb])\? # delimiter
59 (.*?) # Base64-encodede contents
60 \?= # end encoded word
63 $obj->{decode_b} or croak qq(MIME "B" unsupported);
65 }elsif(uc($2) eq 'Q'){
66 $obj->{decode_q} or croak qq(MIME "Q" unsupported);
69 croak qq(MIME "$2" encoding is nonexistent!);
78 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
79 my $db64 = decode_base64(shift);
80 return $d->name eq 'utf8'
81 ? Encode::decode_utf8($db64)
82 : $d->decode( $db64, Encode::FB_PERLQQ );
87 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
89 $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
90 return $d->name eq 'utf8'
91 ? Encode::decode_utf8($q)
92 : $d->decode( $q, Encode::FB_PERLQQ );
96 join( '|' => map { quotemeta( chr($_) ) }
97 unpack( "C*", qq{()<>@,;:\"\'/[]?.=} ) );
99 my $re_encoded_word = qr{
101 =\? # begin encoded word
102 (?:[0-9A-Za-z\-_]+) # charset (encoding)
103 (?:\*\w+(?:-\w+)*)? # language (RFC 2231)
104 \?(?:[QqBb])\? # delimiter
105 (?:.*?) # Base64-encodede contents
106 \?= # end encoded word
110 my $re_especials = qr{$re_encoded_word|$especials}xo;
113 my ( $obj, $str, $chk ) = @_;
115 for my $line ( split /\r|\n|\r\n/o, $str ) {
116 my ( @word, @subline );
117 for my $word ( split /($re_especials)/o, $line ) {
118 if ( $word =~ /[^\x00-\x7f]/o
119 or $word =~ /^$re_encoded_word$/o )
121 push @word, $obj->_encode($word);
128 for my $word (@word) {
130 if ( bytes::length($subline) + bytes::length($word) >
133 push @subline, $subline;
138 $subline and push @subline, $subline;
139 push @line, join( "\n " => @subline );
142 return join( "\n", @line );
145 use constant HEAD => '=?UTF-8?';
146 use constant TAIL => '?=';
147 use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
150 my ( $o, $str ) = @_;
151 my $enc = $o->{encode};
152 my $llen = ( $o->{bpl} - length(HEAD) - 2 - length(TAIL) );
154 # to coerce a floating-point arithmetics, the following contains
155 # .0 in numbers -- dankogai
156 $llen *= $enc eq 'B' ? 3.0 / 4.0 : 1.0 / 3.0;
159 while ( length( my $chr = substr( $str, 0, 1, '' ) ) ) {
161 if ( bytes::length($chunk) + bytes::length($chr) > $llen ) {
162 push @result, SINGLE->{$enc}($chunk);
167 $chunk and push @result, SINGLE->{$enc}($chunk);
172 HEAD . 'B?' . encode_base64( encode_utf8(shift), '' ) . TAIL;
177 $chunk = encode_utf8($chunk);
181 join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
183 return HEAD . 'Q?' . $chunk . TAIL;
191 Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
195 use Encode qw/encode decode/;
196 $utf8 = decode('MIME-Header', $header);
197 $header = encode('MIME-Header', $utf8);
201 This module implements RFC 2047 Mime Header Encoding. There are 3
202 variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The
203 difference is described below
206 ----------------------------------------------
207 MIME-Header Both B and Q =?UTF-8?B?....?=
208 MIME-B B only; Q croaks =?UTF-8?B?....?=
209 MIME-Q Q only; B croaks =?UTF-8?Q?....?=
213 When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
214 is extracted and decoded for I<X> encoding (B for Base64, Q for
215 Quoted-Printable). Then the decoded chunk is fed to
216 decode(I<encoding>). So long as I<encoding> is supported by Encode,
217 any source encoding is fine.
219 When you encode, it just encodes UTF-8 string with I<X> encoding then
220 quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to
221 encode are left as is and long lines are folded within 76 bytes per
226 It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
227 and =?ISO-8859-1?= but that makes the implementation too complicated.
228 These days major mail agents all support =?UTF-8? so I think it is
231 Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by
232 Makamaka. Thre are still too many MUAs especially cellular phone
233 handsets which does not grok UTF-8.
239 RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other