1 package Encode::MIME::Header;
4 no warnings 'redefine';
6 our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
7 use Encode qw(find_encoding encode_utf8 decode_utf8);
12 decode_b => '1', # decodes 'B' encoding ?
13 decode_q => '1', # decodes 'Q' encoding ?
14 encode => 'B', # encode with 'B' or 'Q' ?
15 bpl => 75, # bytes per line
18 $Encode::Encoding{'MIME-Header'} =
19 bless { %seed, Name => 'MIME-Header', } => __PACKAGE__;
21 $Encode::Encoding{'MIME-B'} = bless {
27 $Encode::Encoding{'MIME-Q'} = bless {
34 use base qw(Encode::Encoding);
41 my ( $obj, $str, $chk ) = @_;
43 # zap spaces between encoded words
44 $str =~ s/\?=\s+=\?/\?==\?/gos;
46 # multi-line header to single line
47 $str =~ s/(:?\r|\n|\r\n)[ \t]//gos;
50 s/(\=\?[0-9A-Za-z\-_]+\?[Qq]\?)(.*?)\?\=\1(.*?)\?\=/$1$2$3\?\=/ )
51 ; # Concat consecutive QP encoded mime headers
52 # Fixes breaking inside multi-byte characters
55 =\? # begin encoded word
56 ([0-9A-Za-z\-_]+) # charset (encoding)
57 (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
58 \?([QqBb])\? # delimiter
59 (.*?) # Base64-encodede contents
60 \?= # end encoded word
63 $obj->{decode_b} or croak qq(MIME "B" unsupported);
65 }elsif(uc($2) eq 'Q'){
66 $obj->{decode_q} or croak qq(MIME "Q" unsupported);
69 croak qq(MIME "$2" encoding is nonexistent!);
78 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
79 my $db64 = decode_base64(shift);
80 return $d->name eq 'utf8'
81 ? Encode::decode_utf8($db64)
82 : $d->decode( $db64, Encode::FB_PERLQQ );
87 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
89 $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
90 return $d->name eq 'utf8'
91 ? Encode::decode_utf8($q)
92 : $d->decode( $q, Encode::FB_PERLQQ );
96 join( '|' => map { quotemeta( chr($_) ) }
97 unpack( "C*", qq{()<>@,;:\"\'/[]?.=} ) );
99 my $re_encoded_word = qr{
101 =\? # begin encoded word
102 (?:[0-9A-Za-z\-_]+) # charset (encoding)
103 (?:\*\w+(?:-\w+)*)? # language (RFC 2231)
104 \?(?:[QqBb])\? # delimiter
105 (?:.*?) # Base64-encodede contents
106 \?= # end encoded word
110 my $re_especials = qr{$re_encoded_word|$especials}xo;
113 my ( $obj, $str, $chk ) = @_;
115 for my $line ( split /\r|\n|\r\n/o, $str ) {
116 my ( @word, @subline );
117 for my $word ( split /($re_especials)/o, $line ) {
118 if ( $word =~ /[^\x00-\x7f]/o
119 or $word =~ /^$re_encoded_word$/o )
121 push @word, $obj->_encode($word);
128 for my $word (@word) {
130 if ( bytes::length($subline) + bytes::length($word) >
133 push @subline, $subline;
138 $subline and push @subline, $subline;
139 push @line, join( "\n " => @subline );
142 return join( "\n", @line );
145 use constant HEAD => '=?UTF-8?';
146 use constant TAIL => '?=';
147 use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
150 my ( $o, $str ) = @_;
151 my $enc = $o->{encode};
152 my $llen = ( $o->{bpl} - length(HEAD) - 2 - length(TAIL) );
154 # to coerce a floating-point arithmetics, the following contains
155 # .0 in numbers -- dankogai
156 $llen *= $enc eq 'B' ? 3.0 / 4.0 : 1.0 / 3.0;
159 while ( length( my $chr = substr( $str, 0, 1, '' ) ) ) {
161 if ( bytes::length($chunk) + bytes::length($chr) > $llen ) {
162 push @result, SINGLE->{$enc}($chunk);
167 $chunk and push @result, SINGLE->{$enc}($chunk);
172 HEAD . 'B?' . encode_base64( encode_utf8(shift), '' ) . TAIL;
180 join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
182 return decode_utf8( HEAD . 'Q?' . $chunk . TAIL );
190 Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
194 use Encode qw/encode decode/;
195 $utf8 = decode('MIME-Header', $header);
196 $header = encode('MIME-Header', $utf8);
200 This module implements RFC 2047 Mime Header Encoding. There are 3
201 variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The
202 difference is described below
205 ----------------------------------------------
206 MIME-Header Both B and Q =?UTF-8?B?....?=
207 MIME-B B only; Q croaks =?UTF-8?B?....?=
208 MIME-Q Q only; B croaks =?UTF-8?Q?....?=
212 When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
213 is extracted and decoded for I<X> encoding (B for Base64, Q for
214 Quoted-Printable). Then the decoded chunk is fed to
215 decode(I<encoding>). So long as I<encoding> is supported by Encode,
216 any source encoding is fine.
218 When you encode, it just encodes UTF-8 string with I<X> encoding then
219 quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to
220 encode are left as is and long lines are folded within 76 bytes per
225 It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
226 and =?ISO-8859-1?= but that makes the implementation too complicated.
227 These days major mail agents all support =?UTF-8? so I think it is
230 Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by
231 Makamaka. Thre are still too many MUAs especially cellular phone
232 handsets which does not grok UTF-8.
238 RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other