Upgrade to Encode-2.29.
[p5sagit/p5-mst-13.2.git] / ext / Encode / lib / Encode / MIME / Header.pm
CommitLineData
af1f55d9 1package Encode::MIME::Header;
2use strict;
656ebd29 3use warnings;
4no warnings 'redefine';
d1256cb1 5
a37eaad4 6our $VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
bedba681 7use Encode qw(find_encoding encode_utf8 decode_utf8);
af1f55d9 8use MIME::Base64;
9use Carp;
10
d1256cb1 11my %seed = (
12 decode_b => '1', # decodes 'B' encoding ?
13 decode_q => '1', # decodes 'Q' encoding ?
14 encode => 'B', # encode with 'B' or 'Q' ?
15 bpl => 75, # bytes per line
16);
af1f55d9 17
18$Encode::Encoding{'MIME-Header'} =
d1256cb1 19 bless { %seed, Name => 'MIME-Header', } => __PACKAGE__;
20
21$Encode::Encoding{'MIME-B'} = bless {
22 %seed,
23 decode_q => 0,
24 Name => 'MIME-B',
25} => __PACKAGE__;
26
27$Encode::Encoding{'MIME-Q'} = bless {
28 %seed,
29 decode_q => 1,
30 encode => 'Q',
31 Name => 'MIME-Q',
32} => __PACKAGE__;
af1f55d9 33
10c5ecbb 34use base qw(Encode::Encoding);
35
af1f55d9 36sub needs_lines { 1 }
d1256cb1 37sub perlio_ok { 0 }
af1f55d9 38
d1256cb1 39sub decode($$;$) {
af1f55d9 40 use utf8;
d1256cb1 41 my ( $obj, $str, $chk ) = @_;
42
af1f55d9 43 # zap spaces between encoded words
44 $str =~ s/\?=\s+=\?/\?==\?/gos;
d1256cb1 45
af1f55d9 46 # multi-line header to single line
a37eaad4 47 $str =~ s/(?:\r|\n|\r\n)[ \t]+//gos;
41c240f5 48
d1256cb1 49 1 while ( $str =~
50 s/(\=\?[0-9A-Za-z\-_]+\?[Qq]\?)(.*?)\?\=\1(.*?)\?\=/$1$2$3\?\=/ )
51 ; # Concat consecutive QP encoded mime headers
52 # Fixes breaking inside multi-byte characters
41c240f5 53
d1256cb1 54 $str =~ s{
55 =\? # begin encoded word
56 ([0-9A-Za-z\-_]+) # charset (encoding)
41c240f5 57 (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
d1256cb1 58 \?([QqBb])\? # delimiter
59 (.*?) # Base64-encodede contents
60 \?= # end encoded word
61 }{
62 if (uc($2) eq 'B'){
63 $obj->{decode_b} or croak qq(MIME "B" unsupported);
64 decode_b($1, $3);
65 }elsif(uc($2) eq 'Q'){
66 $obj->{decode_q} or croak qq(MIME "Q" unsupported);
67 decode_q($1, $3);
68 }else{
69 croak qq(MIME "$2" encoding is nonexistent!);
70 }
71 }egox;
af1f55d9 72 $_[1] = '' if $chk;
73 return $str;
74}
75
d1256cb1 76sub decode_b {
77 my $enc = shift;
78 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
af1f55d9 79 my $db64 = decode_base64(shift);
d1256cb1 80 return $d->name eq 'utf8'
81 ? Encode::decode_utf8($db64)
82 : $d->decode( $db64, Encode::FB_PERLQQ );
af1f55d9 83}
84
d1256cb1 85sub decode_q {
86 my ( $enc, $q ) = @_;
bedba681 87 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
af1f55d9 88 $q =~ s/_/ /go;
89 $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
d1256cb1 90 return $d->name eq 'utf8'
91 ? Encode::decode_utf8($q)
92 : $d->decode( $q, Encode::FB_PERLQQ );
af1f55d9 93}
94
d1256cb1 95my $especials =
96 join( '|' => map { quotemeta( chr($_) ) }
97 unpack( "C*", qq{()<>@,;:\"\'/[]?.=} ) );
af1f55d9 98
d1256cb1 99my $re_encoded_word = qr{
bedba681 100 (?:
d1256cb1 101 =\? # begin encoded word
102 (?:[0-9A-Za-z\-_]+) # charset (encoding)
41c240f5 103 (?:\*\w+(?:-\w+)*)? # language (RFC 2231)
d1256cb1 104 \?(?:[QqBb])\? # delimiter
105 (?:.*?) # Base64-encodede contents
106 \?= # end encoded word
bedba681 107 )
108 }xo;
109
110my $re_especials = qr{$re_encoded_word|$especials}xo;
af1f55d9 111
d1256cb1 112sub encode($$;$) {
113 my ( $obj, $str, $chk ) = @_;
af1f55d9 114 my @line = ();
d1256cb1 115 for my $line ( split /\r|\n|\r\n/o, $str ) {
116 my ( @word, @subline );
117 for my $word ( split /($re_especials)/o, $line ) {
118 if ( $word =~ /[^\x00-\x7f]/o
119 or $word =~ /^$re_encoded_word$/o )
120 {
121 push @word, $obj->_encode($word);
122 }
123 else {
124 push @word, $word;
125 }
126 }
127 my $subline = '';
128 for my $word (@word) {
129 use bytes ();
130 if ( bytes::length($subline) + bytes::length($word) >
131 $obj->{bpl} )
132 {
133 push @subline, $subline;
134 $subline = '';
135 }
136 $subline .= $word;
137 }
138 $subline and push @subline, $subline;
139 push @line, join( "\n " => @subline );
af1f55d9 140 }
141 $_[1] = '' if $chk;
d1256cb1 142 return join( "\n", @line );
af1f55d9 143}
144
d1256cb1 145use constant HEAD => '=?UTF-8?';
146use constant TAIL => '?=';
af1f55d9 147use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
148
d1256cb1 149sub _encode {
150 my ( $o, $str ) = @_;
151 my $enc = $o->{encode};
152 my $llen = ( $o->{bpl} - length(HEAD) - 2 - length(TAIL) );
153
11067275 154 # to coerce a floating-point arithmetics, the following contains
155 # .0 in numbers -- dankogai
d1256cb1 156 $llen *= $enc eq 'B' ? 3.0 / 4.0 : 1.0 / 3.0;
af1f55d9 157 my @result = ();
d1256cb1 158 my $chunk = '';
159 while ( length( my $chr = substr( $str, 0, 1, '' ) ) ) {
160 use bytes ();
161 if ( bytes::length($chunk) + bytes::length($chr) > $llen ) {
162 push @result, SINGLE->{$enc}($chunk);
163 $chunk = '';
164 }
165 $chunk .= $chr;
af1f55d9 166 }
64bc6d54 167 length($chunk) and push @result, SINGLE->{$enc}($chunk);
af1f55d9 168 return @result;
169}
170
d1256cb1 171sub _encode_b {
172 HEAD . 'B?' . encode_base64( encode_utf8(shift), '' ) . TAIL;
af1f55d9 173}
174
d1256cb1 175sub _encode_q {
af1f55d9 176 my $chunk = shift;
51e4e64d 177 $chunk = encode_utf8($chunk);
af1f55d9 178 $chunk =~ s{
d1256cb1 179 ([^0-9A-Za-z])
180 }{
181 join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
182 }egox;
51e4e64d 183 return HEAD . 'Q?' . $chunk . TAIL;
af1f55d9 184}
185
1861;
187__END__
188
189=head1 NAME
190
191Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
192
193=head1 SYNOPSIS
194
195 use Encode qw/encode decode/;
196 $utf8 = decode('MIME-Header', $header);
197 $header = encode('MIME-Header', $utf8);
198
199=head1 ABSTRACT
200
201This module implements RFC 2047 Mime Header Encoding. There are 3
202variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The
203difference is described below
204
205 decode() encode()
206 ----------------------------------------------
207 MIME-Header Both B and Q =?UTF-8?B?....?=
208 MIME-B B only; Q croaks =?UTF-8?B?....?=
209 MIME-Q Q only; B croaks =?UTF-8?Q?....?=
210
211=head1 DESCRIPTION
212
213When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
214is extracted and decoded for I<X> encoding (B for Base64, Q for
215Quoted-Printable). Then the decoded chunk is fed to
216decode(I<encoding>). So long as I<encoding> is supported by Encode,
217any source encoding is fine.
218
219When you encode, it just encodes UTF-8 string with I<X> encoding then
220quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to
221encode are left as is and long lines are folded within 76 bytes per
222line.
223
224=head1 BUGS
225
7e19fb92 226It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
af1f55d9 227and =?ISO-8859-1?= but that makes the implementation too complicated.
228These days major mail agents all support =?UTF-8? so I think it is
229just good enough.
230
56ff7374 231Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by
232Makamaka. Thre are still too many MUAs especially cellular phone
233handsets which does not grok UTF-8.
234
af1f55d9 235=head1 SEE ALSO
236
237L<Encode>
238
239RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other
240locations.
241
242=cut