Upgrade to Encode 2.18
[p5sagit/p5-mst-13.2.git] / ext / Encode / lib / Encode / MIME / Header.pm
CommitLineData
af1f55d9 1package Encode::MIME::Header;
2use strict;
656ebd29 3use warnings;
4no warnings 'redefine';
d1256cb1 5
656ebd29 6our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
bedba681 7use Encode qw(find_encoding encode_utf8 decode_utf8);
af1f55d9 8use MIME::Base64;
9use Carp;
10
d1256cb1 11my %seed = (
12 decode_b => '1', # decodes 'B' encoding ?
13 decode_q => '1', # decodes 'Q' encoding ?
14 encode => 'B', # encode with 'B' or 'Q' ?
15 bpl => 75, # bytes per line
16);
af1f55d9 17
18$Encode::Encoding{'MIME-Header'} =
d1256cb1 19 bless { %seed, Name => 'MIME-Header', } => __PACKAGE__;
20
21$Encode::Encoding{'MIME-B'} = bless {
22 %seed,
23 decode_q => 0,
24 Name => 'MIME-B',
25} => __PACKAGE__;
26
27$Encode::Encoding{'MIME-Q'} = bless {
28 %seed,
29 decode_q => 1,
30 encode => 'Q',
31 Name => 'MIME-Q',
32} => __PACKAGE__;
af1f55d9 33
10c5ecbb 34use base qw(Encode::Encoding);
35
af1f55d9 36sub needs_lines { 1 }
d1256cb1 37sub perlio_ok { 0 }
af1f55d9 38
d1256cb1 39sub decode($$;$) {
af1f55d9 40 use utf8;
d1256cb1 41 my ( $obj, $str, $chk ) = @_;
42
af1f55d9 43 # zap spaces between encoded words
44 $str =~ s/\?=\s+=\?/\?==\?/gos;
d1256cb1 45
af1f55d9 46 # multi-line header to single line
47 $str =~ s/(:?\r|\n|\r\n)[ \t]//gos;
41c240f5 48
d1256cb1 49 1 while ( $str =~
50 s/(\=\?[0-9A-Za-z\-_]+\?[Qq]\?)(.*?)\?\=\1(.*?)\?\=/$1$2$3\?\=/ )
51 ; # Concat consecutive QP encoded mime headers
52 # Fixes breaking inside multi-byte characters
41c240f5 53
d1256cb1 54 $str =~ s{
55 =\? # begin encoded word
56 ([0-9A-Za-z\-_]+) # charset (encoding)
41c240f5 57 (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
d1256cb1 58 \?([QqBb])\? # delimiter
59 (.*?) # Base64-encodede contents
60 \?= # end encoded word
61 }{
62 if (uc($2) eq 'B'){
63 $obj->{decode_b} or croak qq(MIME "B" unsupported);
64 decode_b($1, $3);
65 }elsif(uc($2) eq 'Q'){
66 $obj->{decode_q} or croak qq(MIME "Q" unsupported);
67 decode_q($1, $3);
68 }else{
69 croak qq(MIME "$2" encoding is nonexistent!);
70 }
71 }egox;
af1f55d9 72 $_[1] = '' if $chk;
73 return $str;
74}
75
d1256cb1 76sub decode_b {
77 my $enc = shift;
78 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
af1f55d9 79 my $db64 = decode_base64(shift);
d1256cb1 80 return $d->name eq 'utf8'
81 ? Encode::decode_utf8($db64)
82 : $d->decode( $db64, Encode::FB_PERLQQ );
af1f55d9 83}
84
d1256cb1 85sub decode_q {
86 my ( $enc, $q ) = @_;
bedba681 87 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
af1f55d9 88 $q =~ s/_/ /go;
89 $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
d1256cb1 90 return $d->name eq 'utf8'
91 ? Encode::decode_utf8($q)
92 : $d->decode( $q, Encode::FB_PERLQQ );
af1f55d9 93}
94
d1256cb1 95my $especials =
96 join( '|' => map { quotemeta( chr($_) ) }
97 unpack( "C*", qq{()<>@,;:\"\'/[]?.=} ) );
af1f55d9 98
d1256cb1 99my $re_encoded_word = qr{
bedba681 100 (?:
d1256cb1 101 =\? # begin encoded word
102 (?:[0-9A-Za-z\-_]+) # charset (encoding)
41c240f5 103 (?:\*\w+(?:-\w+)*)? # language (RFC 2231)
d1256cb1 104 \?(?:[QqBb])\? # delimiter
105 (?:.*?) # Base64-encodede contents
106 \?= # end encoded word
bedba681 107 )
108 }xo;
109
110my $re_especials = qr{$re_encoded_word|$especials}xo;
af1f55d9 111
d1256cb1 112sub encode($$;$) {
113 my ( $obj, $str, $chk ) = @_;
af1f55d9 114 my @line = ();
d1256cb1 115 for my $line ( split /\r|\n|\r\n/o, $str ) {
116 my ( @word, @subline );
117 for my $word ( split /($re_especials)/o, $line ) {
118 if ( $word =~ /[^\x00-\x7f]/o
119 or $word =~ /^$re_encoded_word$/o )
120 {
121 push @word, $obj->_encode($word);
122 }
123 else {
124 push @word, $word;
125 }
126 }
127 my $subline = '';
128 for my $word (@word) {
129 use bytes ();
130 if ( bytes::length($subline) + bytes::length($word) >
131 $obj->{bpl} )
132 {
133 push @subline, $subline;
134 $subline = '';
135 }
136 $subline .= $word;
137 }
138 $subline and push @subline, $subline;
139 push @line, join( "\n " => @subline );
af1f55d9 140 }
141 $_[1] = '' if $chk;
d1256cb1 142 return join( "\n", @line );
af1f55d9 143}
144
d1256cb1 145use constant HEAD => '=?UTF-8?';
146use constant TAIL => '?=';
af1f55d9 147use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
148
d1256cb1 149sub _encode {
150 my ( $o, $str ) = @_;
151 my $enc = $o->{encode};
152 my $llen = ( $o->{bpl} - length(HEAD) - 2 - length(TAIL) );
153
11067275 154 # to coerce a floating-point arithmetics, the following contains
155 # .0 in numbers -- dankogai
d1256cb1 156 $llen *= $enc eq 'B' ? 3.0 / 4.0 : 1.0 / 3.0;
af1f55d9 157 my @result = ();
d1256cb1 158 my $chunk = '';
159 while ( length( my $chr = substr( $str, 0, 1, '' ) ) ) {
160 use bytes ();
161 if ( bytes::length($chunk) + bytes::length($chr) > $llen ) {
162 push @result, SINGLE->{$enc}($chunk);
163 $chunk = '';
164 }
165 $chunk .= $chr;
af1f55d9 166 }
167 $chunk and push @result, SINGLE->{$enc}($chunk);
168 return @result;
169}
170
d1256cb1 171sub _encode_b {
172 HEAD . 'B?' . encode_base64( encode_utf8(shift), '' ) . TAIL;
af1f55d9 173}
174
d1256cb1 175sub _encode_q {
af1f55d9 176 my $chunk = shift;
177 $chunk =~ s{
d1256cb1 178 ([^0-9A-Za-z])
179 }{
180 join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
181 }egox;
182 return decode_utf8( HEAD . 'Q?' . $chunk . TAIL );
af1f55d9 183}
184
1851;
186__END__
187
188=head1 NAME
189
190Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
191
192=head1 SYNOPSIS
193
194 use Encode qw/encode decode/;
195 $utf8 = decode('MIME-Header', $header);
196 $header = encode('MIME-Header', $utf8);
197
198=head1 ABSTRACT
199
200This module implements RFC 2047 Mime Header Encoding. There are 3
201variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The
202difference is described below
203
204 decode() encode()
205 ----------------------------------------------
206 MIME-Header Both B and Q =?UTF-8?B?....?=
207 MIME-B B only; Q croaks =?UTF-8?B?....?=
208 MIME-Q Q only; B croaks =?UTF-8?Q?....?=
209
210=head1 DESCRIPTION
211
212When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
213is extracted and decoded for I<X> encoding (B for Base64, Q for
214Quoted-Printable). Then the decoded chunk is fed to
215decode(I<encoding>). So long as I<encoding> is supported by Encode,
216any source encoding is fine.
217
218When you encode, it just encodes UTF-8 string with I<X> encoding then
219quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to
220encode are left as is and long lines are folded within 76 bytes per
221line.
222
223=head1 BUGS
224
7e19fb92 225It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
af1f55d9 226and =?ISO-8859-1?= but that makes the implementation too complicated.
227These days major mail agents all support =?UTF-8? so I think it is
228just good enough.
229
56ff7374 230Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by
231Makamaka. Thre are still too many MUAs especially cellular phone
232handsets which does not grok UTF-8.
233
af1f55d9 234=head1 SEE ALSO
235
236L<Encode>
237
238RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other
239locations.
240
241=cut