Commit | Line | Data |
af1f55d9 |
1 | package Encode::MIME::Header; |
2 | use strict; |
3 | # use warnings; |
f74b3917 |
4 | our $VERSION = do { my @r = (q$Revision: 1.5 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; |
af1f55d9 |
5 | |
6 | use Encode qw(find_encoding encode_utf8); |
7 | use MIME::Base64; |
8 | use Carp; |
9 | |
10 | my %seed = |
11 | ( |
12 | decode_b => '1', # decodes 'B' encoding ? |
13 | decode_q => '1', # decodes 'Q' encoding ? |
14 | encode => 'B', # encode with 'B' or 'Q' ? |
15 | bpl => 75, # bytes per line |
16 | ); |
17 | |
18 | $Encode::Encoding{'MIME-Header'} = |
19 | bless { |
20 | %seed, |
21 | Name => 'MIME-Header', |
22 | } => __PACKAGE__; |
23 | |
24 | $Encode::Encoding{'MIME-B'} = |
25 | bless { |
26 | %seed, |
27 | decode_q => 0, |
28 | Name => 'MIME-B', |
29 | } => __PACKAGE__; |
30 | |
31 | $Encode::Encoding{'MIME-Q'} = |
32 | bless { |
33 | %seed, |
34 | decode_q => 1, |
35 | encode => 'Q', |
36 | Name => 'MIME-Q', |
37 | } => __PACKAGE__; |
38 | |
10c5ecbb |
39 | use base qw(Encode::Encoding); |
40 | |
af1f55d9 |
41 | sub needs_lines { 1 } |
42 | sub perlio_ok{ 0 }; |
43 | |
44 | sub decode($$;$){ |
45 | use utf8; |
46 | my ($obj, $str, $chk) = @_; |
47 | # zap spaces between encoded words |
48 | $str =~ s/\?=\s+=\?/\?==\?/gos; |
49 | # multi-line header to single line |
50 | $str =~ s/(:?\r|\n|\r\n)[ \t]//gos; |
51 | $str =~ |
52 | s{ |
53 | =\? # begin encoded word |
f74b3917 |
54 | ([0-9A-Za-z\-_]+) # charset (encoding) |
af1f55d9 |
55 | \?([QqBb])\? # delimiter |
56 | (.*?) # Base64-encodede contents |
57 | \?= # end encoded word |
58 | }{ |
59 | if (uc($2) eq 'B'){ |
60 | $obj->{decode_b} or croak qq(MIME "B" unsupported); |
61 | decode_b($1, $3); |
62 | }elsif(uc($2) eq 'Q'){ |
63 | $obj->{decode_q} or croak qq(MIME "Q" unsupported); |
64 | decode_q($1, $3); |
65 | }else{ |
66 | croak qq(MIME "$2" encoding is nonexistent!); |
67 | } |
68 | }egox; |
69 | $_[1] = '' if $chk; |
70 | return $str; |
71 | } |
72 | |
73 | sub decode_b{ |
74 | my $enc = shift; |
75 | my $d = find_encoding($enc) or croak(Unknown encoding "$enc"); |
76 | my $db64 = decode_base64(shift); |
77 | return $d->decode($db64, Encode::FB_PERLQQ); |
78 | } |
79 | |
80 | sub decode_q{ |
81 | my ($enc, $q) = @_; |
82 | my $d = find_encoding($enc) or croak(Unknown encoding "$enc"); |
83 | $q =~ s/_/ /go; |
84 | $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego; |
85 | return $d->decode($q, Encode::FB_PERLQQ); |
86 | } |
87 | |
88 | my $especials = |
89 | join('|' => |
90 | map {quotemeta(chr($_))} |
91 | unpack("C*", qq{()<>@,;:\"\'/[]?.=})); |
92 | |
93 | my $re_especials = qr/$especials/o; |
94 | |
95 | sub encode($$;$){ |
96 | my ($obj, $str, $chk) = @_; |
97 | my @line = (); |
98 | for my $line (split /\r|\n|\r\n/o, $str){ |
99 | my (@word, @subline); |
100 | for my $word (split /($re_especials)/o, $line){ |
101 | if ($word =~ /[^\x00-\x7f]/o){ |
102 | push @word, $obj->_encode($word); |
103 | }else{ |
104 | push @word, $word; |
105 | } |
106 | } |
107 | my $subline = ''; |
108 | for my $word (@word){ |
109 | use bytes (); |
110 | if (bytes::length($subline) + bytes::length($word) > $obj->{bpl}){ |
111 | push @subline, $subline; |
112 | $subline = ''; |
113 | } |
114 | $subline .= $word; |
115 | } |
116 | $subline and push @subline, $subline; |
117 | push @line, join("\n " => @subline); |
118 | } |
119 | $_[1] = '' if $chk; |
120 | return join("\n", @line); |
121 | } |
122 | |
123 | use constant HEAD => '=?UTF-8?'; |
124 | use constant TAIL => '?='; |
125 | use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, }; |
126 | |
127 | sub _encode{ |
128 | my ($o, $str) = @_; |
129 | my $enc = $o->{encode}; |
130 | my $llen = ($o->{bpl} - length(HEAD) - 2 - length(TAIL)); |
11067275 |
131 | # to coerce a floating-point arithmetics, the following contains |
132 | # .0 in numbers -- dankogai |
133 | $llen *= $enc eq 'B' ? 3.0/4.0 : 1.0/3.0; |
af1f55d9 |
134 | my @result = (); |
135 | my $chunk = ''; |
136 | while(my $chr = substr($str, 0, 1, '')){ |
137 | use bytes (); |
138 | if (bytes::length($chunk) + bytes::length($chr) > $llen){ |
139 | push @result, SINGLE->{$enc}($chunk); |
140 | $chunk = ''; |
141 | } |
142 | $chunk .= $chr; |
143 | } |
144 | $chunk and push @result, SINGLE->{$enc}($chunk); |
145 | return @result; |
146 | } |
147 | |
148 | sub _encode_b{ |
149 | HEAD . 'B?' . encode_base64(encode_utf8(shift), '') . TAIL; |
150 | } |
151 | |
152 | sub _encode_q{ |
153 | my $chunk = shift; |
154 | $chunk =~ s{ |
155 | ([^0-9A-Za-z]) |
156 | }{ |
157 | join("" => map {sprintf "=%02X", $_} unpack("C*", $1)) |
158 | }egox; |
159 | return HEAD . 'Q?' . $chunk . TAIL; |
160 | } |
161 | |
162 | 1; |
163 | __END__ |
164 | |
165 | =head1 NAME |
166 | |
167 | Encode::MIME::Header -- MIME 'B' and 'Q' header encoding |
168 | |
169 | =head1 SYNOPSIS |
170 | |
171 | use Encode qw/encode decode/; |
172 | $utf8 = decode('MIME-Header', $header); |
173 | $header = encode('MIME-Header', $utf8); |
174 | |
175 | =head1 ABSTRACT |
176 | |
177 | This module implements RFC 2047 Mime Header Encoding. There are 3 |
178 | variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The |
179 | difference is described below |
180 | |
181 | decode() encode() |
182 | ---------------------------------------------- |
183 | MIME-Header Both B and Q =?UTF-8?B?....?= |
184 | MIME-B B only; Q croaks =?UTF-8?B?....?= |
185 | MIME-Q Q only; B croaks =?UTF-8?Q?....?= |
186 | |
187 | =head1 DESCRIPTION |
188 | |
189 | When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD> |
190 | is extracted and decoded for I<X> encoding (B for Base64, Q for |
191 | Quoted-Printable). Then the decoded chunk is fed to |
192 | decode(I<encoding>). So long as I<encoding> is supported by Encode, |
193 | any source encoding is fine. |
194 | |
195 | When you encode, it just encodes UTF-8 string with I<X> encoding then |
196 | quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to |
197 | encode are left as is and long lines are folded within 76 bytes per |
198 | line. |
199 | |
200 | =head1 BUGS |
201 | |
7e19fb92 |
202 | It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP? |
af1f55d9 |
203 | and =?ISO-8859-1?= but that makes the implementation too complicated. |
204 | These days major mail agents all support =?UTF-8? so I think it is |
205 | just good enough. |
206 | |
207 | =head1 SEE ALSO |
208 | |
209 | L<Encode> |
210 | |
211 | RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other |
212 | locations. |
213 | |
214 | =cut |