X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=ext%2FEncode%2Flib%2FEncode%2FMIME%2FHeader.pm;h=29fc858a2308f9a49df5c65606d241d8af7981b0;hb=41c240f59398510e3a736bd441215c051e190e68;hp=fb4fdd95856f592576943ec9bbbcfbc9e50fce7b;hpb=ab3374e4b7f0adca0bcf6e7b71aaacbfd7df7b07;p=p5sagit%2Fp5-mst-13.2.git diff --git a/ext/Encode/lib/Encode/MIME/Header.pm b/ext/Encode/lib/Encode/MIME/Header.pm index fb4fdd9..29fc858 100644 --- a/ext/Encode/lib/Encode/MIME/Header.pm +++ b/ext/Encode/lib/Encode/MIME/Header.pm @@ -1,9 +1,8 @@ package Encode::MIME::Header; use strict; # use warnings; -our $VERSION = do { my @r = (q$Revision: 1.7 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; - -use Encode qw(find_encoding encode_utf8); +our $VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +use Encode qw(find_encoding encode_utf8 decode_utf8); use MIME::Base64; use Carp; @@ -48,10 +47,15 @@ sub decode($$;$){ $str =~ s/\?=\s+=\?/\?==\?/gos; # multi-line header to single line $str =~ s/(:?\r|\n|\r\n)[ \t]//gos; + + 1 while ($str =~ s/(\=\?[0-9A-Za-z\-_]+\?[Qq]\?)(.*?)\?\=\1(.*?)\?\=/$1$2$3\?\=/); # Concat consecutive QP encoded mime headers + # Fixes breaking inside multi-byte characters + $str =~ s{ =\? # begin encoded word ([0-9A-Za-z\-_]+) # charset (encoding) + (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231) \?([QqBb])\? # delimiter (.*?) # Base64-encodede contents \?= # end encoded word @@ -72,7 +76,7 @@ sub decode($$;$){ sub decode_b{ my $enc = shift; - my $d = find_encoding($enc) or croak(Unknown encoding "$enc"); + my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc"); my $db64 = decode_base64(shift); return $d->name eq 'utf8' ? Encode::decode_utf8($db64) : $d->decode($db64, Encode::FB_PERLQQ); @@ -80,7 +84,7 @@ sub decode_b{ sub decode_q{ my ($enc, $q) = @_; - my $d = find_encoding($enc) or croak(Unknown encoding "$enc"); + my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc"); $q =~ s/_/ /go; $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego; return $d->name eq 'utf8' ? @@ -92,7 +96,19 @@ my $especials = map {quotemeta(chr($_))} unpack("C*", qq{()<>@,;:\"\'/[]?.=})); -my $re_especials = qr/$especials/o; +my $re_encoded_word = + qr{ + (?: + =\? # begin encoded word + (?:[0-9A-Za-z\-_]+) # charset (encoding) + (?:\*\w+(?:-\w+)*)? # language (RFC 2231) + \?(?:[QqBb])\? # delimiter + (?:.*?) # Base64-encodede contents + \?= # end encoded word + ) + }xo; + +my $re_especials = qr{$re_encoded_word|$especials}xo; sub encode($$;$){ my ($obj, $str, $chk) = @_; @@ -100,7 +116,7 @@ sub encode($$;$){ for my $line (split /\r|\n|\r\n/o, $str){ my (@word, @subline); for my $word (split /($re_especials)/o, $line){ - if ($word =~ /[^\x00-\x7f]/o){ + if ($word =~ /[^\x00-\x7f]/o or $word =~ /^$re_encoded_word$/o){ push @word, $obj->_encode($word); }else{ push @word, $word; @@ -135,7 +151,7 @@ sub _encode{ $llen *= $enc eq 'B' ? 3.0/4.0 : 1.0/3.0; my @result = (); my $chunk = ''; - while(my $chr = substr($str, 0, 1, '')){ + while(length(my $chr = substr($str, 0, 1, ''))){ use bytes (); if (bytes::length($chunk) + bytes::length($chr) > $llen){ push @result, SINGLE->{$enc}($chunk); @@ -158,7 +174,7 @@ sub _encode_q{ }{ join("" => map {sprintf "=%02X", $_} unpack("C*", $1)) }egox; - return HEAD . 'Q?' . $chunk . TAIL; + return decode_utf8(HEAD . 'Q?' . $chunk . TAIL); } 1; @@ -206,6 +222,10 @@ and =?ISO-8859-1?= but that makes the implementation too complicated. These days major mail agents all support =?UTF-8? so I think it is just good enough. +Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by +Makamaka. Thre are still too many MUAs especially cellular phone +handsets which does not grok UTF-8. + =head1 SEE ALSO L