X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=ext%2FEncode%2Flib%2FEncode%2FMIME%2FHeader.pm;h=29fc858a2308f9a49df5c65606d241d8af7981b0;hb=41c240f59398510e3a736bd441215c051e190e68;hp=fb4fdd95856f592576943ec9bbbcfbc9e50fce7b;hpb=ab3374e4b7f0adca0bcf6e7b71aaacbfd7df7b07;p=p5sagit%2Fp5-mst-13.2.git

diff --git a/ext/Encode/lib/Encode/MIME/Header.pm b/ext/Encode/lib/Encode/MIME/Header.pm
index fb4fdd9..29fc858 100644
--- a/ext/Encode/lib/Encode/MIME/Header.pm
+++ b/ext/Encode/lib/Encode/MIME/Header.pm
@@ -1,9 +1,8 @@
 package Encode::MIME::Header;
 use strict;
 # use warnings;
-our $VERSION = do { my @r = (q$Revision: 1.7 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
-
-use Encode qw(find_encoding encode_utf8);
+our $VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+use Encode qw(find_encoding encode_utf8 decode_utf8);
 use MIME::Base64;
 use Carp;
 
@@ -48,10 +47,15 @@ sub decode($$;$){
     $str =~ s/\?=\s+=\?/\?==\?/gos;
     # multi-line header to single line
     $str =~ s/(:?\r|\n|\r\n)[ \t]//gos;
+
+    1 while ($str =~ s/(\=\?[0-9A-Za-z\-_]+\?[Qq]\?)(.*?)\?\=\1(.*?)\?\=/$1$2$3\?\=/);  # Concat consecutive QP encoded mime headers
+                                                                                        # Fixes breaking inside multi-byte characters
+
     $str =~
 	s{
 	    =\?                  # begin encoded word
 		([0-9A-Za-z\-_]+) # charset (encoding)
+                (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
 		\?([QqBb])\?     # delimiter
 		(.*?)            # Base64-encodede contents
 		\?=              # end encoded word      
@@ -72,7 +76,7 @@ sub decode($$;$){
 
 sub decode_b{
     my $enc = shift;
-    my $d = find_encoding($enc)	or croak(Unknown encoding "$enc");
+    my $d = find_encoding($enc)        or croak qq(Unknown encoding "$enc");
     my $db64 = decode_base64(shift);
     return $d->name eq 'utf8' ?
 	Encode::decode_utf8($db64) : $d->decode($db64, Encode::FB_PERLQQ);
@@ -80,7 +84,7 @@ sub decode_b{
 
 sub decode_q{
     my ($enc, $q) = @_;
-    my $d = find_encoding($enc) or croak(Unknown encoding "$enc");
+    my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
     $q =~ s/_/ /go;
     $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
     return $d->name eq 'utf8' ? 
@@ -92,7 +96,19 @@ my $especials =
 	 map {quotemeta(chr($_))} 
 	 unpack("C*", qq{()<>@,;:\"\'/[]?.=}));
 
-my $re_especials = qr/$especials/o;
+my $re_encoded_word =
+    qr{
+       (?:
+	=\?               # begin encoded word
+	(?:[0-9A-Za-z\-_]+) # charset (encoding)
+        (?:\*\w+(?:-\w+)*)? # language (RFC 2231)
+	\?(?:[QqBb])\?      # delimiter
+	(?:.*?)             # Base64-encodede contents
+	\?=                 # end encoded word
+       )
+      }xo;
+
+my $re_especials = qr{$re_encoded_word|$especials}xo;
 
 sub encode($$;$){
     my ($obj, $str, $chk) = @_;
@@ -100,7 +116,7 @@ sub encode($$;$){
     for my $line (split /\r|\n|\r\n/o, $str){
 	my (@word, @subline);
         for my $word (split /($re_especials)/o, $line){
-	    if ($word =~ /[^\x00-\x7f]/o){ 
+	    if ($word =~ /[^\x00-\x7f]/o or $word =~ /^$re_encoded_word$/o){
 		push @word, $obj->_encode($word);
 	    }else{
 		push @word, $word;
@@ -135,7 +151,7 @@ sub _encode{
     $llen *= $enc eq 'B' ? 3.0/4.0 : 1.0/3.0;
     my @result = ();
     my $chunk = '';
-    while(my $chr = substr($str, 0, 1, '')){
+    while(length(my $chr = substr($str, 0, 1, ''))){
 	use bytes ();
 	if (bytes::length($chunk) + bytes::length($chr) > $llen){
 	    push @result, SINGLE->{$enc}($chunk);
@@ -158,7 +174,7 @@ sub _encode_q{
 	       }{
 		   join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
 	       }egox;
-    return HEAD . 'Q?' . $chunk . TAIL;
+    return decode_utf8(HEAD . 'Q?' . $chunk . TAIL);
 }
 
 1;
@@ -206,6 +222,10 @@ and =?ISO-8859-1?= but that makes the implementation too complicated.
 These days major mail agents all support =?UTF-8? so I think it is
 just good enough.
 
+Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by
+Makamaka.  Thre are still too many MUAs especially cellular phone
+handsets which does not grok UTF-8.
+
 =head1 SEE ALSO
 
 L<Encode>