Upgrade to Encode-2.27

diff --git a/ext/Encode/AUTHORS b/ext/Encode/AUTHORS

index 647b356..022a3c4 100644 (file)
--- a/ext/Encode/AUTHORS
+++ b/ext/Encode/AUTHORS
@@ -9,7 +9,8 @@
 #
 # This list is in alphabetical order.
 --
-Andreas J. Koenig              <andreas.koenig@anima.de>
+Alex Davies                     <alex.davies@talktalk.net>
+Andreas J. Koenig               <andreas.koenig@anima.de>
 Anton Tagunov                  <tagunov@motor.ru>
 Autrijus Tang                  <autrijus@autrijus.org>
 Benjamin Goldberg              <goldbb2@earthlink.net>
diff --git a/ext/Encode/Changes b/ext/Encode/Changes

index 3039058..a65c47e 100644 (file)
--- a/ext/Encode/Changes
+++ b/ext/Encode/Changes
@@ -1,13 +1,36 @@
 # Revision history for Perl extension Encode.
 #
-# $Id: Changes,v 2.26 2008/07/01 20:56:17 dankogai Exp dankogai $
+# $Id: Changes,v 2.27 2009/01/21 22:55:07 dankogai Exp dankogai $
 #
-$Revision: 2.26 $ $Date: 2008/07/01 20:56:17 $
+$Revision: 2.27 $ $Date: 2009/01/21 22:55:07 $
+! lib/Encode/MIME/Header.pm t/mime-header.t
+  Addressed: Encode::MIME::Header MIME-Q encoding truncates 
+    trailing zeros in some circumstances
+  http://rt.cpan.org/Ticket/Display.html?id=342627
+! lib/Encode/Alias.pm
+  Added alias: unicode-1-1-utf-7
+  http://rt.cpan.org/Ticket/Display.html?id=38558  
+! Encode.pm
+  Documented: _utf8_on() does not work for tainted values
+  http://rt.cpan.org/Ticket/Display.html?id=41163
+! bin/enc2xs
+  s[oss.software.ibm.com/icu][www.icu-project.org]g
+  http://rt.cpan.org/Ticket/Display.html?id=40245
+! lib/Encode/Guess.pm t/guess.t
+  Addressed:Empty file should produce an error message
+  http://rt.cpan.org/Ticket/Display.html?id=38652
+
+
+2.26 2008/07/01 20:56:17
+! Unicode/Unicode.xs AUTHORS
+  Refactored by Alex Davies
+  http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2007-10/msg00745.html
+  Message-Id: <7637669B2E3D46B187591747DA27F4C8@Amelie>
 ! Encode.pm
   Absense of Encode::ConfigLocal no longer carps no matter what.
-  https://bugzilla.redhat.com/show_bug.cgi?id=435505#c2
-  https://rt.cpan.org/Ticket/Display.html?id=28638
-  https://rt.cpan.org/Ticket/Display.html?id=11511
+  http://bugzilla.redhat.com/show_bug.cgi?id=435505#c2
+  http://rt.cpan.org/Ticket/Display.html?id=28638
+  http://rt.cpan.org/Ticket/Display.html?id=11511
 ! lib/Encode/JIS7.pm
   use encoding 'utf8' and 'iso-2022-jp' glitches on perl 5.10
   Thanks, MIYAGAWA
diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm

index 0209257..878536b 100644 (file)
--- a/ext/Encode/Encode.pm
+++ b/ext/Encode/Encode.pm
@@ -1,10 +1,10 @@
 #
-# $Id: Encode.pm,v 2.26 2008/07/01 20:56:17 dankogai Exp dankogai $
+# $Id: Encode.pm,v 2.27 2009/01/21 22:55:07 dankogai Exp dankogai $
 #
 package Encode;
 use strict;
 use warnings;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.26 $ =~ /(\d+)/g;
+our $VERSION = sprintf "%d.%02d", q$Revision: 2.27 $ =~ /(\d+)/g;
 sub DEBUG () { 0 }
 use XSLoader ();
 XSLoader::load( __PACKAGE__, $VERSION );
@@ -863,6 +863,8 @@ B<know> that the STRING is well-formed UTF-8.  Returns the previous
 state of the UTF8 flag (so please don't treat the return value as
 indicating success or failure), or C<undef> if STRING is not a string.
 
+This function does not work on tainted values.
+
 =item _utf8_off(STRING)
 
 [INTERNAL] Turns off the UTF8 flag in STRING.  Do not use frivolously.
@@ -870,6 +872,8 @@ Returns the previous state of the UTF8 flag (so please don't treat the
 return value as indicating success or failure), or C<undef> if STRING is
 not a string.
 
+This function does not work on tainted values.
+
 =back
 
 =head1 UTF-8 vs. utf8 vs. UTF8
diff --git a/ext/Encode/Makefile.PL b/ext/Encode/Makefile.PL

index 7a78d11..5b8f832 100644 (file)
--- a/ext/Encode/Makefile.PL
+++ b/ext/Encode/Makefile.PL
@@ -1,5 +1,5 @@
 #
-# $Id: Makefile.PL,v 2.7 2008/07/01 20:56:17 dankogai Exp dankogai $
+# $Id: Makefile.PL,v 2.7 2008/07/01 20:56:17 dankogai Exp $
 #
 use 5.007003;
 use strict;
diff --git a/ext/Encode/Unicode/Unicode.xs b/ext/Encode/Unicode/Unicode.xs

index 9efead6..da069c1 100644 (file)
--- a/ext/Encode/Unicode/Unicode.xs
+++ b/ext/Encode/Unicode/Unicode.xs
@@ -1,5 +1,5 @@
 /*
- $Id: Unicode.xs,v 2.3 2006/05/03 18:24:10 dankogai Exp $
+ $Id: Unicode.xs,v 2.4 2009/01/21 22:55:07 dankogai Exp dankogai $
  */
 
 #define PERL_NO_GET_CONTEXT
@@ -18,65 +18,84 @@
 #define isLoSurrogate(x)       (0xDC00 <= (x)  && (x) <= 0xDFFF )
 #define invalid_ucs2(x)         ( issurrogate(x) || 0xFFFF < (x) )
 
+#define PERLIO_BUFSIZ 1024 /* XXX value comes from PerlIOEncode_get_base */
+
+/* Avoid wasting too much space in the result buffer */
+static void
+shrink_buffer(SV *result)
+{
+    if (SvLEN(result) > 42 + SvCUR(result)) {
+       char *buf;
+       STRLEN datalen = 1 + SvCUR(result); /* include the NUL byte */
+       STRLEN buflen = PERL_STRLEN_ROUNDUP(datalen);
+       Newx(buf, buflen, char);
+       Copy(SvPVX(result), buf, datalen, char);
+       Safefree(SvPVX(result));
+       SvPV_set(result, buf);
+       SvLEN_set(result, buflen);
+    }
+}
+
 static UV
-enc_unpack(pTHX_ U8 **sp,U8 *e,STRLEN size,U8 endian)
+enc_unpack(pTHX_ U8 **sp, U8 *e, STRLEN size, U8 endian)
 {
     U8 *s = *sp;
     UV v = 0;
     if (s+size > e) {
-    croak("Partial character %c",(char) endian);
+       croak("Partial character %c",(char) endian);
     }
     switch(endian) {
     case 'N':
-    v = *s++;
-    v = (v << 8) | *s++;
+       v = *s++;
+       v = (v << 8) | *s++;
     case 'n':
-    v = (v << 8) | *s++;
-    v = (v << 8) | *s++;
-    break;
+       v = (v << 8) | *s++;
+       v = (v << 8) | *s++;
+       break;
     case 'V':
     case 'v':
-    v |= *s++;
-    v |= (*s++ << 8);
-    if (endian == 'v')
-        break;
-    v |= (*s++ << 16);
-    v |= (*s++ << 24);
-    break;
+       v |= *s++;
+       v |= (*s++ << 8);
+       if (endian == 'v')
+           break;
+       v |= (*s++ << 16);
+       v |= (*s++ << 24);
+       break;
     default:
-    croak("Unknown endian %c",(char) endian);
-    break;
+       croak("Unknown endian %c",(char) endian);
+       break;
     }
     *sp = s;
     return v;
 }
 
 void
-enc_pack(pTHX_ SV *result,STRLEN size,U8 endian,UV value)
+enc_pack(pTHX_ SV *result, STRLEN size, U8 endian, UV value)
 {
-    U8 *d = (U8 *)SvGROW(result,SvCUR(result)+size+1);
+    U8 *d = (U8 *) SvPV_nolen(result);
+
     switch(endian) {
     case 'v':
     case 'V':
-    d += SvCUR(result);
-    SvCUR_set(result,SvCUR(result)+size);
-    while (size--) {
-        *d++ = (U8)(value & 0xFF);
-        value >>= 8;
-    }
-    break;
+       d += SvCUR(result);
+       SvCUR_set(result,SvCUR(result)+size);
+       while (size--) {
+           *d++ = (U8)(value & 0xFF);
+           value >>= 8;
+       }
+       break;
     case 'n':
     case 'N':
-    SvCUR_set(result,SvCUR(result)+size);
-    d += SvCUR(result);
-    while (size--) {
-        *--d = (U8)(value & 0xFF);
-        value >>= 8;
-    }
-    break;
+       SvCUR_set(result,SvCUR(result)+size);
+       d += SvCUR(result);
+       while (size--) {
+           *--d = (U8)(value & 0xFF);
+           value >>= 8;
+       }
+       break;
     default:
-    croak("Unknown endian %c",(char) endian);
-    break;
+       croak("Unknown endian %c",(char) endian);
+       break;
     }
 }
 
@@ -94,124 +113,162 @@ SV *     str
 IV     check
 CODE:
 {
-    U8 endian   = *((U8 *)SvPV_nolen(attr("endian", 6)));
-    int size    =   SvIV(attr("size",   4));
-    int ucs2    = SvTRUE(attr("ucs2",   4));
-    int renewed = SvTRUE(attr("renewed",  7));
-    SV *result  = newSVpvn("",0);
+    U8 endian    = *((U8 *)SvPV_nolen(attr("endian", 6)));
+    int size     = SvIV(attr("size", 4));
+    int ucs2     = -1; /* only needed in the event of surrogate pairs */
+    SV *result   = newSVpvn("",0);
+    STRLEN usize = (size > 0 ? size : 1); /* protect against rogue size<=0 */
     STRLEN ulen;
+    STRLEN resultbuflen;
+    U8 *resultbuf;
     U8 *s = (U8 *)SvPVbyte(str,ulen);
     U8 *e = (U8 *)SvEND(str);
+    /* Optimise for the common case of being called from PerlIOEncode_fill()
+       with a standard length buffer. In this case the result SV's buffer is
+       only used temporarily, so we can afford to allocate the maximum needed
+       and not care about unused space. */
+    const bool temp_result = (ulen == PERLIO_BUFSIZ);
+
     ST(0) = sv_2mortal(result);
     SvUTF8_on(result);
 
     if (!endian && s+size <= e) {
-    UV bom;
-    endian = (size == 4) ? 'N' : 'n';
-    bom = enc_unpack(aTHX_ &s,e,size,endian);
-        if (bom != BOM_BE) {
-        if (bom == BOM16LE) {
-        endian = 'v';
-        }
-        else if (bom == BOM32LE) {
-        endian = 'V';
-        }
-        else {
-        croak("%"SVf":Unrecognised BOM %"UVxf,
-                      *hv_fetch((HV *)SvRV(obj),"Name",4,0),
-              bom);
-        }
-    }
+       UV bom;
+       endian = (size == 4) ? 'N' : 'n';
+       bom = enc_unpack(aTHX_ &s,e,size,endian);
+       if (bom != BOM_BE) {
+           if (bom == BOM16LE) {
+               endian = 'v';
+           }
+           else if (bom == BOM32LE) {
+               endian = 'V';
+           }
+           else {
+               croak("%"SVf":Unrecognised BOM %"UVxf,
+                     *hv_fetch((HV *)SvRV(obj),"Name",4,0),
+                     bom);
+           }
+       }
 #if 1
-    /* Update endian for next sequence */
-    if (renewed) {
-        hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
-    }
+       /* Update endian for next sequence */
+       if (SvTRUE(attr("renewed", 7))) {
+           hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
+       }
 #endif
     }
-    while (s < e && s+size <= e) {
-    UV ord = enc_unpack(aTHX_ &s,e,size,endian);
-    U8 *d;
-    if (issurrogate(ord)) {
-        if (ucs2 || size == 4) {
-        if (check) {
-            croak("%"SVf":no surrogates allowed %"UVxf,
-              *hv_fetch((HV *)SvRV(obj),"Name",4,0),
-              ord);
-        }
-        if (s+size <= e) {
-                    /* skip the next one as well */
-            enc_unpack(aTHX_ &s,e,size,endian);
-        }
-        ord = FBCHAR;
-        }
-        else {
-        UV lo;
-        if (!isHiSurrogate(ord)) {
-            if (check) {
-            croak("%"SVf":Malformed HI surrogate %"UVxf,
-                  *hv_fetch((HV *)SvRV(obj),"Name",4,0),
-                  ord);
-            }
-            else {
-            ord = FBCHAR;
-            }
-        }
-            else {
-            if (s+size > e) {
-            /* Partial character */
-            s -= size;   /* back up to 1st half */
-            break;       /* And exit loop */
-            }
-            lo = enc_unpack(aTHX_ &s,e,size,endian);
-            if (!isLoSurrogate(lo)){
-            if (check) {
-                croak("%"SVf":Malformed LO surrogate %"UVxf,
-                  *hv_fetch((HV *)SvRV(obj),"Name",4,0),
-                  ord);
-            }
-            else {
-                ord = FBCHAR;
-            }
-            }
-            else {
-            ord = 0x10000 + ((ord - 0xD800) << 10) + (lo - 0xDC00);
-            }
-        }
-        }
-    }
 
-    if ((ord & 0xFFFE) == 0xFFFE || (ord >= 0xFDD0 && ord <= 0xFDEF)) {
-        if (check) {
-        croak("%"SVf":Unicode character %"UVxf" is illegal",
-              *hv_fetch((HV *)SvRV(obj),"Name",4,0),
-              ord);
-        } else {
-        ord = FBCHAR;
-        }
+    if (temp_result) {
+       resultbuflen = 1 + ulen/usize * UTF8_MAXLEN;
+    } else {
+       /* Preallocate the buffer to the minimum possible space required. */
+       resultbuflen = ulen/usize + UTF8_MAXLEN + 1;
     }
+    resultbuf = (U8 *) SvGROW(result, resultbuflen);
 
-    d = (U8 *) SvGROW(result,SvCUR(result)+UTF8_MAXLEN+1);
-    d = uvuni_to_utf8_flags(d+SvCUR(result), ord, 0);
-    SvCUR_set(result,d - (U8 *)SvPVX(result));
-    }
-    if (s < e) {
-    /* unlikely to happen because it's fixed-length -- dankogai */
-    if (check & ENCODE_WARN_ON_ERR){
-        Perl_warner(aTHX_ packWARN(WARN_UTF8),"%"SVf":Partial character",
-            *hv_fetch((HV *)SvRV(obj),"Name",4,0));
-    }
+    while (s < e && s+size <= e) {
+       UV ord = enc_unpack(aTHX_ &s,e,size,endian);
+       U8 *d;
+       if (issurrogate(ord)) {
+           if (ucs2 == -1) {
+               ucs2 = SvTRUE(attr("ucs2", 4));
+           }
+           if (ucs2 || size == 4) {
+               if (check) {
+                   croak("%"SVf":no surrogates allowed %"UVxf,
+                         *hv_fetch((HV *)SvRV(obj),"Name",4,0),
+                         ord);
+               }
+               if (s+size <= e) {
+                   /* skip the next one as well */
+                   enc_unpack(aTHX_ &s,e,size,endian);
+               }
+               ord = FBCHAR;
+           }
+           else {
+               UV lo;
+               if (!isHiSurrogate(ord)) {
+                   if (check) {
+                       croak("%"SVf":Malformed HI surrogate %"UVxf,
+                             *hv_fetch((HV *)SvRV(obj),"Name",4,0),
+                             ord);
+                   }
+                   else {
+                       ord = FBCHAR;
+                   }
+               }
+               else {
+                   if (s+size > e) {
+                       /* Partial character */
+                       s -= size;   /* back up to 1st half */
+                       break;       /* And exit loop */
+                   }
+                   lo = enc_unpack(aTHX_ &s,e,size,endian);
+                   if (!isLoSurrogate(lo)) {
+                       if (check) {
+                           croak("%"SVf":Malformed LO surrogate %"UVxf,
+                                 *hv_fetch((HV *)SvRV(obj),"Name",4,0),
+                                 ord);
+                       }
+                       else {
+                           ord = FBCHAR;
+                       }
+                   }
+                   else {
+                       ord = 0x10000 + ((ord - 0xD800) << 10) + (lo - 0xDC00);
+                   }
+               }
+           }
+       }
+
+       if ((ord & 0xFFFE) == 0xFFFE || (ord >= 0xFDD0 && ord <= 0xFDEF)) {
+           if (check) {
+               croak("%"SVf":Unicode character %"UVxf" is illegal",
+                     *hv_fetch((HV *)SvRV(obj),"Name",4,0),
+                     ord);
+           } else {
+               ord = FBCHAR;
+           }
+       }
+
+       if (resultbuflen < SvCUR(result) + UTF8_MAXLEN + 1) {
+           /* Do not allocate >8Mb more than the minimum needed.
+              This prevents allocating too much in the rogue case of a large
+              input consisting initially of long sequence uft8-byte unicode
+              chars followed by single utf8-byte chars. */
+           STRLEN remaining = (e - s)/usize;
+           STRLEN max_alloc = remaining + (8*1024*1024);
+           STRLEN est_alloc = remaining * UTF8_MAXLEN;
+           STRLEN newlen = SvLEN(result) + /* min(max_alloc, est_alloc) */
+               (est_alloc > max_alloc ? max_alloc : est_alloc);
+           resultbuf = (U8 *) SvGROW(result, newlen);
+           resultbuflen = SvLEN(result);
+       }
+
+       d = uvuni_to_utf8_flags(resultbuf+SvCUR(result), ord, 0);
+       SvCUR_set(result, d - (U8 *)SvPVX(result));
     }
-    if (check && !(check & ENCODE_LEAVE_SRC)){
+
     if (s < e) {
-        Move(s,SvPVX(str),e-s,U8);
-        SvCUR_set(str,(e-s));
+       /* unlikely to happen because it's fixed-length -- dankogai */
+       if (check & ENCODE_WARN_ON_ERR) {
+           Perl_warner(aTHX_ packWARN(WARN_UTF8),"%"SVf":Partial character",
+                       *hv_fetch((HV *)SvRV(obj),"Name",4,0));
+       }
     }
-    else {
-        SvCUR_set(str,0);
-    }
-    *SvEND(str) = '\0';
+    if (check && !(check & ENCODE_LEAVE_SRC)) {
+       if (s < e) {
+           Move(s,SvPVX(str),e-s,U8);
+           SvCUR_set(str,(e-s));
+       }
+       else {
+           SvCUR_set(str,0);
+       }
+       *SvEND(str) = '\0';
     }
+
+    if (!temp_result)
+       shrink_buffer(result);
+
     XSRETURN(1);
 }
 
@@ -222,75 +279,92 @@ SV *      utf8
 IV     check
 CODE:
 {
-    U8 endian   = *((U8 *)SvPV_nolen(attr("endian", 6)));
-    int size    =   SvIV(attr("size",   4));
-    int ucs2    = SvTRUE(attr("ucs2",   4));
-    int renewed = SvTRUE(attr("renewed",  7));
-    SV *result  = newSVpvn("",0);
+    U8 endian = *((U8 *)SvPV_nolen(attr("endian", 6)));
+    const int size = SvIV(attr("size", 4));
+    int ucs2 = -1; /* only needed if there is invalid_ucs2 input */
+    const STRLEN usize = (size > 0 ? size : 1);
+    SV *result = newSVpvn("", 0);
     STRLEN ulen;
-    U8 *s = (U8 *)SvPVutf8(utf8,ulen);
-    U8 *e = (U8 *)SvEND(utf8);
+    U8 *s = (U8 *) SvPVutf8(utf8, ulen);
+    const U8 *e = (U8 *) SvEND(utf8);
+    /* Optimise for the common case of being called from PerlIOEncode_flush()
+       with a standard length buffer. In this case the result SV's buffer is
+       only used temporarily, so we can afford to allocate the maximum needed
+       and not care about unused space. */
+    const bool temp_result = (ulen == PERLIO_BUFSIZ);
+
     ST(0) = sv_2mortal(result);
+
+    /* Preallocate the result buffer to the maximum possible size.
+       ie. assume each UTF8 byte is 1 character.
+       Then shrink the result's buffer if necesary at the end. */
+    SvGROW(result, ((ulen+1) * usize));
+
     if (!endian) {
-    endian = (size == 4) ? 'N' : 'n';
-    enc_pack(aTHX_ result,size,endian,BOM_BE);
+       endian = (size == 4) ? 'N' : 'n';
+       enc_pack(aTHX_ result,size,endian,BOM_BE);
 #if 1
-    /* Update endian for next sequence */
-    if (renewed){
-        hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
-    }
+       /* Update endian for next sequence */
+       if (SvTRUE(attr("renewed", 7))) {
+           hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
+       }
 #endif
     }
     while (s < e && s+UTF8SKIP(s) <= e) {
-    STRLEN len;
-    UV ord = utf8n_to_uvuni(s, e-s, &len, 0);
-        s += len;
-    if (size != 4 && invalid_ucs2(ord)) {
-        if (!issurrogate(ord)){
-        if (ucs2) {
-            if (check) {
-            croak("%"SVf":code point \"\\x{%"UVxf"}\" too high",
-                  *hv_fetch((HV *)SvRV(obj),"Name",4,0),ord);
-            }
-            enc_pack(aTHX_ result,size,endian,FBCHAR);
-        }else{
-            UV hi = ((ord - 0x10000) >> 10)   + 0xD800;
-            UV lo = ((ord - 0x10000) & 0x3FF) + 0xDC00;
-            enc_pack(aTHX_ result,size,endian,hi);
-            enc_pack(aTHX_ result,size,endian,lo);
-        }
-        }
-        else {
-        /* not supposed to happen */
-        enc_pack(aTHX_ result,size,endian,FBCHAR);
-        }
-    }
-    else {
-        enc_pack(aTHX_ result,size,endian,ord);
-    }
+       STRLEN len;
+       UV ord = utf8n_to_uvuni(s, e-s, &len, 0);
+       s += len;
+       if (size != 4 && invalid_ucs2(ord)) {
+           if (!issurrogate(ord)) {
+               if (ucs2 == -1) {
+                   ucs2 = SvTRUE(attr("ucs2", 4));
+               }
+               if (ucs2) {
+                   if (check) {
+                       croak("%"SVf":code point \"\\x{%"UVxf"}\" too high",
+                                 *hv_fetch((HV *)SvRV(obj),"Name",4,0),ord);
+                   }
+                   enc_pack(aTHX_ result,size,endian,FBCHAR);
+               } else {
+                   UV hi = ((ord - 0x10000) >> 10)   + 0xD800;
+                   UV lo = ((ord - 0x10000) & 0x3FF) + 0xDC00;
+                   enc_pack(aTHX_ result,size,endian,hi);
+                   enc_pack(aTHX_ result,size,endian,lo);
+               }
+           }
+           else {
+               /* not supposed to happen */
+               enc_pack(aTHX_ result,size,endian,FBCHAR);
+           }
+       }
+       else {
+           enc_pack(aTHX_ result,size,endian,ord);
+       }
     }
     if (s < e) {
-    /* UTF-8 partial char happens often on PerlIO.
-       Since this is okay and normal, we do not warn.
-       But this is critical when you choose to LEAVE_SRC
-       in which case we die */
-    if (check & (ENCODE_DIE_ON_ERR|ENCODE_LEAVE_SRC)){
-        Perl_croak(aTHX_ "%"SVf":partial character is not allowed "
-               "when CHECK = 0x%" UVuf,
-               *hv_fetch((HV *)SvRV(obj),"Name",4,0), check);
+       /* UTF-8 partial char happens often on PerlIO.
+          Since this is okay and normal, we do not warn.
+          But this is critical when you choose to LEAVE_SRC
+          in which case we die */
+       if (check & (ENCODE_DIE_ON_ERR|ENCODE_LEAVE_SRC)) {
+           Perl_croak(aTHX_ "%"SVf":partial character is not allowed "
+                      "when CHECK = 0x%" UVuf,
+                      *hv_fetch((HV *)SvRV(obj),"Name",4,0), check);
+       }
     }
-    
-    }
-    if (check && !(check & ENCODE_LEAVE_SRC)){
-    if (s < e) {
-        Move(s,SvPVX(utf8),e-s,U8);
-        SvCUR_set(utf8,(e-s));
+    if (check && !(check & ENCODE_LEAVE_SRC)) {
+       if (s < e) {
+           Move(s,SvPVX(utf8),e-s,U8);
+           SvCUR_set(utf8,(e-s));
+       }
+       else {
+           SvCUR_set(utf8,0);
+       }
+       *SvEND(utf8) = '\0';
     }
-    else {
-        SvCUR_set(utf8,0);
-    }
-    *SvEND(utf8) = '\0';
-    } 
+
+    if (!temp_result)
+       shrink_buffer(result);
+
     XSRETURN(1);
 }
-
diff --git a/ext/Encode/bin/enc2xs b/ext/Encode/bin/enc2xs

index c5cf8ee..233ca54 100644 (file)
--- a/ext/Encode/bin/enc2xs
+++ b/ext/Encode/bin/enc2xs
@@ -10,7 +10,7 @@ use warnings;
 use Getopt::Std;
 use Config;
 my @orig_ARGV = @ARGV;
-our $VERSION  = do { my @r = (q$Revision: 2.5 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION  = do { my @r = (q$Revision: 2.6 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
 
 # These may get re-ordered.
 # RAW is a do_now as inserted by &enter
@@ -1351,17 +1351,17 @@ Encode/bin directory.
 =item *
 
 ICU Home Page 
-L<http://oss.software.ibm.com/icu/>
+L<http://www.icu-project.org/>
 
 =item *
 
 ICU Character Mapping Tables
-L<http://oss.software.ibm.com/icu/charset/>
+L<http://www.icu-project.org/charset/>
 
 =item *
 
 ICU:Conversion Data
-L<http://oss.software.ibm.com/icu/userguide/conversion-data.html>
+L<http://www.icu-project.org/userguide/conversion-data.html>
 
 =back
 
diff --git a/ext/Encode/lib/Encode/Alias.pm b/ext/Encode/lib/Encode/Alias.pm

index d02ca39..5fb12e4 100644 (file)
--- a/ext/Encode/lib/Encode/Alias.pm
+++ b/ext/Encode/lib/Encode/Alias.pm
@@ -2,7 +2,7 @@ package Encode::Alias;
 use strict;
 use warnings;
 no warnings 'redefine';
-our $VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.11 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 sub DEBUG () { 0 }
 
 use base qw(Exporter);
@@ -135,7 +135,7 @@ sub init_aliases {
     define_alias( qr/^(.*)$/ => '"\L$1"' );
 
     # UTF/UCS stuff
-    define_alias( qr/^UTF-?7$/i     => '"UTF-7"' );
+    define_alias( qr/^(unicode-1-1-)?UTF-?7$/i     => '"UTF-7"' );
     define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' );
     define_alias(
         qr/^UCS-?2-?(BE)?$/i    => '"UCS-2BE"',
diff --git a/ext/Encode/lib/Encode/Guess.pm b/ext/Encode/lib/Encode/Guess.pm

index 1bc4df7..1ad7147 100644 (file)
--- a/ext/Encode/lib/Encode/Guess.pm
+++ b/ext/Encode/lib/Encode/Guess.pm
@@ -2,7 +2,7 @@ package Encode::Guess;
 use strict;
 use warnings;
 use Encode qw(:fallbacks find_encoding);
-our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 
 my $Canon = 'Guess';
 sub DEBUG () { 0 }
@@ -68,7 +68,7 @@ sub guess {
     my $octet = shift;
 
     # sanity check
-    return unless defined $octet and length $octet;
+    return "Empty string, empty guess" unless defined $octet and length $octet;
 
     # cheat 0: utf8 flag;
     if ( Encode::is_utf8($octet) ) {
diff --git a/ext/Encode/lib/Encode/MIME/Header.pm b/ext/Encode/lib/Encode/MIME/Header.pm

index b664d88..4742a72 100644 (file)
--- a/ext/Encode/lib/Encode/MIME/Header.pm
+++ b/ext/Encode/lib/Encode/MIME/Header.pm
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 no warnings 'redefine';
 
-our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.6 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 use Encode qw(find_encoding encode_utf8 decode_utf8);
 use MIME::Base64;
 use Carp;
@@ -164,7 +164,7 @@ sub _encode {
         }
         $chunk .= $chr;
     }
-    $chunk and push @result, SINGLE->{$enc}($chunk);
+    length($chunk) and push @result, SINGLE->{$enc}($chunk);
     return @result;
 }
 
diff --git a/ext/Encode/t/guess.t b/ext/Encode/t/guess.t

index 5bfbf4e..707ca85 100644 (file)
--- a/ext/Encode/t/guess.t
+++ b/ext/Encode/t/guess.t
@@ -21,7 +21,7 @@ use File::Spec;
 use Encode qw(decode encode find_encoding _utf8_off);
 
 #use Test::More qw(no_plan);
-use Test::More tests => 29;
+use Test::More tests => 30;
 use_ok("Encode::Guess");
 {
     no warnings;
@@ -35,6 +35,7 @@ my $utf8off = $utf8on; _utf8_off($utf8off);
 my $utf16 = encode('UTF-16', $utf8on);
 my $utf32 = encode('UTF-32', $utf8on);
 
+like(guess_encoding(''), qr/empty string/io, 'empty string');
 is(guess_encoding($ascii)->name, 'ascii', 'ascii');
 like(guess_encoding($latin1), qr/No appropriate encoding/io, 'no ascii');
 is(guess_encoding($latin1, 'latin1')->name, 'iso-8859-1', 'iso-8859-1');
diff --git a/ext/Encode/t/mime-header.t b/ext/Encode/t/mime-header.t

index e36e0ba..a69e176 100644 (file)
--- a/ext/Encode/t/mime-header.t
+++ b/ext/Encode/t/mime-header.t
@@ -1,5 +1,5 @@
 #
-# $Id: mime-header.t,v 2.3 2007/04/06 12:53:41 dankogai Exp $
+# $Id: mime-header.t,v 2.4 2009/01/21 22:55:07 dankogai Exp dankogai $
 # This script is written in utf8
 #
 BEGIN {
@@ -23,7 +23,7 @@ no utf8;
 
 use strict;
 #use Test::More qw(no_plan);
-use Test::More tests => 12;
+use Test::More tests => 13;
 use_ok("Encode::MIME::Header");
 
 my $eheader =<<'EOS';
@@ -119,4 +119,10 @@ is(Encode::encode('MIME-Q', $dheader), $qheader, "Double decode Q");
 
 is(Encode::encode('MIME-Q', "\x{fc}"), '=?UTF-8?Q?=C3=BC?=', 'Encode latin1 characters');
 
+# RT42627
+
+my $rt42627 = Encode::decode_utf8("\x{c2}\x{a3}xxxxxxxxxxxxxxxxxxx0");
+is(Encode::encode('MIME-Q', $rt42627), 
+   '=?UTF-8?Q?=C2=A3xxxxxxxxxxxxxxxxxxx?==?UTF-8?Q?0?=',
+   'MIME-Q encoding does not truncate trailing zeros');
 __END__;
ext/Encode/AUTHORS		patch \| blob \| blame \| history
ext/Encode/Changes		patch \| blob \| blame \| history
ext/Encode/Encode.pm		patch \| blob \| blame \| history
ext/Encode/Makefile.PL		patch \| blob \| blame \| history
ext/Encode/Unicode/Unicode.xs		patch \| blob \| blame \| history
ext/Encode/bin/enc2xs		patch \| blob \| blame \| history
ext/Encode/lib/Encode/Alias.pm		patch \| blob \| blame \| history
ext/Encode/lib/Encode/Guess.pm		patch \| blob \| blame \| history
ext/Encode/lib/Encode/MIME/Header.pm		patch \| blob \| blame \| history
ext/Encode/t/guess.t		patch \| blob \| blame \| history
ext/Encode/t/mime-header.t		patch \| blob \| blame \| history