From: Steve Peters <steve@fisharerojo.org>
Date: Thu, 27 Apr 2006 17:53:11 +0000 (+0000)
Subject: Upgrade to Encode-2.15
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=0a8c69ed98534e4cb5df7fb14bb378b3457ef4ef;p=p5sagit%2Fp5-mst-13.2.git

Upgrade to Encode-2.15

p4raw-id: //depot/perl@27982
---

diff --git a/ext/Encode/Changes b/ext/Encode/Changes
index 6cd82b7..50a7c3f 100644
--- a/ext/Encode/Changes
+++ b/ext/Encode/Changes
@@ -1,10 +1,24 @@
 # Revision history for Perl extension Encode.
 #
-# $Id: Changes,v 2.14 2006/01/15 15:43:36 dankogai Exp dankogai $
+# $Id: Changes,v 2.15 2006/04/06 15:44:11 dankogai Exp dankogai $
 #
 
-$Revision: 2.14 $ $Date: 2006/01/15 15:43:36 $
-2.14  2006/01/15 15:06:36 $
+$Revision: 2.15 $ $Date: 2006/04/06 15:44:11 $
+! Unicode/Unicode.xs
+  Addressed: UTF-16, UTF-32, UCS, UTF-7 decoders mishandle illegal characters
+  http://rt.cpan.org/NoAuth/Bug.html?id=#18556
+! Encode.pm
+  added str2bytes() as an alias to encode() and  bytes2str() as an alias 
+  to decode()
+  http://rt.cpan.org/NoAuth/Bug.html?id=#17103
+! Encode.xs
+  Change 26922: Avoid warning with MS Visual C compiler.
+  Message-Id: <200601231245.k0NCj2dw009484@smtp3.ActiveState.com>
+! t/perlio.t
+  Change 26067: As using -C to turn on utf8 IO is equivalent to the open pragma
+  Message-Id: <200511092227.jA9MRcYD009025@smtp3.ActiveState.com>
+
+2.14 2006/01/15 15:43:36
 ! Makefile.PL 
   Change 26295: Don't build manpages for Encode and Unicode::Normalize
   Message-Id: <200512071540.jB7Fe4Gt017960@smtp3.ActiveState.com>
diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm
index 7785f5a..75d0e51 100644
--- a/ext/Encode/Encode.pm
+++ b/ext/Encode/Encode.pm
@@ -1,9 +1,9 @@
 #
-# $Id: Encode.pm,v 2.14 2006/01/15 15:43:36 dankogai Exp dankogai $
+# $Id: Encode.pm,v 2.15 2006/04/06 15:44:11 dankogai Exp dankogai $
 #
 package Encode;
 use strict;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.14 $ =~ /(\d+)/g;
+our $VERSION = sprintf "%d.%02d", q$Revision: 2.15 $ =~ /(\d+)/g;
 sub DEBUG () { 0 }
 use XSLoader ();
 XSLoader::load(__PACKAGE__, $VERSION);
@@ -14,7 +14,7 @@ use base qw/Exporter/;
 # Public, encouraged API is exported by default
 
 our @EXPORT = qw(
-  decode  decode_utf8  encode  encode_utf8
+  decode  decode_utf8  encode  encode_utf8 str2bytes bytes2str
   encodings  find_encoding clone_encoding
 );
 
@@ -151,6 +151,7 @@ sub encode($$;$)
     $_[1] = $string if $check and !($check & LEAVE_SRC());
     return $octets;
 }
+*str2bytes = \&encode;
 
 sub decode($$;$)
 {
@@ -167,6 +168,7 @@ sub decode($$;$)
     $_[1] = $octets if $check and !($check & LEAVE_SRC());
     return $string;
 }
+*bytes2str = \&decode;
 
 sub from_to($$$;$)
 {
diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs
index 47087da..3c8d681 100644
--- a/ext/Encode/Encode.xs
+++ b/ext/Encode/Encode.xs
@@ -1,5 +1,5 @@
 /*
- $Id: Encode.xs,v 2.7 2006/01/15 15:43:36 dankogai Exp dankogai $
+ $Id: Encode.xs,v 2.8 2006/04/06 15:44:11 dankogai Exp dankogai $
  */
 
 #define PERL_NO_GET_CONTEXT
diff --git a/ext/Encode/Unicode/Unicode.xs b/ext/Encode/Unicode/Unicode.xs
index b17be85..94404c6 100644
--- a/ext/Encode/Unicode/Unicode.xs
+++ b/ext/Encode/Unicode/Unicode.xs
@@ -1,5 +1,5 @@
 /*
- $Id: Unicode.xs,v 2.1 2004/10/24 13:00:29 dankogai Exp $
+ $Id: Unicode.xs,v 2.2 2006/04/06 15:44:11 dankogai Exp dankogai $
  */
 
 #define PERL_NO_GET_CONTEXT
@@ -132,8 +132,8 @@ CODE:
     while (s < e && s+size <= e) {
 	UV ord = enc_unpack(aTHX_ &s,e,size,endian);
 	U8 *d;
-	if (size != 4 && invalid_ucs2(ord)) {
-	    if (ucs2) {
+	if (issurrogate(ord)) {
+	    if (ucs2 || size == 4) {
 		if (check) {
 		    croak("%"SVf":no surrogates allowed %"UVxf,
 			  *hv_fetch((HV *)SvRV(obj),"Name",4,0),
@@ -148,24 +148,49 @@ CODE:
 	    else {
 		UV lo;
 		if (!isHiSurrogate(ord)) {
-		    croak("%"SVf":Malformed HI surrogate %"UVxf,
-			  *hv_fetch((HV *)SvRV(obj),"Name",4,0),
-			  ord);
-		}
-		if (s+size > e) {
-		    /* Partial character */
-		    s -= size;   /* back up to 1st half */
-		    break;       /* And exit loop */
+		    if (check) {
+			croak("%"SVf":Malformed HI surrogate %"UVxf,
+			      *hv_fetch((HV *)SvRV(obj),"Name",4,0),
+			      ord);
+		    }
+		    else {
+			ord = FBCHAR;
+		    }
 		}
-		lo = enc_unpack(aTHX_ &s,e,size,endian);
-		if (!isLoSurrogate(lo)){
-		    croak("%"SVf":Malformed LO surrogate %"UVxf,
-			  *hv_fetch((HV *)SvRV(obj),"Name",4,0),
-			  ord);
+	        else {
+		    if (s+size > e) {
+			/* Partial character */
+			s -= size;   /* back up to 1st half */
+			break;       /* And exit loop */
+		    }
+		    lo = enc_unpack(aTHX_ &s,e,size,endian);
+		    if (!isLoSurrogate(lo)){
+			if (check) {
+			    croak("%"SVf":Malformed LO surrogate %"UVxf,
+				  *hv_fetch((HV *)SvRV(obj),"Name",4,0),
+				  ord);
+			}
+			else {
+			    ord = FBCHAR;
+			}
+		    }
+		    else {
+			ord = 0x10000 + ((ord - 0xD800) << 10) + (lo - 0xDC00);
+		    }
 		}
-		ord = 0x10000 + ((ord - 0xD800) << 10) + (lo - 0xDC00);
 	    }
 	}
+
+	if ((ord & 0xFFFE) == 0xFFFE || (ord >= 0xFDD0 && ord <= 0xFDEF)) {
+	    if (check) {
+		croak("%"SVf":Unicode character %"UVxf" is illegal",
+		      *hv_fetch((HV *)SvRV(obj),"Name",4,0),
+		      ord);
+	    } else {
+		ord = FBCHAR;
+	    }
+	}
+
 	d = (U8 *) SvGROW(result,SvCUR(result)+UTF8_MAXLEN+1);
 	d = uvuni_to_utf8_flags(d+SvCUR(result), ord, 0);
 	SvCUR_set(result,d - (U8 *)SvPVX(result));
diff --git a/ext/Encode/lib/Encode/Unicode/UTF7.pm b/ext/Encode/lib/Encode/Unicode/UTF7.pm
index dc75ce3..a2a789b 100644
--- a/ext/Encode/lib/Encode/Unicode/UTF7.pm
+++ b/ext/Encode/lib/Encode/Unicode/UTF7.pm
@@ -51,10 +51,11 @@ sub encode($$;$){
     return $bytes;
 }
 	   
-sub decode{
+sub decode($$;$){
     my ($obj, $bytes, $chk) = @_;
     my $len = length($bytes);
     my $str = "";
+    no warnings 'uninitialized';
     while (pos($bytes) < $len) {
 	if    ($bytes =~ /\G([^+]+)/ogc) {
 	    $str .= $1;