From: Steve Peters Date: Thu, 27 Apr 2006 17:53:11 +0000 (+0000) Subject: Upgrade to Encode-2.15 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=0a8c69ed98534e4cb5df7fb14bb378b3457ef4ef;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Encode-2.15 p4raw-id: //depot/perl@27982 --- diff --git a/ext/Encode/Changes b/ext/Encode/Changes index 6cd82b7..50a7c3f 100644 --- a/ext/Encode/Changes +++ b/ext/Encode/Changes @@ -1,10 +1,24 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 2.14 2006/01/15 15:43:36 dankogai Exp dankogai $ +# $Id: Changes,v 2.15 2006/04/06 15:44:11 dankogai Exp dankogai $ # -$Revision: 2.14 $ $Date: 2006/01/15 15:43:36 $ -2.14 2006/01/15 15:06:36 $ +$Revision: 2.15 $ $Date: 2006/04/06 15:44:11 $ +! Unicode/Unicode.xs + Addressed: UTF-16, UTF-32, UCS, UTF-7 decoders mishandle illegal characters + http://rt.cpan.org/NoAuth/Bug.html?id=#18556 +! Encode.pm + added str2bytes() as an alias to encode() and bytes2str() as an alias + to decode() + http://rt.cpan.org/NoAuth/Bug.html?id=#17103 +! Encode.xs + Change 26922: Avoid warning with MS Visual C compiler. + Message-Id: <200601231245.k0NCj2dw009484@smtp3.ActiveState.com> +! t/perlio.t + Change 26067: As using -C to turn on utf8 IO is equivalent to the open pragma + Message-Id: <200511092227.jA9MRcYD009025@smtp3.ActiveState.com> + +2.14 2006/01/15 15:43:36 ! Makefile.PL Change 26295: Don't build manpages for Encode and Unicode::Normalize Message-Id: <200512071540.jB7Fe4Gt017960@smtp3.ActiveState.com> diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index 7785f5a..75d0e51 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -1,9 +1,9 @@ # -# $Id: Encode.pm,v 2.14 2006/01/15 15:43:36 dankogai Exp dankogai $ +# $Id: Encode.pm,v 2.15 2006/04/06 15:44:11 dankogai Exp dankogai $ # package Encode; use strict; -our $VERSION = sprintf "%d.%02d", q$Revision: 2.14 $ =~ /(\d+)/g; +our $VERSION = sprintf "%d.%02d", q$Revision: 2.15 $ =~ /(\d+)/g; sub DEBUG () { 0 } use XSLoader (); XSLoader::load(__PACKAGE__, $VERSION); @@ -14,7 +14,7 @@ use base qw/Exporter/; # Public, encouraged API is exported by default our @EXPORT = qw( - decode decode_utf8 encode encode_utf8 + decode decode_utf8 encode encode_utf8 str2bytes bytes2str encodings find_encoding clone_encoding ); @@ -151,6 +151,7 @@ sub encode($$;$) $_[1] = $string if $check and !($check & LEAVE_SRC()); return $octets; } +*str2bytes = \&encode; sub decode($$;$) { @@ -167,6 +168,7 @@ sub decode($$;$) $_[1] = $octets if $check and !($check & LEAVE_SRC()); return $string; } +*bytes2str = \&decode; sub from_to($$$;$) { diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index 47087da..3c8d681 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -1,5 +1,5 @@ /* - $Id: Encode.xs,v 2.7 2006/01/15 15:43:36 dankogai Exp dankogai $ + $Id: Encode.xs,v 2.8 2006/04/06 15:44:11 dankogai Exp dankogai $ */ #define PERL_NO_GET_CONTEXT diff --git a/ext/Encode/Unicode/Unicode.xs b/ext/Encode/Unicode/Unicode.xs index b17be85..94404c6 100644 --- a/ext/Encode/Unicode/Unicode.xs +++ b/ext/Encode/Unicode/Unicode.xs @@ -1,5 +1,5 @@ /* - $Id: Unicode.xs,v 2.1 2004/10/24 13:00:29 dankogai Exp $ + $Id: Unicode.xs,v 2.2 2006/04/06 15:44:11 dankogai Exp dankogai $ */ #define PERL_NO_GET_CONTEXT @@ -132,8 +132,8 @@ CODE: while (s < e && s+size <= e) { UV ord = enc_unpack(aTHX_ &s,e,size,endian); U8 *d; - if (size != 4 && invalid_ucs2(ord)) { - if (ucs2) { + if (issurrogate(ord)) { + if (ucs2 || size == 4) { if (check) { croak("%"SVf":no surrogates allowed %"UVxf, *hv_fetch((HV *)SvRV(obj),"Name",4,0), @@ -148,24 +148,49 @@ CODE: else { UV lo; if (!isHiSurrogate(ord)) { - croak("%"SVf":Malformed HI surrogate %"UVxf, - *hv_fetch((HV *)SvRV(obj),"Name",4,0), - ord); - } - if (s+size > e) { - /* Partial character */ - s -= size; /* back up to 1st half */ - break; /* And exit loop */ + if (check) { + croak("%"SVf":Malformed HI surrogate %"UVxf, + *hv_fetch((HV *)SvRV(obj),"Name",4,0), + ord); + } + else { + ord = FBCHAR; + } } - lo = enc_unpack(aTHX_ &s,e,size,endian); - if (!isLoSurrogate(lo)){ - croak("%"SVf":Malformed LO surrogate %"UVxf, - *hv_fetch((HV *)SvRV(obj),"Name",4,0), - ord); + else { + if (s+size > e) { + /* Partial character */ + s -= size; /* back up to 1st half */ + break; /* And exit loop */ + } + lo = enc_unpack(aTHX_ &s,e,size,endian); + if (!isLoSurrogate(lo)){ + if (check) { + croak("%"SVf":Malformed LO surrogate %"UVxf, + *hv_fetch((HV *)SvRV(obj),"Name",4,0), + ord); + } + else { + ord = FBCHAR; + } + } + else { + ord = 0x10000 + ((ord - 0xD800) << 10) + (lo - 0xDC00); + } } - ord = 0x10000 + ((ord - 0xD800) << 10) + (lo - 0xDC00); } } + + if ((ord & 0xFFFE) == 0xFFFE || (ord >= 0xFDD0 && ord <= 0xFDEF)) { + if (check) { + croak("%"SVf":Unicode character %"UVxf" is illegal", + *hv_fetch((HV *)SvRV(obj),"Name",4,0), + ord); + } else { + ord = FBCHAR; + } + } + d = (U8 *) SvGROW(result,SvCUR(result)+UTF8_MAXLEN+1); d = uvuni_to_utf8_flags(d+SvCUR(result), ord, 0); SvCUR_set(result,d - (U8 *)SvPVX(result)); diff --git a/ext/Encode/lib/Encode/Unicode/UTF7.pm b/ext/Encode/lib/Encode/Unicode/UTF7.pm index dc75ce3..a2a789b 100644 --- a/ext/Encode/lib/Encode/Unicode/UTF7.pm +++ b/ext/Encode/lib/Encode/Unicode/UTF7.pm @@ -51,10 +51,11 @@ sub encode($$;$){ return $bytes; } -sub decode{ +sub decode($$;$){ my ($obj, $bytes, $chk) = @_; my $len = length($bytes); my $str = ""; + no warnings 'uninitialized'; while (pos($bytes) < $len) { if ($bytes =~ /\G([^+]+)/ogc) { $str .= $1;