# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 2.14 2006/01/15 15:43:36 dankogai Exp dankogai $
+# $Id: Changes,v 2.15 2006/04/06 15:44:11 dankogai Exp dankogai $
#
-$Revision: 2.14 $ $Date: 2006/01/15 15:43:36 $
-2.14 2006/01/15 15:06:36 $
+$Revision: 2.15 $ $Date: 2006/04/06 15:44:11 $
+! Unicode/Unicode.xs
+ Addressed: UTF-16, UTF-32, UCS, UTF-7 decoders mishandle illegal characters
+ http://rt.cpan.org/NoAuth/Bug.html?id=#18556
+! Encode.pm
+ added str2bytes() as an alias to encode() and bytes2str() as an alias
+ to decode()
+ http://rt.cpan.org/NoAuth/Bug.html?id=#17103
+! Encode.xs
+ Change 26922: Avoid warning with MS Visual C compiler.
+ Message-Id: <200601231245.k0NCj2dw009484@smtp3.ActiveState.com>
+! t/perlio.t
+ Change 26067: As using -C to turn on utf8 IO is equivalent to the open pragma
+ Message-Id: <200511092227.jA9MRcYD009025@smtp3.ActiveState.com>
+
+2.14 2006/01/15 15:43:36
! Makefile.PL
Change 26295: Don't build manpages for Encode and Unicode::Normalize
Message-Id: <200512071540.jB7Fe4Gt017960@smtp3.ActiveState.com>
#
-# $Id: Encode.pm,v 2.14 2006/01/15 15:43:36 dankogai Exp dankogai $
+# $Id: Encode.pm,v 2.15 2006/04/06 15:44:11 dankogai Exp dankogai $
#
package Encode;
use strict;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.14 $ =~ /(\d+)/g;
+our $VERSION = sprintf "%d.%02d", q$Revision: 2.15 $ =~ /(\d+)/g;
sub DEBUG () { 0 }
use XSLoader ();
XSLoader::load(__PACKAGE__, $VERSION);
# Public, encouraged API is exported by default
our @EXPORT = qw(
- decode decode_utf8 encode encode_utf8
+ decode decode_utf8 encode encode_utf8 str2bytes bytes2str
encodings find_encoding clone_encoding
);
$_[1] = $string if $check and !($check & LEAVE_SRC());
return $octets;
}
+*str2bytes = \&encode;
sub decode($$;$)
{
$_[1] = $octets if $check and !($check & LEAVE_SRC());
return $string;
}
+*bytes2str = \&decode;
sub from_to($$$;$)
{
/*
- $Id: Unicode.xs,v 2.1 2004/10/24 13:00:29 dankogai Exp $
+ $Id: Unicode.xs,v 2.2 2006/04/06 15:44:11 dankogai Exp dankogai $
*/
#define PERL_NO_GET_CONTEXT
while (s < e && s+size <= e) {
UV ord = enc_unpack(aTHX_ &s,e,size,endian);
U8 *d;
- if (size != 4 && invalid_ucs2(ord)) {
- if (ucs2) {
+ if (issurrogate(ord)) {
+ if (ucs2 || size == 4) {
if (check) {
croak("%"SVf":no surrogates allowed %"UVxf,
*hv_fetch((HV *)SvRV(obj),"Name",4,0),
else {
UV lo;
if (!isHiSurrogate(ord)) {
- croak("%"SVf":Malformed HI surrogate %"UVxf,
- *hv_fetch((HV *)SvRV(obj),"Name",4,0),
- ord);
- }
- if (s+size > e) {
- /* Partial character */
- s -= size; /* back up to 1st half */
- break; /* And exit loop */
+ if (check) {
+ croak("%"SVf":Malformed HI surrogate %"UVxf,
+ *hv_fetch((HV *)SvRV(obj),"Name",4,0),
+ ord);
+ }
+ else {
+ ord = FBCHAR;
+ }
}
- lo = enc_unpack(aTHX_ &s,e,size,endian);
- if (!isLoSurrogate(lo)){
- croak("%"SVf":Malformed LO surrogate %"UVxf,
- *hv_fetch((HV *)SvRV(obj),"Name",4,0),
- ord);
+ else {
+ if (s+size > e) {
+ /* Partial character */
+ s -= size; /* back up to 1st half */
+ break; /* And exit loop */
+ }
+ lo = enc_unpack(aTHX_ &s,e,size,endian);
+ if (!isLoSurrogate(lo)){
+ if (check) {
+ croak("%"SVf":Malformed LO surrogate %"UVxf,
+ *hv_fetch((HV *)SvRV(obj),"Name",4,0),
+ ord);
+ }
+ else {
+ ord = FBCHAR;
+ }
+ }
+ else {
+ ord = 0x10000 + ((ord - 0xD800) << 10) + (lo - 0xDC00);
+ }
}
- ord = 0x10000 + ((ord - 0xD800) << 10) + (lo - 0xDC00);
}
}
+
+ if ((ord & 0xFFFE) == 0xFFFE || (ord >= 0xFDD0 && ord <= 0xFDEF)) {
+ if (check) {
+ croak("%"SVf":Unicode character %"UVxf" is illegal",
+ *hv_fetch((HV *)SvRV(obj),"Name",4,0),
+ ord);
+ } else {
+ ord = FBCHAR;
+ }
+ }
+
d = (U8 *) SvGROW(result,SvCUR(result)+UTF8_MAXLEN+1);
d = uvuni_to_utf8_flags(d+SvCUR(result), ord, 0);
SvCUR_set(result,d - (U8 *)SvPVX(result));