# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 2.38 2009/11/16 14:08:13 dankogai Exp dankogai $
-$Revision: 2.38 $ $Date: 2009/11/16 14:08:13 $
+# $Id: Changes,v 2.39 2009/11/26 09:23:59 dankogai Exp dankogai $
+! Encode.xs t/fallback.t
+ $utf8 = decode('utf8', $malformed, sub{ ... }) # now works!
+ http://rt.cpan.org/Ticket/Display.html?id=51204
+! t/CJKT.t t/guess.t t/perlio.t
+ $ENV{'PERL_CORE'} tricks removed since they are no longer necessary.
+ Message-Id: <20091116161513.GA25556@bestpractical.com>
+
+$Revision: 2.39 $ $Date: 2009/11/26 09:23:59 $
! Encode.xs
Addressed: Encode memory corruption [perl #70528]
Message-Id: <alpine.LFD.2.00.0911152328070.9483@ein.m-l.org>
/*
- $Id: Encode.xs,v 2.17 2009/11/16 14:08:13 dankogai Exp dankogai $
+ $Id: Encode.xs,v 2.18 2009/11/26 09:23:59 dankogai Exp dankogai $
*/
#define PERL_NO_GET_CONTEXT
}
static U8*
-process_utf8(pTHX_ SV* dst, U8* s, U8* e, int check,
+process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
bool encode, bool strict, bool stop_at_partial)
{
UV uv;
STRLEN ulen;
+ SV *fallback_cb;
+ int check;
+
+ if (SvROK(check_sv)) {
+ /* croak("UTF-8 decoder doesn't support callback CHECK"); */
+ fallback_cb = check_sv;
+ check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as perlqq */
+ }
+ else {
+ fallback_cb = &PL_sv_undef;
+ check = SvIV(check_sv);
+ }
SvPOK_only(dst);
SvCUR_set(dst,0);
break;
}
if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
- SV* subchar = newSVpvf(check & ENCODE_PERLQQ ? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}"):
- check & ENCODE_HTMLCREF ? "&#%" UVuf ";" :
- "&#x%" UVxf ";", uv);
+ SV* subchar =
+ (fallback_cb != &PL_sv_undef)
+ ? do_fallback_cb(aTHX_ uv, fallback_cb)
+ : newSVpvf(check & ENCODE_PERLQQ
+ ? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}")
+ : check & ENCODE_HTMLCREF ? "&#%" UVuf ";"
+ : "&#x%" UVxf ";", uv);
+ if (encode){
+ SvUTF8_off(subchar); /* make sure no decoded string gets in */
+ }
sv_catsv(dst, subchar);
SvREFCNT_dec(subchar);
} else {
CODE:
{
dSP; ENTER; SAVETMPS;
- if (SvROK(check_sv)) {
- croak("UTF-8 decoder doesn't support callback CHECK");
- }
- else {
- check = SvIV(check_sv);
- }
if (src == &PL_sv_undef) src = newSV(0);
s = (U8 *) SvPV(src, slen);
e = (U8 *) SvEND(src);
dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */
-
+ check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv);
/*
* PerlIO check -- we assume the object is of PerlIO if renewed
*/
}
}
- s = process_utf8(aTHX_ dst, s, e, check, 0, strict_utf8(aTHX_ obj), renewed);
+ s = process_utf8(aTHX_ dst, s, e, check_sv, 0, strict_utf8(aTHX_ obj), renewed);
/* Clear out translated part of source unless asked not to */
if (check && !(check & ENCODE_LEAVE_SRC)){
int check;
CODE:
{
- if (SvROK(check_sv)) {
- croak("UTF-8 encoder doesn't support callback CHECK");
- }
- else {
- check = SvIV(check_sv);
- }
+ check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv);
if (src == &PL_sv_undef) src = newSV(0);
s = (U8 *) SvPV(src, slen);
e = (U8 *) SvEND(src);
if (SvUTF8(src)) {
/* Already encoded */
if (strict_utf8(aTHX_ obj)) {
- s = process_utf8(aTHX_ dst, s, e, check, 1, 1, 0);
+ s = process_utf8(aTHX_ dst, s, e, check_sv, 1, 1, 0);
}
else {
/* trust it and just copy the octets */
use strict;
#use Test::More qw(no_plan);
-use Test::More tests => 48;
+use Test::More tests => 50;
use Encode q(:all);
my $uo = '';
is $dst, 0xFF."", qq{$ascii->encode(\$src, sub{ \$_[0] } )};
$dst = decode("ascii", (pack "C*", 0xFF), sub{ $_[0] });
is $dst, 0xFF."", qq{decode("ascii", (pack "C*", 0xFF), sub{ \$_[0] })};
+
+
+$src = pack "C*", 0x80;
+$dst = $utf8->decode($src, sub{ $_[0] });
+is $dst, 0x80."", qq{$utf8->encode(\$src, sub{ \$_[0] } )};
+$dst = decode("utf8", $src, sub{ $_[0] });
+is $dst, 0x80."", qq{decode("utf8", (pack "C*", 0x80), sub{ \$_[0] })};