From: Rafael Garcia-Suarez Date: Wed, 30 May 2007 10:45:08 +0000 (+0000) Subject: Upgrade to Encode 2.23 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=370462a2c1be6d2588dcfa0aeab1b13e61e59d0c;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Encode 2.23 p4raw-id: //depot/perl@31310 --- diff --git a/ext/Encode/Changes b/ext/Encode/Changes index 1611254..704b96c 100644 --- a/ext/Encode/Changes +++ b/ext/Encode/Changes @@ -1,15 +1,32 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 2.22 2007/05/29 07:35:27 dankogai Exp dankogai $ +# $Id: Changes,v 2.23 2007/05/29 18:15:32 dankogai Exp dankogai $ # -$Revision: 2.22 $ $Date: 2007/05/29 07:35:27 $ +$Revision: 2.23 $ $Date: 2007/05/29 18:15:32 $ +! Encode.xs + got rid of global fallback_cb; encode_method() now takes one more + argument which is a coderef to fallback. This should make + encode_method() thread-safe. +! Encode.pm + Added perluniintro, perlunifaq, and perlunitut to POD +! Encode.xs + Plug a memory leak in Encode -- by rgs + Message-Id: +! Unicode/Unicode.pm + POD fixes on UTF-16LE + http://aspn.activestate.com/ASPN/Mail/Message/perl5-porters/3486118 +! Makefile.PL + man page generation is now conditional; yes by default but no if $PERL_CORE + Message-Id: + +2.22 2007/05/29 07:35:27 ! Encode.pm from_to() does not honor the check while decoding. That's a feature. To make sure it is a feature it is mentioned in the POD. - http://rt.cpan.org/NoAuth/Bug.html?id=#27277 + http://rt.cpan.org/NoAuth/Bug.html?id=27277 ! Makefile.pl Encode used to suppress man page generation. Now it does. - http://rt.cpan.org/NoAuth/Bug.html?id=#27200 + http://rt.cpan.org/NoAuth/Bug.html?id=27200 ! Encode.pm Encode.xs t/fallback.t Addressed: (de|en)code("ascii", "\x{3000}", sub{ $_[0] }) segfaults Reported by MIYAGAWA @@ -53,19 +70,19 @@ $Revision: 2.22 $ $Date: 2007/05/29 07:35:27 $ Message-Id: <693254b90704060526s6d850320h71cdda50dfbf7eba@mail.gmail.com> ! Encode.pm #25216 ([PATCH] Encode.pm: postpone the load of Encode::Encoding) - http://rt.cpan.org/NoAuth/Bug.html?id=#25216 + http://rt.cpan.org/NoAuth/Bug.html?id=25216 ! lib/Encode/MIME/Header.pm t/mime-header.t #24418 (Encode::MIME::Header: wrong encoding with latin1 characters) - http://rt.cpan.org/NoAuth/Bug.html?id=#24418 + http://rt.cpan.org/NoAuth/Bug.html?id=24418 ! Encode.pm #23876 (Add documentation for LEAVE_SRC) - http://rt.cpan.org/NoAuth/Bug.html?id=#23876 + http://rt.cpan.org/NoAuth/Bug.html?id=23876 ! lib/Encode/Alias.pm t/Aliases.t #20781: Thai encoding needs alias for tis-620 - http://rt.cpan.org/NoAuth/Bug.html?id=#20781 + http://rt.cpan.org/NoAuth/Bug.html?id=20781 ! bin/piconv AUTHORS #20344: piconv: wrong conversion of utf-16le encoded files (with PATCH) - http://rt.cpan.org/NoAuth/Bug.html?id=#20344 + http://rt.cpan.org/NoAuth/Bug.html?id=20344 ! Encode.pm Encode.xs bin/enc2xs encoding.pm t/Aliases.t t/utf8strict.t Imported from bleedperl's 2.18_01 @@ -97,7 +114,7 @@ $Revision: 2.22 $ $Date: 2007/05/29 07:35:27 $ --xmlcref and --htmlcref added. ! Encode.pm Copyright Notice Added. - http://rt.cpan.org/NoAuth/Bug.html?id=#19056 + http://rt.cpan.org/NoAuth/Bug.html?id=19056 ! * Replaced remaining ^\t with q( ) x 4. -- Perl Best Practice pp. 20 And all .pm's are now perltidy-ed. @@ -105,11 +122,11 @@ $Revision: 2.22 $ $Date: 2007/05/29 07:35:27 $ 2.15 2006/04/06 15:44:11 ! Unicode/Unicode.xs Addressed: UTF-16, UTF-32, UCS, UTF-7 decoders mishandle illegal characters - http://rt.cpan.org/NoAuth/Bug.html?id=#18556 + http://rt.cpan.org/NoAuth/Bug.html?id=18556 ! Encode.pm added str2bytes() as an alias to encode() and bytes2str() as an alias to decode() - http://rt.cpan.org/NoAuth/Bug.html?id=#17103 + http://rt.cpan.org/NoAuth/Bug.html?id=17103 ! Encode.xs Change 26922: Avoid warning with MS Visual C compiler. Message-Id: <200601231245.k0NCj2dw009484@smtp3.ActiveState.com> diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index 953bb56..97a5a6f 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -1,10 +1,10 @@ # -# $Id: Encode.pm,v 2.22 2007/05/29 07:35:27 dankogai Exp dankogai $ +# $Id: Encode.pm,v 2.23 2007/05/29 18:15:32 dankogai Exp dankogai $ # package Encode; use strict; use warnings; -our $VERSION = sprintf "%d.%02d", q$Revision: 2.22 $ =~ /(\d+)/g; +our $VERSION = sprintf "%d.%02d", q$Revision: 2.23 $ =~ /(\d+)/g; sub DEBUG () { 0 } use XSLoader (); XSLoader::load( __PACKAGE__, $VERSION ); @@ -922,7 +922,7 @@ L, L, L, L, -L, +L, L, L, L L, the Perl Unicode Mailing List Eperl-unicode@perl.orgE diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index 265aec0..5acdc75 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -1,5 +1,5 @@ /* - $Id: Encode.xs,v 2.13 2007/05/29 07:35:27 dankogai Exp dankogai $ + $Id: Encode.xs,v 2.14 2007/05/29 18:15:32 dankogai Exp dankogai $ */ #define PERL_NO_GET_CONTEXT @@ -35,8 +35,6 @@ UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) UTF8_ALLOW_NON_CONTINUATION | \ UTF8_ALLOW_LONG)) -static SV* fallback_cb = (SV*)NULL ; - void Encode_XSEncoding(pTHX_ encode_t * enc) { @@ -66,7 +64,7 @@ call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) #define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" static SV * -do_fallback_cb(pTHX_ UV ch) +do_fallback_cb(pTHX_ UV ch, SV *fallback_cb) { dSP; int argc; @@ -79,7 +77,7 @@ do_fallback_cb(pTHX_ UV ch) argc = call_sv(fallback_cb, G_SCALAR); SPAGAIN; if (argc != 1){ - croak("fallback sub must return scalar!"); + croak("fallback sub must return scalar!"); } temp = newSVsv(POPs); PUTBACK; @@ -93,7 +91,8 @@ do_fallback_cb(pTHX_ UV ch) static SV * encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, - int check, STRLEN * offset, SV * term, int * retcode) + int check, STRLEN * offset, SV * term, int * retcode, + SV *fallback_cb) { STRLEN slen; U8 *s = (U8 *) SvPV(src, slen); @@ -195,8 +194,9 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, } if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ SV* subchar = - (fallback_cb != (SV*)NULL) ? do_fallback_cb(aTHX_ ch) : - newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04"UVxf"}" : + (fallback_cb != &PL_sv_undef) + ? do_fallback_cb(aTHX_ ch, fallback_cb) + : newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04"UVxf"}" : check & ENCODE_HTMLCREF ? "&#%" UVuf ";" : "&#x%" UVxf ";", (UV)ch); sdone += slen + clen; @@ -229,9 +229,9 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ SV* subchar = - (fallback_cb != (SV*)NULL) ? - do_fallback_cb(aTHX_ (UV)s[slen]) : - newSVpvf("\\x%02" UVXf, (UV)s[slen]); + (fallback_cb != &PL_sv_undef) + ? do_fallback_cb(aTHX_ (UV)s[slen], fallback_cb) + : newSVpvf("\\x%02" UVXf, (UV)s[slen]); sdone += slen + 1; ddone += dlen + SvCUR(subchar); sv_catsv(dst, subchar); @@ -542,23 +542,31 @@ CODE: } void -Method_cat_decode(obj, dst, src, off, term, check = 0) +Method_cat_decode(obj, dst, src, off, term, check_sv = &PL_sv_no) SV * obj SV * dst SV * src SV * off SV * term -int check +SV * check_sv CODE: { + int check; + SV *fallback_cb = &PL_sv_undef; encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); STRLEN offset = (STRLEN)SvIV(off); int code = 0; if (SvUTF8(src)) { sv_utf8_downgrade(src, FALSE); } + if (SvROK(check_sv)){ + fallback_cb = check_sv; + check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ + }else{ + check = SvIV(check_sv); + } sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check, - &offset, term, &code)); + &offset, term, &code, fallback_cb)); SvIV_set(off, (IV)offset); if (code == ENCODE_FOUND_TERM) { ST(0) = &PL_sv_yes; @@ -576,29 +584,23 @@ SV * check_sv CODE: { int check; + SV *fallback_cb = &PL_sv_undef; encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); if (SvUTF8(src)) { sv_utf8_downgrade(src, FALSE); } if (SvROK(check_sv)){ - if (fallback_cb == (SV*)NULL){ - fallback_cb = newSVsv(check_sv); /* First time */ - }else{ - SvSetSV(fallback_cb, check_sv); /* Been here before */ - } - check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ + fallback_cb = check_sv; + check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ }else{ - fallback_cb = (SV*)NULL; - check = SvIV(check_sv); + check = SvIV(check_sv); } ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check, - NULL, Nullsv, NULL); + NULL, Nullsv, NULL, fallback_cb); SvUTF8_on(ST(0)); XSRETURN(1); } - - void Method_encode(obj,src,check_sv = &PL_sv_no) SV * obj @@ -607,21 +609,17 @@ SV * check_sv CODE: { int check; + SV *fallback_cb = &PL_sv_undef; encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); sv_utf8_upgrade(src); if (SvROK(check_sv)){ - if (fallback_cb == (SV*)NULL){ - fallback_cb = newSVsv(check_sv); /* First time */ - }else{ - SvSetSV(fallback_cb, check_sv); /* Been here before */ - } - check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ + fallback_cb = check_sv; + check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ }else{ - fallback_cb = (SV*)NULL; - check = SvIV(check_sv); + check = SvIV(check_sv); } ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check, - NULL, Nullsv, NULL); + NULL, Nullsv, NULL, fallback_cb); XSRETURN(1); } diff --git a/ext/Encode/Makefile.PL b/ext/Encode/Makefile.PL index 981dba6..d02bc41 100644 --- a/ext/Encode/Makefile.PL +++ b/ext/Encode/Makefile.PL @@ -1,10 +1,15 @@ +# +# $Id: Makefile.PL,v 2.5 2007/05/29 18:15:32 dankogai Exp dankogai $ +# use 5.007003; +use strict; +use warnings; use ExtUtils::MakeMaker; # Just for sure :) -my %ARGV = map { split /=/; defined $_[1] or $_[1]=1; @_ } @ARGV; +my %ARGV = map { my @r = split /=/,$_; defined $r[1] or $r[1]=1; @r } @ARGV; $ARGV{DEBUG} and warn "$_ => $ARGV{$_}\n" for keys %ARGV; -$ENV{PERL_CORE} ||= $ARGV{PERL_CORE}; +$ENV{PERL_CORE} ||= $ARGV{PERL_CORE} if $ARGV{PERL_CORE}; my %tables = ( @@ -25,6 +30,8 @@ my @pmlibdirs = qw(lib Encode); $ARGV{MORE_SCRIOPTS} and push @exe_files, @more_exe_files; $ARGV{INSTALL_UCM} and push @pmlibdirs, "ucm"; +my @man = (); +@man = ( MAN1PODS => {}, MAN3PODS => {} ) if $ENV{PERL_CORE}; WriteMakefile( NAME => "Encode", @@ -36,8 +43,7 @@ WriteMakefile( SUFFIX => 'gz', DIST_DEFAULT => 'all tardist', }, - MAN1PODS => {}, - MAN3PODS => {}, + @man, INC => "-I./Encode", PMLIBDIRS => \@pmlibdirs, INSTALLDIRS => 'perl', diff --git a/ext/Encode/Unicode/Unicode.pm b/ext/Encode/Unicode/Unicode.pm index 6e10941..cdfe02d 100644 --- a/ext/Encode/Unicode/Unicode.pm +++ b/ext/Encode/Unicode/Unicode.pm @@ -4,7 +4,7 @@ use strict; use warnings; no warnings 'redefine'; -our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use XSLoader; XSLoader::load( __PACKAGE__, $VERSION ); @@ -97,14 +97,14 @@ Encode::Unicode::UTF7. For details see L. Decodes from ord(N) Encodes chr(N) to... octet/char BOM S.P d800-dfff ord > 0xffff \x{1abcd} == ---------------+-----------------+------------------------------ - UCS-2BE 2 N N is bogus Not Available + UCS-2BE 2 N N is bogus Not Available UCS-2LE 2 N N bogus Not Available UTF-16 2/4 Y Y is S.P S.P BE/LE UTF-16BE 2/4 N Y S.P S.P 0xd82a,0xdfcd - UTF-16LE 2 N Y S.P S.P 0x2ad8,0xcddf - UTF-32 4 Y - is bogus As is BE/LE - UTF-32BE 4 N - bogus As is 0x0001abcd - UTF-32LE 4 N - bogus As is 0xcdab0100 + UTF-16LE 2/4 N Y S.P S.P 0x2ad8,0xcddf + UTF-32 4 Y - is bogus As is BE/LE + UTF-32BE 4 N - bogus As is 0x0001abcd + UTF-32LE 4 N - bogus As is 0xcdab0100 UTF-8 1-4 - - bogus >= 4 octets \xf0\x9a\af\8d ---------------+-----------------+------------------------------