From: Rafael Garcia-Suarez Date: Sat, 6 Aug 2005 22:15:14 +0000 (+0000) Subject: Upgrade to Encode 2.11, plus a patch to PerlIO::encoding X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=56ff73747e9362a483df0d73a430e0cb83ff0e6b;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Encode 2.11, plus a patch to PerlIO::encoding by Dan Kogai to fix encoding(utf-8-strict) with partial characters. p4raw-id: //depot/perl@25271 --- diff --git a/MANIFEST b/MANIFEST index 288e149..7f46fb9 100644 --- a/MANIFEST +++ b/MANIFEST @@ -428,6 +428,7 @@ ext/Encode/lib/Encode/JP/H2Z.pm Encode extension ext/Encode/lib/Encode/JP/JIS7.pm Encode extension ext/Encode/lib/Encode/KR/2022_KR.pm Encode extension ext/Encode/lib/Encode/MIME/Header.pm Encode extension +ext/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm Encode extension ext/Encode/lib/Encode/PerlIO.pod Documents for Encode & PerlIO ext/Encode/lib/Encode/Supported.pod Documents for supported encodings ext/Encode/lib/Encode/Unicode/UTF7.pm Encode extension @@ -469,6 +470,7 @@ ext/Encode/t/jperl.t test script ext/Encode/t/ksc5601.enc test data ext/Encode/t/ksc5601.utf test data ext/Encode/t/mime-header.t test script +ext/Encode/t/mime_header_iso2022jp.t test script ext/Encode/t/Mod_EUCJP.pm module that t/enc_module.enc uses ext/Encode/t/perlio.t test script ext/Encode/t/rt.pl test script diff --git a/ext/Encode/AUTHORS b/ext/Encode/AUTHORS index 42e2feb..a42e4d6 100644 --- a/ext/Encode/AUTHORS +++ b/ext/Encode/AUTHORS @@ -12,43 +12,47 @@ Andreas J. Koenig Anton Tagunov Autrijus Tang -Benjamin Goldberg +Benjamin Goldberg Bjoern Hoehrmann Bjoern Jacke -Chris Nandor +Chris Nandor Craig A. Berry Dan Kogai Dave Evans Deng Liu Dominic Dunlop -Elizabeth Mattijsen +Elizabeth Mattijsen Gerrit P. Haase -Graham Barr +Graham Barr Gurusamy Sarathy -H.Merijn Brand +H.Merijn Brand Hugo van der Sanden Inaba Hiroto Jarkko Hietaniemi -Jungshik Shin +Jungshik Shin +KONNO Hiroharu Laszlo Molnar MORIYAMA Masayuki -Mark-Jason Dominus +Makamaka +Mark-Jason Dominus Mattia Barbon -Michael G Schwern +Michael G Schwern +Miron Cuperman Nicholas Clark Nick Ing-Simmons -Paul Marquess +Paul Marquess Peter Prymmer Philip Newton -Robin Barker +Piotr Fusik +Robin Barker SADAHIRO Tomoyuki SUGAWARA Hajime -SUZUKI Norio +SUZUKI Norio Simon Cozens Spider Boardman Steve Hay Steve Peters Tatsuhiko Miyagawa -Tels +Tels Vadim Konovalov Yitzchak Scott-Thoennes diff --git a/ext/Encode/Changes b/ext/Encode/Changes index 16d6ed2..dd9a6b5 100644 --- a/ext/Encode/Changes +++ b/ext/Encode/Changes @@ -1,8 +1,28 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 2.10 2005/05/16 18:46:36 dankogai Exp dankogai $ +# $Id: Changes,v 2.11 2005/08/05 10:58:25 dankogai Exp dankogai $ # -$Revision: 2.10 $ $Date: 2005/05/16 18:46:36 $ +$Revision: 2.11 $ $Date: 2005/08/05 10:58:25 $ +! AUTHORS CHANGES + To reflect changes below +! Encode.pm encoding.pm + lib/Encode/Alias.pm lib/Encode/PerlIO.pod lib/Encode/Supported.pod + Typo fixed by Piotr Fusik in Change 25261 & 25266 + Message-ID: <001401c595bd$dccb5d80$0bd34dd5@piec> +! Encode.xs + Addresses "BUG REPORT: panic in Encode.xs". + Message-Id: <42EDDA97.2010608@hyper.to> ++ lib/Encode/MIME/Header/ISO_2022_JP.pm mime_header_iso2022jp.t +! lib/Encode/MIME/Header.pm lib/Encode/Config.pm + Encoding 'MIME-Header-ISO_2022_JP' is introduced by Makamaka + Message-Id: <200507311557.j6VFvE2K034605@www231.sakura.ne.jp> +! Encode/encode.h Encode.pm Encode.xs + PerlIO's "encoding(utf-8-strict)" got a problem w/ partial character. + Found and addressed by KONNO Hiroharu + See also ext/PerlIO/encoding/encoding.pm + Message-Id: + +2.10 2005/05/16 18:46:36 ! Encode.pm fixed decode_utf8() accordingly to RT#8872 http://rt.cpan.org/NoAuth/Bug.html?id=8872 diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index 672ab4b..9b45b7b 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -1,9 +1,9 @@ # -# $Id: Encode.pm,v 2.10 2005/05/16 18:46:36 dankogai Exp dankogai $ +# $Id: Encode.pm,v 2.11 2005/08/05 10:58:25 dankogai Exp dankogai $ # package Encode; use strict; -our $VERSION = sprintf "%d.%02d", q$Revision: 2.10 $ =~ /(\d+)/g; +our $VERSION = sprintf "%d.%02d", q$Revision: 2.11 $ =~ /(\d+)/g; sub DEBUG () { 0 } use XSLoader (); XSLoader::load(__PACKAGE__, $VERSION); @@ -19,7 +19,7 @@ our @EXPORT = qw( ); our @FB_FLAGS = qw(DIE_ON_ERR WARN_ON_ERR RETURN_ON_ERR LEAVE_SRC - PERLQQ HTMLCREF XMLCREF); + PERLQQ HTMLCREF XMLCREF STOP_AT_PARTIAL); our @FB_CONSTS = qw(FB_DEFAULT FB_CROAK FB_QUIET FB_WARN FB_PERLQQ FB_HTMLCREF FB_XMLCREF); diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index 689100b..cc5fe3b 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -1,5 +1,5 @@ /* - $Id: Encode.xs,v 2.4 2005/05/16 18:46:36 dankogai Exp dankogai $ + $Id: Encode.xs,v 2.5 2005/08/05 10:58:25 dankogai Exp dankogai $ */ #define PERL_NO_GET_CONTEXT @@ -151,6 +151,8 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, UV ch = utf8n_to_uvuni(s+slen, (SvCUR(src)-slen), &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY); + /* if non-representable multibyte prefix at end of current buffer - break*/ + if (clen > tlen - sdone) break; if (check & ENCODE_DIE_ON_ERR) { Perl_croak(aTHX_ ERR_ENCODE_NOMAP, (UV)ch, enc->name[0]); @@ -290,7 +292,7 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, int check, if ((s + skip) > e) { /* Partial character */ /* XXX could check that rest of bytes are UTF8_IS_CONTINUATION(ch) */ - if (stop_at_partial) + if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) break; goto malformed_byte; @@ -791,6 +793,13 @@ OUTPUT: RETVAL int +STOP_AT_PARTIAL() +CODE: + RETVAL = ENCODE_STOP_AT_PARTIAL; +OUTPUT: + RETVAL + +int FB_DEFAULT() CODE: RETVAL = ENCODE_FB_DEFAULT; diff --git a/ext/Encode/Encode/encode.h b/ext/Encode/Encode/encode.h index d7a57a4..94764a6 100644 --- a/ext/Encode/Encode/encode.h +++ b/ext/Encode/Encode/encode.h @@ -98,6 +98,7 @@ extern void Encode_DefineEncoding(encode_t *enc); #define ENCODE_PERLQQ 0x0100 /* perlqq fallback string */ #define ENCODE_HTMLCREF 0x0200 /* HTML character ref. fb mode */ #define ENCODE_XMLCREF 0x0400 /* XML character ref. fb mode */ +#define ENCODE_STOP_AT_PARTIAL 0x0800 /* stop at partial explicitly */ #define ENCODE_FB_DEFAULT 0x0000 #define ENCODE_FB_CROAK 0x0001 diff --git a/ext/Encode/MANIFEST b/ext/Encode/MANIFEST index 7f31c3c..35aefdd 100644 --- a/ext/Encode/MANIFEST +++ b/ext/Encode/MANIFEST @@ -49,6 +49,7 @@ lib/Encode/JP/H2Z.pm Encode extension lib/Encode/JP/JIS7.pm Encode extension lib/Encode/KR/2022_KR.pm Encode extension lib/Encode/MIME/Header.pm Encode extension +lib/Encode/MIME/Header/ISO_2022_JP.pm Encode extension lib/Encode/PerlIO.pod Documents for Encode & PerlIO lib/Encode/Supported.pod Documents for supported encodings lib/Encode/Unicode/UTF7.pm Encode Extension @@ -86,6 +87,7 @@ t/jperl.t test script t/ksc5601.enc test data t/ksc5601.utf test data t/mime-header.t test script +t/mime_header_iso2022jp.t test script t/perlio.t test script t/rt.pl even more test script t/unibench.pl benchmark script diff --git a/ext/Encode/META.yml b/ext/Encode/META.yml index d1a3260..e17c3de 100644 --- a/ext/Encode/META.yml +++ b/ext/Encode/META.yml @@ -1,7 +1,7 @@ # http://module-build.sourceforge.net/META-spec.html #XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# name: Encode -version: 2.10 +version: 2.11 version_from: Encode.pm installdirs: perl requires: diff --git a/ext/Encode/lib/Encode/Alias.pm b/ext/Encode/lib/Encode/Alias.pm index 2ec8f9f..c0bbf69 100644 --- a/ext/Encode/lib/Encode/Alias.pm +++ b/ext/Encode/lib/Encode/Alias.pm @@ -2,7 +2,7 @@ package Encode::Alias; use strict; no warnings 'redefine'; use Encode; -our $VERSION = do { my @r = (q$Revision: 2.3 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 2.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; sub DEBUG () { 0 } use base qw(Exporter); diff --git a/ext/Encode/lib/Encode/Config.pm b/ext/Encode/lib/Encode/Config.pm index 0c752cf8..d69b92d 100644 --- a/ext/Encode/lib/Encode/Config.pm +++ b/ext/Encode/lib/Encode/Config.pm @@ -2,7 +2,7 @@ # Demand-load module list # package Encode::Config; -our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use strict; @@ -145,6 +145,7 @@ unless (ord("A") == 193){ 'MIME-B' => 'Encode::MIME::Header', 'MIME-Q' => 'Encode::MIME::Header', + 'MIME-Header-ISO_2022_JP' => 'Encode::MIME::Header::ISO_2022_JP', ); } diff --git a/ext/Encode/lib/Encode/MIME/Header.pm b/ext/Encode/lib/Encode/MIME/Header.pm index f000776..f4e2ad6 100644 --- a/ext/Encode/lib/Encode/MIME/Header.pm +++ b/ext/Encode/lib/Encode/MIME/Header.pm @@ -1,7 +1,7 @@ package Encode::MIME::Header; use strict; # use warnings; -our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Encode qw(find_encoding encode_utf8 decode_utf8); use MIME::Base64; use Carp; @@ -216,6 +216,10 @@ and =?ISO-8859-1?= but that makes the implementation too complicated. These days major mail agents all support =?UTF-8? so I think it is just good enough. +Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by +Makamaka. Thre are still too many MUAs especially cellular phone +handsets which does not grok UTF-8. + =head1 SEE ALSO L diff --git a/ext/PerlIO/encoding/encoding.pm b/ext/PerlIO/encoding/encoding.pm index 3eb7dd5..f0d419b 100644 --- a/ext/PerlIO/encoding/encoding.pm +++ b/ext/PerlIO/encoding/encoding.pm @@ -1,6 +1,6 @@ package PerlIO::encoding; use strict; -our $VERSION = '0.08'; +our $VERSION = '0.09'; our $DEBUG = 0; $DEBUG and warn __PACKAGE__, " called by ", join(", ", caller), "\n"; @@ -12,7 +12,8 @@ $DEBUG and warn __PACKAGE__, " called by ", join(", ", caller), "\n"; use XSLoader (); XSLoader::load(__PACKAGE__, $VERSION); -our $fallback = Encode::PERLQQ()|Encode::WARN_ON_ERR(); +our $fallback = + Encode::PERLQQ()|Encode::WARN_ON_ERR()|Encode::STOP_AT_PARTIAL(); 1; __END__