From: Steve Peters Date: Sun, 13 May 2007 16:47:40 +0000 (+0000) Subject: Upgrade to Encode-2.21 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=742555bdc91466b79b2d8e3bb1e4cfd8a1dc6c3c;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Encode-2.21 p4raw-id: //depot/perl@31212 --- diff --git a/MANIFEST b/MANIFEST index cc8d430..f04de0b 100644 --- a/MANIFEST +++ b/MANIFEST @@ -483,6 +483,7 @@ ext/Encode/lib/Encode/JP/JIS7.pm Encode extension ext/Encode/lib/Encode/KR/2022_KR.pm Encode extension ext/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm Encode extension ext/Encode/lib/Encode/MIME/Header.pm Encode extension +ext/Encode/lib/Encode/MIME/Name.pm Encode extension ext/Encode/lib/Encode/PerlIO.pod Documents for Encode & PerlIO ext/Encode/lib/Encode/Supported.pod Documents for supported encodings ext/Encode/lib/Encode/Unicode/UTF7.pm Encode extension @@ -524,6 +525,7 @@ ext/Encode/t/ksc5601.enc test data ext/Encode/t/ksc5601.utf test data ext/Encode/t/mime_header_iso2022jp.t test script ext/Encode/t/mime-header.t test script +ext/Encode/t/mime-name.t test script ext/Encode/t/Mod_EUCJP.pm module that t/enc_module.enc uses ext/Encode/t/perlio.t test script ext/Encode/t/rt.pl test script diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index c52e7c4..74c7e3f 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -1,10 +1,10 @@ # -# $Id: Encode.pm,v 2.20 2007/04/22 14:56:12 dankogai Exp dankogai $ +# $Id: Encode.pm,v 2.21 2007/05/12 06:42:19 dankogai Exp dankogai $ # package Encode; use strict; use warnings; -our $VERSION = sprintf "%d.%02d", q$Revision: 2.20 $ =~ /(\d+)/g; +our $VERSION = sprintf "%d.%02d", q$Revision: 2.21 $ =~ /(\d+)/g; sub DEBUG () { 0 } use XSLoader (); XSLoader::load( __PACKAGE__, $VERSION ); @@ -565,6 +565,22 @@ exported via C. See L for details. +=head2 Finding IANA Character Set Registry names + +The canonical name of a given encoding does not necessarily agree with +IANA IANA Character Set Registry, commonly seen as C<< Content-Type: +text/plain; charset=I >>. For most cases canonical names +work but sometimes it does not (notably 'utf-8-strict'). + +Therefore as of Encode version 2.21, a new method C is added. + + use Encode; + my $enc = find_encoding('UTF-8'); + warn $enc->name; # utf-8-strict + warn $enc->mime_name; # UTF-8 + +See also: L + =head1 Encoding via PerlIO If your perl supports I (which is the default), you can use a diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index fe645b6..9de0a64 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -1,5 +1,5 @@ /* - $Id: Encode.xs,v 2.11 2007/04/06 12:53:41 dankogai Exp $ + $Id: Encode.xs,v 2.12 2007/05/12 06:42:19 dankogai Exp dankogai $ */ #define PERL_NO_GET_CONTEXT @@ -650,6 +650,35 @@ CODE: XSRETURN(1); } +void +Method_mime_name(obj) +SV * obj +CODE: +{ + encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + SV *retval; + eval_pv("require Encode::MIME::Name", 0); + + if (SvTRUE(get_sv("@", 0))) { + ST(0) = &PL_sv_undef; + }else{ + ENTER; + SAVETMPS; + PUSHMARK(sp); + XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0])))); + PUTBACK; + call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR); + SPAGAIN; + retval = newSVsv(POPs); + PUTBACK; + FREETMPS; + LEAVE; + /* enc->name[0] */ + ST(0) = retval; + } + XSRETURN(1); +} + MODULE = Encode PACKAGE = Encode PROTOTYPES: ENABLE diff --git a/ext/Encode/Unicode/Unicode.pm b/ext/Encode/Unicode/Unicode.pm index 9a11d81..6e10941 100644 --- a/ext/Encode/Unicode/Unicode.pm +++ b/ext/Encode/Unicode/Unicode.pm @@ -4,7 +4,7 @@ use strict; use warnings; no warnings 'redefine'; -our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use XSLoader; XSLoader::load( __PACKAGE__, $VERSION ); @@ -158,7 +158,7 @@ and as of this writing Encode suite just leave it as is (\x{FeFF}). 16 32 bits/char ------------------------- BE 0xFeFF 0x0000FeFF - LE 0xFFeF 0xFFFe0000 + LE 0xFFFe 0xFFFe0000 ------------------------- =back diff --git a/ext/Encode/encoding.pm b/ext/Encode/encoding.pm index 7b8eee4..5b9bdf8 100644 --- a/ext/Encode/encoding.pm +++ b/ext/Encode/encoding.pm @@ -1,4 +1,4 @@ -# $Id: encoding.pm,v 2.6 2007/04/22 14:56:12 dankogai Exp dankogai $ +# $Id: encoding.pm,v 2.6 2007/04/22 14:56:12 dankogai Exp $ package encoding; our $VERSION = do { my @r = ( q$Revision: 2.6 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; diff --git a/ext/Encode/lib/Encode/Encoding.pm b/ext/Encode/lib/Encode/Encoding.pm index 9ebf95b..768d6d1 100644 --- a/ext/Encode/lib/Encode/Encoding.pm +++ b/ext/Encode/lib/Encode/Encoding.pm @@ -3,7 +3,7 @@ package Encode::Encoding; # Base class for classes which implement encodings use strict; use warnings; -our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; require Encode; @@ -20,6 +20,11 @@ sub Define { sub name { return shift->{'Name'} } +sub mime_name{ + require Encode::MIME::Name; + return Encode::MIME::Name::get_mime_name(shift->name); +} + # sub renew { return $_[0] } sub renew { @@ -178,6 +183,17 @@ Predefined As: MUST return the string representing the canonical name of the encoding. +=item -Emime_name + +Predefined As: + + sub mime_name{ + require Encode::MIME::Name; + return Encode::MIME::Name::get_mime_name(shift->name); + } + +MUST return the string representing the IANA charset name of the encoding. + =item -Erenew Predefined As: diff --git a/ext/Encode/lib/Encode/MIME/Name.pm b/ext/Encode/lib/Encode/MIME/Name.pm new file mode 100644 index 0000000..10d86a7 --- /dev/null +++ b/ext/Encode/lib/Encode/MIME/Name.pm @@ -0,0 +1,94 @@ +package Encode::MIME::Name; +use strict; +use warnings; +our $VERSION = do { my @r = ( q$Revision: 1.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; + +our %MIME_NAME_OF = ( + 'AdobeStandardEncoding' => 'Adobe-Standard-Encoding', + 'AdobeSymbol' => 'Adobe-Symbol-Encoding', + 'ascii' => 'US-ASCII', + 'big5-hkscs' => 'Big5-HKSCS', + 'cp1026' => 'IBM1026', + 'cp1047' => 'IBM1047', + 'cp1250' => 'windows-1250', + 'cp1251' => 'windows-1251', + 'cp1252' => 'windows-1252', + 'cp1253' => 'windows-1253', + 'cp1254' => 'windows-1254', + 'cp1255' => 'windows-1255', + 'cp1256' => 'windows-1256', + 'cp1257' => 'windows-1257', + 'cp1258' => 'windows-1258', + 'cp37' => 'IBM037', + 'cp424' => 'IBM424', + 'cp437' => 'IBM437', + 'cp500' => 'IBM500', + 'cp775' => 'IBM775', + 'cp850' => 'IBM850', + 'cp852' => 'IBM852', + 'cp855' => 'IBM855', + 'cp857' => 'IBM857', + 'cp860' => 'IBM860', + 'cp861' => 'IBM861', + 'cp862' => 'IBM862', + 'cp863' => 'IBM863', + 'cp864' => 'IBM864', + 'cp865' => 'IBM865', + 'cp866' => 'IBM866', + 'cp869' => 'IBM869', + 'cp936' => 'GBK', + 'euc-jp' => 'EUC-JP', + 'euc-kr' => 'EUC-KR', + #'gb2312-raw' => 'GB2312', # no, you're wrong, I18N::Charset + 'hp-roman8' => 'hp-roman8', + 'hz' => 'HZ-GB-2312', + 'iso-2022-jp' => 'ISO-2022-JP', + 'iso-2022-jp-1' => 'ISO-2022-JP', + 'iso-2022-kr' => 'ISO-2022-KR', + 'iso-8859-1' => 'ISO-8859-1', + 'iso-8859-10' => 'ISO-8859-10', + 'iso-8859-13' => 'ISO-8859-13', + 'iso-8859-14' => 'ISO-8859-14', + 'iso-8859-15' => 'ISO-8859-15', + 'iso-8859-16' => 'ISO-8859-16', + 'iso-8859-2' => 'ISO-8859-2', + 'iso-8859-3' => 'ISO-8859-3', + 'iso-8859-4' => 'ISO-8859-4', + 'iso-8859-5' => 'ISO-8859-5', + 'iso-8859-6' => 'ISO-8859-6', + 'iso-8859-7' => 'ISO-8859-7', + 'iso-8859-8' => 'ISO-8859-8', + 'iso-8859-9' => 'ISO-8859-9', + #'jis0201-raw' => 'JIS_X0201', + #'jis0208-raw' => 'JIS_C6226-1983', + #'jis0212-raw' => 'JIS_X0212-1990', + 'koi8-r' => 'KOI8-R', + 'koi8-u' => 'KOI8-U', + #'ksc5601-raw' => 'KS_C_5601-1987', + 'shiftjis' => 'Shift_JIS', + 'UTF-16' => 'UTF-16', + 'UTF-16BE' => 'UTF-16BE', + 'UTF-16LE' => 'UTF-16LE', + 'UTF-32' => 'UTF-32', + 'UTF-32BE' => 'UTF-32BE', + 'UTF-32LE' => 'UTF-32LE', + 'UTF-7' => 'UTF-7', + 'utf8' => 'UTF-8', + 'utf-8-strict' => 'UTF-8', + 'viscii' => 'VISCII', +); + +sub get_mime_name($) { $MIME_NAME_OF{$_[0]} }; + +1; +__END__ + +=head1 NAME + +Encode::MIME::NAME -- internally used by Encode + +=head1 SEE ALSO + +L + +=cut diff --git a/ext/Encode/t/mime-name.t b/ext/Encode/t/mime-name.t new file mode 100644 index 0000000..5346497 --- /dev/null +++ b/ext/Encode/t/mime-name.t @@ -0,0 +1,36 @@ +# +# $Id: mime-name.t,v 1.1 2007/05/12 06:42:19 dankogai Exp dankogai $ +# This script is written in utf8 +# +BEGIN { + if ($ENV{'PERL_CORE'}){ + chdir 't'; + unshift @INC, '../lib'; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + $| = 1; +} + +use strict; +use warnings; +use Encode; +#use Test::More qw(no_plan); +use Test::More tests => 68; + +use_ok("Encode::MIME::Name"); +for my $canon ( sort keys %Encode::MIME::Name::MIME_NAME_OF ) { + my $enc = find_encoding($canon); + my $mime_name = $Encode::MIME::Name::MIME_NAME_OF{$canon}; + is $enc->mime_name, $mime_name, + qq(\$enc->mime_name("$canon") eq $mime_name); +} + +__END__;