From: Jarkko Hietaniemi Date: Sat, 10 May 2003 18:59:29 +0000 (+0000) Subject: Upgrade to Encode 1.94. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=bedba6814834d84c03e3c8711e154e5c1e84209c;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Encode 1.94. p4raw-id: //depot/perl@19477 --- diff --git a/ext/Encode/AUTHORS b/ext/Encode/AUTHORS index f921fd5..b565a0f 100644 --- a/ext/Encode/AUTHORS +++ b/ext/Encode/AUTHORS @@ -13,11 +13,11 @@ Andreas J. Koenig Anton Tagunov Autrijus Tang Benjamin Goldberg +Bjoern Jacke Chris Nandor Craig A. Berry Dan Kogai Elizabeth Mattijsen -Enache Adrian Gerrit P. Haase Graham Barr Gurusamy Sarathy @@ -39,6 +39,7 @@ Robin Barker SADAHIRO Tomoyuki SUGAWARA Hajime SUZUKI Norio +Simon Cozens Spider Boardman Tatsuhiko Miyagawa Vadim Konovalov diff --git a/ext/Encode/Changes b/ext/Encode/Changes index 8d7a054..1e68f43 100644 --- a/ext/Encode/Changes +++ b/ext/Encode/Changes @@ -1,8 +1,42 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 1.93 2003/04/24 17:43:16 dankogai Exp $ +# $Id: Changes,v 1.94 2003/05/10 18:13:59 dankogai Exp $ # -$Revision: 1.93 $ $Date: 2003/04/24 17:43:16 $ +$Revision: 1.94 $ $Date: 2003/05/10 18:13:59 $ +! lib/Encode/MIME/Header.pm + A more sophisticated solution for double-encoding by dankogai +! lib/Encode/MIME/Header.pm AUTHORS + Two bugs fixed by Bjoern Jacke + * "Double Encoding" was not possible + i.e. encode("MIME-B" => "=?UTF-8?B?w4RwZmVs?=") + * encode("MIME-Q") had UTF-8 flag on + Message-Id: +! lib/Encode/MIME/Header.pm AUTHORS + Two occurances of "croak ()" fixed as "croak qq()". + Simon Cozens is added to AUTHORS as a result. + Message-Id: <20030509103708.GA30664@deep-dark-truthful-mirror.pad> +! bin/piconv + POD fixes that reflect enhancements by jhi +! bin/piconv + Two enhancements by jhi. + + Now uses Getopt::Long so it accepts long name options + (--from for -f, for example) + + New option: -r,--resolve + Message-Id: <20030505114149.GA227075@kosh.hut.fi> +! MANIFEST META.yml + META.yml added upon request of Schwern + Message-Id: +! AUTHORS + Enache Adrian removed upon request -- to live longer than Encode + and/or FreeBSD (toy-)?thread :) + Message-Id: <20030425015701.GA2069@ratsnest.hole> +! t/enc_module.t + "close STDOUT unless $^O eq 'freebsd';" once again relocated + to keep VMS happy in which case "$^O eq 'freebsd'" is required + to keep FreeBSD+thread happy. Sigh. + Message-Id: <3EA88ADC.3000300@mac.com> + +1.93 2003/04/24 17:43:16 ! t/enc_eucjp.t added "no warnings 'pack'" in for loop to keep bleedperl from complaining "Character in 'C' format wrapped in pack". diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index 45d134b..e9dead4 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -1,9 +1,9 @@ # -# $Id: Encode.pm,v 1.93 2003/04/24 17:44:00 dankogai Exp $ +# $Id: Encode.pm,v 1.94 2003/05/10 18:14:36 dankogai Exp $ # package Encode; use strict; -our $VERSION = do { my @r = (q$Revision: 1.93 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.94 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; our $DEBUG = 0; use XSLoader (); XSLoader::load(__PACKAGE__, $VERSION); diff --git a/ext/Encode/MANIFEST b/ext/Encode/MANIFEST index d46dea1..86aaea7 100644 --- a/ext/Encode/MANIFEST +++ b/ext/Encode/MANIFEST @@ -20,6 +20,7 @@ JP/Makefile.PL Encode extension KR/KR.pm Encode extension KR/Makefile.PL Encode extension MANIFEST Encode extension +META.yml Module meta-data in YAML Makefile.PL Encode extension makefile writer README Encode extension Symbol/Makefile.PL Encode extension diff --git a/ext/Encode/bin/piconv b/ext/Encode/bin/piconv index fb1d7d6..b25b0b5 100644 --- a/ext/Encode/bin/piconv +++ b/ext/Encode/bin/piconv @@ -1,5 +1,5 @@ #!./perl -# $Id: piconv,v 1.25 2002/06/01 18:07:49 dankogai Exp $ +# $Id: piconv,v 1.26 2003/05/10 18:13:59 dankogai Exp $ # use 5.8.0; use strict; @@ -7,21 +7,42 @@ use Encode ; use Encode::Alias; my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio); -use Getopt::Std; - -my %Opt; getopts("pcC:hDS:lf:t:s:", \%Opt); -$Opt{h} and help(); -$Opt{l} and list_encodings(); +use File::Basename; +my $name = basename($0); + +use Getopt::Long; + +my %Opt; + +help() + unless + GetOptions(\%Opt, + 'from|f=s', + 'to|t=s', + 'list|l', + 'string|s=s', + 'check|C=i', + 'c', + 'perlqq|p', + 'debug|D', + 'scheme|S=s', + 'resolve|r=s', + 'help', + ); + +$Opt{help} and help(); +$Opt{list} and list_encodings(); my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG}; -$Opt{f} || $Opt{t} || help(); -my $from = $Opt{f} || $locale or help("from_encoding unspecified"); -my $to = $Opt{t} || $locale or help("to_encoding unspecified"); -$Opt{s} and Encode::from_to($Opt{s}, $from, $to) and print $Opt{s} and exit; -my $scheme = exists $Scheme{$Opt{S}} ? $Opt{S} : 'from_to'; -$Opt{C} ||= $Opt{c}; -$Opt{p} and $Opt{C} = Encode::FB_PERLQQ; - -if ($Opt{D}){ +defined $Opt{resolve} and resolve_encoding($Opt{resolve}); +$Opt{from} || $Opt{to} || help(); +my $from = $Opt{from} || $locale or help("from_encoding unspecified"); +my $to = $Opt{to} || $locale or help("to_encoding unspecified"); +$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit; +my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to'; +$Opt{check} ||= $Opt{c}; +$Opt{p} and $Opt{check} = Encode::FB_PERLQQ; + +if ($Opt{debug}){ my $cfrom = Encode->getEncoding($from)->name; my $cto = Encode->getEncoding($to)->name; print <<"EOT"; @@ -34,12 +55,12 @@ EOT # default if ($scheme eq 'from_to'){ while(<>){ - Encode::from_to($_, $from, $to, $Opt{C}); print; + Encode::from_to($_, $from, $to, $Opt{check}); print; }; # step-by-step }elsif ($scheme eq 'decode_encode'){ while(<>){ - my $decoded = decode($from, $_, $Opt{C}); + my $decoded = decode($from, $_, $Opt{check}); my $encoded = encode($to, $decoded); print $encoded; }; @@ -48,27 +69,46 @@ if ($scheme eq 'from_to'){ binmode(STDIN, ":encoding($from)"); binmode(STDOUT, ":encoding($to)"); while(<>){ print; } -}else{ # won't reach - die "unknown scheme: $scheme"; +} else { # won't reach + die "$name: unknown scheme: $scheme"; } sub list_encodings{ print join("\n", Encode->encodings(":all")), "\n"; - exit; + exit 0; +} + +sub resolve_encoding { + if (my $alias = Encode::resolve_alias($_[0])) { + print $alias, "\n"; + exit 0; + } else { + warn "$name: $_[0] is not known to Encode\n"; + exit 1; + } } sub help{ my $message = shift; - use File::Basename; - my $name = basename($0); $message and print STDERR "$name error: $message\n"; print STDERR <<"EOT"; $name [-f from_encoding] [-t to_encoding] [-s string] [files...] $name -l - -l lists all available encodings (the canonical names, many aliases exist) - -f from_encoding When omitted, the current locale will be used. - -t to_encoding When omitted, the current locale will be used. - -s string "string" will be converted instead of STDIN. +$name -r encoding_alias + -l,--list + lists all available encodings + -r,--resolve encoding_alias + resolve encoding to its (Encode) canonical name + -f,--from from_encoding + when omitted, the current locale will be used + -t,--to to_encoding + when omitted, the current locale will be used + -s,--string string + "string" will be the input instead of STDIN or files +The following are mainly of interest to Encode hackers: + -D,--debug show debug information + -C N | -c | -p check the validity of the input + -S,--scheme scheme use the scheme for conversion EOT exit; } @@ -83,6 +123,11 @@ piconv -- iconv(1), reinvented in perl piconv [-f from_encoding] [-t to_encoding] [-s string] [files...] piconv -l + piconv [-C N|-c|-p] + piconv -S scheme ... + piconv -r encoding + piconv -D ... + piconv -h =head1 DESCRIPTION @@ -94,16 +139,17 @@ place of iconv for virtually any case. piconv converts the character encoding of either STDIN or files specified in the argument and prints out to STDOUT. -Here is the list of options. +Here is the list of options. Each option can be in short format (-f) +or long (--from). =over 4 -=item -f from_encoding +=item -f,--from from_encoding Specifies the encoding you are converting from. Unlike B, this option can be omitted. In such cases, the current locale is used. -=item -t to_encoding +=item -t,--to to_encoding Specifies the encoding you are converting to. Unlike B, this option can be omitted. In such cases, the current locale is used. @@ -111,11 +157,11 @@ this option can be omitted. In such cases, the current locale is used. Therefore, when both -f and -t are omitted, B just acts like B. -=item -s I +=item -s,--string I -uses I instead of file for the source of text. Same as B. +uses I instead of file for the source of text. -=item -l +=item -l,--list Lists all available encodings, one per line, in case-insensitive order. Note that only the canonical names are listed; many aliases @@ -124,7 +170,7 @@ and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850" instead of "cp850", or "winlatin1" for "cp1252". See L for a full discussion. -=item -C I +=item -C,--check I Check the validity of the stream if I = 1. When I = -1, something interesting happens when it encounters an invalid character. @@ -133,19 +179,19 @@ interesting happens when it encounters an invalid character. Same as C<-C 1>. -=item -p +=item -p,--perlqq Same as C<-C -1>. -=item -h +=item -h,--help Show usage. -=item -D +=item -D,--debug Invokes debugging mode. Primarily for Encode hackers. -=item -S scheme +=item -S,--scheme scheme Selects which scheme is to be used for conversion. Available schemes are as follows: @@ -173,8 +219,8 @@ Like the I<-D> option, this is also for Encode hackers. =head1 SEE ALSO -L -L +L +L L L L diff --git a/ext/Encode/lib/Encode/MIME/Header.pm b/ext/Encode/lib/Encode/MIME/Header.pm index fb4fdd9..447951b 100644 --- a/ext/Encode/lib/Encode/MIME/Header.pm +++ b/ext/Encode/lib/Encode/MIME/Header.pm @@ -1,9 +1,8 @@ package Encode::MIME::Header; use strict; # use warnings; -our $VERSION = do { my @r = (q$Revision: 1.7 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; - -use Encode qw(find_encoding encode_utf8); +our $VERSION = do { my @r = (q$Revision: 1.8 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +use Encode qw(find_encoding encode_utf8 decode_utf8); use MIME::Base64; use Carp; @@ -72,7 +71,7 @@ sub decode($$;$){ sub decode_b{ my $enc = shift; - my $d = find_encoding($enc) or croak(Unknown encoding "$enc"); + my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc"); my $db64 = decode_base64(shift); return $d->name eq 'utf8' ? Encode::decode_utf8($db64) : $d->decode($db64, Encode::FB_PERLQQ); @@ -80,7 +79,7 @@ sub decode_b{ sub decode_q{ my ($enc, $q) = @_; - my $d = find_encoding($enc) or croak(Unknown encoding "$enc"); + my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc"); $q =~ s/_/ /go; $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego; return $d->name eq 'utf8' ? @@ -92,7 +91,18 @@ my $especials = map {quotemeta(chr($_))} unpack("C*", qq{()<>@,;:\"\'/[]?.=})); -my $re_especials = qr/$especials/o; +my $re_encoded_word = + qr{ + (?: + =\? # begin encoded word + (?:[0-9A-Za-z\-_]+) # charset (encoding) + \?(?:[QqBb])\? # delimiter + (?:.*?) # Base64-encodede contents + \?= # end encoded word + ) + }xo; + +my $re_especials = qr{$re_encoded_word|$especials}xo; sub encode($$;$){ my ($obj, $str, $chk) = @_; @@ -100,7 +110,7 @@ sub encode($$;$){ for my $line (split /\r|\n|\r\n/o, $str){ my (@word, @subline); for my $word (split /($re_especials)/o, $line){ - if ($word =~ /[^\x00-\x7f]/o){ + if ($word =~ /[^\x00-\x7f]/o or $word =~ /^$re_encoded_word$/o){ push @word, $obj->_encode($word); }else{ push @word, $word; @@ -158,7 +168,7 @@ sub _encode_q{ }{ join("" => map {sprintf "=%02X", $_} unpack("C*", $1)) }egox; - return HEAD . 'Q?' . $chunk . TAIL; + return decode_utf8(HEAD . 'Q?' . $chunk . TAIL); } 1; diff --git a/ext/Encode/t/enc_module.t b/ext/Encode/t/enc_module.t index d444f40..d6d9e7e 100644 --- a/ext/Encode/t/enc_module.t +++ b/ext/Encode/t/enc_module.t @@ -1,4 +1,4 @@ -# $Id: enc_module.t,v 1.5 2003/04/24 17:43:16 dankogai Exp $ +# $Id: enc_module.t,v 1.6 2003/05/10 18:13:59 dankogai Exp $ # This file is in euc-jp BEGIN { require Config; import Config; @@ -41,9 +41,9 @@ print $obj->str, "\n"; $obj->set("¥Æ¥¹¥Èʸ»úÎó"); print $obj->str, "\n"; -# I have tested and found "unless $^O eq 'freebsd'" is not -# necessary but I will leave it for the sake of Enache -- dankogai # Please do not move this to a point after the comparison -- Craig Berry +# and "unless $^O eq 'freebsd'" is needed for FreeBSD (toy-)?thread +# -- dankogai close STDOUT unless $^O eq 'freebsd'; my $cmp = compare_text($file0, $file1); diff --git a/ext/Encode/t/mime-header.t b/ext/Encode/t/mime-header.t index 4c84c4f..81d6ec8 100644 --- a/ext/Encode/t/mime-header.t +++ b/ext/Encode/t/mime-header.t @@ -1,5 +1,5 @@ # -# $Id: mime-header.t,v 1.6 2002/10/21 19:47:47 dankogai Exp $ +# $Id: mime-header.t,v 1.7 2003/05/10 18:13:59 dankogai Exp $ # This script is written in utf8 # BEGIN { @@ -23,7 +23,7 @@ no utf8; use strict; #use Test::More qw(no_plan); -use Test::More tests => 7; +use Test::More tests => 9; use_ok("Encode::MIME::Header"); my $eheader =<<'EOS'; @@ -91,4 +91,10 @@ is(Encode::decode('MIME-Header', $bheader), $dheader, "decode B"); is(Encode::decode('MIME-Header', $qheader), $dheader, "decode Q"); is(Encode::encode('MIME-B', $dheader)."\n", $bheader, "encode B"); is(Encode::encode('MIME-Q', $dheader)."\n", $qheader, "encode Q"); + +$dheader = "What is =?UTF-8?B?w4RwZmVs?= ?"; +$bheader = "What is =?UTF-8?B?PT9VVEYtOD9CP3c0UndabVZzPz0=?= ?"; +$qheader = "What is =?UTF-8?Q?=3D=3FUTF=2D8=3FB=3Fw4RwZmVs=3F=3D?= ?"; +is(Encode::encode('MIME-B', $dheader), $bheader, "Double decode B"); +is(Encode::encode('MIME-Q', $dheader), $qheader, "Double decode Q"); __END__;