Anton Tagunov <tagunov@motor.ru>
Autrijus Tang <autrijus@autrijus.org>
Benjamin Goldberg <goldbb2@earthlink.net>
+Bjoern Jacke <debianbugs@j3e.de>
Chris Nandor <pudge@pobox.com>
Craig A. Berry <craigberry@mac.com>
Dan Kogai <dankogai@dan.co.jp>
Elizabeth Mattijsen <liz@dijkmat.nl>
-Enache Adrian <enache@rdslink.ro>
Gerrit P. Haase <gp@familiehaase.de>
Graham Barr <gbarr@pobox.com>
Gurusamy Sarathy <gsar@activestate.com>
SADAHIRO Tomoyuki <SADAHIRO@cpan.org>
SUGAWARA Hajime <sugawara@hdt.co.jp>
SUZUKI Norio <ZAP00217@nifty.com>
+Simon Cozens <simon@netthink.co.uk>
Spider Boardman <spider@web.zk3.dec.com>
Tatsuhiko Miyagawa <miyagawa@edge.co.jp>
Vadim Konovalov <vkonovalov@peterstar.ru>
# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 1.93 2003/04/24 17:43:16 dankogai Exp $
+# $Id: Changes,v 1.94 2003/05/10 18:13:59 dankogai Exp $
#
-$Revision: 1.93 $ $Date: 2003/04/24 17:43:16 $
+$Revision: 1.94 $ $Date: 2003/05/10 18:13:59 $
+! lib/Encode/MIME/Header.pm
+ A more sophisticated solution for double-encoding by dankogai
+! lib/Encode/MIME/Header.pm AUTHORS
+ Two bugs fixed by Bjoern Jacke
+ * "Double Encoding" was not possible
+ i.e. encode("MIME-B" => "=?UTF-8?B?w4RwZmVs?=")
+ * encode("MIME-Q") had UTF-8 flag on
+ Message-Id: <rt-22166-57077.2.12980078979811@bugs6.perl.org>
+! lib/Encode/MIME/Header.pm AUTHORS
+ Two occurances of "croak ()" fixed as "croak qq()".
+ Simon Cozens is added to AUTHORS as a result.
+ Message-Id: <20030509103708.GA30664@deep-dark-truthful-mirror.pad>
+! bin/piconv
+ POD fixes that reflect enhancements by jhi
+! bin/piconv
+ Two enhancements by jhi.
+ + Now uses Getopt::Long so it accepts long name options
+ (--from for -f, for example)
+ + New option: -r,--resolve
+ Message-Id: <20030505114149.GA227075@kosh.hut.fi>
+! MANIFEST META.yml
+ META.yml added upon request of Schwern
+ Message-Id: <F3B0BD2C-7BCB-11D7-A488-000393AE4244@dan.co.jp>
+! AUTHORS
+ Enache Adrian removed upon request -- to live longer than Encode
+ and/or FreeBSD (toy-)?thread :)
+ Message-Id: <20030425015701.GA2069@ratsnest.hole>
+! t/enc_module.t
+ "close STDOUT unless $^O eq 'freebsd';" once again relocated
+ to keep VMS happy in which case "$^O eq 'freebsd'" is required
+ to keep FreeBSD+thread happy. Sigh.
+ Message-Id: <3EA88ADC.3000300@mac.com>
+
+1.93 2003/04/24 17:43:16
! t/enc_eucjp.t
added "no warnings 'pack'" in for loop to keep bleedperl from
complaining "Character in 'C' format wrapped in pack".
#
-# $Id: Encode.pm,v 1.93 2003/04/24 17:44:00 dankogai Exp $
+# $Id: Encode.pm,v 1.94 2003/05/10 18:14:36 dankogai Exp $
#
package Encode;
use strict;
-our $VERSION = do { my @r = (q$Revision: 1.93 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.94 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
our $DEBUG = 0;
use XSLoader ();
XSLoader::load(__PACKAGE__, $VERSION);
KR/KR.pm Encode extension
KR/Makefile.PL Encode extension
MANIFEST Encode extension
+META.yml Module meta-data in YAML
Makefile.PL Encode extension makefile writer
README Encode extension
Symbol/Makefile.PL Encode extension
#!./perl
-# $Id: piconv,v 1.25 2002/06/01 18:07:49 dankogai Exp $
+# $Id: piconv,v 1.26 2003/05/10 18:13:59 dankogai Exp $
#
use 5.8.0;
use strict;
use Encode::Alias;
my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio);
-use Getopt::Std;
-
-my %Opt; getopts("pcC:hDS:lf:t:s:", \%Opt);
-$Opt{h} and help();
-$Opt{l} and list_encodings();
+use File::Basename;
+my $name = basename($0);
+
+use Getopt::Long;
+
+my %Opt;
+
+help()
+ unless
+ GetOptions(\%Opt,
+ 'from|f=s',
+ 'to|t=s',
+ 'list|l',
+ 'string|s=s',
+ 'check|C=i',
+ 'c',
+ 'perlqq|p',
+ 'debug|D',
+ 'scheme|S=s',
+ 'resolve|r=s',
+ 'help',
+ );
+
+$Opt{help} and help();
+$Opt{list} and list_encodings();
my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
-$Opt{f} || $Opt{t} || help();
-my $from = $Opt{f} || $locale or help("from_encoding unspecified");
-my $to = $Opt{t} || $locale or help("to_encoding unspecified");
-$Opt{s} and Encode::from_to($Opt{s}, $from, $to) and print $Opt{s} and exit;
-my $scheme = exists $Scheme{$Opt{S}} ? $Opt{S} : 'from_to';
-$Opt{C} ||= $Opt{c};
-$Opt{p} and $Opt{C} = Encode::FB_PERLQQ;
-
-if ($Opt{D}){
+defined $Opt{resolve} and resolve_encoding($Opt{resolve});
+$Opt{from} || $Opt{to} || help();
+my $from = $Opt{from} || $locale or help("from_encoding unspecified");
+my $to = $Opt{to} || $locale or help("to_encoding unspecified");
+$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
+my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to';
+$Opt{check} ||= $Opt{c};
+$Opt{p} and $Opt{check} = Encode::FB_PERLQQ;
+
+if ($Opt{debug}){
my $cfrom = Encode->getEncoding($from)->name;
my $cto = Encode->getEncoding($to)->name;
print <<"EOT";
# default
if ($scheme eq 'from_to'){
while(<>){
- Encode::from_to($_, $from, $to, $Opt{C}); print;
+ Encode::from_to($_, $from, $to, $Opt{check}); print;
};
# step-by-step
}elsif ($scheme eq 'decode_encode'){
while(<>){
- my $decoded = decode($from, $_, $Opt{C});
+ my $decoded = decode($from, $_, $Opt{check});
my $encoded = encode($to, $decoded);
print $encoded;
};
binmode(STDIN, ":encoding($from)");
binmode(STDOUT, ":encoding($to)");
while(<>){ print; }
-}else{ # won't reach
- die "unknown scheme: $scheme";
+} else { # won't reach
+ die "$name: unknown scheme: $scheme";
}
sub list_encodings{
print join("\n", Encode->encodings(":all")), "\n";
- exit;
+ exit 0;
+}
+
+sub resolve_encoding {
+ if (my $alias = Encode::resolve_alias($_[0])) {
+ print $alias, "\n";
+ exit 0;
+ } else {
+ warn "$name: $_[0] is not known to Encode\n";
+ exit 1;
+ }
}
sub help{
my $message = shift;
- use File::Basename;
- my $name = basename($0);
$message and print STDERR "$name error: $message\n";
print STDERR <<"EOT";
$name [-f from_encoding] [-t to_encoding] [-s string] [files...]
$name -l
- -l lists all available encodings (the canonical names, many aliases exist)
- -f from_encoding When omitted, the current locale will be used.
- -t to_encoding When omitted, the current locale will be used.
- -s string "string" will be converted instead of STDIN.
+$name -r encoding_alias
+ -l,--list
+ lists all available encodings
+ -r,--resolve encoding_alias
+ resolve encoding to its (Encode) canonical name
+ -f,--from from_encoding
+ when omitted, the current locale will be used
+ -t,--to to_encoding
+ when omitted, the current locale will be used
+ -s,--string string
+ "string" will be the input instead of STDIN or files
+The following are mainly of interest to Encode hackers:
+ -D,--debug show debug information
+ -C N | -c | -p check the validity of the input
+ -S,--scheme scheme use the scheme for conversion
EOT
exit;
}
piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
piconv -l
+ piconv [-C N|-c|-p]
+ piconv -S scheme ...
+ piconv -r encoding
+ piconv -D ...
+ piconv -h
=head1 DESCRIPTION
piconv converts the character encoding of either STDIN or files
specified in the argument and prints out to STDOUT.
-Here is the list of options.
+Here is the list of options. Each option can be in short format (-f)
+or long (--from).
=over 4
-=item -f from_encoding
+=item -f,--from from_encoding
Specifies the encoding you are converting from. Unlike B<iconv>,
this option can be omitted. In such cases, the current locale is used.
-=item -t to_encoding
+=item -t,--to to_encoding
Specifies the encoding you are converting to. Unlike B<iconv>,
this option can be omitted. In such cases, the current locale is used.
Therefore, when both -f and -t are omitted, B<piconv> just acts
like B<cat>.
-=item -s I<string>
+=item -s,--string I<string>
-uses I<string> instead of file for the source of text. Same as B<iconv>.
+uses I<string> instead of file for the source of text.
-=item -l
+=item -l,--list
Lists all available encodings, one per line, in case-insensitive
order. Note that only the canonical names are listed; many aliases
instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported>
for a full discussion.
-=item -C I<N>
+=item -C,--check I<N>
Check the validity of the stream if I<N> = 1. When I<N> = -1, something
interesting happens when it encounters an invalid character.
Same as C<-C 1>.
-=item -p
+=item -p,--perlqq
Same as C<-C -1>.
-=item -h
+=item -h,--help
Show usage.
-=item -D
+=item -D,--debug
Invokes debugging mode. Primarily for Encode hackers.
-=item -S scheme
+=item -S,--scheme scheme
Selects which scheme is to be used for conversion. Available schemes
are as follows:
=head1 SEE ALSO
-L<iconv(1)>
-L<locale(3)>
+L<iconv/1>
+L<locale/3>
L<Encode>
L<Encode::Supported>
L<Encode::Alias>
package Encode::MIME::Header;
use strict;
# use warnings;
-our $VERSION = do { my @r = (q$Revision: 1.7 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
-
-use Encode qw(find_encoding encode_utf8);
+our $VERSION = do { my @r = (q$Revision: 1.8 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+use Encode qw(find_encoding encode_utf8 decode_utf8);
use MIME::Base64;
use Carp;
sub decode_b{
my $enc = shift;
- my $d = find_encoding($enc) or croak(Unknown encoding "$enc");
+ my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
my $db64 = decode_base64(shift);
return $d->name eq 'utf8' ?
Encode::decode_utf8($db64) : $d->decode($db64, Encode::FB_PERLQQ);
sub decode_q{
my ($enc, $q) = @_;
- my $d = find_encoding($enc) or croak(Unknown encoding "$enc");
+ my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
$q =~ s/_/ /go;
$q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
return $d->name eq 'utf8' ?
map {quotemeta(chr($_))}
unpack("C*", qq{()<>@,;:\"\'/[]?.=}));
-my $re_especials = qr/$especials/o;
+my $re_encoded_word =
+ qr{
+ (?:
+ =\? # begin encoded word
+ (?:[0-9A-Za-z\-_]+) # charset (encoding)
+ \?(?:[QqBb])\? # delimiter
+ (?:.*?) # Base64-encodede contents
+ \?= # end encoded word
+ )
+ }xo;
+
+my $re_especials = qr{$re_encoded_word|$especials}xo;
sub encode($$;$){
my ($obj, $str, $chk) = @_;
for my $line (split /\r|\n|\r\n/o, $str){
my (@word, @subline);
for my $word (split /($re_especials)/o, $line){
- if ($word =~ /[^\x00-\x7f]/o){
+ if ($word =~ /[^\x00-\x7f]/o or $word =~ /^$re_encoded_word$/o){
push @word, $obj->_encode($word);
}else{
push @word, $word;
}{
join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
}egox;
- return HEAD . 'Q?' . $chunk . TAIL;
+ return decode_utf8(HEAD . 'Q?' . $chunk . TAIL);
}
1;
-# $Id: enc_module.t,v 1.5 2003/04/24 17:43:16 dankogai Exp $
+# $Id: enc_module.t,v 1.6 2003/05/10 18:13:59 dankogai Exp $
# This file is in euc-jp
BEGIN {
require Config; import Config;
$obj->set("¥Æ¥¹¥Èʸ»úÎó");
print $obj->str, "\n";
-# I have tested and found "unless $^O eq 'freebsd'" is not
-# necessary but I will leave it for the sake of Enache -- dankogai
# Please do not move this to a point after the comparison -- Craig Berry
+# and "unless $^O eq 'freebsd'" is needed for FreeBSD (toy-)?thread
+# -- dankogai
close STDOUT unless $^O eq 'freebsd';
my $cmp = compare_text($file0, $file1);
#
-# $Id: mime-header.t,v 1.6 2002/10/21 19:47:47 dankogai Exp $
+# $Id: mime-header.t,v 1.7 2003/05/10 18:13:59 dankogai Exp $
# This script is written in utf8
#
BEGIN {
use strict;
#use Test::More qw(no_plan);
-use Test::More tests => 7;
+use Test::More tests => 9;
use_ok("Encode::MIME::Header");
my $eheader =<<'EOS';
is(Encode::decode('MIME-Header', $qheader), $dheader, "decode Q");
is(Encode::encode('MIME-B', $dheader)."\n", $bheader, "encode B");
is(Encode::encode('MIME-Q', $dheader)."\n", $qheader, "encode Q");
+
+$dheader = "What is =?UTF-8?B?w4RwZmVs?= ?";
+$bheader = "What is =?UTF-8?B?PT9VVEYtOD9CP3c0UndabVZzPz0=?= ?";
+$qheader = "What is =?UTF-8?Q?=3D=3FUTF=2D8=3FB=3Fw4RwZmVs=3F=3D?= ?";
+is(Encode::encode('MIME-B', $dheader), $bheader, "Double decode B");
+is(Encode::encode('MIME-Q', $dheader), $qheader, "Double decode Q");
__END__;