#!./perl
-# $Id: piconv,v 1.20 2002/04/04 19:50:52 dankogai Exp $
+# $Id: piconv,v 2.2 2006/05/03 18:24:10 dankogai Exp $
#
-use 5.7.3;
+use 5.8.0;
use strict;
use Encode ;
use Encode::Alias;
my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio);
-use Getopt::Std;
-
-my %Opt; getopts("hDS:lf:t:s:", \%Opt);
-$Opt{h} and help();
-$Opt{l} and list_encodings();
+use File::Basename;
+my $name = basename($0);
+
+use Getopt::Long qw(:config no_ignore_case);
+
+my %Opt;
+
+help()
+ unless
+ GetOptions(\%Opt,
+ 'from|f=s',
+ 'to|t=s',
+ 'list|l',
+ 'string|s=s',
+ 'check|C=i',
+ 'c',
+ 'perlqq|p',
+ 'htmlcref',
+ 'xmlcref',
+ 'debug|D',
+ 'scheme|S=s',
+ 'resolve|r=s',
+ 'help',
+ );
+
+$Opt{help} and help();
+$Opt{list} and list_encodings();
my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
-$Opt{f} || $Opt{t} || help();
-my $from = $Opt{f} || $locale or help("from_encoding unspecified");
-my $to = $Opt{t} || $locale or help("to_encoding unspecified");
-$Opt{s} and Encode::from_to($Opt{s}, $from, $to) and print $Opt{s} and exit;
-my $scheme = exists $Scheme{$Opt{S}} ? $Opt{S} : 'from_to';
-
-if ($Opt{D}){
+defined $Opt{resolve} and resolve_encoding($Opt{resolve});
+$Opt{from} || $Opt{to} || help();
+my $from = $Opt{from} || $locale or help("from_encoding unspecified");
+my $to = $Opt{to} || $locale or help("to_encoding unspecified");
+$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
+my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to';
+$Opt{check} ||= $Opt{c};
+$Opt{perlqq} and $Opt{check} = Encode::PERLQQ;
+$Opt{htmlcref} and $Opt{check} = Encode::HTMLCREF;
+$Opt{xmlcref} and $Opt{check} = Encode::XMLCREF;
+
+if ($Opt{debug}){
my $cfrom = Encode->getEncoding($from)->name;
my $cto = Encode->getEncoding($to)->name;
- print STDERR <<"EOT";
+ print <<"EOT";
Scheme: $scheme
From: $from => $cfrom
To: $to => $cto
EOT
}
-# default
-if ($scheme eq 'from_to'){
- while(<>){
- Encode::from_to($_, $from, $to); print;
- };
-# step-by-step
-}elsif ($scheme eq 'decode_encode'){
- while(<>){
- my $decoded = decode($from, $_);
- my $encoded = encode($to, $decoded);
- print $encoded;
- };
-# NI-S favorite
-}elsif ($scheme eq 'perlio'){
- binmode(STDIN, ":encoding($from)");
- binmode(STDOUT, ":encoding($to)");
- while(<>){ print; }
-}else{ # won't reach
- die "unknown scheme: $scheme";
+# we do not use <> (or ARGV) for the sake of binmode()
+@ARGV or push @ARGV, \*STDIN;
+
+unless ( $scheme eq 'perlio' ) {
+ binmode STDOUT;
+ for my $argv (@ARGV) {
+ my $ifh = ref $argv ? $argv : undef;
+ $ifh or open $ifh, "<", $argv or next;
+ binmode $ifh;
+ if ( $scheme eq 'from_to' ) { # default
+ while (<$ifh>) {
+ Encode::from_to( $_, $from, $to, $Opt{check} );
+ print;
+ }
+ }
+ elsif ( $scheme eq 'decode_encode' ) { # step-by-step
+ while (<$ifh>) {
+ my $decoded = decode( $from, $_, $Opt{check} );
+ my $encoded = encode( $to, $decoded );
+ print $encoded;
+ }
+ }
+ else { # won't reach
+ die "$name: unknown scheme: $scheme";
+ }
+ }
+}
+else {
+
+ # NI-S favorite
+ binmode STDOUT => "raw:encoding($to)";
+ for my $argv (@ARGV) {
+ my $ifh = ref $argv ? $argv : undef;
+ $ifh or open $ifh, "<", $argv or next;
+ binmode $ifh => "raw:encoding($from)";
+ print while (<$ifh>);
+ }
}
-sub list_encodings{
- print STDERR join("\n", Encode->encodings(":all")), "\n";
- exit;
+sub list_encodings {
+ print join( "\n", Encode->encodings(":all") ), "\n";
+ exit 0;
}
-sub help{
+sub resolve_encoding {
+ if ( my $alias = Encode::resolve_alias( $_[0] ) ) {
+ print $alias, "\n";
+ exit 0;
+ }
+ else {
+ warn "$name: $_[0] is not known to Encode\n";
+ exit 1;
+ }
+}
+
+sub help {
my $message = shift;
- use File::Basename;
- my $name = basename($0);
$message and print STDERR "$name error: $message\n";
print STDERR <<"EOT";
$name [-f from_encoding] [-t to_encoding] [-s string] [files...]
$name -l
- -l lists all available encodings.
- -f from_encoding When omitted, the current locale will be used.
- -t to_encoding When omitted, the current locale will be used.
- -s string "string" will be converted instead of STDIN.
+$name -r encoding_alias
+ -l,--list
+ lists all available encodings
+ -r,--resolve encoding_alias
+ resolve encoding to its (Encode) canonical name
+ -f,--from from_encoding
+ when omitted, the current locale will be used
+ -t,--to to_encoding
+ when omitted, the current locale will be used
+ -s,--string string
+ "string" will be the input instead of STDIN or files
+The following are mainly of interest to Encode hackers:
+ -D,--debug show debug information
+ -C N | -c check the validity of the input
+ -S,--scheme scheme use the scheme for conversion
+Those are handy when you can only see ascii characters:
+ -p,--perlqq
+ --htmlcref
+ --xmlcref
EOT
- exit;
+ exit;
}
__END__
piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
piconv -l
+ piconv [-C N|-c|-p]
+ piconv -S scheme ...
+ piconv -r encoding
+ piconv -D ...
+ piconv -h
=head1 DESCRIPTION
-B<piconv> is perl version of F<iconv>, a character encoding converter
-widely available for various Unixen today. This script was primarily
-a technology demonstrator for Perl 5.8.0, you can use piconv in the
-place of iconv for virtually any cases.
+B<piconv> is perl version of B<iconv>, a character encoding converter
+widely available for various Unixen today. This script was primarily
+a technology demonstrator for Perl 5.8.0, but you can use piconv in the
+place of iconv for virtually any case.
-piconv converts character encoding of either STDIN or files specified
-in the argument and prints out to STDOUT.
+piconv converts the character encoding of either STDIN or files
+specified in the argument and prints out to STDOUT.
-Here are list of options.
+Here is the list of options. Each option can be in short format (-f)
+or long (--from).
=over 4
-=item -f from_encoding
+=item -f,--from from_encoding
+
+Specifies the encoding you are converting from. Unlike B<iconv>,
+this option can be omitted. In such cases, the current locale is used.
+
+=item -t,--to to_encoding
+
+Specifies the encoding you are converting to. Unlike B<iconv>,
+this option can be omitted. In such cases, the current locale is used.
+
+Therefore, when both -f and -t are omitted, B<piconv> just acts
+like B<cat>.
+
+=item -s,--string I<string>
+
+uses I<string> instead of file for the source of text.
+
+=item -l,--list
+
+Lists all available encodings, one per line, in case-insensitive
+order. Note that only the canonical names are listed; many aliases
+exist. For example, the names are case-insensitive, and many standard
+and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
+instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported>
+for a full discussion.
+
+=item -C,--check I<N>
+
+Check the validity of the stream if I<N> = 1. When I<N> = -1, something
+interesting happens when it encounters an invalid character.
-Specifies the encoding you are converting from. Unlike F<iconv>,
-this option can be omitted. In such cases the current locale is used.
+=item -c
-=item -t to_encoding
+Same as C<-C 1>.
-Specifies the encoding you are converting to. Unlike F<iconv>,
-this option can be omitted. In such cases the current locale is used.
+=item -p,--perlqq
-Therefore when both -f and -t are omitted, F<piconv> just acts like F<cat>.
+=item --htmlcref
-=item -s I<string>
+=item --xmlcref
-uses I<string> instead of file for the source of text. Same as F<iconv>.
+Applies PERLQQ, HTMLCREF, XMLCREF, respectively. Try
-=item -l
+ piconv -f utf8 -t ascii --perlqq
-Lists all available encodings to STDERR.
+To see what it does.
-=item -h
+=item -h,--help
Show usage.
-=item -D
+=item -D,--debug
Invokes debugging mode. Primarily for Encode hackers.
-=item -S scheme
+=item -S,--scheme scheme
Selects which scheme is to be used for conversion. Available schemes
-are as follows;
+are as follows:
=over 4
=back
-Like I<-D> option, this is also for Encode hackers.
+Like the I<-D> option, this is also for Encode hackers.
=back
=head1 SEE ALSO
-L<iconv(1)>
-L<locale(3)>
+L<iconv/1>
+L<locale/3>
L<Encode>
+L<Encode::Supported>
+L<Encode::Alias>
L<PerlIO>
=cut