#!./perl
-# $Id: piconv,v 1.20 2002/04/04 19:50:52 dankogai Exp $
+# $Id: piconv,v 1.24 2002/04/22 02:45:50 dankogai Exp $
#
use 5.7.3;
use strict;
use Getopt::Std;
-my %Opt; getopts("hDS:lf:t:s:", \%Opt);
+my %Opt; getopts("pcC:hDS:lf:t:s:", \%Opt);
$Opt{h} and help();
$Opt{l} and list_encodings();
my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
my $to = $Opt{t} || $locale or help("to_encoding unspecified");
$Opt{s} and Encode::from_to($Opt{s}, $from, $to) and print $Opt{s} and exit;
my $scheme = exists $Scheme{$Opt{S}} ? $Opt{S} : 'from_to';
+$Opt{C} ||= $Opt{c};
+$Opt{p} and $Opt{C} = Encode::FB_PERLQQ;
if ($Opt{D}){
my $cfrom = Encode->getEncoding($from)->name;
my $cto = Encode->getEncoding($to)->name;
- print STDERR <<"EOT";
+ print <<"EOT";
Scheme: $scheme
From: $from => $cfrom
To: $to => $cto
# default
if ($scheme eq 'from_to'){
while(<>){
- Encode::from_to($_, $from, $to); print;
+ Encode::from_to($_, $from, $to, $Opt{C}); print;
};
# step-by-step
}elsif ($scheme eq 'decode_encode'){
while(<>){
- my $decoded = decode($from, $_);
+ my $decoded = decode($from, $_, $Opt{C});
my $encoded = encode($to, $decoded);
print $encoded;
};
}
sub list_encodings{
- print STDERR join("\n", Encode->encodings(":all")), "\n";
+ print join("\n", Encode->encodings(":all")), "\n";
exit;
}
print STDERR <<"EOT";
$name [-f from_encoding] [-t to_encoding] [-s string] [files...]
$name -l
- -l lists all available encodings.
+ -l lists all available encodings (the canonical names, many aliases exist)
-f from_encoding When omitted, the current locale will be used.
-t to_encoding When omitted, the current locale will be used.
-s string "string" will be converted instead of STDIN.
=head1 DESCRIPTION
-B<piconv> is perl version of F<iconv>, a character encoding converter
-widely available for various Unixen today. This script was primarily
-a technology demonstrator for Perl 5.8.0, you can use piconv in the
-place of iconv for virtually any cases.
+B<piconv> is perl version of B<iconv>, a character encoding converter
+widely available for various Unixen today. This script was primarily
+a technology demonstrator for Perl 5.8.0, but you can use piconv in the
+place of iconv for virtually any case.
-piconv converts character encoding of either STDIN or files specified
-in the argument and prints out to STDOUT.
+piconv converts the character encoding of either STDIN or files
+specified in the argument and prints out to STDOUT.
-Here are list of options.
+Here is the list of options.
=over 4
=item -f from_encoding
-Specifies the encoding you are converting from. Unlike F<iconv>,
-this option can be omitted. In such cases the current locale is used.
+Specifies the encoding you are converting from. Unlike B<iconv>,
+this option can be omitted. In such cases, the current locale is used.
=item -t to_encoding
-Specifies the encoding you are converting to. Unlike F<iconv>,
-this option can be omitted. In such cases the current locale is used.
+Specifies the encoding you are converting to. Unlike B<iconv>,
+this option can be omitted. In such cases, the current locale is used.
-Therefore when both -f and -t are omitted, F<piconv> just acts like F<cat>.
+Therefore, when both -f and -t are omitted, B<piconv> just acts
+like B<cat>.
=item -s I<string>
-uses I<string> instead of file for the source of text. Same as F<iconv>.
+uses I<string> instead of file for the source of text. Same as B<iconv>.
=item -l
-Lists all available encodings to STDERR.
+Lists all available encodings, one per line, in case-insensitive
+order. Note that only the canonical names are listed; many aliases
+exist. For example, the names are case-insensitive, and many standard
+and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
+instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported>
+for a full discussion.
+
+=item -C I<N>
+
+Check the validity of the stream if I<N> = 1. When I<N> = -1, something
+interesting happens when it encounters an invalid character.
+
+=item -c
+
+Same as C<-C 1>.
+
+=item -p
+
+Same as C<-C -1>.
=item -h
=item -S scheme
Selects which scheme is to be used for conversion. Available schemes
-are as follows;
+are as follows:
=over 4
=back
-Like I<-D> option, this is also for Encode hackers.
+Like the I<-D> option, this is also for Encode hackers.
=back
L<iconv(1)>
L<locale(3)>
L<Encode>
+L<Encode::Supported>
+L<Encode::Alias>
L<PerlIO>
=cut