2 # $Id: piconv,v 2.1 2004/10/06 05:07:20 dankogai Exp $
8 my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio);
11 my $name = basename($0);
13 use Getopt::Long qw(:config no_ignore_case);
33 $Opt{help} and help();
34 $Opt{list} and list_encodings();
35 my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
36 defined $Opt{resolve} and resolve_encoding($Opt{resolve});
37 $Opt{from} || $Opt{to} || help();
38 my $from = $Opt{from} || $locale or help("from_encoding unspecified");
39 my $to = $Opt{to} || $locale or help("to_encoding unspecified");
40 $Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
41 my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to';
42 $Opt{check} ||= $Opt{c};
43 $Opt{perlqq} and $Opt{check} = Encode::FB_PERLQQ;
46 my $cfrom = Encode->getEncoding($from)->name;
47 my $cto = Encode->getEncoding($to)->name;
55 # we do not use <> (or ARGV) for the sake of binmode()
56 @ARGV or push @ARGV, \*STDIN;
58 unless ($scheme eq 'perlio'){
61 my $ifh = ref $argv ? $argv : undef;
62 $ifh or open $ifh, "<", $argv or next;
64 if ($scheme eq 'from_to'){ # default
66 Encode::from_to($_, $from, $to, $Opt{check});
69 }elsif ($scheme eq 'decode_encode'){ # step-by-step
71 my $decoded = decode($from, $_, $Opt{check});
72 my $encoded = encode($to, $decoded);
75 } else { # won't reach
76 die "$name: unknown scheme: $scheme";
81 binmode STDOUT => "raw:encoding($to)";
83 my $ifh = ref $argv ? $argv : undef;
84 $ifh or open $ifh, "<", $argv or next;
85 binmode $ifh => "raw:encoding($from)";
91 print join("\n", Encode->encodings(":all")), "\n";
95 sub resolve_encoding {
96 if (my $alias = Encode::resolve_alias($_[0])) {
100 warn "$name: $_[0] is not known to Encode\n";
107 $message and print STDERR "$name error: $message\n";
108 print STDERR <<"EOT";
109 $name [-f from_encoding] [-t to_encoding] [-s string] [files...]
111 $name -r encoding_alias
113 lists all available encodings
114 -r,--resolve encoding_alias
115 resolve encoding to its (Encode) canonical name
116 -f,--from from_encoding
117 when omitted, the current locale will be used
119 when omitted, the current locale will be used
121 "string" will be the input instead of STDIN or files
122 The following are mainly of interest to Encode hackers:
123 -D,--debug show debug information
124 -C N | -c | -p check the validity of the input
125 -S,--scheme scheme use the scheme for conversion
134 piconv -- iconv(1), reinvented in perl
138 piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
148 B<piconv> is perl version of B<iconv>, a character encoding converter
149 widely available for various Unixen today. This script was primarily
150 a technology demonstrator for Perl 5.8.0, but you can use piconv in the
151 place of iconv for virtually any case.
153 piconv converts the character encoding of either STDIN or files
154 specified in the argument and prints out to STDOUT.
156 Here is the list of options. Each option can be in short format (-f)
161 =item -f,--from from_encoding
163 Specifies the encoding you are converting from. Unlike B<iconv>,
164 this option can be omitted. In such cases, the current locale is used.
166 =item -t,--to to_encoding
168 Specifies the encoding you are converting to. Unlike B<iconv>,
169 this option can be omitted. In such cases, the current locale is used.
171 Therefore, when both -f and -t are omitted, B<piconv> just acts
174 =item -s,--string I<string>
176 uses I<string> instead of file for the source of text.
180 Lists all available encodings, one per line, in case-insensitive
181 order. Note that only the canonical names are listed; many aliases
182 exist. For example, the names are case-insensitive, and many standard
183 and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
184 instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported>
185 for a full discussion.
187 =item -C,--check I<N>
189 Check the validity of the stream if I<N> = 1. When I<N> = -1, something
190 interesting happens when it encounters an invalid character.
206 Invokes debugging mode. Primarily for Encode hackers.
208 =item -S,--scheme scheme
210 Selects which scheme is to be used for conversion. Available schemes
217 Uses Encode::from_to for conversion. This is the default.
221 Input strings are decode()d then encode()d. A straight two-step
226 The new perlIO layer is used. NI-S' favorite.
230 Like the I<-D> option, this is also for Encode hackers.