2 # $Id: piconv,v 2.2 2006/05/03 18:24:10 dankogai Exp $
8 my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio);
11 my $name = basename($0);
13 use Getopt::Long qw(:config no_ignore_case);
35 $Opt{help} and help();
36 $Opt{list} and list_encodings();
37 my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
38 defined $Opt{resolve} and resolve_encoding($Opt{resolve});
39 $Opt{from} || $Opt{to} || help();
40 my $from = $Opt{from} || $locale or help("from_encoding unspecified");
41 my $to = $Opt{to} || $locale or help("to_encoding unspecified");
42 $Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
43 my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to';
44 $Opt{check} ||= $Opt{c};
45 $Opt{perlqq} and $Opt{check} = Encode::PERLQQ;
46 $Opt{htmlcref} and $Opt{check} = Encode::HTMLCREF;
47 $Opt{xmlcref} and $Opt{check} = Encode::XMLCREF;
50 my $cfrom = Encode->getEncoding($from)->name;
51 my $cto = Encode->getEncoding($to)->name;
59 # we do not use <> (or ARGV) for the sake of binmode()
60 @ARGV or push @ARGV, \*STDIN;
62 unless ( $scheme eq 'perlio' ) {
64 for my $argv (@ARGV) {
65 my $ifh = ref $argv ? $argv : undef;
66 $ifh or open $ifh, "<", $argv or next;
68 if ( $scheme eq 'from_to' ) { # default
70 Encode::from_to( $_, $from, $to, $Opt{check} );
74 elsif ( $scheme eq 'decode_encode' ) { # step-by-step
76 my $decoded = decode( $from, $_, $Opt{check} );
77 my $encoded = encode( $to, $decoded );
82 die "$name: unknown scheme: $scheme";
89 binmode STDOUT => "raw:encoding($to)";
90 for my $argv (@ARGV) {
91 my $ifh = ref $argv ? $argv : undef;
92 $ifh or open $ifh, "<", $argv or next;
93 binmode $ifh => "raw:encoding($from)";
99 print join( "\n", Encode->encodings(":all") ), "\n";
103 sub resolve_encoding {
104 if ( my $alias = Encode::resolve_alias( $_[0] ) ) {
109 warn "$name: $_[0] is not known to Encode\n";
116 $message and print STDERR "$name error: $message\n";
117 print STDERR <<"EOT";
118 $name [-f from_encoding] [-t to_encoding] [-s string] [files...]
120 $name -r encoding_alias
122 lists all available encodings
123 -r,--resolve encoding_alias
124 resolve encoding to its (Encode) canonical name
125 -f,--from from_encoding
126 when omitted, the current locale will be used
128 when omitted, the current locale will be used
130 "string" will be the input instead of STDIN or files
131 The following are mainly of interest to Encode hackers:
132 -D,--debug show debug information
133 -C N | -c check the validity of the input
134 -S,--scheme scheme use the scheme for conversion
135 Those are handy when you can only see ascii characters:
147 piconv -- iconv(1), reinvented in perl
151 piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
161 B<piconv> is perl version of B<iconv>, a character encoding converter
162 widely available for various Unixen today. This script was primarily
163 a technology demonstrator for Perl 5.8.0, but you can use piconv in the
164 place of iconv for virtually any case.
166 piconv converts the character encoding of either STDIN or files
167 specified in the argument and prints out to STDOUT.
169 Here is the list of options. Each option can be in short format (-f)
174 =item -f,--from from_encoding
176 Specifies the encoding you are converting from. Unlike B<iconv>,
177 this option can be omitted. In such cases, the current locale is used.
179 =item -t,--to to_encoding
181 Specifies the encoding you are converting to. Unlike B<iconv>,
182 this option can be omitted. In such cases, the current locale is used.
184 Therefore, when both -f and -t are omitted, B<piconv> just acts
187 =item -s,--string I<string>
189 uses I<string> instead of file for the source of text.
193 Lists all available encodings, one per line, in case-insensitive
194 order. Note that only the canonical names are listed; many aliases
195 exist. For example, the names are case-insensitive, and many standard
196 and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
197 instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported>
198 for a full discussion.
200 =item -C,--check I<N>
202 Check the validity of the stream if I<N> = 1. When I<N> = -1, something
203 interesting happens when it encounters an invalid character.
215 Applies PERLQQ, HTMLCREF, XMLCREF, respectively. Try
217 piconv -f utf8 -t ascii --perlqq
227 Invokes debugging mode. Primarily for Encode hackers.
229 =item -S,--scheme scheme
231 Selects which scheme is to be used for conversion. Available schemes
238 Uses Encode::from_to for conversion. This is the default.
242 Input strings are decode()d then encode()d. A straight two-step
247 The new perlIO layer is used. NI-S' favorite.
251 Like the I<-D> option, this is also for Encode hackers.