2 # $Id: piconv,v 2.4 2009/07/08 13:34:15 dankogai Exp $
8 my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio);
11 my $name = basename($0);
13 use Getopt::Long qw(:config no_ignore_case);
35 $Opt{help} and help();
36 $Opt{list} and list_encodings();
37 my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
38 defined $Opt{resolve} and resolve_encoding($Opt{resolve});
39 $Opt{from} || $Opt{to} || help();
40 my $from = $Opt{from} || $locale or help("from_encoding unspecified");
41 my $to = $Opt{to} || $locale or help("to_encoding unspecified");
42 $Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
44 if (defined $Opt{scheme}) {
45 if (!exists $Scheme{$Opt{scheme}}) {
46 warn "Unknown scheme '$Opt{scheme}', fallback to 'from_to'.\n";
56 $Opt{check} ||= $Opt{c};
57 $Opt{perlqq} and $Opt{check} = Encode::PERLQQ;
58 $Opt{htmlcref} and $Opt{check} = Encode::HTMLCREF;
59 $Opt{xmlcref} and $Opt{check} = Encode::XMLCREF;
62 my $cfrom = Encode->getEncoding($from)->name;
63 my $cto = Encode->getEncoding($to)->name;
71 my %use_bom = map { $_ => 1 } qw/UTF-16 UTF-32/;
73 # we do not use <> (or ARGV) for the sake of binmode()
74 @ARGV or push @ARGV, \*STDIN;
76 unless ( $scheme eq 'perlio' ) {
78 my $need2slurp = $use_bom{ find_encoding($to)->name };
79 for my $argv (@ARGV) {
80 my $ifh = ref $argv ? $argv : undef;
81 $ifh or open $ifh, "<", $argv or warn "Can't open $argv: $!" and next;
82 $ifh or open $ifh, "<", $argv or next;
84 if ( $scheme eq 'from_to' ) { # default
88 Encode::from_to( $_, $from, $to, $Opt{check} );
92 Encode::from_to( $_, $from, $to, $Opt{check} );
97 elsif ( $scheme eq 'decode_encode' ) { # step-by-step
101 my $decoded = decode( $from, $_, $Opt{check} );
102 my $encoded = encode( $to, $decoded );
106 my $decoded = decode( $from, $_, $Opt{check} );
107 my $encoded = encode( $to, $decoded );
113 die "$name: unknown scheme: $scheme";
120 binmode STDOUT => "raw:encoding($to)";
121 for my $argv (@ARGV) {
122 my $ifh = ref $argv ? $argv : undef;
123 $ifh or open $ifh, "<", $argv or warn "Can't open $argv: $!" and next;
124 $ifh or open $ifh, "<", $argv or next;
125 binmode $ifh => "raw:encoding($from)";
126 print while (<$ifh>);
131 print join( "\n", Encode->encodings(":all") ), "\n";
135 sub resolve_encoding {
136 if ( my $alias = Encode::resolve_alias( $_[0] ) ) {
141 warn "$name: $_[0] is not known to Encode\n";
148 $message and print STDERR "$name error: $message\n";
149 print STDERR <<"EOT";
150 $name [-f from_encoding] [-t to_encoding] [-s string] [files...]
152 $name -r encoding_alias
154 lists all available encodings
155 -r,--resolve encoding_alias
156 resolve encoding to its (Encode) canonical name
157 -f,--from from_encoding
158 when omitted, the current locale will be used
160 when omitted, the current locale will be used
162 "string" will be the input instead of STDIN or files
163 The following are mainly of interest to Encode hackers:
164 -D,--debug show debug information
165 -C N | -c check the validity of the input
166 -S,--scheme scheme use the scheme for conversion
167 Those are handy when you can only see ascii characters:
179 piconv -- iconv(1), reinvented in perl
183 piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
193 B<piconv> is perl version of B<iconv>, a character encoding converter
194 widely available for various Unixen today. This script was primarily
195 a technology demonstrator for Perl 5.8.0, but you can use piconv in the
196 place of iconv for virtually any case.
198 piconv converts the character encoding of either STDIN or files
199 specified in the argument and prints out to STDOUT.
201 Here is the list of options. Each option can be in short format (-f)
206 =item -f,--from from_encoding
208 Specifies the encoding you are converting from. Unlike B<iconv>,
209 this option can be omitted. In such cases, the current locale is used.
211 =item -t,--to to_encoding
213 Specifies the encoding you are converting to. Unlike B<iconv>,
214 this option can be omitted. In such cases, the current locale is used.
216 Therefore, when both -f and -t are omitted, B<piconv> just acts
219 =item -s,--string I<string>
221 uses I<string> instead of file for the source of text.
225 Lists all available encodings, one per line, in case-insensitive
226 order. Note that only the canonical names are listed; many aliases
227 exist. For example, the names are case-insensitive, and many standard
228 and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
229 instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported>
230 for a full discussion.
232 =item -C,--check I<N>
234 Check the validity of the stream if I<N> = 1. When I<N> = -1, something
235 interesting happens when it encounters an invalid character.
247 Applies PERLQQ, HTMLCREF, XMLCREF, respectively. Try
249 piconv -f utf8 -t ascii --perlqq
259 Invokes debugging mode. Primarily for Encode hackers.
261 =item -S,--scheme scheme
263 Selects which scheme is to be used for conversion. Available schemes
270 Uses Encode::from_to for conversion. This is the default.
274 Input strings are decode()d then encode()d. A straight two-step
279 The new perlIO layer is used. NI-S' favorite.
281 You should use this option if you are using UTF-16 and others which
286 Like the I<-D> option, this is also for Encode hackers.