Actually submit previous change.
[p5sagit/p5-mst-13.2.git] / ext / Encode / bin / piconv
CommitLineData
3ef515df 1#!./perl
d1256cb1 2# $Id: piconv,v 2.2 2006/05/03 18:24:10 dankogai Exp $
67d7b5ef 3#
9160fdbd 4use 5.8.0;
67d7b5ef 5use strict;
6use Encode ;
7use Encode::Alias;
8my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio);
9
bedba681 10use File::Basename;
11my $name = basename($0);
12
c14d784c 13use Getopt::Long qw(:config no_ignore_case);
bedba681 14
15my %Opt;
16
17help()
18 unless
19 GetOptions(\%Opt,
d1256cb1 20 'from|f=s',
21 'to|t=s',
22 'list|l',
23 'string|s=s',
24 'check|C=i',
25 'c',
26 'perlqq|p',
27 'htmlcref',
28 'xmlcref',
29 'debug|D',
30 'scheme|S=s',
31 'resolve|r=s',
32 'help',
33 );
bedba681 34
35$Opt{help} and help();
36$Opt{list} and list_encodings();
67d7b5ef 37my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
bedba681 38defined $Opt{resolve} and resolve_encoding($Opt{resolve});
39$Opt{from} || $Opt{to} || help();
40my $from = $Opt{from} || $locale or help("from_encoding unspecified");
41my $to = $Opt{to} || $locale or help("to_encoding unspecified");
42$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
43my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to';
44$Opt{check} ||= $Opt{c};
d1256cb1 45$Opt{perlqq} and $Opt{check} = Encode::PERLQQ;
46$Opt{htmlcref} and $Opt{check} = Encode::HTMLCREF;
47$Opt{xmlcref} and $Opt{check} = Encode::XMLCREF;
bedba681 48
49if ($Opt{debug}){
67d7b5ef 50 my $cfrom = Encode->getEncoding($from)->name;
51 my $cto = Encode->getEncoding($to)->name;
ce912cd4 52 print <<"EOT";
67d7b5ef 53Scheme: $scheme
54From: $from => $cfrom
55To: $to => $cto
56EOT
57}
58
8f1ed24a 59# we do not use <> (or ARGV) for the sake of binmode()
d1256cb1 60@ARGV or push @ARGV, \*STDIN;
8f1ed24a 61
d1256cb1 62unless ( $scheme eq 'perlio' ) {
8f1ed24a 63 binmode STDOUT;
d1256cb1 64 for my $argv (@ARGV) {
65 my $ifh = ref $argv ? $argv : undef;
66 $ifh or open $ifh, "<", $argv or next;
67 binmode $ifh;
68 if ( $scheme eq 'from_to' ) { # default
69 while (<$ifh>) {
70 Encode::from_to( $_, $from, $to, $Opt{check} );
71 print;
72 }
73 }
74 elsif ( $scheme eq 'decode_encode' ) { # step-by-step
75 while (<$ifh>) {
76 my $decoded = decode( $from, $_, $Opt{check} );
77 my $encoded = encode( $to, $decoded );
78 print $encoded;
79 }
80 }
81 else { # won't reach
82 die "$name: unknown scheme: $scheme";
83 }
8f1ed24a 84 }
d1256cb1 85}
86else {
87
8f1ed24a 88 # NI-S favorite
89 binmode STDOUT => "raw:encoding($to)";
d1256cb1 90 for my $argv (@ARGV) {
91 my $ifh = ref $argv ? $argv : undef;
92 $ifh or open $ifh, "<", $argv or next;
93 binmode $ifh => "raw:encoding($from)";
94 print while (<$ifh>);
8f1ed24a 95 }
67d7b5ef 96}
97
d1256cb1 98sub list_encodings {
99 print join( "\n", Encode->encodings(":all") ), "\n";
bedba681 100 exit 0;
101}
102
103sub resolve_encoding {
d1256cb1 104 if ( my $alias = Encode::resolve_alias( $_[0] ) ) {
105 print $alias, "\n";
106 exit 0;
107 }
108 else {
109 warn "$name: $_[0] is not known to Encode\n";
110 exit 1;
bedba681 111 }
67d7b5ef 112}
113
d1256cb1 114sub help {
67d7b5ef 115 my $message = shift;
67d7b5ef 116 $message and print STDERR "$name error: $message\n";
117 print STDERR <<"EOT";
118$name [-f from_encoding] [-t to_encoding] [-s string] [files...]
119$name -l
bedba681 120$name -r encoding_alias
121 -l,--list
122 lists all available encodings
123 -r,--resolve encoding_alias
124 resolve encoding to its (Encode) canonical name
125 -f,--from from_encoding
126 when omitted, the current locale will be used
127 -t,--to to_encoding
128 when omitted, the current locale will be used
129 -s,--string string
130 "string" will be the input instead of STDIN or files
131The following are mainly of interest to Encode hackers:
132 -D,--debug show debug information
d1256cb1 133 -C N | -c check the validity of the input
bedba681 134 -S,--scheme scheme use the scheme for conversion
d1256cb1 135Those are handy when you can only see ascii characters:
136 -p,--perlqq
137 --htmlcref
138 --xmlcref
67d7b5ef 139EOT
d1256cb1 140 exit;
67d7b5ef 141}
142
143__END__
144
145=head1 NAME
146
147piconv -- iconv(1), reinvented in perl
148
149=head1 SYNOPSIS
150
151 piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
152 piconv -l
bedba681 153 piconv [-C N|-c|-p]
154 piconv -S scheme ...
155 piconv -r encoding
156 piconv -D ...
157 piconv -h
67d7b5ef 158
159=head1 DESCRIPTION
160
0ab8f81e 161B<piconv> is perl version of B<iconv>, a character encoding converter
162widely available for various Unixen today. This script was primarily
163a technology demonstrator for Perl 5.8.0, but you can use piconv in the
164place of iconv for virtually any case.
67d7b5ef 165
0ab8f81e 166piconv converts the character encoding of either STDIN or files
167specified in the argument and prints out to STDOUT.
67d7b5ef 168
bedba681 169Here is the list of options. Each option can be in short format (-f)
170or long (--from).
67d7b5ef 171
172=over 4
173
bedba681 174=item -f,--from from_encoding
67d7b5ef 175
0ab8f81e 176Specifies the encoding you are converting from. Unlike B<iconv>,
177this option can be omitted. In such cases, the current locale is used.
67d7b5ef 178
bedba681 179=item -t,--to to_encoding
67d7b5ef 180
0ab8f81e 181Specifies the encoding you are converting to. Unlike B<iconv>,
182this option can be omitted. In such cases, the current locale is used.
67d7b5ef 183
0ab8f81e 184Therefore, when both -f and -t are omitted, B<piconv> just acts
185like B<cat>.
67d7b5ef 186
bedba681 187=item -s,--string I<string>
67d7b5ef 188
bedba681 189uses I<string> instead of file for the source of text.
67d7b5ef 190
bedba681 191=item -l,--list
67d7b5ef 192
ce912cd4 193Lists all available encodings, one per line, in case-insensitive
0ab8f81e 194order. Note that only the canonical names are listed; many aliases
ce912cd4 195exist. For example, the names are case-insensitive, and many standard
0ab8f81e 196and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
ce912cd4 197instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported>
0ab8f81e 198for a full discussion.
67d7b5ef 199
bedba681 200=item -C,--check I<N>
b2704119 201
202Check the validity of the stream if I<N> = 1. When I<N> = -1, something
203interesting happens when it encounters an invalid character.
204
205=item -c
206
207Same as C<-C 1>.
208
bedba681 209=item -p,--perlqq
b2704119 210
d1256cb1 211=item --htmlcref
212
213=item --xmlcref
214
215Applies PERLQQ, HTMLCREF, XMLCREF, respectively. Try
216
217 piconv -f utf8 -t ascii --perlqq
218
219To see what it does.
b2704119 220
bedba681 221=item -h,--help
d31fa6c4 222
223Show usage.
224
bedba681 225=item -D,--debug
67d7b5ef 226
7748829a 227Invokes debugging mode. Primarily for Encode hackers.
67d7b5ef 228
bedba681 229=item -S,--scheme scheme
67d7b5ef 230
231Selects which scheme is to be used for conversion. Available schemes
0ab8f81e 232are as follows:
67d7b5ef 233
234=over 4
235
236=item from_to
237
238Uses Encode::from_to for conversion. This is the default.
239
240=item decode_encode
241
7748829a 242Input strings are decode()d then encode()d. A straight two-step
67d7b5ef 243implementation.
244
245=item perlio
246
7748829a 247The new perlIO layer is used. NI-S' favorite.
67d7b5ef 248
249=back
250
0ab8f81e 251Like the I<-D> option, this is also for Encode hackers.
67d7b5ef 252
253=back
254
255=head1 SEE ALSO
256
bedba681 257L<iconv/1>
258L<locale/3>
67d7b5ef 259L<Encode>
ce912cd4 260L<Encode::Supported>
261L<Encode::Alias>
67d7b5ef 262L<PerlIO>
263
264=cut