Upgrade to Encode 2.00.
[p5sagit/p5-mst-13.2.git] / ext / Encode / bin / piconv
CommitLineData
3ef515df 1#!./perl
7237418a 2# $Id: piconv,v 2.0 2004/05/16 20:55:16 dankogai Exp $
67d7b5ef 3#
9160fdbd 4use 5.8.0;
67d7b5ef 5use strict;
6use Encode ;
7use Encode::Alias;
8my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio);
9
bedba681 10use File::Basename;
11my $name = basename($0);
12
c14d784c 13use Getopt::Long qw(:config no_ignore_case);
bedba681 14
15my %Opt;
16
17help()
18 unless
19 GetOptions(\%Opt,
20 'from|f=s',
21 'to|t=s',
22 'list|l',
23 'string|s=s',
24 'check|C=i',
25 'c',
26 'perlqq|p',
27 'debug|D',
28 'scheme|S=s',
29 'resolve|r=s',
30 'help',
31 );
32
33$Opt{help} and help();
34$Opt{list} and list_encodings();
67d7b5ef 35my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
bedba681 36defined $Opt{resolve} and resolve_encoding($Opt{resolve});
37$Opt{from} || $Opt{to} || help();
38my $from = $Opt{from} || $locale or help("from_encoding unspecified");
39my $to = $Opt{to} || $locale or help("to_encoding unspecified");
40$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
41my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to';
42$Opt{check} ||= $Opt{c};
a3be20bf 43$Opt{perlqq} and $Opt{check} = Encode::FB_PERLQQ;
bedba681 44
45if ($Opt{debug}){
67d7b5ef 46 my $cfrom = Encode->getEncoding($from)->name;
47 my $cto = Encode->getEncoding($to)->name;
ce912cd4 48 print <<"EOT";
67d7b5ef 49Scheme: $scheme
50From: $from => $cfrom
51To: $to => $cto
52EOT
53}
54
55# default
56if ($scheme eq 'from_to'){
57 while(<>){
bedba681 58 Encode::from_to($_, $from, $to, $Opt{check}); print;
67d7b5ef 59 };
60# step-by-step
61}elsif ($scheme eq 'decode_encode'){
62 while(<>){
bedba681 63 my $decoded = decode($from, $_, $Opt{check});
67d7b5ef 64 my $encoded = encode($to, $decoded);
65 print $encoded;
66 };
67# NI-S favorite
68}elsif ($scheme eq 'perlio'){
69 binmode(STDIN, ":encoding($from)");
70 binmode(STDOUT, ":encoding($to)");
71 while(<>){ print; }
bedba681 72} else { # won't reach
73 die "$name: unknown scheme: $scheme";
67d7b5ef 74}
75
76sub list_encodings{
ce912cd4 77 print join("\n", Encode->encodings(":all")), "\n";
bedba681 78 exit 0;
79}
80
81sub resolve_encoding {
82 if (my $alias = Encode::resolve_alias($_[0])) {
83 print $alias, "\n";
84 exit 0;
85 } else {
86 warn "$name: $_[0] is not known to Encode\n";
87 exit 1;
88 }
67d7b5ef 89}
90
91sub help{
92 my $message = shift;
67d7b5ef 93 $message and print STDERR "$name error: $message\n";
94 print STDERR <<"EOT";
95$name [-f from_encoding] [-t to_encoding] [-s string] [files...]
96$name -l
bedba681 97$name -r encoding_alias
98 -l,--list
99 lists all available encodings
100 -r,--resolve encoding_alias
101 resolve encoding to its (Encode) canonical name
102 -f,--from from_encoding
103 when omitted, the current locale will be used
104 -t,--to to_encoding
105 when omitted, the current locale will be used
106 -s,--string string
107 "string" will be the input instead of STDIN or files
108The following are mainly of interest to Encode hackers:
109 -D,--debug show debug information
110 -C N | -c | -p check the validity of the input
111 -S,--scheme scheme use the scheme for conversion
67d7b5ef 112EOT
113 exit;
114}
115
116__END__
117
118=head1 NAME
119
120piconv -- iconv(1), reinvented in perl
121
122=head1 SYNOPSIS
123
124 piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
125 piconv -l
bedba681 126 piconv [-C N|-c|-p]
127 piconv -S scheme ...
128 piconv -r encoding
129 piconv -D ...
130 piconv -h
67d7b5ef 131
132=head1 DESCRIPTION
133
0ab8f81e 134B<piconv> is perl version of B<iconv>, a character encoding converter
135widely available for various Unixen today. This script was primarily
136a technology demonstrator for Perl 5.8.0, but you can use piconv in the
137place of iconv for virtually any case.
67d7b5ef 138
0ab8f81e 139piconv converts the character encoding of either STDIN or files
140specified in the argument and prints out to STDOUT.
67d7b5ef 141
bedba681 142Here is the list of options. Each option can be in short format (-f)
143or long (--from).
67d7b5ef 144
145=over 4
146
bedba681 147=item -f,--from from_encoding
67d7b5ef 148
0ab8f81e 149Specifies the encoding you are converting from. Unlike B<iconv>,
150this option can be omitted. In such cases, the current locale is used.
67d7b5ef 151
bedba681 152=item -t,--to to_encoding
67d7b5ef 153
0ab8f81e 154Specifies the encoding you are converting to. Unlike B<iconv>,
155this option can be omitted. In such cases, the current locale is used.
67d7b5ef 156
0ab8f81e 157Therefore, when both -f and -t are omitted, B<piconv> just acts
158like B<cat>.
67d7b5ef 159
bedba681 160=item -s,--string I<string>
67d7b5ef 161
bedba681 162uses I<string> instead of file for the source of text.
67d7b5ef 163
bedba681 164=item -l,--list
67d7b5ef 165
ce912cd4 166Lists all available encodings, one per line, in case-insensitive
0ab8f81e 167order. Note that only the canonical names are listed; many aliases
ce912cd4 168exist. For example, the names are case-insensitive, and many standard
0ab8f81e 169and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
ce912cd4 170instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported>
0ab8f81e 171for a full discussion.
67d7b5ef 172
bedba681 173=item -C,--check I<N>
b2704119 174
175Check the validity of the stream if I<N> = 1. When I<N> = -1, something
176interesting happens when it encounters an invalid character.
177
178=item -c
179
180Same as C<-C 1>.
181
bedba681 182=item -p,--perlqq
b2704119 183
184Same as C<-C -1>.
185
bedba681 186=item -h,--help
d31fa6c4 187
188Show usage.
189
bedba681 190=item -D,--debug
67d7b5ef 191
7748829a 192Invokes debugging mode. Primarily for Encode hackers.
67d7b5ef 193
bedba681 194=item -S,--scheme scheme
67d7b5ef 195
196Selects which scheme is to be used for conversion. Available schemes
0ab8f81e 197are as follows:
67d7b5ef 198
199=over 4
200
201=item from_to
202
203Uses Encode::from_to for conversion. This is the default.
204
205=item decode_encode
206
7748829a 207Input strings are decode()d then encode()d. A straight two-step
67d7b5ef 208implementation.
209
210=item perlio
211
7748829a 212The new perlIO layer is used. NI-S' favorite.
67d7b5ef 213
214=back
215
0ab8f81e 216Like the I<-D> option, this is also for Encode hackers.
67d7b5ef 217
218=back
219
220=head1 SEE ALSO
221
bedba681 222L<iconv/1>
223L<locale/3>
67d7b5ef 224L<Encode>
ce912cd4 225L<Encode::Supported>
226L<Encode::Alias>
67d7b5ef 227L<PerlIO>
228
229=cut