Upgrade to Encode 1.42, from Dan Kogai.
[p5sagit/p5-mst-13.2.git] / ext / Encode / bin / piconv
CommitLineData
3ef515df 1#!./perl
b2704119 2# $Id: piconv,v 1.22 2002/04/16 23:35:00 dankogai Exp $
67d7b5ef 3#
4use 5.7.3;
5use strict;
6use Encode ;
7use Encode::Alias;
8my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio);
9
10use Getopt::Std;
11
b2704119 12my %Opt; getopts("pcC:hDS:lf:t:s:", \%Opt);
d31fa6c4 13$Opt{h} and help();
67d7b5ef 14$Opt{l} and list_encodings();
15my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
d31fa6c4 16$Opt{f} || $Opt{t} || help();
67d7b5ef 17my $from = $Opt{f} || $locale or help("from_encoding unspecified");
18my $to = $Opt{t} || $locale or help("to_encoding unspecified");
19$Opt{s} and Encode::from_to($Opt{s}, $from, $to) and print $Opt{s} and exit;
20my $scheme = exists $Scheme{$Opt{S}} ? $Opt{S} : 'from_to';
b2704119 21$Opt{C} ||= $Opt{c};
22$Opt{p} and $Opt{C} = -1;
67d7b5ef 23
24if ($Opt{D}){
25 my $cfrom = Encode->getEncoding($from)->name;
26 my $cto = Encode->getEncoding($to)->name;
ce912cd4 27 print <<"EOT";
67d7b5ef 28Scheme: $scheme
29From: $from => $cfrom
30To: $to => $cto
31EOT
32}
33
34# default
35if ($scheme eq 'from_to'){
36 while(<>){
b2704119 37 Encode::from_to($_, $from, $to, $Opt{C}); print;
67d7b5ef 38 };
39# step-by-step
40}elsif ($scheme eq 'decode_encode'){
41 while(<>){
b2704119 42 my $decoded = decode($from, $_, $Opt{C});
67d7b5ef 43 my $encoded = encode($to, $decoded);
44 print $encoded;
45 };
46# NI-S favorite
47}elsif ($scheme eq 'perlio'){
48 binmode(STDIN, ":encoding($from)");
49 binmode(STDOUT, ":encoding($to)");
50 while(<>){ print; }
51}else{ # won't reach
52 die "unknown scheme: $scheme";
53}
54
55sub list_encodings{
ce912cd4 56 print join("\n", Encode->encodings(":all")), "\n";
67d7b5ef 57 exit;
58}
59
60sub help{
61 my $message = shift;
62 use File::Basename;
63 my $name = basename($0);
64 $message and print STDERR "$name error: $message\n";
65 print STDERR <<"EOT";
66$name [-f from_encoding] [-t to_encoding] [-s string] [files...]
67$name -l
ce912cd4 68 -l lists all available encodings (the canonical names, many aliases exist)
67d7b5ef 69 -f from_encoding When omitted, the current locale will be used.
70 -t to_encoding When omitted, the current locale will be used.
71 -s string "string" will be converted instead of STDIN.
72EOT
73 exit;
74}
75
76__END__
77
78=head1 NAME
79
80piconv -- iconv(1), reinvented in perl
81
82=head1 SYNOPSIS
83
84 piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
85 piconv -l
86
87=head1 DESCRIPTION
88
89B<piconv> is perl version of F<iconv>, a character encoding converter
7748829a 90widely available for various Unixen today. This script was primarily
91a technology demonstrator for Perl 5.8.0, you can use piconv in the
67d7b5ef 92place of iconv for virtually any cases.
93
94piconv converts character encoding of either STDIN or files specified
95in the argument and prints out to STDOUT.
96
97Here are list of options.
98
99=over 4
100
101=item -f from_encoding
102
103Specifies the encoding you are converting from. Unlike F<iconv>,
7748829a 104this option can be omitted. In such cases the current locale is used.
67d7b5ef 105
106=item -t to_encoding
107
108Specifies the encoding you are converting to. Unlike F<iconv>,
7748829a 109this option can be omitted. In such cases the current locale is used.
67d7b5ef 110
111Therefore when both -f and -t are omitted, F<piconv> just acts like F<cat>.
112
113=item -s I<string>
114
115uses I<string> instead of file for the source of text. Same as F<iconv>.
116
117=item -l
118
ce912cd4 119Lists all available encodings, one per line, in case-insensitive
120order. Note that only the canonical names are listed, many aliases
121exist. For example, the names are case-insensitive, and many standard
122and common aliases work, like "latin1" for "ISO 8859-1", or "ibm850"
123instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported>
124for the full discussion.
67d7b5ef 125
b2704119 126=item -C I<N>
127
128Check the validity of the stream if I<N> = 1. When I<N> = -1, something
129interesting happens when it encounters an invalid character.
130
131=item -c
132
133Same as C<-C 1>.
134
135=item -p
136
137Same as C<-C -1>.
138
d31fa6c4 139=item -h
140
141Show usage.
142
67d7b5ef 143=item -D
144
7748829a 145Invokes debugging mode. Primarily for Encode hackers.
67d7b5ef 146
147=item -S scheme
148
149Selects which scheme is to be used for conversion. Available schemes
150are as follows;
151
152=over 4
153
154=item from_to
155
156Uses Encode::from_to for conversion. This is the default.
157
158=item decode_encode
159
7748829a 160Input strings are decode()d then encode()d. A straight two-step
67d7b5ef 161implementation.
162
163=item perlio
164
7748829a 165The new perlIO layer is used. NI-S' favorite.
67d7b5ef 166
167=back
168
169Like I<-D> option, this is also for Encode hackers.
170
171=back
172
173=head1 SEE ALSO
174
175L<iconv(1)>
176L<locale(3)>
177L<Encode>
ce912cd4 178L<Encode::Supported>
179L<Encode::Alias>
67d7b5ef 180L<PerlIO>
181
182=cut