Encode 2.34
[p5sagit/p5-mst-13.2.git] / ext / Encode / bin / piconv
CommitLineData
3ef515df 1#!./perl
1a1e8f7d 2# $Id: piconv,v 2.4 2009/07/08 13:34:15 dankogai Exp $
67d7b5ef 3#
9160fdbd 4use 5.8.0;
67d7b5ef 5use strict;
6use Encode ;
7use Encode::Alias;
8my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio);
9
bedba681 10use File::Basename;
11my $name = basename($0);
12
c14d784c 13use Getopt::Long qw(:config no_ignore_case);
bedba681 14
15my %Opt;
16
17help()
18 unless
19 GetOptions(\%Opt,
d1256cb1 20 'from|f=s',
21 'to|t=s',
22 'list|l',
23 'string|s=s',
24 'check|C=i',
25 'c',
26 'perlqq|p',
27 'htmlcref',
28 'xmlcref',
29 'debug|D',
30 'scheme|S=s',
31 'resolve|r=s',
32 'help',
33 );
bedba681 34
35$Opt{help} and help();
36$Opt{list} and list_encodings();
67d7b5ef 37my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
bedba681 38defined $Opt{resolve} and resolve_encoding($Opt{resolve});
39$Opt{from} || $Opt{to} || help();
40my $from = $Opt{from} || $locale or help("from_encoding unspecified");
41my $to = $Opt{to} || $locale or help("to_encoding unspecified");
42$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
1a1e8f7d 43my $scheme = do {
44 if (defined $Opt{scheme}) {
45 if (!exists $Scheme{$Opt{scheme}}) {
46 warn "Unknown scheme '$Opt{scheme}', fallback to 'from_to'.\n";
47 'from_to';
48 } else {
49 $Opt{scheme};
50 }
51 } else {
52 'from_to';
53 }
54};
55
bedba681 56$Opt{check} ||= $Opt{c};
d1256cb1 57$Opt{perlqq} and $Opt{check} = Encode::PERLQQ;
58$Opt{htmlcref} and $Opt{check} = Encode::HTMLCREF;
59$Opt{xmlcref} and $Opt{check} = Encode::XMLCREF;
bedba681 60
61if ($Opt{debug}){
67d7b5ef 62 my $cfrom = Encode->getEncoding($from)->name;
63 my $cto = Encode->getEncoding($to)->name;
ce912cd4 64 print <<"EOT";
67d7b5ef 65Scheme: $scheme
66From: $from => $cfrom
67To: $to => $cto
68EOT
69}
70
1a1e8f7d 71my %use_bom = map { $_ => 1 } qw/UTF-16 UTF-32/;
72
8f1ed24a 73# we do not use <> (or ARGV) for the sake of binmode()
d1256cb1 74@ARGV or push @ARGV, \*STDIN;
8f1ed24a 75
d1256cb1 76unless ( $scheme eq 'perlio' ) {
8f1ed24a 77 binmode STDOUT;
1a1e8f7d 78 my $need2slurp = $use_bom{ find_encoding($to)->name };
d1256cb1 79 for my $argv (@ARGV) {
80 my $ifh = ref $argv ? $argv : undef;
1a1e8f7d 81 $ifh or open $ifh, "<", $argv or warn "Can't open $argv: $!" and next;
d1256cb1 82 $ifh or open $ifh, "<", $argv or next;
83 binmode $ifh;
84 if ( $scheme eq 'from_to' ) { # default
1a1e8f7d 85 if ($need2slurp){
86 local $/;
87 $_ = <$ifh>;
88 Encode::from_to( $_, $from, $to, $Opt{check} );
89 print;
90 }else{
91 while (<$ifh>) {
92 Encode::from_to( $_, $from, $to, $Opt{check} );
93 print;
94 }
95 }
d1256cb1 96 }
97 elsif ( $scheme eq 'decode_encode' ) { # step-by-step
1a1e8f7d 98 if ($need2slurp){
99 local $/;
100 $_ = <$ifh>;
d1256cb1 101 my $decoded = decode( $from, $_, $Opt{check} );
102 my $encoded = encode( $to, $decoded );
103 print $encoded;
1a1e8f7d 104 }else{
105 while (<$ifh>) {
106 my $decoded = decode( $from, $_, $Opt{check} );
107 my $encoded = encode( $to, $decoded );
108 print $encoded;
109 }
110 }
111 }
112 else { # won't reach
d1256cb1 113 die "$name: unknown scheme: $scheme";
114 }
8f1ed24a 115 }
d1256cb1 116}
117else {
118
8f1ed24a 119 # NI-S favorite
120 binmode STDOUT => "raw:encoding($to)";
d1256cb1 121 for my $argv (@ARGV) {
122 my $ifh = ref $argv ? $argv : undef;
1a1e8f7d 123 $ifh or open $ifh, "<", $argv or warn "Can't open $argv: $!" and next;
d1256cb1 124 $ifh or open $ifh, "<", $argv or next;
125 binmode $ifh => "raw:encoding($from)";
126 print while (<$ifh>);
8f1ed24a 127 }
67d7b5ef 128}
129
d1256cb1 130sub list_encodings {
131 print join( "\n", Encode->encodings(":all") ), "\n";
bedba681 132 exit 0;
133}
134
135sub resolve_encoding {
d1256cb1 136 if ( my $alias = Encode::resolve_alias( $_[0] ) ) {
137 print $alias, "\n";
138 exit 0;
139 }
140 else {
141 warn "$name: $_[0] is not known to Encode\n";
142 exit 1;
bedba681 143 }
67d7b5ef 144}
145
d1256cb1 146sub help {
67d7b5ef 147 my $message = shift;
67d7b5ef 148 $message and print STDERR "$name error: $message\n";
149 print STDERR <<"EOT";
150$name [-f from_encoding] [-t to_encoding] [-s string] [files...]
151$name -l
bedba681 152$name -r encoding_alias
153 -l,--list
154 lists all available encodings
155 -r,--resolve encoding_alias
156 resolve encoding to its (Encode) canonical name
157 -f,--from from_encoding
158 when omitted, the current locale will be used
159 -t,--to to_encoding
160 when omitted, the current locale will be used
161 -s,--string string
162 "string" will be the input instead of STDIN or files
163The following are mainly of interest to Encode hackers:
164 -D,--debug show debug information
d1256cb1 165 -C N | -c check the validity of the input
bedba681 166 -S,--scheme scheme use the scheme for conversion
d1256cb1 167Those are handy when you can only see ascii characters:
168 -p,--perlqq
169 --htmlcref
170 --xmlcref
67d7b5ef 171EOT
d1256cb1 172 exit;
67d7b5ef 173}
174
175__END__
176
177=head1 NAME
178
179piconv -- iconv(1), reinvented in perl
180
181=head1 SYNOPSIS
182
183 piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
184 piconv -l
bedba681 185 piconv [-C N|-c|-p]
186 piconv -S scheme ...
187 piconv -r encoding
188 piconv -D ...
189 piconv -h
67d7b5ef 190
191=head1 DESCRIPTION
192
0ab8f81e 193B<piconv> is perl version of B<iconv>, a character encoding converter
194widely available for various Unixen today. This script was primarily
195a technology demonstrator for Perl 5.8.0, but you can use piconv in the
196place of iconv for virtually any case.
67d7b5ef 197
0ab8f81e 198piconv converts the character encoding of either STDIN or files
199specified in the argument and prints out to STDOUT.
67d7b5ef 200
bedba681 201Here is the list of options. Each option can be in short format (-f)
202or long (--from).
67d7b5ef 203
204=over 4
205
bedba681 206=item -f,--from from_encoding
67d7b5ef 207
0ab8f81e 208Specifies the encoding you are converting from. Unlike B<iconv>,
209this option can be omitted. In such cases, the current locale is used.
67d7b5ef 210
bedba681 211=item -t,--to to_encoding
67d7b5ef 212
0ab8f81e 213Specifies the encoding you are converting to. Unlike B<iconv>,
214this option can be omitted. In such cases, the current locale is used.
67d7b5ef 215
0ab8f81e 216Therefore, when both -f and -t are omitted, B<piconv> just acts
217like B<cat>.
67d7b5ef 218
bedba681 219=item -s,--string I<string>
67d7b5ef 220
bedba681 221uses I<string> instead of file for the source of text.
67d7b5ef 222
bedba681 223=item -l,--list
67d7b5ef 224
ce912cd4 225Lists all available encodings, one per line, in case-insensitive
0ab8f81e 226order. Note that only the canonical names are listed; many aliases
ce912cd4 227exist. For example, the names are case-insensitive, and many standard
0ab8f81e 228and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
ce912cd4 229instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported>
0ab8f81e 230for a full discussion.
67d7b5ef 231
bedba681 232=item -C,--check I<N>
b2704119 233
234Check the validity of the stream if I<N> = 1. When I<N> = -1, something
235interesting happens when it encounters an invalid character.
236
237=item -c
238
239Same as C<-C 1>.
240
bedba681 241=item -p,--perlqq
b2704119 242
d1256cb1 243=item --htmlcref
244
245=item --xmlcref
246
247Applies PERLQQ, HTMLCREF, XMLCREF, respectively. Try
248
249 piconv -f utf8 -t ascii --perlqq
250
251To see what it does.
b2704119 252
bedba681 253=item -h,--help
d31fa6c4 254
255Show usage.
256
bedba681 257=item -D,--debug
67d7b5ef 258
7748829a 259Invokes debugging mode. Primarily for Encode hackers.
67d7b5ef 260
bedba681 261=item -S,--scheme scheme
67d7b5ef 262
263Selects which scheme is to be used for conversion. Available schemes
0ab8f81e 264are as follows:
67d7b5ef 265
266=over 4
267
268=item from_to
269
270Uses Encode::from_to for conversion. This is the default.
271
272=item decode_encode
273
7748829a 274Input strings are decode()d then encode()d. A straight two-step
67d7b5ef 275implementation.
276
277=item perlio
278
7748829a 279The new perlIO layer is used. NI-S' favorite.
67d7b5ef 280
51e4e64d 281You should use this option if you are using UTF-16 and others which
282linefeed is not $/.
283
67d7b5ef 284=back
285
0ab8f81e 286Like the I<-D> option, this is also for Encode hackers.
67d7b5ef 287
288=back
289
290=head1 SEE ALSO
291
1a1e8f7d 292L<iconv(1)>
293L<locale(3)>
67d7b5ef 294L<Encode>
ce912cd4 295L<Encode::Supported>
296L<Encode::Alias>
67d7b5ef 297L<PerlIO>
298
299=cut