Commit | Line | Data |
3ef515df |
1 | #!./perl |
7237418a |
2 | # $Id: piconv,v 2.0 2004/05/16 20:55:16 dankogai Exp $ |
67d7b5ef |
3 | # |
9160fdbd |
4 | use 5.8.0; |
67d7b5ef |
5 | use strict; |
6 | use Encode ; |
7 | use Encode::Alias; |
8 | my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio); |
9 | |
bedba681 |
10 | use File::Basename; |
11 | my $name = basename($0); |
12 | |
c14d784c |
13 | use Getopt::Long qw(:config no_ignore_case); |
bedba681 |
14 | |
15 | my %Opt; |
16 | |
17 | help() |
18 | unless |
19 | GetOptions(\%Opt, |
20 | 'from|f=s', |
21 | 'to|t=s', |
22 | 'list|l', |
23 | 'string|s=s', |
24 | 'check|C=i', |
25 | 'c', |
26 | 'perlqq|p', |
27 | 'debug|D', |
28 | 'scheme|S=s', |
29 | 'resolve|r=s', |
30 | 'help', |
31 | ); |
32 | |
33 | $Opt{help} and help(); |
34 | $Opt{list} and list_encodings(); |
67d7b5ef |
35 | my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG}; |
bedba681 |
36 | defined $Opt{resolve} and resolve_encoding($Opt{resolve}); |
37 | $Opt{from} || $Opt{to} || help(); |
38 | my $from = $Opt{from} || $locale or help("from_encoding unspecified"); |
39 | my $to = $Opt{to} || $locale or help("to_encoding unspecified"); |
40 | $Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit; |
41 | my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to'; |
42 | $Opt{check} ||= $Opt{c}; |
a3be20bf |
43 | $Opt{perlqq} and $Opt{check} = Encode::FB_PERLQQ; |
bedba681 |
44 | |
45 | if ($Opt{debug}){ |
67d7b5ef |
46 | my $cfrom = Encode->getEncoding($from)->name; |
47 | my $cto = Encode->getEncoding($to)->name; |
ce912cd4 |
48 | print <<"EOT"; |
67d7b5ef |
49 | Scheme: $scheme |
50 | From: $from => $cfrom |
51 | To: $to => $cto |
52 | EOT |
53 | } |
54 | |
55 | # default |
56 | if ($scheme eq 'from_to'){ |
57 | while(<>){ |
bedba681 |
58 | Encode::from_to($_, $from, $to, $Opt{check}); print; |
67d7b5ef |
59 | }; |
60 | # step-by-step |
61 | }elsif ($scheme eq 'decode_encode'){ |
62 | while(<>){ |
bedba681 |
63 | my $decoded = decode($from, $_, $Opt{check}); |
67d7b5ef |
64 | my $encoded = encode($to, $decoded); |
65 | print $encoded; |
66 | }; |
67 | # NI-S favorite |
68 | }elsif ($scheme eq 'perlio'){ |
69 | binmode(STDIN, ":encoding($from)"); |
70 | binmode(STDOUT, ":encoding($to)"); |
71 | while(<>){ print; } |
bedba681 |
72 | } else { # won't reach |
73 | die "$name: unknown scheme: $scheme"; |
67d7b5ef |
74 | } |
75 | |
76 | sub list_encodings{ |
ce912cd4 |
77 | print join("\n", Encode->encodings(":all")), "\n"; |
bedba681 |
78 | exit 0; |
79 | } |
80 | |
81 | sub resolve_encoding { |
82 | if (my $alias = Encode::resolve_alias($_[0])) { |
83 | print $alias, "\n"; |
84 | exit 0; |
85 | } else { |
86 | warn "$name: $_[0] is not known to Encode\n"; |
87 | exit 1; |
88 | } |
67d7b5ef |
89 | } |
90 | |
91 | sub help{ |
92 | my $message = shift; |
67d7b5ef |
93 | $message and print STDERR "$name error: $message\n"; |
94 | print STDERR <<"EOT"; |
95 | $name [-f from_encoding] [-t to_encoding] [-s string] [files...] |
96 | $name -l |
bedba681 |
97 | $name -r encoding_alias |
98 | -l,--list |
99 | lists all available encodings |
100 | -r,--resolve encoding_alias |
101 | resolve encoding to its (Encode) canonical name |
102 | -f,--from from_encoding |
103 | when omitted, the current locale will be used |
104 | -t,--to to_encoding |
105 | when omitted, the current locale will be used |
106 | -s,--string string |
107 | "string" will be the input instead of STDIN or files |
108 | The following are mainly of interest to Encode hackers: |
109 | -D,--debug show debug information |
110 | -C N | -c | -p check the validity of the input |
111 | -S,--scheme scheme use the scheme for conversion |
67d7b5ef |
112 | EOT |
113 | exit; |
114 | } |
115 | |
116 | __END__ |
117 | |
118 | =head1 NAME |
119 | |
120 | piconv -- iconv(1), reinvented in perl |
121 | |
122 | =head1 SYNOPSIS |
123 | |
124 | piconv [-f from_encoding] [-t to_encoding] [-s string] [files...] |
125 | piconv -l |
bedba681 |
126 | piconv [-C N|-c|-p] |
127 | piconv -S scheme ... |
128 | piconv -r encoding |
129 | piconv -D ... |
130 | piconv -h |
67d7b5ef |
131 | |
132 | =head1 DESCRIPTION |
133 | |
0ab8f81e |
134 | B<piconv> is perl version of B<iconv>, a character encoding converter |
135 | widely available for various Unixen today. This script was primarily |
136 | a technology demonstrator for Perl 5.8.0, but you can use piconv in the |
137 | place of iconv for virtually any case. |
67d7b5ef |
138 | |
0ab8f81e |
139 | piconv converts the character encoding of either STDIN or files |
140 | specified in the argument and prints out to STDOUT. |
67d7b5ef |
141 | |
bedba681 |
142 | Here is the list of options. Each option can be in short format (-f) |
143 | or long (--from). |
67d7b5ef |
144 | |
145 | =over 4 |
146 | |
bedba681 |
147 | =item -f,--from from_encoding |
67d7b5ef |
148 | |
0ab8f81e |
149 | Specifies the encoding you are converting from. Unlike B<iconv>, |
150 | this option can be omitted. In such cases, the current locale is used. |
67d7b5ef |
151 | |
bedba681 |
152 | =item -t,--to to_encoding |
67d7b5ef |
153 | |
0ab8f81e |
154 | Specifies the encoding you are converting to. Unlike B<iconv>, |
155 | this option can be omitted. In such cases, the current locale is used. |
67d7b5ef |
156 | |
0ab8f81e |
157 | Therefore, when both -f and -t are omitted, B<piconv> just acts |
158 | like B<cat>. |
67d7b5ef |
159 | |
bedba681 |
160 | =item -s,--string I<string> |
67d7b5ef |
161 | |
bedba681 |
162 | uses I<string> instead of file for the source of text. |
67d7b5ef |
163 | |
bedba681 |
164 | =item -l,--list |
67d7b5ef |
165 | |
ce912cd4 |
166 | Lists all available encodings, one per line, in case-insensitive |
0ab8f81e |
167 | order. Note that only the canonical names are listed; many aliases |
ce912cd4 |
168 | exist. For example, the names are case-insensitive, and many standard |
0ab8f81e |
169 | and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850" |
ce912cd4 |
170 | instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported> |
0ab8f81e |
171 | for a full discussion. |
67d7b5ef |
172 | |
bedba681 |
173 | =item -C,--check I<N> |
b2704119 |
174 | |
175 | Check the validity of the stream if I<N> = 1. When I<N> = -1, something |
176 | interesting happens when it encounters an invalid character. |
177 | |
178 | =item -c |
179 | |
180 | Same as C<-C 1>. |
181 | |
bedba681 |
182 | =item -p,--perlqq |
b2704119 |
183 | |
184 | Same as C<-C -1>. |
185 | |
bedba681 |
186 | =item -h,--help |
d31fa6c4 |
187 | |
188 | Show usage. |
189 | |
bedba681 |
190 | =item -D,--debug |
67d7b5ef |
191 | |
7748829a |
192 | Invokes debugging mode. Primarily for Encode hackers. |
67d7b5ef |
193 | |
bedba681 |
194 | =item -S,--scheme scheme |
67d7b5ef |
195 | |
196 | Selects which scheme is to be used for conversion. Available schemes |
0ab8f81e |
197 | are as follows: |
67d7b5ef |
198 | |
199 | =over 4 |
200 | |
201 | =item from_to |
202 | |
203 | Uses Encode::from_to for conversion. This is the default. |
204 | |
205 | =item decode_encode |
206 | |
7748829a |
207 | Input strings are decode()d then encode()d. A straight two-step |
67d7b5ef |
208 | implementation. |
209 | |
210 | =item perlio |
211 | |
7748829a |
212 | The new perlIO layer is used. NI-S' favorite. |
67d7b5ef |
213 | |
214 | =back |
215 | |
0ab8f81e |
216 | Like the I<-D> option, this is also for Encode hackers. |
67d7b5ef |
217 | |
218 | =back |
219 | |
220 | =head1 SEE ALSO |
221 | |
bedba681 |
222 | L<iconv/1> |
223 | L<locale/3> |
67d7b5ef |
224 | L<Encode> |
ce912cd4 |
225 | L<Encode::Supported> |
226 | L<Encode::Alias> |
67d7b5ef |
227 | L<PerlIO> |
228 | |
229 | =cut |