Commit | Line | Data |
3ef515df |
1 | #!./perl |
35e20135 |
2 | # $Id: piconv,v 1.25 2002/06/01 18:07:49 dankogai Exp $ |
67d7b5ef |
3 | # |
9160fdbd |
4 | use 5.8.0; |
67d7b5ef |
5 | use strict; |
6 | use Encode ; |
7 | use Encode::Alias; |
8 | my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio); |
9 | |
10 | use Getopt::Std; |
11 | |
b2704119 |
12 | my %Opt; getopts("pcC:hDS:lf:t:s:", \%Opt); |
d31fa6c4 |
13 | $Opt{h} and help(); |
67d7b5ef |
14 | $Opt{l} and list_encodings(); |
15 | my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG}; |
d31fa6c4 |
16 | $Opt{f} || $Opt{t} || help(); |
67d7b5ef |
17 | my $from = $Opt{f} || $locale or help("from_encoding unspecified"); |
18 | my $to = $Opt{t} || $locale or help("to_encoding unspecified"); |
19 | $Opt{s} and Encode::from_to($Opt{s}, $from, $to) and print $Opt{s} and exit; |
20 | my $scheme = exists $Scheme{$Opt{S}} ? $Opt{S} : 'from_to'; |
b2704119 |
21 | $Opt{C} ||= $Opt{c}; |
85982a32 |
22 | $Opt{p} and $Opt{C} = Encode::FB_PERLQQ; |
67d7b5ef |
23 | |
24 | if ($Opt{D}){ |
25 | my $cfrom = Encode->getEncoding($from)->name; |
26 | my $cto = Encode->getEncoding($to)->name; |
ce912cd4 |
27 | print <<"EOT"; |
67d7b5ef |
28 | Scheme: $scheme |
29 | From: $from => $cfrom |
30 | To: $to => $cto |
31 | EOT |
32 | } |
33 | |
34 | # default |
35 | if ($scheme eq 'from_to'){ |
36 | while(<>){ |
b2704119 |
37 | Encode::from_to($_, $from, $to, $Opt{C}); print; |
67d7b5ef |
38 | }; |
39 | # step-by-step |
40 | }elsif ($scheme eq 'decode_encode'){ |
41 | while(<>){ |
b2704119 |
42 | my $decoded = decode($from, $_, $Opt{C}); |
67d7b5ef |
43 | my $encoded = encode($to, $decoded); |
44 | print $encoded; |
45 | }; |
46 | # NI-S favorite |
47 | }elsif ($scheme eq 'perlio'){ |
48 | binmode(STDIN, ":encoding($from)"); |
49 | binmode(STDOUT, ":encoding($to)"); |
50 | while(<>){ print; } |
51 | }else{ # won't reach |
52 | die "unknown scheme: $scheme"; |
53 | } |
54 | |
55 | sub list_encodings{ |
ce912cd4 |
56 | print join("\n", Encode->encodings(":all")), "\n"; |
67d7b5ef |
57 | exit; |
58 | } |
59 | |
60 | sub help{ |
61 | my $message = shift; |
62 | use File::Basename; |
63 | my $name = basename($0); |
64 | $message and print STDERR "$name error: $message\n"; |
65 | print STDERR <<"EOT"; |
66 | $name [-f from_encoding] [-t to_encoding] [-s string] [files...] |
67 | $name -l |
ce912cd4 |
68 | -l lists all available encodings (the canonical names, many aliases exist) |
67d7b5ef |
69 | -f from_encoding When omitted, the current locale will be used. |
70 | -t to_encoding When omitted, the current locale will be used. |
71 | -s string "string" will be converted instead of STDIN. |
72 | EOT |
73 | exit; |
74 | } |
75 | |
76 | __END__ |
77 | |
78 | =head1 NAME |
79 | |
80 | piconv -- iconv(1), reinvented in perl |
81 | |
82 | =head1 SYNOPSIS |
83 | |
84 | piconv [-f from_encoding] [-t to_encoding] [-s string] [files...] |
85 | piconv -l |
86 | |
87 | =head1 DESCRIPTION |
88 | |
0ab8f81e |
89 | B<piconv> is perl version of B<iconv>, a character encoding converter |
90 | widely available for various Unixen today. This script was primarily |
91 | a technology demonstrator for Perl 5.8.0, but you can use piconv in the |
92 | place of iconv for virtually any case. |
67d7b5ef |
93 | |
0ab8f81e |
94 | piconv converts the character encoding of either STDIN or files |
95 | specified in the argument and prints out to STDOUT. |
67d7b5ef |
96 | |
0ab8f81e |
97 | Here is the list of options. |
67d7b5ef |
98 | |
99 | =over 4 |
100 | |
101 | =item -f from_encoding |
102 | |
0ab8f81e |
103 | Specifies the encoding you are converting from. Unlike B<iconv>, |
104 | this option can be omitted. In such cases, the current locale is used. |
67d7b5ef |
105 | |
106 | =item -t to_encoding |
107 | |
0ab8f81e |
108 | Specifies the encoding you are converting to. Unlike B<iconv>, |
109 | this option can be omitted. In such cases, the current locale is used. |
67d7b5ef |
110 | |
0ab8f81e |
111 | Therefore, when both -f and -t are omitted, B<piconv> just acts |
112 | like B<cat>. |
67d7b5ef |
113 | |
114 | =item -s I<string> |
115 | |
0ab8f81e |
116 | uses I<string> instead of file for the source of text. Same as B<iconv>. |
67d7b5ef |
117 | |
118 | =item -l |
119 | |
ce912cd4 |
120 | Lists all available encodings, one per line, in case-insensitive |
0ab8f81e |
121 | order. Note that only the canonical names are listed; many aliases |
ce912cd4 |
122 | exist. For example, the names are case-insensitive, and many standard |
0ab8f81e |
123 | and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850" |
ce912cd4 |
124 | instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported> |
0ab8f81e |
125 | for a full discussion. |
67d7b5ef |
126 | |
b2704119 |
127 | =item -C I<N> |
128 | |
129 | Check the validity of the stream if I<N> = 1. When I<N> = -1, something |
130 | interesting happens when it encounters an invalid character. |
131 | |
132 | =item -c |
133 | |
134 | Same as C<-C 1>. |
135 | |
136 | =item -p |
137 | |
138 | Same as C<-C -1>. |
139 | |
d31fa6c4 |
140 | =item -h |
141 | |
142 | Show usage. |
143 | |
67d7b5ef |
144 | =item -D |
145 | |
7748829a |
146 | Invokes debugging mode. Primarily for Encode hackers. |
67d7b5ef |
147 | |
148 | =item -S scheme |
149 | |
150 | Selects which scheme is to be used for conversion. Available schemes |
0ab8f81e |
151 | are as follows: |
67d7b5ef |
152 | |
153 | =over 4 |
154 | |
155 | =item from_to |
156 | |
157 | Uses Encode::from_to for conversion. This is the default. |
158 | |
159 | =item decode_encode |
160 | |
7748829a |
161 | Input strings are decode()d then encode()d. A straight two-step |
67d7b5ef |
162 | implementation. |
163 | |
164 | =item perlio |
165 | |
7748829a |
166 | The new perlIO layer is used. NI-S' favorite. |
67d7b5ef |
167 | |
168 | =back |
169 | |
0ab8f81e |
170 | Like the I<-D> option, this is also for Encode hackers. |
67d7b5ef |
171 | |
172 | =back |
173 | |
174 | =head1 SEE ALSO |
175 | |
176 | L<iconv(1)> |
177 | L<locale(3)> |
178 | L<Encode> |
ce912cd4 |
179 | L<Encode::Supported> |
180 | L<Encode::Alias> |
67d7b5ef |
181 | L<PerlIO> |
182 | |
183 | =cut |