Commit | Line | Data |
3ef515df |
1 | #!./perl |
85982a32 |
2 | # $Id: piconv,v 1.23 2002/04/19 05:36:43 dankogai Exp $ |
67d7b5ef |
3 | # |
4 | use 5.7.3; |
5 | use strict; |
6 | use Encode ; |
7 | use Encode::Alias; |
8 | my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio); |
9 | |
10 | use Getopt::Std; |
11 | |
b2704119 |
12 | my %Opt; getopts("pcC:hDS:lf:t:s:", \%Opt); |
d31fa6c4 |
13 | $Opt{h} and help(); |
67d7b5ef |
14 | $Opt{l} and list_encodings(); |
15 | my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG}; |
d31fa6c4 |
16 | $Opt{f} || $Opt{t} || help(); |
67d7b5ef |
17 | my $from = $Opt{f} || $locale or help("from_encoding unspecified"); |
18 | my $to = $Opt{t} || $locale or help("to_encoding unspecified"); |
19 | $Opt{s} and Encode::from_to($Opt{s}, $from, $to) and print $Opt{s} and exit; |
20 | my $scheme = exists $Scheme{$Opt{S}} ? $Opt{S} : 'from_to'; |
b2704119 |
21 | $Opt{C} ||= $Opt{c}; |
85982a32 |
22 | $Opt{p} and $Opt{C} = Encode::FB_PERLQQ; |
67d7b5ef |
23 | |
24 | if ($Opt{D}){ |
25 | my $cfrom = Encode->getEncoding($from)->name; |
26 | my $cto = Encode->getEncoding($to)->name; |
ce912cd4 |
27 | print <<"EOT"; |
67d7b5ef |
28 | Scheme: $scheme |
29 | From: $from => $cfrom |
30 | To: $to => $cto |
31 | EOT |
32 | } |
33 | |
34 | # default |
35 | if ($scheme eq 'from_to'){ |
36 | while(<>){ |
b2704119 |
37 | Encode::from_to($_, $from, $to, $Opt{C}); print; |
67d7b5ef |
38 | }; |
39 | # step-by-step |
40 | }elsif ($scheme eq 'decode_encode'){ |
41 | while(<>){ |
b2704119 |
42 | my $decoded = decode($from, $_, $Opt{C}); |
67d7b5ef |
43 | my $encoded = encode($to, $decoded); |
44 | print $encoded; |
45 | }; |
46 | # NI-S favorite |
47 | }elsif ($scheme eq 'perlio'){ |
48 | binmode(STDIN, ":encoding($from)"); |
49 | binmode(STDOUT, ":encoding($to)"); |
50 | while(<>){ print; } |
51 | }else{ # won't reach |
52 | die "unknown scheme: $scheme"; |
53 | } |
54 | |
55 | sub list_encodings{ |
ce912cd4 |
56 | print join("\n", Encode->encodings(":all")), "\n"; |
67d7b5ef |
57 | exit; |
58 | } |
59 | |
60 | sub help{ |
61 | my $message = shift; |
62 | use File::Basename; |
63 | my $name = basename($0); |
64 | $message and print STDERR "$name error: $message\n"; |
65 | print STDERR <<"EOT"; |
66 | $name [-f from_encoding] [-t to_encoding] [-s string] [files...] |
67 | $name -l |
ce912cd4 |
68 | -l lists all available encodings (the canonical names, many aliases exist) |
67d7b5ef |
69 | -f from_encoding When omitted, the current locale will be used. |
70 | -t to_encoding When omitted, the current locale will be used. |
71 | -s string "string" will be converted instead of STDIN. |
72 | EOT |
73 | exit; |
74 | } |
75 | |
76 | __END__ |
77 | |
78 | =head1 NAME |
79 | |
80 | piconv -- iconv(1), reinvented in perl |
81 | |
82 | =head1 SYNOPSIS |
83 | |
84 | piconv [-f from_encoding] [-t to_encoding] [-s string] [files...] |
85 | piconv -l |
86 | |
87 | =head1 DESCRIPTION |
88 | |
89 | B<piconv> is perl version of F<iconv>, a character encoding converter |
7748829a |
90 | widely available for various Unixen today. This script was primarily |
91 | a technology demonstrator for Perl 5.8.0, you can use piconv in the |
67d7b5ef |
92 | place of iconv for virtually any cases. |
93 | |
94 | piconv converts character encoding of either STDIN or files specified |
95 | in the argument and prints out to STDOUT. |
96 | |
97 | Here are list of options. |
98 | |
99 | =over 4 |
100 | |
101 | =item -f from_encoding |
102 | |
103 | Specifies the encoding you are converting from. Unlike F<iconv>, |
7748829a |
104 | this option can be omitted. In such cases the current locale is used. |
67d7b5ef |
105 | |
106 | =item -t to_encoding |
107 | |
108 | Specifies the encoding you are converting to. Unlike F<iconv>, |
7748829a |
109 | this option can be omitted. In such cases the current locale is used. |
67d7b5ef |
110 | |
111 | Therefore when both -f and -t are omitted, F<piconv> just acts like F<cat>. |
112 | |
113 | =item -s I<string> |
114 | |
115 | uses I<string> instead of file for the source of text. Same as F<iconv>. |
116 | |
117 | =item -l |
118 | |
ce912cd4 |
119 | Lists all available encodings, one per line, in case-insensitive |
120 | order. Note that only the canonical names are listed, many aliases |
121 | exist. For example, the names are case-insensitive, and many standard |
122 | and common aliases work, like "latin1" for "ISO 8859-1", or "ibm850" |
123 | instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported> |
124 | for the full discussion. |
67d7b5ef |
125 | |
b2704119 |
126 | =item -C I<N> |
127 | |
128 | Check the validity of the stream if I<N> = 1. When I<N> = -1, something |
129 | interesting happens when it encounters an invalid character. |
130 | |
131 | =item -c |
132 | |
133 | Same as C<-C 1>. |
134 | |
135 | =item -p |
136 | |
137 | Same as C<-C -1>. |
138 | |
d31fa6c4 |
139 | =item -h |
140 | |
141 | Show usage. |
142 | |
67d7b5ef |
143 | =item -D |
144 | |
7748829a |
145 | Invokes debugging mode. Primarily for Encode hackers. |
67d7b5ef |
146 | |
147 | =item -S scheme |
148 | |
149 | Selects which scheme is to be used for conversion. Available schemes |
150 | are as follows; |
151 | |
152 | =over 4 |
153 | |
154 | =item from_to |
155 | |
156 | Uses Encode::from_to for conversion. This is the default. |
157 | |
158 | =item decode_encode |
159 | |
7748829a |
160 | Input strings are decode()d then encode()d. A straight two-step |
67d7b5ef |
161 | implementation. |
162 | |
163 | =item perlio |
164 | |
7748829a |
165 | The new perlIO layer is used. NI-S' favorite. |
67d7b5ef |
166 | |
167 | =back |
168 | |
169 | Like I<-D> option, this is also for Encode hackers. |
170 | |
171 | =back |
172 | |
173 | =head1 SEE ALSO |
174 | |
175 | L<iconv(1)> |
176 | L<locale(3)> |
177 | L<Encode> |
ce912cd4 |
178 | L<Encode::Supported> |
179 | L<Encode::Alias> |
67d7b5ef |
180 | L<PerlIO> |
181 | |
182 | =cut |