Commit | Line | Data |
3ef515df |
1 | #!./perl |
448e90bb |
2 | # $Id: piconv,v 1.21 2002/04/09 20:06:15 dankogai Exp $ |
67d7b5ef |
3 | # |
4 | use 5.7.3; |
5 | use strict; |
6 | use Encode ; |
7 | use Encode::Alias; |
8 | my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio); |
9 | |
10 | use Getopt::Std; |
11 | |
d31fa6c4 |
12 | my %Opt; getopts("hDS:lf:t:s:", \%Opt); |
13 | $Opt{h} and help(); |
67d7b5ef |
14 | $Opt{l} and list_encodings(); |
15 | my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG}; |
d31fa6c4 |
16 | $Opt{f} || $Opt{t} || help(); |
67d7b5ef |
17 | my $from = $Opt{f} || $locale or help("from_encoding unspecified"); |
18 | my $to = $Opt{t} || $locale or help("to_encoding unspecified"); |
19 | $Opt{s} and Encode::from_to($Opt{s}, $from, $to) and print $Opt{s} and exit; |
20 | my $scheme = exists $Scheme{$Opt{S}} ? $Opt{S} : 'from_to'; |
21 | |
22 | if ($Opt{D}){ |
23 | my $cfrom = Encode->getEncoding($from)->name; |
24 | my $cto = Encode->getEncoding($to)->name; |
ce912cd4 |
25 | print <<"EOT"; |
67d7b5ef |
26 | Scheme: $scheme |
27 | From: $from => $cfrom |
28 | To: $to => $cto |
29 | EOT |
30 | } |
31 | |
32 | # default |
33 | if ($scheme eq 'from_to'){ |
34 | while(<>){ |
35 | Encode::from_to($_, $from, $to); print; |
36 | }; |
37 | # step-by-step |
38 | }elsif ($scheme eq 'decode_encode'){ |
39 | while(<>){ |
40 | my $decoded = decode($from, $_); |
41 | my $encoded = encode($to, $decoded); |
42 | print $encoded; |
43 | }; |
44 | # NI-S favorite |
45 | }elsif ($scheme eq 'perlio'){ |
46 | binmode(STDIN, ":encoding($from)"); |
47 | binmode(STDOUT, ":encoding($to)"); |
48 | while(<>){ print; } |
49 | }else{ # won't reach |
50 | die "unknown scheme: $scheme"; |
51 | } |
52 | |
53 | sub list_encodings{ |
ce912cd4 |
54 | print join("\n", Encode->encodings(":all")), "\n"; |
67d7b5ef |
55 | exit; |
56 | } |
57 | |
58 | sub help{ |
59 | my $message = shift; |
60 | use File::Basename; |
61 | my $name = basename($0); |
62 | $message and print STDERR "$name error: $message\n"; |
63 | print STDERR <<"EOT"; |
64 | $name [-f from_encoding] [-t to_encoding] [-s string] [files...] |
65 | $name -l |
ce912cd4 |
66 | -l lists all available encodings (the canonical names, many aliases exist) |
67d7b5ef |
67 | -f from_encoding When omitted, the current locale will be used. |
68 | -t to_encoding When omitted, the current locale will be used. |
69 | -s string "string" will be converted instead of STDIN. |
70 | EOT |
71 | exit; |
72 | } |
73 | |
74 | __END__ |
75 | |
76 | =head1 NAME |
77 | |
78 | piconv -- iconv(1), reinvented in perl |
79 | |
80 | =head1 SYNOPSIS |
81 | |
82 | piconv [-f from_encoding] [-t to_encoding] [-s string] [files...] |
83 | piconv -l |
84 | |
85 | =head1 DESCRIPTION |
86 | |
87 | B<piconv> is perl version of F<iconv>, a character encoding converter |
7748829a |
88 | widely available for various Unixen today. This script was primarily |
89 | a technology demonstrator for Perl 5.8.0, you can use piconv in the |
67d7b5ef |
90 | place of iconv for virtually any cases. |
91 | |
92 | piconv converts character encoding of either STDIN or files specified |
93 | in the argument and prints out to STDOUT. |
94 | |
95 | Here are list of options. |
96 | |
97 | =over 4 |
98 | |
99 | =item -f from_encoding |
100 | |
101 | Specifies the encoding you are converting from. Unlike F<iconv>, |
7748829a |
102 | this option can be omitted. In such cases the current locale is used. |
67d7b5ef |
103 | |
104 | =item -t to_encoding |
105 | |
106 | Specifies the encoding you are converting to. Unlike F<iconv>, |
7748829a |
107 | this option can be omitted. In such cases the current locale is used. |
67d7b5ef |
108 | |
109 | Therefore when both -f and -t are omitted, F<piconv> just acts like F<cat>. |
110 | |
111 | =item -s I<string> |
112 | |
113 | uses I<string> instead of file for the source of text. Same as F<iconv>. |
114 | |
115 | =item -l |
116 | |
ce912cd4 |
117 | Lists all available encodings, one per line, in case-insensitive |
118 | order. Note that only the canonical names are listed, many aliases |
119 | exist. For example, the names are case-insensitive, and many standard |
120 | and common aliases work, like "latin1" for "ISO 8859-1", or "ibm850" |
121 | instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported> |
122 | for the full discussion. |
67d7b5ef |
123 | |
d31fa6c4 |
124 | =item -h |
125 | |
126 | Show usage. |
127 | |
67d7b5ef |
128 | =item -D |
129 | |
7748829a |
130 | Invokes debugging mode. Primarily for Encode hackers. |
67d7b5ef |
131 | |
132 | =item -S scheme |
133 | |
134 | Selects which scheme is to be used for conversion. Available schemes |
135 | are as follows; |
136 | |
137 | =over 4 |
138 | |
139 | =item from_to |
140 | |
141 | Uses Encode::from_to for conversion. This is the default. |
142 | |
143 | =item decode_encode |
144 | |
7748829a |
145 | Input strings are decode()d then encode()d. A straight two-step |
67d7b5ef |
146 | implementation. |
147 | |
148 | =item perlio |
149 | |
7748829a |
150 | The new perlIO layer is used. NI-S' favorite. |
67d7b5ef |
151 | |
152 | =back |
153 | |
154 | Like I<-D> option, this is also for Encode hackers. |
155 | |
156 | =back |
157 | |
158 | =head1 SEE ALSO |
159 | |
160 | L<iconv(1)> |
161 | L<locale(3)> |
162 | L<Encode> |
ce912cd4 |
163 | L<Encode::Supported> |
164 | L<Encode::Alias> |
67d7b5ef |
165 | L<PerlIO> |
166 | |
167 | =cut |