Actually submit previous change.
[p5sagit/p5-mst-13.2.git] / ext / Encode / bin / unidump
CommitLineData
a999c27c 1#!./perl
2
3use strict;
4use Encode;
5use Getopt::Std;
6my %Opt; getopts("ChH:e:f:t:s:pPv", \%Opt);
7$Opt{p} ||= $Opt{P};
8$Opt{e} ||= 'utf8';
9$Opt{f} ||= $Opt{e};
10$Opt{t} ||= $Opt{e};
11$Opt{h} and help();
12
13my ($linebuf, $outbuf);
14my $CPL = $Opt{p} ? 64 : 8;
15my $linenum;
16my $linesperheading = $Opt{H};
17my $nchars;
18our $PrevChunk;
19
20$Opt{h} and help();
21$Opt{p} and do_perl($Opt{s});
22do_dump($Opt{s});
23exit;
24
25#
26
27sub do_perl{
28 my $string = shift;
29 $Opt{P} and print "#!$^X -w\nprint\n";
30 unless ($string){
d1256cb1 31 while(<>){
32 use utf8;
33 $linebuf .= Encode::decode($Opt{f}, $_);
34 while($linebuf){
35 my $chr = render_p(substr($linebuf, 0, 1, ''));
36 length($outbuf) + length($chr) > $CPL and print_P();
37 $outbuf .= $chr;
38 }
39 }
40 $outbuf and print print_P(";");
a999c27c 41 }else{
d1256cb1 42 while($string){
43 my $chr = render_p(substr($string, 0, 1, ''));
44 length($outbuf) + length($chr) > $CPL and print_P();
45 $outbuf .= $chr;
46 }
a999c27c 47 }
48 $outbuf and print print_P(";");
49 exit;
50}
51
52sub render_p{
53 my ($chr, $format) = @_;
54 our %S2pstr;
55 $S2pstr{$chr} and return $S2pstr{$chr}; # \t\n...
56 $chr =~ /[\x20-\x7e]/ and return $chr; # ascii, printable;
57 my $fmt = ($chr =~ /[\x00-\x1f\x7F]/) ?
d1256cb1 58 q(\x%x) : q(\x{%x});
a999c27c 59 return sprintf $fmt, ord($chr);
60}
61
62sub print_P{
63 my $end = shift;
64 $outbuf or return;
65 print '"', encode($Opt{t}, $outbuf), '"';
66 my $tail = $Opt{P} ? $end ? "$end" : "," : '';
67 print $tail, "\n";
68 $outbuf = '';
69}
70
71sub do_dump{
72 my $string = shift;
73 !$Opt{p} and exists $Opt{H} and print_H();
74 unless ($string){
d1256cb1 75 while(<>){
76 use utf8;
77 $linebuf .= Encode::decode($Opt{f}, $_);
78 while (length($linebuf) > $CPL){
79 my $chunk = substr($linebuf, 0, $CPL, '');
80 print_C($chunk, $linenum++);
81 $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S();
82 }
83 }
84 $linebuf and print_C($linebuf);
a999c27c 85 }else{
d1256cb1 86 while ($string){
87 my $chunk = substr($string, 0, $CPL, '');
88 print_C($chunk, $linenum++);
89 $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S();
90 }
a999c27c 91 }
92 exit;
93}
94
95sub print_S{
96 print "--------+------------------------------------------------";
97 if ($Opt{C}){
d1256cb1 98 print "-+-----------------";
a999c27c 99 }
100 print "\n";
101}
102sub print_H{
103 print " Offset 0 1 2 3 4 5 6 7";
104 if ($Opt{C}){
d1256cb1 105 print " | 0 1 2 3 4 5 6 7";
a999c27c 106 }
107 print "\n";
108 print_S;
109}
110
111sub print_C{
112 my ($chunk, $linenum) = @_;
113 if (!$Opt{v} and $chunk eq $PrevChunk){
d1256cb1 114 printf "%08x *\n", $linenum*8; return;
a999c27c 115 }
116 $PrevChunk = $chunk;
117 my $end = length($chunk) - 1;
118 my (@ord, @chr);
119 for my $i (0..$end){
d1256cb1 120 use utf8;
121 my $chr = substr($chunk,$i,1);
122 my $ord = ord($chr);
123 my $fmt = $ord <= 0xffff ? " %04x" : " %05x";
124 push @ord, (sprintf $fmt, $ord);
125 $Opt{C} and push @chr, render_c($chr);
a999c27c 126 }
127 if (++$end < 7){
d1256cb1 128 for my $i ($end..7){
129 push @ord, (" " x 6);
130 }
a999c27c 131 }
132 my $line = sprintf "%08x %s", $linenum*8, join('', @ord);
133 $Opt{C} and $line .= sprintf " | %s", join('', @chr);
134 print encode($Opt{t}, $line), "\n";
135}
136
137sub render_c{
138 my ($chr, $format) = @_;
139 our (%S2str, $IsFullWidth);
140 $chr =~ /[\p{IsControl}\s]/o and return $S2str{$chr} || " ";
141 $chr =~ $IsFullWidth and return $chr; # as is
142 return " " . $chr;
143}
144
145sub help{
146 my $message = shift;
147 use File::Basename;
148 my $name = basename($0);
149 $message and print STDERR "$name error: $message\n";
150 print STDERR <<"EOT";
151Usage:
152 $name -[options...] [files...]
153 $name -[options...] -s "string"
154 $name -h
155 -h prints this message.
156Inherited from hexdump;
157 -C Canonical unidump mode
158 -v prints the duplicate line as is. Without this option,
159 single "*" will be printed instead.
160For unidump only
161 -p prints in perl literals that you can copy and paste directly
162 to your perl script.
163 -P prints in perl executable format!
164 -u prints a bunch of "Uxxxx,". Handy when you want to pass your
165 characters in mailing lists.
166IO Options:
167 -e io_encoding same as "-f io_encoding -t io_encoding"
168 -f from_encoding convert the source stream from this encoding
169 -t to_encoding print to STDOUT in this encoding
170 -s string "string" will be converted instead of STDIN.
171 -H nline prints separater for each nlines of output.
172 0 means only the table headding be printed.
173EOT
174 exit;
175}
176
177BEGIN{
178 our %S2pstr= (
d1256cb1 179 "\\" => '\\\\',
180 "\0" => '\0',
181 "\t" => '\t',
182 "\n" => '\n',
183 "\r" => '\r',
184 "\v" => '\v',
185 "\a" => '\a',
186 "\e" => '\e',
187 "\"" => qq(\\\"),
188 "\'" => qq(\\\'),
189 '$' => '\$',
190 "@" => '\@',
191 "%" => '\%',
192 );
a999c27c 193
194 our %S2str = (
d1256cb1 195 qq(\x00) => q(\0), # NULL
196 qq(\x01) => q(^A), # START OF HEADING
197 qq(\x02) => q(^B), # START OF TEXT
198 qq(\x03) => q(^C), # END OF TEXT
199 qq(\x04) => q(^D), # END OF TRANSMISSION
200 qq(\x05) => q(^E), # ENQUIRY
201 qq(\x06) => q(^F), # ACKNOWLEDGE
202 qq(\x07) => q(\a), # BELL
203 qq(\x08) => q(^H), # BACKSPACE
204 qq(\x09) => q(\t), # HORIZONTAL TABULATION
205 qq(\x0A) => q(\n), # LINE FEED
206 qq(\x0B) => q(\v), # VERTICAL TABULATION
207 qq(\x0C) => q(^L), # FORM FEED
208 qq(\x0D) => q(\r), # CARRIAGE RETURN
209 qq(\x0E) => q(^N), # SHIFT OUT
210 qq(\x0F) => q(^O), # SHIFT IN
211 qq(\x10) => q(^P), # DATA LINK ESCAPE
212 qq(\x11) => q(^Q), # DEVICE CONTROL ONE
213 qq(\x12) => q(^R), # DEVICE CONTROL TWO
214 qq(\x13) => q(^S), # DEVICE CONTROL THREE
215 qq(\x14) => q(^T), # DEVICE CONTROL FOUR
216 qq(\x15) => q(^U), # NEGATIVE ACKNOWLEDGE
217 qq(\x16) => q(^V), # SYNCHRONOUS IDLE
218 qq(\x17) => q(^W), # END OF TRANSMISSION BLOCK
219 qq(\x18) => q(^X), # CANCEL
220 qq(\x19) => q(^Y), # END OF MEDIUM
221 qq(\x1A) => q(^Z), # SUBSTITUTE
222 qq(\x1B) => q(\e), # ESCAPE (\c[)
223 qq(\x1C) => "^\\", # FILE SEPARATOR
224 qq(\x1D) => "^\]", # GROUP SEPARATOR
225 qq(\x1E) => q(^^), # RECORD SEPARATOR
226 qq(\x1F) => q(^_), # UNIT SEPARATOR
227 );
a999c27c 228 #
229 # Generated out of lib/unicore/EastAsianWidth.txt
230 # will it work ?
231 #
232 our $IsFullWidth =
d1256cb1 233 qr/^[
234 \x{1100}-\x{1159}
235 \x{115F}-\x{115F}
236 \x{2329}-\x{232A}
237 \x{2E80}-\x{2E99}
238 \x{2E9B}-\x{2EF3}
239 \x{2F00}-\x{2FD5}
240 \x{2FF0}-\x{2FFB}
241 \x{3000}-\x{303E}
242 \x{3041}-\x{3096}
243 \x{3099}-\x{30FF}
244 \x{3105}-\x{312C}
245 \x{3131}-\x{318E}
246 \x{3190}-\x{31B7}
247 \x{31F0}-\x{321C}
248 \x{3220}-\x{3243}
249 \x{3251}-\x{327B}
250 \x{327F}-\x{32CB}
251 \x{32D0}-\x{32FE}
252 \x{3300}-\x{3376}
253 \x{337B}-\x{33DD}
254 \x{3400}-\x{4DB5}
255 \x{4E00}-\x{9FA5}
256 \x{33E0}-\x{33FE}
257 \x{A000}-\x{A48C}
258 \x{AC00}-\x{D7A3}
259 \x{A490}-\x{A4C6}
260 \x{F900}-\x{FA2D}
261 \x{FA30}-\x{FA6A}
262 \x{FE30}-\x{FE46}
263 \x{FE49}-\x{FE52}
264 \x{FE54}-\x{FE66}
265 \x{FE68}-\x{FE6B}
266 \x{FF01}-\x{FF60}
267 \x{FFE0}-\x{FFE6}
268 \x{20000}-\x{2A6D6}
269 ]$/xo;
a999c27c 270}
271
272__END__