Commit | Line | Data |
a999c27c |
1 | #!./perl |
2 | |
3 | use strict; |
4 | use Encode; |
5 | use Getopt::Std; |
6 | my %Opt; getopts("ChH:e:f:t:s:pPv", \%Opt); |
7 | $Opt{p} ||= $Opt{P}; |
8 | $Opt{e} ||= 'utf8'; |
9 | $Opt{f} ||= $Opt{e}; |
10 | $Opt{t} ||= $Opt{e}; |
11 | $Opt{h} and help(); |
12 | |
13 | my ($linebuf, $outbuf); |
14 | my $CPL = $Opt{p} ? 64 : 8; |
15 | my $linenum; |
16 | my $linesperheading = $Opt{H}; |
17 | my $nchars; |
18 | our $PrevChunk; |
19 | |
20 | $Opt{h} and help(); |
21 | $Opt{p} and do_perl($Opt{s}); |
22 | do_dump($Opt{s}); |
23 | exit; |
24 | |
25 | # |
26 | |
27 | sub do_perl{ |
28 | my $string = shift; |
29 | $Opt{P} and print "#!$^X -w\nprint\n"; |
30 | unless ($string){ |
31 | while(<>){ |
32 | use utf8; |
33 | $linebuf .= Encode::decode($Opt{f}, $_); |
34 | while($linebuf){ |
80a5d8e7 |
35 | my $chr = render_p(substr($linebuf, 0, 1, '')); |
a999c27c |
36 | length($outbuf) + length($chr) > $CPL and print_P(); |
37 | $outbuf .= $chr; |
38 | } |
39 | } |
40 | $outbuf and print print_P(";"); |
41 | }else{ |
42 | while($string){ |
80a5d8e7 |
43 | my $chr = render_p(substr($string, 0, 1, '')); |
a999c27c |
44 | length($outbuf) + length($chr) > $CPL and print_P(); |
45 | $outbuf .= $chr; |
46 | } |
47 | } |
48 | $outbuf and print print_P(";"); |
49 | exit; |
50 | } |
51 | |
52 | sub render_p{ |
53 | my ($chr, $format) = @_; |
54 | our %S2pstr; |
55 | $S2pstr{$chr} and return $S2pstr{$chr}; # \t\n... |
56 | $chr =~ /[\x20-\x7e]/ and return $chr; # ascii, printable; |
57 | my $fmt = ($chr =~ /[\x00-\x1f\x7F]/) ? |
58 | q(\x%x) : q(\x{%x}); |
59 | return sprintf $fmt, ord($chr); |
60 | } |
61 | |
62 | sub print_P{ |
63 | my $end = shift; |
64 | $outbuf or return; |
65 | print '"', encode($Opt{t}, $outbuf), '"'; |
66 | my $tail = $Opt{P} ? $end ? "$end" : "," : ''; |
67 | print $tail, "\n"; |
68 | $outbuf = ''; |
69 | } |
70 | |
71 | sub do_dump{ |
72 | my $string = shift; |
73 | !$Opt{p} and exists $Opt{H} and print_H(); |
74 | unless ($string){ |
75 | while(<>){ |
76 | use utf8; |
77 | $linebuf .= Encode::decode($Opt{f}, $_); |
78 | while (length($linebuf) > $CPL){ |
80a5d8e7 |
79 | my $chunk = substr($linebuf, 0, $CPL, ''); |
a999c27c |
80 | print_C($chunk, $linenum++); |
a999c27c |
81 | $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S(); |
82 | } |
83 | } |
84 | $linebuf and print_C($linebuf); |
85 | }else{ |
86 | while ($string){ |
80a5d8e7 |
87 | my $chunk = substr($string, 0, $CPL, ''); |
a999c27c |
88 | print_C($chunk, $linenum++); |
a999c27c |
89 | $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S(); |
90 | } |
91 | } |
92 | exit; |
93 | } |
94 | |
95 | sub print_S{ |
96 | print "--------+------------------------------------------------"; |
97 | if ($Opt{C}){ |
98 | print "-+-----------------"; |
99 | } |
100 | print "\n"; |
101 | } |
102 | sub print_H{ |
103 | print " Offset 0 1 2 3 4 5 6 7"; |
104 | if ($Opt{C}){ |
105 | print " | 0 1 2 3 4 5 6 7"; |
106 | } |
107 | print "\n"; |
108 | print_S; |
109 | } |
110 | |
111 | sub print_C{ |
112 | my ($chunk, $linenum) = @_; |
113 | if (!$Opt{v} and $chunk eq $PrevChunk){ |
114 | printf "%08x *\n", $linenum*8; return; |
115 | } |
116 | $PrevChunk = $chunk; |
117 | my $end = length($chunk) - 1; |
118 | my (@ord, @chr); |
119 | for my $i (0..$end){ |
120 | use utf8; |
121 | my $chr = substr($chunk,$i,1); |
122 | my $ord = ord($chr); |
123 | my $fmt = $ord <= 0xffff ? " %04x" : " %05x"; |
124 | push @ord, (sprintf $fmt, $ord); |
125 | $Opt{C} and push @chr, render_c($chr); |
126 | } |
127 | if (++$end < 7){ |
128 | for my $i ($end..7){ |
129 | push @ord, (" " x 6); |
130 | } |
131 | } |
132 | my $line = sprintf "%08x %s", $linenum*8, join('', @ord); |
133 | $Opt{C} and $line .= sprintf " | %s", join('', @chr); |
134 | print encode($Opt{t}, $line), "\n"; |
135 | } |
136 | |
137 | sub render_c{ |
138 | my ($chr, $format) = @_; |
139 | our (%S2str, $IsFullWidth); |
140 | $chr =~ /[\p{IsControl}\s]/o and return $S2str{$chr} || " "; |
141 | $chr =~ $IsFullWidth and return $chr; # as is |
142 | return " " . $chr; |
143 | } |
144 | |
145 | sub help{ |
146 | my $message = shift; |
147 | use File::Basename; |
148 | my $name = basename($0); |
149 | $message and print STDERR "$name error: $message\n"; |
150 | print STDERR <<"EOT"; |
151 | Usage: |
152 | $name -[options...] [files...] |
153 | $name -[options...] -s "string" |
154 | $name -h |
155 | -h prints this message. |
156 | Inherited from hexdump; |
157 | -C Canonical unidump mode |
158 | -v prints the duplicate line as is. Without this option, |
159 | single "*" will be printed instead. |
160 | For unidump only |
161 | -p prints in perl literals that you can copy and paste directly |
162 | to your perl script. |
163 | -P prints in perl executable format! |
164 | -u prints a bunch of "Uxxxx,". Handy when you want to pass your |
165 | characters in mailing lists. |
166 | IO Options: |
167 | -e io_encoding same as "-f io_encoding -t io_encoding" |
168 | -f from_encoding convert the source stream from this encoding |
169 | -t to_encoding print to STDOUT in this encoding |
170 | -s string "string" will be converted instead of STDIN. |
171 | -H nline prints separater for each nlines of output. |
172 | 0 means only the table headding be printed. |
173 | EOT |
174 | exit; |
175 | } |
176 | |
177 | BEGIN{ |
178 | our %S2pstr= ( |
179 | "\\" => '\\\\', |
180 | "\0" => '\0', |
181 | "\t" => '\t', |
182 | "\n" => '\n', |
183 | "\r" => '\r', |
184 | "\v" => '\v', |
185 | "\a" => '\a', |
186 | "\e" => '\e', |
187 | "\"" => qq(\\\"), |
188 | "\'" => qq(\\\'), |
189 | '$' => '\$', |
190 | "@" => '\@', |
191 | "%" => '\%', |
192 | ); |
193 | |
194 | our %S2str = ( |
195 | qq(\x00) => q(\0), # NULL |
196 | qq(\x01) => q(^A), # START OF HEADING |
197 | qq(\x02) => q(^B), # START OF TEXT |
198 | qq(\x03) => q(^C), # END OF TEXT |
199 | qq(\x04) => q(^D), # END OF TRANSMISSION |
200 | qq(\x05) => q(^E), # ENQUIRY |
201 | qq(\x06) => q(^F), # ACKNOWLEDGE |
202 | qq(\x07) => q(\a), # BELL |
203 | qq(\x08) => q(^H), # BACKSPACE |
204 | qq(\x09) => q(\t), # HORIZONTAL TABULATION |
205 | qq(\x0A) => q(\n), # LINE FEED |
206 | qq(\x0B) => q(\v), # VERTICAL TABULATION |
207 | qq(\x0C) => q(^L), # FORM FEED |
208 | qq(\x0D) => q(\r), # CARRIAGE RETURN |
209 | qq(\x0E) => q(^N), # SHIFT OUT |
210 | qq(\x0F) => q(^O), # SHIFT IN |
211 | qq(\x10) => q(^P), # DATA LINK ESCAPE |
212 | qq(\x11) => q(^Q), # DEVICE CONTROL ONE |
213 | qq(\x12) => q(^R), # DEVICE CONTROL TWO |
214 | qq(\x13) => q(^S), # DEVICE CONTROL THREE |
215 | qq(\x14) => q(^T), # DEVICE CONTROL FOUR |
216 | qq(\x15) => q(^U), # NEGATIVE ACKNOWLEDGE |
217 | qq(\x16) => q(^V), # SYNCHRONOUS IDLE |
218 | qq(\x17) => q(^W), # END OF TRANSMISSION BLOCK |
219 | qq(\x18) => q(^X), # CANCEL |
220 | qq(\x19) => q(^Y), # END OF MEDIUM |
221 | qq(\x1A) => q(^Z), # SUBSTITUTE |
222 | qq(\x1B) => q(\e), # ESCAPE (\c[) |
223 | qq(\x1C) => "^\\", # FILE SEPARATOR |
224 | qq(\x1D) => "^\]", # GROUP SEPARATOR |
225 | qq(\x1E) => q(^^), # RECORD SEPARATOR |
226 | qq(\x1F) => q(^_), # UNIT SEPARATOR |
227 | ); |
228 | # |
229 | # Generated out of lib/unicore/EastAsianWidth.txt |
230 | # will it work ? |
231 | # |
232 | our $IsFullWidth = |
233 | qr/^[ |
234 | \x{1100}-\x{1159} |
235 | \x{115F}-\x{115F} |
236 | \x{2329}-\x{232A} |
237 | \x{2E80}-\x{2E99} |
238 | \x{2E9B}-\x{2EF3} |
239 | \x{2F00}-\x{2FD5} |
240 | \x{2FF0}-\x{2FFB} |
241 | \x{3000}-\x{303E} |
242 | \x{3041}-\x{3096} |
243 | \x{3099}-\x{30FF} |
244 | \x{3105}-\x{312C} |
245 | \x{3131}-\x{318E} |
246 | \x{3190}-\x{31B7} |
247 | \x{31F0}-\x{321C} |
248 | \x{3220}-\x{3243} |
249 | \x{3251}-\x{327B} |
250 | \x{327F}-\x{32CB} |
251 | \x{32D0}-\x{32FE} |
252 | \x{3300}-\x{3376} |
253 | \x{337B}-\x{33DD} |
254 | \x{3400}-\x{4DB5} |
255 | \x{4E00}-\x{9FA5} |
256 | \x{33E0}-\x{33FE} |
257 | \x{A000}-\x{A48C} |
258 | \x{AC00}-\x{D7A3} |
259 | \x{A490}-\x{A4C6} |
260 | \x{F900}-\x{FA2D} |
261 | \x{FA30}-\x{FA6A} |
262 | \x{FE30}-\x{FE46} |
263 | \x{FE49}-\x{FE52} |
264 | \x{FE54}-\x{FE66} |
265 | \x{FE68}-\x{FE6B} |
266 | \x{FF01}-\x{FF60} |
267 | \x{FFE0}-\x{FFE6} |
268 | \x{20000}-\x{2A6D6} |
269 | ]$/xo; |
270 | } |
271 | |
272 | __END__ |