Commit | Line | Data |
51ef4e11 |
1 | package Encode::Tcl; |
2 | use strict; |
3 | use Encode qw(find_encoding); |
4 | use base 'Encode::Encoding'; |
5 | use Carp; |
6 | |
fc6a272d |
7 | =head1 NAME |
8 | |
9 | Encode::Tcl - Tcl encodings |
10 | |
11 | =cut |
51ef4e11 |
12 | |
13 | sub INC_search |
14 | { |
15 | foreach my $dir (@INC) |
16 | { |
17 | if (opendir(my $dh,"$dir/Encode")) |
18 | { |
19 | while (defined(my $name = readdir($dh))) |
20 | { |
21 | if ($name =~ /^(.*)\.enc$/) |
22 | { |
23 | my $canon = $1; |
24 | my $obj = find_encoding($canon); |
25 | if (!defined($obj)) |
26 | { |
27 | my $obj = bless { Name => $canon, File => "$dir/Encode/$name"},__PACKAGE__; |
28 | $obj->Define( $canon ); |
29 | # warn "$canon => $obj\n"; |
30 | } |
31 | } |
32 | } |
33 | closedir($dh); |
34 | } |
35 | } |
36 | } |
37 | |
38 | sub import |
39 | { |
40 | INC_search(); |
41 | } |
42 | |
43 | sub encode |
44 | { |
45 | my $obj = shift; |
46 | my $new = $obj->loadEncoding; |
47 | return undef unless (defined $new); |
48 | return $new->encode(@_); |
49 | } |
50 | |
51 | sub new_sequence |
52 | { |
53 | my $obj = shift; |
54 | my $new = $obj->loadEncoding; |
55 | return undef unless (defined $new); |
56 | return $new->new_sequence(@_); |
57 | } |
58 | |
59 | sub decode |
60 | { |
61 | my $obj = shift; |
62 | my $new = $obj->loadEncoding; |
63 | return undef unless (defined $new); |
64 | return $new->decode(@_); |
65 | } |
66 | |
67 | sub loadEncoding |
68 | { |
69 | my $obj = shift; |
70 | my $file = $obj->{'File'}; |
71 | my $name = $obj->name; |
72 | if (open(my $fh,$file)) |
73 | { |
74 | my $type; |
75 | while (1) |
76 | { |
77 | my $line = <$fh>; |
78 | $type = substr($line,0,1); |
79 | last unless $type eq '#'; |
80 | } |
466d6cd3 |
81 | my $class = ref($obj).('::'.(($type eq 'H') ? 'HanZi' : ($type eq 'E') ? 'Escape' : 'Table')); |
71a18b0f |
82 | # carp "Loading $file"; |
51ef4e11 |
83 | bless $obj,$class; |
84 | return $obj if $obj->read($fh,$obj->name,$type); |
85 | } |
86 | else |
87 | { |
88 | croak("Cannot open $file for ".$obj->name); |
89 | } |
90 | $obj->Undefine($name); |
91 | return undef; |
92 | } |
93 | |
94 | sub INC_find |
95 | { |
96 | my ($class,$name) = @_; |
97 | my $enc; |
98 | foreach my $dir (@INC) |
99 | { |
100 | last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc")); |
101 | } |
102 | return $enc; |
103 | } |
104 | |
105 | package Encode::Tcl::Table; |
106 | use base 'Encode::Encoding'; |
107 | |
108 | use Data::Dumper; |
109 | |
110 | sub read |
111 | { |
112 | my ($obj,$fh,$name,$type) = @_; |
f57a1a59 |
113 | my($rep, @leading); |
51ef4e11 |
114 | my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>)); |
115 | my @touni; |
116 | my %fmuni; |
117 | my $count = 0; |
118 | $def = hex($def); |
119 | while ($pages--) |
120 | { |
121 | my $line = <$fh>; |
122 | chomp($line); |
123 | my $page = hex($line); |
124 | my @page; |
f57a1a59 |
125 | $leading[$page] = 1 if $page; |
51ef4e11 |
126 | my $ch = $page * 256; |
127 | for (my $i = 0; $i < 16; $i++) |
128 | { |
129 | my $line = <$fh>; |
130 | for (my $j = 0; $j < 16; $j++) |
131 | { |
132 | my $val = hex(substr($line,0,4,'')); |
133 | if ($val || !$ch) |
134 | { |
f57a1a59 |
135 | my $uch = pack('U', $val); # chr($val); |
51ef4e11 |
136 | push(@page,$uch); |
137 | $fmuni{$uch} = $ch; |
138 | $count++; |
139 | } |
140 | else |
141 | { |
142 | push(@page,undef); |
143 | } |
144 | $ch++; |
145 | } |
146 | } |
147 | $touni[$page] = \@page; |
148 | } |
f57a1a59 |
149 | $rep = $type ne 'M' ? $obj->can("rep_$type") : |
150 | sub { ($_[0] > 255) || $leading[$_[0]] ? 'n' : 'C'}; |
51ef4e11 |
151 | $obj->{'Rep'} = $rep; |
152 | $obj->{'ToUni'} = \@touni; |
153 | $obj->{'FmUni'} = \%fmuni; |
154 | $obj->{'Def'} = $def; |
155 | $obj->{'Num'} = $count; |
156 | return $obj; |
157 | } |
158 | |
159 | sub rep_S { 'C' } |
160 | |
161 | sub rep_D { 'n' } |
162 | |
f57a1a59 |
163 | #sub rep_M { ($_[0] > 255) ? 'n' : 'C' } |
51ef4e11 |
164 | |
165 | sub representation |
166 | { |
167 | my ($obj,$ch) = @_; |
168 | $ch = 0 unless @_ > 1; |
f57a1a59 |
169 | $obj->{'Rep'}->($ch); |
51ef4e11 |
170 | } |
171 | |
172 | sub decode |
173 | { |
174 | my ($obj,$str,$chk) = @_; |
175 | my $rep = $obj->{'Rep'}; |
176 | my $touni = $obj->{'ToUni'}; |
e91cad5b |
177 | my $uni; |
51ef4e11 |
178 | while (length($str)) |
179 | { |
180 | my $ch = ord(substr($str,0,1,'')); |
181 | my $x; |
182 | if (&$rep($ch) eq 'C') |
183 | { |
184 | $x = $touni->[0][$ch]; |
185 | } |
186 | else |
187 | { |
188 | $x = $touni->[$ch][ord(substr($str,0,1,''))]; |
189 | } |
190 | unless (defined $x) |
191 | { |
192 | last if $chk; |
193 | # What do we do here ? |
194 | $x = ''; |
195 | } |
196 | $uni .= $x; |
197 | } |
198 | $_[1] = $str if $chk; |
199 | return $uni; |
200 | } |
201 | |
202 | |
203 | sub encode |
204 | { |
205 | my ($obj,$uni,$chk) = @_; |
206 | my $fmuni = $obj->{'FmUni'}; |
51ef4e11 |
207 | my $def = $obj->{'Def'}; |
208 | my $rep = $obj->{'Rep'}; |
e91cad5b |
209 | my $str; |
51ef4e11 |
210 | while (length($uni)) |
211 | { |
212 | my $ch = substr($uni,0,1,''); |
213 | my $x = $fmuni->{chr(ord($ch))}; |
214 | unless (defined $x) |
215 | { |
216 | last if ($chk); |
217 | $x = $def; |
218 | } |
219 | $str .= pack(&$rep($x),$x); |
220 | } |
221 | $_[1] = $uni if $chk; |
222 | return $str; |
223 | } |
224 | |
225 | package Encode::Tcl::Escape; |
226 | use base 'Encode::Encoding'; |
227 | |
228 | use Carp; |
229 | |
230 | sub read |
231 | { |
e91cad5b |
232 | my ($obj,$fh,$name) = @_; |
d9da9e35 |
233 | my(%tbl, @seq, $enc, @esc); |
51ef4e11 |
234 | while (<$fh>) |
235 | { |
236 | my ($key,$val) = /^(\S+)\s+(.*)$/; |
237 | $val =~ s/^\{(.*?)\}/$1/g; |
238 | $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; |
e91cad5b |
239 | if($enc = Encode->getEncoding($key)){ |
240 | $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; |
d9da9e35 |
241 | push @seq, $val; |
e91cad5b |
242 | }else{ |
243 | $obj->{$key} = $val; |
244 | } |
d9da9e35 |
245 | if($val =~ /^\e(.*)/){ push(@esc, quotemeta $1) } |
51ef4e11 |
246 | } |
d9da9e35 |
247 | $obj->{'Seq'} = \@seq; # escape sequences |
248 | $obj->{'Tbl'} = \%tbl; # encoding tables |
249 | $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC |
e91cad5b |
250 | return $obj; |
51ef4e11 |
251 | } |
252 | |
253 | sub decode |
254 | { |
e91cad5b |
255 | my ($obj,$str,$chk) = @_; |
256 | my $tbl = $obj->{'Tbl'}; |
d9da9e35 |
257 | my $seq = $obj->{'Seq'}; |
258 | my $esc = $obj->{'Esc'}; |
e91cad5b |
259 | my $ini = $obj->{'init'}; |
260 | my $fin = $obj->{'final'}; |
d9da9e35 |
261 | my $std = $seq->[0]; |
e91cad5b |
262 | my $cur = $std; |
263 | my $uni; |
264 | while (length($str)){ |
265 | my $uch = substr($str,0,1,''); |
266 | if($uch eq "\e"){ |
d9da9e35 |
267 | if($str =~ s/^($esc)//) |
268 | { |
269 | my $esc = "\e$1"; |
270 | $cur = $tbl->{$esc} ? $esc : |
271 | ($esc eq $ini || $esc eq $fin) ? $std : |
272 | $cur; |
273 | } |
274 | else |
275 | { |
276 | $str =~ s/^([\x20-\x2F]*[\x30-\x7E])//; |
277 | carp "unknown escape sequence: ESC $1"; |
278 | } |
e91cad5b |
279 | next; |
280 | } |
281 | if($uch eq "\x0e" || $uch eq "\x0f"){ |
282 | $cur = $uch and next; |
283 | } |
e91cad5b |
284 | if(ref($tbl->{$cur}) eq 'Encode::XS'){ |
285 | $uni .= $tbl->{$cur}->decode($uch); |
286 | next; |
287 | } |
d9da9e35 |
288 | my $ch = ord($uch); |
e91cad5b |
289 | my $rep = $tbl->{$cur}->{'Rep'}; |
290 | my $touni = $tbl->{$cur}->{'ToUni'}; |
d9da9e35 |
291 | my $x; |
e91cad5b |
292 | if (&$rep($ch) eq 'C') |
293 | { |
294 | $x = $touni->[0][$ch]; |
295 | } |
296 | else |
297 | { |
298 | $x = $touni->[$ch][ord(substr($str,0,1,''))]; |
299 | } |
300 | unless (defined $x) |
301 | { |
302 | last if $chk; |
303 | # What do we do here ? |
304 | $x = ''; |
305 | } |
306 | $uni .= $x; |
307 | } |
308 | $_[1] = $str if $chk; |
309 | return $uni; |
51ef4e11 |
310 | } |
311 | |
312 | sub encode |
313 | { |
e91cad5b |
314 | my ($obj,$uni,$chk) = @_; |
315 | my $tbl = $obj->{'Tbl'}; |
d9da9e35 |
316 | my $seq = $obj->{'Seq'}; |
e91cad5b |
317 | my $ini = $obj->{'init'}; |
318 | my $fin = $obj->{'final'}; |
d9da9e35 |
319 | my $std = $seq->[0]; |
e91cad5b |
320 | my $str = $ini; |
321 | my $pre = $std; |
322 | my $cur = $pre; |
51ef4e11 |
323 | |
e91cad5b |
324 | while (length($uni)){ |
325 | my $ch = chr(ord(substr($uni,0,1,''))); |
466d6cd3 |
326 | my $x; |
327 | foreach my $e_seq ($std, $pre, @$seq){ |
328 | $x = ref($tbl->{$e_seq}) eq 'Encode::XS' |
329 | ? $tbl->{$e_seq}->encode($ch,1) |
330 | : $tbl->{$e_seq}->{FmUni}->{$ch}; |
331 | $cur = $e_seq and last if defined $x; |
e91cad5b |
332 | } |
466d6cd3 |
333 | if(ref($tbl->{$cur}) ne 'Encode::XS') |
e91cad5b |
334 | { |
466d6cd3 |
335 | my $def = $tbl->{$cur}->{'Def'}; |
336 | my $rep = $tbl->{$cur}->{'Rep'}; |
337 | unless (defined $x){ |
338 | last if ($chk); |
339 | $x = $def; |
340 | } |
341 | $x = pack(&$rep($x),$x); |
342 | } |
343 | $str .= $cur eq $pre ? $x : ($pre = $cur).$x; |
344 | } |
345 | $str .= $std unless $cur eq $std; |
346 | $str .= $fin; |
347 | $_[1] = $uni if $chk; |
348 | return $str; |
349 | } |
350 | |
351 | package Encode::Tcl::HanZi; |
352 | use base 'Encode::Encoding'; |
353 | |
354 | use Carp; |
355 | |
356 | sub read |
357 | { |
358 | my ($obj,$fh,$name) = @_; |
359 | my(%tbl, @seq, $enc); |
360 | while (<$fh>) |
361 | { |
362 | my ($key,$val) = /^(\S+)\s+(.*)$/; |
363 | $val =~ s/^\{(.*?)\}/$1/g; |
364 | $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; |
365 | if($enc = Encode->getEncoding($key)){ |
366 | $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; |
367 | push @seq, $val; |
368 | }else{ |
369 | $obj->{$key} = $val; |
370 | } |
371 | } |
372 | $obj->{'Seq'} = \@seq; # escape sequences |
373 | $obj->{'Tbl'} = \%tbl; # encoding tables |
374 | return $obj; |
375 | } |
376 | |
377 | sub decode |
378 | { |
379 | my ($obj,$str,$chk) = @_; |
380 | my $tbl = $obj->{'Tbl'}; |
381 | my $seq = $obj->{'Seq'}; |
382 | my $std = $seq->[0]; |
383 | my $cur = $std; |
384 | my $uni; |
385 | while (length($str)){ |
386 | my $uch = substr($str,0,1,''); |
387 | if($uch eq "~"){ |
388 | if($str =~ s/^\cJ//) |
389 | { |
390 | next; |
391 | } |
392 | elsif($str =~ s/^\~//) |
393 | { |
394 | 1; |
395 | } |
396 | elsif($str =~ s/^([{}])//) |
397 | { |
398 | $cur = "~$1"; |
399 | next; |
400 | } |
401 | else |
402 | { |
403 | $str =~ s/^([^~])//; |
404 | carp "unknown HanZi escape sequence: ~$1"; |
405 | next; |
406 | } |
e91cad5b |
407 | } |
466d6cd3 |
408 | if(ref($tbl->{$cur}) eq 'Encode::XS'){ |
409 | $uni .= $tbl->{$cur}->decode($uch); |
410 | next; |
411 | } |
412 | my $ch = ord($uch); |
413 | my $rep = $tbl->{$cur}->{'Rep'}; |
414 | my $touni = $tbl->{$cur}->{'ToUni'}; |
415 | my $x; |
416 | if (&$rep($ch) eq 'C') |
417 | { |
418 | $x = $touni->[0][$ch]; |
419 | } |
420 | else |
421 | { |
422 | $x = $touni->[$ch][ord(substr($str,0,1,''))]; |
423 | } |
424 | unless (defined $x) |
425 | { |
426 | last if $chk; |
427 | # What do we do here ? |
428 | $x = ''; |
429 | } |
430 | $uni .= $x; |
e91cad5b |
431 | } |
466d6cd3 |
432 | $_[1] = $str if $chk; |
433 | return $uni; |
434 | } |
435 | |
436 | sub encode |
437 | { |
438 | my ($obj,$uni,$chk) = @_; |
439 | my $tbl = $obj->{'Tbl'}; |
440 | my $seq = $obj->{'Seq'}; |
441 | my $std = $seq->[0]; |
442 | my $str; |
443 | my $pre = $std; |
444 | my $cur = $pre; |
445 | |
446 | while (length($uni)){ |
447 | my $ch = chr(ord(substr($uni,0,1,''))); |
448 | my $x; |
449 | foreach my $e_seq (@$seq){ |
450 | $x = ref($tbl->{$e_seq}) eq 'Encode::XS' |
451 | ? $tbl->{$e_seq}->encode($ch,1) |
452 | : $tbl->{$e_seq}->{FmUni}->{$ch}; |
453 | $cur = $e_seq and last if defined $x; |
e91cad5b |
454 | } |
466d6cd3 |
455 | if(ref($tbl->{$cur}) ne 'Encode::XS') |
456 | { |
457 | my $def = $tbl->{$cur}->{'Def'}; |
458 | my $rep = $tbl->{$cur}->{'Rep'}; |
459 | unless (defined $x){ |
460 | last if ($chk); |
461 | $x = $def; |
462 | } |
463 | $x = pack(&$rep($x),$x); |
464 | } |
465 | $str .= $cur eq $pre ? $x : ($pre = $cur).$x; |
466 | $str .= '~' if $x eq '~'; # to '~~' |
e91cad5b |
467 | } |
468 | $str .= $std unless $cur eq $std; |
e91cad5b |
469 | $_[1] = $uni if $chk; |
470 | return $str; |
471 | } |
466d6cd3 |
472 | |
51ef4e11 |
473 | 1; |
474 | __END__ |