Commit | Line | Data |
51ef4e11 |
1 | package Encode::Tcl; |
28b605d8 |
2 | |
3 | our $VERSION = '1.00'; |
4 | |
51ef4e11 |
5 | use strict; |
6 | use Encode qw(find_encoding); |
7 | use base 'Encode::Encoding'; |
8 | use Carp; |
9 | |
fc6a272d |
10 | =head1 NAME |
11 | |
12 | Encode::Tcl - Tcl encodings |
13 | |
14 | =cut |
51ef4e11 |
15 | |
16 | sub INC_search |
17 | { |
18 | foreach my $dir (@INC) |
19 | { |
20 | if (opendir(my $dh,"$dir/Encode")) |
21 | { |
22 | while (defined(my $name = readdir($dh))) |
23 | { |
24 | if ($name =~ /^(.*)\.enc$/) |
25 | { |
26 | my $canon = $1; |
27 | my $obj = find_encoding($canon); |
28 | if (!defined($obj)) |
29 | { |
30 | my $obj = bless { Name => $canon, File => "$dir/Encode/$name"},__PACKAGE__; |
31 | $obj->Define( $canon ); |
32 | # warn "$canon => $obj\n"; |
33 | } |
34 | } |
35 | } |
36 | closedir($dh); |
37 | } |
38 | } |
39 | } |
40 | |
41 | sub import |
42 | { |
43 | INC_search(); |
44 | } |
45 | |
96d6357c |
46 | sub no_map_in_encode ($$) |
47 | # codepoint, enc-name; |
48 | { |
49 | carp sprintf "\"\\N{U+%x}\" does not map to %s", @_; |
50 | # /* FIXME: Skip over the character, copy in replacement and continue |
51 | # * but that is messy so for now just fail. |
52 | # */ |
53 | return; |
54 | } |
55 | |
56 | sub no_map_in_decode ($$) |
57 | # enc-name, string beginning the malform char; |
58 | { |
59 | # /* UTF-8 is supposed to be "Universal" so should not happen */ |
60 | croak sprintf "%s '%s' does not map to UTF-8", @_; |
61 | } |
62 | |
51ef4e11 |
63 | sub encode |
64 | { |
65 | my $obj = shift; |
66 | my $new = $obj->loadEncoding; |
67 | return undef unless (defined $new); |
68 | return $new->encode(@_); |
69 | } |
70 | |
71 | sub new_sequence |
72 | { |
73 | my $obj = shift; |
74 | my $new = $obj->loadEncoding; |
75 | return undef unless (defined $new); |
76 | return $new->new_sequence(@_); |
77 | } |
78 | |
79 | sub decode |
80 | { |
81 | my $obj = shift; |
82 | my $new = $obj->loadEncoding; |
83 | return undef unless (defined $new); |
84 | return $new->decode(@_); |
85 | } |
86 | |
87 | sub loadEncoding |
88 | { |
89 | my $obj = shift; |
90 | my $file = $obj->{'File'}; |
91 | my $name = $obj->name; |
92 | if (open(my $fh,$file)) |
93 | { |
94 | my $type; |
95 | while (1) |
96 | { |
97 | my $line = <$fh>; |
98 | $type = substr($line,0,1); |
99 | last unless $type eq '#'; |
100 | } |
96d6357c |
101 | my $subclass = |
102 | ($type eq 'X') ? 'Extended' : |
103 | ($type eq 'H') ? 'HanZi' : |
104 | ($type eq 'E') ? 'Escape' : 'Table'; |
105 | my $class = ref($obj) . '::' . $subclass; |
71a18b0f |
106 | # carp "Loading $file"; |
51ef4e11 |
107 | bless $obj,$class; |
108 | return $obj if $obj->read($fh,$obj->name,$type); |
109 | } |
110 | else |
111 | { |
112 | croak("Cannot open $file for ".$obj->name); |
113 | } |
114 | $obj->Undefine($name); |
115 | return undef; |
116 | } |
117 | |
118 | sub INC_find |
119 | { |
120 | my ($class,$name) = @_; |
121 | my $enc; |
122 | foreach my $dir (@INC) |
123 | { |
124 | last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc")); |
125 | } |
126 | return $enc; |
127 | } |
128 | |
129 | package Encode::Tcl::Table; |
130 | use base 'Encode::Encoding'; |
131 | |
96d6357c |
132 | use Carp; |
133 | #use Data::Dumper; |
51ef4e11 |
134 | |
135 | sub read |
136 | { |
137 | my ($obj,$fh,$name,$type) = @_; |
f57a1a59 |
138 | my($rep, @leading); |
51ef4e11 |
139 | my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>)); |
140 | my @touni; |
141 | my %fmuni; |
142 | my $count = 0; |
143 | $def = hex($def); |
144 | while ($pages--) |
145 | { |
146 | my $line = <$fh>; |
147 | chomp($line); |
148 | my $page = hex($line); |
149 | my @page; |
f57a1a59 |
150 | $leading[$page] = 1 if $page; |
51ef4e11 |
151 | my $ch = $page * 256; |
152 | for (my $i = 0; $i < 16; $i++) |
153 | { |
154 | my $line = <$fh>; |
155 | for (my $j = 0; $j < 16; $j++) |
156 | { |
157 | my $val = hex(substr($line,0,4,'')); |
158 | if ($val || !$ch) |
159 | { |
f57a1a59 |
160 | my $uch = pack('U', $val); # chr($val); |
51ef4e11 |
161 | push(@page,$uch); |
162 | $fmuni{$uch} = $ch; |
163 | $count++; |
164 | } |
165 | else |
166 | { |
167 | push(@page,undef); |
168 | } |
169 | $ch++; |
170 | } |
171 | } |
172 | $touni[$page] = \@page; |
173 | } |
96d6357c |
174 | $rep = $type ne 'M' |
175 | ? $obj->can("rep_$type") |
176 | : sub |
177 | { |
178 | ($_[0] > 255) || $leading[$_[0]] ? 'n' : 'C'; |
179 | }; |
51ef4e11 |
180 | $obj->{'Rep'} = $rep; |
181 | $obj->{'ToUni'} = \@touni; |
182 | $obj->{'FmUni'} = \%fmuni; |
183 | $obj->{'Def'} = $def; |
184 | $obj->{'Num'} = $count; |
185 | return $obj; |
186 | } |
187 | |
188 | sub rep_S { 'C' } |
189 | |
190 | sub rep_D { 'n' } |
191 | |
f57a1a59 |
192 | #sub rep_M { ($_[0] > 255) ? 'n' : 'C' } |
51ef4e11 |
193 | |
194 | sub representation |
195 | { |
196 | my ($obj,$ch) = @_; |
197 | $ch = 0 unless @_ > 1; |
f57a1a59 |
198 | $obj->{'Rep'}->($ch); |
51ef4e11 |
199 | } |
200 | |
201 | sub decode |
202 | { |
96d6357c |
203 | my($obj,$str,$chk) = @_; |
204 | my $name = $obj->{'Name'}; |
51ef4e11 |
205 | my $rep = $obj->{'Rep'}; |
206 | my $touni = $obj->{'ToUni'}; |
e91cad5b |
207 | my $uni; |
51ef4e11 |
208 | while (length($str)) |
209 | { |
96d6357c |
210 | my $cc = substr($str,0,1,''); |
211 | my $ch = ord($cc); |
51ef4e11 |
212 | my $x; |
213 | if (&$rep($ch) eq 'C') |
214 | { |
215 | $x = $touni->[0][$ch]; |
216 | } |
217 | else |
218 | { |
96d6357c |
219 | if(! length $str) |
220 | { |
221 | $str = pack('C',$ch); # split leading byte |
222 | last; |
223 | } |
224 | my $c2 = substr($str,0,1,''); |
225 | $cc .= $c2; |
226 | $x = $touni->[$ch][ord($c2)]; |
51ef4e11 |
227 | } |
228 | unless (defined $x) |
229 | { |
96d6357c |
230 | Encode::Tcl::no_map_in_decode($name, $cc.$str); |
51ef4e11 |
231 | } |
232 | $uni .= $x; |
233 | } |
234 | $_[1] = $str if $chk; |
235 | return $uni; |
236 | } |
237 | |
238 | |
239 | sub encode |
240 | { |
241 | my ($obj,$uni,$chk) = @_; |
242 | my $fmuni = $obj->{'FmUni'}; |
51ef4e11 |
243 | my $def = $obj->{'Def'}; |
96d6357c |
244 | my $name = $obj->{'Name'}; |
51ef4e11 |
245 | my $rep = $obj->{'Rep'}; |
e91cad5b |
246 | my $str; |
51ef4e11 |
247 | while (length($uni)) |
248 | { |
249 | my $ch = substr($uni,0,1,''); |
96d6357c |
250 | my $x = $fmuni->{$ch}; |
251 | unless(defined $x) |
51ef4e11 |
252 | { |
96d6357c |
253 | unless($chk) |
254 | { |
255 | Encode::Tcl::no_map_in_encode(ord($ch), $name) |
256 | } |
257 | return undef; |
51ef4e11 |
258 | } |
259 | $str .= pack(&$rep($x),$x); |
260 | } |
261 | $_[1] = $uni if $chk; |
262 | return $str; |
263 | } |
264 | |
265 | package Encode::Tcl::Escape; |
266 | use base 'Encode::Encoding'; |
267 | |
268 | use Carp; |
269 | |
96d6357c |
270 | use constant SI => "\cO"; |
271 | use constant SO => "\cN"; |
272 | use constant SS2 => "\eN"; |
273 | use constant SS3 => "\eO"; |
274 | |
51ef4e11 |
275 | sub read |
276 | { |
e91cad5b |
277 | my ($obj,$fh,$name) = @_; |
83ea2aad |
278 | my(%tbl, @seq, $enc, @esc, %grp); |
51ef4e11 |
279 | while (<$fh>) |
280 | { |
96d6357c |
281 | next unless /^(\S+)\s+(.*)$/; |
282 | my ($key,$val) = ($1,$2); |
51ef4e11 |
283 | $val =~ s/^\{(.*?)\}/$1/g; |
284 | $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; |
83ea2aad |
285 | |
96d6357c |
286 | if($enc = Encode->getEncoding($key)) |
287 | { |
e91cad5b |
288 | $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; |
d9da9e35 |
289 | push @seq, $val; |
83ea2aad |
290 | $grp{$val} = |
96d6357c |
291 | $val =~ m|[(]| ? 0 : # G0 : SI eq "\cO" |
292 | $val =~ m|[)-]| ? 1 : # G1 : SO eq "\cN" |
293 | $val =~ m|[*.]| ? 2 : # G2 : SS2 eq "\eN" |
294 | $val =~ m|[+/]| ? 3 : # G3 : SS3 eq "\eO" |
295 | 0; # G0 |
296 | } |
297 | else |
298 | { |
e91cad5b |
299 | $obj->{$key} = $val; |
96d6357c |
300 | } |
301 | if($val =~ /^\e(.*)/) |
302 | { |
303 | push(@esc, quotemeta $1); |
304 | } |
51ef4e11 |
305 | } |
83ea2aad |
306 | $obj->{'Grp'} = \%grp; # graphic chars |
d9da9e35 |
307 | $obj->{'Seq'} = \@seq; # escape sequences |
308 | $obj->{'Tbl'} = \%tbl; # encoding tables |
309 | $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC |
e91cad5b |
310 | return $obj; |
51ef4e11 |
311 | } |
312 | |
313 | sub decode |
314 | { |
e91cad5b |
315 | my ($obj,$str,$chk) = @_; |
96d6357c |
316 | my $name = $obj->{'Name'}; |
e91cad5b |
317 | my $tbl = $obj->{'Tbl'}; |
d9da9e35 |
318 | my $seq = $obj->{'Seq'}; |
83ea2aad |
319 | my $grp = $obj->{'Grp'}; |
d9da9e35 |
320 | my $esc = $obj->{'Esc'}; |
e91cad5b |
321 | my $ini = $obj->{'init'}; |
322 | my $fin = $obj->{'final'}; |
d9da9e35 |
323 | my $std = $seq->[0]; |
e91cad5b |
324 | my $cur = $std; |
83ea2aad |
325 | my @sta = ($std, undef, undef, undef); # G0 .. G3 state |
b29b78de |
326 | my $s = 0; # state of SO-SI. 0 (G0) or 1 (G1); |
327 | my $ss = 0; # state of SS2,SS3. 0 (G0), 2 (G2) or 3 (G3); |
e91cad5b |
328 | my $uni; |
96d6357c |
329 | while (length($str)) |
330 | { |
331 | my $cc = substr($str,0,1,''); |
332 | if($cc eq "\e") |
333 | { |
334 | if($str =~ s/^($esc)//) |
335 | { |
336 | my $e = "\e$1"; |
337 | $sta[ $grp->{$e} ] = $e if $tbl->{$e}; |
338 | } |
83ea2aad |
339 | # appearance of "\eN\eO" or "\eO\eN" isn't supposed. |
96d6357c |
340 | # but in that case, the former will be ignored. |
341 | elsif($str =~ s/^N//) |
342 | { |
343 | $ss = 2; |
344 | } |
345 | elsif($str =~ s/^O//) |
346 | { |
347 | $ss = 3; |
348 | } |
349 | else |
350 | { |
351 | # strictly, ([\x20-\x2F]*[\x30-\x7E]). '?' for chopped. |
352 | $str =~ s/^([\x20-\x2F]*[\x30-\x7E]?)//; |
353 | if($chk && ! length $str) |
354 | { |
355 | $str = "\e$1"; # split sequence |
356 | last; |
357 | } |
358 | croak "unknown escape sequence: ESC $1"; |
359 | } |
360 | next; |
361 | } |
362 | if($cc eq SO) |
363 | { |
364 | $s = 1; next; |
365 | } |
366 | if($cc eq SI) |
367 | { |
368 | $s = 0; next; |
369 | } |
83ea2aad |
370 | |
b29b78de |
371 | $cur = $ss ? $sta[$ss] : $sta[$s]; |
83ea2aad |
372 | |
96d6357c |
373 | if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table') |
374 | { |
375 | $uni .= $tbl->{$cur}->decode($cc); |
b29b78de |
376 | $ss = 0; |
e91cad5b |
377 | next; |
96d6357c |
378 | } |
379 | my $ch = ord($cc); |
e91cad5b |
380 | my $rep = $tbl->{$cur}->{'Rep'}; |
381 | my $touni = $tbl->{$cur}->{'ToUni'}; |
d9da9e35 |
382 | my $x; |
e91cad5b |
383 | if (&$rep($ch) eq 'C') |
384 | { |
385 | $x = $touni->[0][$ch]; |
386 | } |
387 | else |
388 | { |
96d6357c |
389 | if(! length $str) |
390 | { |
391 | $str = $cc; # split leading byte |
392 | last; |
393 | } |
394 | my $c2 = substr($str,0,1,''); |
395 | $cc .= $c2; |
396 | $x = $touni->[$ch][ord($c2)]; |
e91cad5b |
397 | } |
398 | unless (defined $x) |
399 | { |
96d6357c |
400 | Encode::Tcl::no_map_in_decode($name, $cc.$str); |
e91cad5b |
401 | } |
402 | $uni .= $x; |
b29b78de |
403 | $ss = 0; |
e91cad5b |
404 | } |
96d6357c |
405 | if($chk) |
406 | { |
407 | my $back = join('', grep defined($_) && $_ ne $std, @sta); |
408 | $back .= SO if $s; |
409 | $back .= $ss == 2 ? SS2 : SS3 if $ss; |
410 | $_[1] = $back.$str; |
411 | } |
412 | return $uni; |
51ef4e11 |
413 | } |
414 | |
415 | sub encode |
416 | { |
e91cad5b |
417 | my ($obj,$uni,$chk) = @_; |
96d6357c |
418 | my $name = $obj->{'Name'}; |
e91cad5b |
419 | my $tbl = $obj->{'Tbl'}; |
d9da9e35 |
420 | my $seq = $obj->{'Seq'}; |
83ea2aad |
421 | my $grp = $obj->{'Grp'}; |
e91cad5b |
422 | my $ini = $obj->{'init'}; |
423 | my $fin = $obj->{'final'}; |
d9da9e35 |
424 | my $std = $seq->[0]; |
e91cad5b |
425 | my $str = $ini; |
b29b78de |
426 | my @sta = ($std,undef,undef,undef); # G0 .. G3 state |
83ea2aad |
427 | my $cur = $std; |
b29b78de |
428 | my $pG = 0; # previous G: 0 or 1. |
429 | my $cG = 0; # current G: 0,1,2,3. |
83ea2aad |
430 | |
b29b78de |
431 | if($ini && defined $grp->{$ini}) |
83ea2aad |
432 | { |
96d6357c |
433 | $sta[ $grp->{$ini} ] = $ini; |
83ea2aad |
434 | } |
51ef4e11 |
435 | |
96d6357c |
436 | while (length($uni)) |
437 | { |
438 | my $ch = substr($uni,0,1,''); |
439 | my $x; |
440 | foreach my $e_seq (@$seq) |
441 | { |
442 | $x = ref($tbl->{$e_seq}) eq 'Encode::Tcl::Table' |
443 | ? $tbl->{$e_seq}->{FmUni}->{$ch} |
444 | : $tbl->{$e_seq}->encode($ch,1); |
445 | $cur = $e_seq, last if defined $x; |
466d6cd3 |
446 | } |
96d6357c |
447 | unless (defined $x) |
448 | { |
449 | unless($chk) |
450 | { |
451 | Encode::Tcl::no_map_in_encode(ord($ch), $name) |
452 | } |
453 | return undef; |
466d6cd3 |
454 | } |
96d6357c |
455 | if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table') |
456 | { |
457 | my $def = $tbl->{$cur}->{'Def'}; |
458 | my $rep = $tbl->{$cur}->{'Rep'}; |
459 | $x = pack(&$rep($x),$x); |
460 | } |
461 | $cG = $grp->{$cur}; |
462 | $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG]; |
463 | |
464 | $str .= $cG == 0 && $pG == 1 ? SI : |
465 | $cG == 1 && $pG == 0 ? SO : |
466 | $cG == 2 ? SS2 : |
467 | $cG == 3 ? SS3 : ""; |
468 | $str .= $x; |
469 | $pG = $cG if $cG < 2; |
470 | } |
471 | $str .= SI if $pG == 1; # back to G0 |
b29b78de |
472 | $str .= $std unless $std eq $sta[0]; # GO to ASCII |
83ea2aad |
473 | $str .= $fin; # necessary? |
466d6cd3 |
474 | $_[1] = $uni if $chk; |
475 | return $str; |
476 | } |
477 | |
b29b78de |
478 | |
479 | package Encode::Tcl::Extended; |
480 | use base 'Encode::Encoding'; |
481 | |
482 | use Carp; |
483 | |
484 | sub read |
485 | { |
486 | my ($obj,$fh,$name) = @_; |
487 | my(%tbl, $enc, %ssc, @key); |
488 | while (<$fh>) |
489 | { |
96d6357c |
490 | next unless /^(\S+)\s+(.*)$/; |
491 | my ($key,$val) = ($1,$2); |
b29b78de |
492 | $val =~ s/\{(.*?)\}/$1/; |
493 | $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; |
494 | |
96d6357c |
495 | if($enc = Encode->getEncoding($key)) |
496 | { |
b29b78de |
497 | push @key, $val; |
96d6357c |
498 | $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; |
b29b78de |
499 | $ssc{$val} = substr($val,1) if $val =~ /^>/; |
96d6357c |
500 | } |
501 | else |
502 | { |
b29b78de |
503 | $obj->{$key} = $val; |
96d6357c |
504 | } |
b29b78de |
505 | } |
506 | $obj->{'SSC'} = \%ssc; # single shift char |
507 | $obj->{'Tbl'} = \%tbl; # encoding tables |
508 | $obj->{'Key'} = \@key; # keys of table hash |
509 | return $obj; |
510 | } |
511 | |
512 | sub decode |
513 | { |
514 | my ($obj,$str,$chk) = @_; |
96d6357c |
515 | my $name = $obj->{'Name'}; |
516 | my $tbl = $obj->{'Tbl'}; |
517 | my $ssc = $obj->{'SSC'}; |
b29b78de |
518 | my $cur = ''; # current state |
519 | my $uni; |
96d6357c |
520 | while (length($str)) |
521 | { |
522 | my $cc = substr($str,0,1,''); |
523 | my $ch = ord($cc); |
b29b78de |
524 | if(!$cur && $ch > 0x7F) |
525 | { |
526 | $cur = '>'; |
96d6357c |
527 | $cur .= $cc, next if $ssc->{$cur.$cc}; |
b29b78de |
528 | } |
529 | $ch ^= 0x80 if $cur; |
530 | |
96d6357c |
531 | if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table') |
532 | { |
533 | $uni .= $tbl->{$cur}->decode($cc); |
b29b78de |
534 | $cur = ''; |
535 | next; |
96d6357c |
536 | } |
b29b78de |
537 | my $rep = $tbl->{$cur}->{'Rep'}; |
538 | my $touni = $tbl->{$cur}->{'ToUni'}; |
539 | my $x; |
540 | if (&$rep($ch) eq 'C') |
541 | { |
542 | $x = $touni->[0][$ch]; |
543 | } |
544 | else |
545 | { |
96d6357c |
546 | if(! length $str) |
547 | { |
548 | $str = $cc; # split leading byte |
549 | last; |
550 | } |
551 | my $c2 = substr($str,0,1,''); |
552 | $cc .= $c2; |
553 | $x = $touni->[$ch][0x80 ^ ord($c2)]; |
b29b78de |
554 | } |
555 | unless (defined $x) |
556 | { |
96d6357c |
557 | Encode::Tcl::no_map_in_decode($name, $cc.$str); |
b29b78de |
558 | } |
559 | $uni .= $x; |
560 | $cur = ''; |
561 | } |
96d6357c |
562 | if($chk) |
563 | { |
564 | $cur =~ s/>//; |
565 | $_[1] = $cur ne '' ? $cur.$str : $str; |
566 | } |
b29b78de |
567 | return $uni; |
568 | } |
569 | |
570 | sub encode |
571 | { |
572 | my ($obj,$uni,$chk) = @_; |
96d6357c |
573 | my $name = $obj->{'Name'}; |
b29b78de |
574 | my $tbl = $obj->{'Tbl'}; |
575 | my $ssc = $obj->{'SSC'}; |
576 | my $key = $obj->{'Key'}; |
577 | my $str; |
578 | my $cur; |
579 | |
96d6357c |
580 | while (length($uni)) |
581 | { |
582 | my $ch = substr($uni,0,1,''); |
583 | my $x; |
584 | foreach my $k (@$key) |
585 | { |
586 | $x = ref($tbl->{$k}) ne 'Encode::Tcl::Table' |
587 | ? $k =~ /^>/ |
588 | ? $tbl->{$k}->encode(chr(0x80 ^ ord $ch),1) |
589 | : $tbl->{$k}->encode($ch,1) |
590 | : $tbl->{$k}->{FmUni}->{$ch}; |
591 | $cur = $k, last if defined $x; |
592 | } |
593 | unless (defined $x) |
594 | { |
595 | unless($chk) |
596 | { |
597 | Encode::Tcl::no_map_in_encode(ord($ch), $name) |
598 | } |
599 | return undef; |
600 | } |
601 | if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table') |
602 | { |
603 | my $def = $tbl->{$cur}->{'Def'}; |
604 | my $rep = $tbl->{$cur}->{'Rep'}; |
605 | my $r = &$rep($x); |
606 | $x = pack($r, |
b29b78de |
607 | $cur =~ /^>/ |
608 | ? $r eq 'C' ? 0x80 ^ $x : 0x8080 ^ $x |
609 | : $x); |
96d6357c |
610 | } |
611 | $str .= $ssc->{$cur} if defined $ssc->{$cur}; |
612 | $str .= $x; |
613 | } |
b29b78de |
614 | $_[1] = $uni if $chk; |
615 | return $str; |
616 | } |
617 | |
466d6cd3 |
618 | package Encode::Tcl::HanZi; |
619 | use base 'Encode::Encoding'; |
620 | |
621 | use Carp; |
622 | |
623 | sub read |
624 | { |
625 | my ($obj,$fh,$name) = @_; |
626 | my(%tbl, @seq, $enc); |
627 | while (<$fh>) |
628 | { |
96d6357c |
629 | next unless /^(\S+)\s+(.*)$/; |
630 | my ($key,$val) = ($1,$2); |
466d6cd3 |
631 | $val =~ s/^\{(.*?)\}/$1/g; |
632 | $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; |
96d6357c |
633 | if($enc = Encode->getEncoding($key)) |
634 | { |
466d6cd3 |
635 | $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; |
636 | push @seq, $val; |
96d6357c |
637 | } |
638 | else |
639 | { |
466d6cd3 |
640 | $obj->{$key} = $val; |
96d6357c |
641 | } |
466d6cd3 |
642 | } |
643 | $obj->{'Seq'} = \@seq; # escape sequences |
644 | $obj->{'Tbl'} = \%tbl; # encoding tables |
645 | return $obj; |
646 | } |
647 | |
648 | sub decode |
649 | { |
650 | my ($obj,$str,$chk) = @_; |
96d6357c |
651 | my $name = $obj->{'Name'}; |
466d6cd3 |
652 | my $tbl = $obj->{'Tbl'}; |
653 | my $seq = $obj->{'Seq'}; |
654 | my $std = $seq->[0]; |
655 | my $cur = $std; |
656 | my $uni; |
657 | while (length($str)){ |
96d6357c |
658 | my $cc = substr($str,0,1,''); |
659 | if($cc eq "~") |
660 | { |
661 | if($str =~ s/^\cJ//) |
662 | { |
663 | next; |
664 | } |
665 | elsif($str =~ s/^\~//) |
666 | { |
667 | 1; # no-op |
668 | } |
669 | elsif($str =~ s/^([{}])//) |
670 | { |
671 | $cur = "~$1"; |
672 | next; |
673 | } |
674 | elsif(! length $str) |
675 | { |
676 | $str = '~'; |
677 | last; |
678 | } |
679 | else |
680 | { |
681 | $str =~ s/^([^~])//; |
682 | croak "unknown HanZi escape sequence: ~$1"; |
683 | next; |
684 | } |
685 | } |
686 | if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table') |
687 | { |
688 | $uni .= $tbl->{$cur}->decode($cc); |
466d6cd3 |
689 | next; |
96d6357c |
690 | } |
691 | my $ch = ord($cc); |
466d6cd3 |
692 | my $rep = $tbl->{$cur}->{'Rep'}; |
693 | my $touni = $tbl->{$cur}->{'ToUni'}; |
694 | my $x; |
695 | if (&$rep($ch) eq 'C') |
696 | { |
697 | $x = $touni->[0][$ch]; |
698 | } |
699 | else |
700 | { |
96d6357c |
701 | if(! length $str) |
702 | { |
703 | $str = $cc; # split leading byte |
704 | last; |
705 | } |
706 | my $c2 = substr($str,0,1,''); |
707 | $cc .= $c2; |
708 | $x = $touni->[$ch][ord($c2)]; |
466d6cd3 |
709 | } |
710 | unless (defined $x) |
711 | { |
96d6357c |
712 | Encode::Tcl::no_map_in_decode($name, $cc.$str); |
466d6cd3 |
713 | } |
714 | $uni .= $x; |
e91cad5b |
715 | } |
96d6357c |
716 | if($chk) |
717 | { |
718 | $_[1] = $cur eq $std ? $str : $cur.$str; |
719 | } |
466d6cd3 |
720 | return $uni; |
721 | } |
722 | |
723 | sub encode |
724 | { |
725 | my ($obj,$uni,$chk) = @_; |
96d6357c |
726 | my $name = $obj->{'Name'}; |
466d6cd3 |
727 | my $tbl = $obj->{'Tbl'}; |
728 | my $seq = $obj->{'Seq'}; |
729 | my $std = $seq->[0]; |
730 | my $str; |
731 | my $pre = $std; |
732 | my $cur = $pre; |
733 | |
96d6357c |
734 | while (length($uni)) |
735 | { |
736 | my $ch = substr($uni,0,1,''); |
737 | my $x; |
738 | foreach my $e_seq (@$seq) |
739 | { |
740 | $x = ref($tbl->{$e_seq}) eq 'Encode::Tcl::Table' |
741 | ? $tbl->{$e_seq}->{FmUni}->{$ch} |
742 | : $tbl->{$e_seq}->encode($ch,1); |
743 | $cur = $e_seq and last if defined $x; |
466d6cd3 |
744 | } |
96d6357c |
745 | unless (defined $x) |
746 | { |
747 | unless($chk) |
748 | { |
749 | Encode::Tcl::no_map_in_encode(ord($ch), $name) |
750 | } |
751 | return undef; |
752 | } |
753 | if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table') |
754 | { |
755 | my $def = $tbl->{$cur}->{'Def'}; |
756 | my $rep = $tbl->{$cur}->{'Rep'}; |
757 | $x = pack(&$rep($x),$x); |
758 | } |
759 | $str .= $cur eq $pre ? $x : ($pre = $cur).$x; |
760 | $str .= '~' if $x eq '~'; # to '~~' |
761 | } |
e91cad5b |
762 | $str .= $std unless $cur eq $std; |
e91cad5b |
763 | $_[1] = $uni if $chk; |
764 | return $str; |
765 | } |
466d6cd3 |
766 | |
51ef4e11 |
767 | 1; |
768 | __END__ |