Commit | Line | Data |
51ef4e11 |
1 | package Encode::Tcl; |
2 | use strict; |
3 | use Encode qw(find_encoding); |
4 | use base 'Encode::Encoding'; |
5 | use Carp; |
6 | |
fc6a272d |
7 | =head1 NAME |
8 | |
9 | Encode::Tcl - Tcl encodings |
10 | |
11 | =cut |
51ef4e11 |
12 | |
13 | sub INC_search |
14 | { |
15 | foreach my $dir (@INC) |
16 | { |
17 | if (opendir(my $dh,"$dir/Encode")) |
18 | { |
19 | while (defined(my $name = readdir($dh))) |
20 | { |
21 | if ($name =~ /^(.*)\.enc$/) |
22 | { |
23 | my $canon = $1; |
24 | my $obj = find_encoding($canon); |
25 | if (!defined($obj)) |
26 | { |
27 | my $obj = bless { Name => $canon, File => "$dir/Encode/$name"},__PACKAGE__; |
28 | $obj->Define( $canon ); |
29 | # warn "$canon => $obj\n"; |
30 | } |
31 | } |
32 | } |
33 | closedir($dh); |
34 | } |
35 | } |
36 | } |
37 | |
38 | sub import |
39 | { |
40 | INC_search(); |
41 | } |
42 | |
96d6357c |
43 | sub no_map_in_encode ($$) |
44 | # codepoint, enc-name; |
45 | { |
46 | carp sprintf "\"\\N{U+%x}\" does not map to %s", @_; |
47 | # /* FIXME: Skip over the character, copy in replacement and continue |
48 | # * but that is messy so for now just fail. |
49 | # */ |
50 | return; |
51 | } |
52 | |
53 | sub no_map_in_decode ($$) |
54 | # enc-name, string beginning the malform char; |
55 | { |
56 | # /* UTF-8 is supposed to be "Universal" so should not happen */ |
57 | croak sprintf "%s '%s' does not map to UTF-8", @_; |
58 | } |
59 | |
51ef4e11 |
60 | sub encode |
61 | { |
62 | my $obj = shift; |
63 | my $new = $obj->loadEncoding; |
64 | return undef unless (defined $new); |
65 | return $new->encode(@_); |
66 | } |
67 | |
68 | sub new_sequence |
69 | { |
70 | my $obj = shift; |
71 | my $new = $obj->loadEncoding; |
72 | return undef unless (defined $new); |
73 | return $new->new_sequence(@_); |
74 | } |
75 | |
76 | sub decode |
77 | { |
78 | my $obj = shift; |
79 | my $new = $obj->loadEncoding; |
80 | return undef unless (defined $new); |
81 | return $new->decode(@_); |
82 | } |
83 | |
84 | sub loadEncoding |
85 | { |
86 | my $obj = shift; |
87 | my $file = $obj->{'File'}; |
88 | my $name = $obj->name; |
89 | if (open(my $fh,$file)) |
90 | { |
91 | my $type; |
92 | while (1) |
93 | { |
94 | my $line = <$fh>; |
95 | $type = substr($line,0,1); |
96 | last unless $type eq '#'; |
97 | } |
96d6357c |
98 | my $subclass = |
99 | ($type eq 'X') ? 'Extended' : |
100 | ($type eq 'H') ? 'HanZi' : |
101 | ($type eq 'E') ? 'Escape' : 'Table'; |
102 | my $class = ref($obj) . '::' . $subclass; |
71a18b0f |
103 | # carp "Loading $file"; |
51ef4e11 |
104 | bless $obj,$class; |
105 | return $obj if $obj->read($fh,$obj->name,$type); |
106 | } |
107 | else |
108 | { |
109 | croak("Cannot open $file for ".$obj->name); |
110 | } |
111 | $obj->Undefine($name); |
112 | return undef; |
113 | } |
114 | |
115 | sub INC_find |
116 | { |
117 | my ($class,$name) = @_; |
118 | my $enc; |
119 | foreach my $dir (@INC) |
120 | { |
121 | last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc")); |
122 | } |
123 | return $enc; |
124 | } |
125 | |
126 | package Encode::Tcl::Table; |
127 | use base 'Encode::Encoding'; |
128 | |
96d6357c |
129 | use Carp; |
130 | #use Data::Dumper; |
51ef4e11 |
131 | |
132 | sub read |
133 | { |
134 | my ($obj,$fh,$name,$type) = @_; |
f57a1a59 |
135 | my($rep, @leading); |
51ef4e11 |
136 | my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>)); |
137 | my @touni; |
138 | my %fmuni; |
139 | my $count = 0; |
140 | $def = hex($def); |
141 | while ($pages--) |
142 | { |
143 | my $line = <$fh>; |
144 | chomp($line); |
145 | my $page = hex($line); |
146 | my @page; |
f57a1a59 |
147 | $leading[$page] = 1 if $page; |
51ef4e11 |
148 | my $ch = $page * 256; |
149 | for (my $i = 0; $i < 16; $i++) |
150 | { |
151 | my $line = <$fh>; |
152 | for (my $j = 0; $j < 16; $j++) |
153 | { |
154 | my $val = hex(substr($line,0,4,'')); |
155 | if ($val || !$ch) |
156 | { |
f57a1a59 |
157 | my $uch = pack('U', $val); # chr($val); |
51ef4e11 |
158 | push(@page,$uch); |
159 | $fmuni{$uch} = $ch; |
160 | $count++; |
161 | } |
162 | else |
163 | { |
164 | push(@page,undef); |
165 | } |
166 | $ch++; |
167 | } |
168 | } |
169 | $touni[$page] = \@page; |
170 | } |
96d6357c |
171 | $rep = $type ne 'M' |
172 | ? $obj->can("rep_$type") |
173 | : sub |
174 | { |
175 | ($_[0] > 255) || $leading[$_[0]] ? 'n' : 'C'; |
176 | }; |
51ef4e11 |
177 | $obj->{'Rep'} = $rep; |
178 | $obj->{'ToUni'} = \@touni; |
179 | $obj->{'FmUni'} = \%fmuni; |
180 | $obj->{'Def'} = $def; |
181 | $obj->{'Num'} = $count; |
182 | return $obj; |
183 | } |
184 | |
185 | sub rep_S { 'C' } |
186 | |
187 | sub rep_D { 'n' } |
188 | |
f57a1a59 |
189 | #sub rep_M { ($_[0] > 255) ? 'n' : 'C' } |
51ef4e11 |
190 | |
191 | sub representation |
192 | { |
193 | my ($obj,$ch) = @_; |
194 | $ch = 0 unless @_ > 1; |
f57a1a59 |
195 | $obj->{'Rep'}->($ch); |
51ef4e11 |
196 | } |
197 | |
198 | sub decode |
199 | { |
96d6357c |
200 | my($obj,$str,$chk) = @_; |
201 | my $name = $obj->{'Name'}; |
51ef4e11 |
202 | my $rep = $obj->{'Rep'}; |
203 | my $touni = $obj->{'ToUni'}; |
e91cad5b |
204 | my $uni; |
51ef4e11 |
205 | while (length($str)) |
206 | { |
96d6357c |
207 | my $cc = substr($str,0,1,''); |
208 | my $ch = ord($cc); |
51ef4e11 |
209 | my $x; |
210 | if (&$rep($ch) eq 'C') |
211 | { |
212 | $x = $touni->[0][$ch]; |
213 | } |
214 | else |
215 | { |
96d6357c |
216 | if(! length $str) |
217 | { |
218 | $str = pack('C',$ch); # split leading byte |
219 | last; |
220 | } |
221 | my $c2 = substr($str,0,1,''); |
222 | $cc .= $c2; |
223 | $x = $touni->[$ch][ord($c2)]; |
51ef4e11 |
224 | } |
225 | unless (defined $x) |
226 | { |
96d6357c |
227 | Encode::Tcl::no_map_in_decode($name, $cc.$str); |
51ef4e11 |
228 | } |
229 | $uni .= $x; |
230 | } |
231 | $_[1] = $str if $chk; |
232 | return $uni; |
233 | } |
234 | |
235 | |
236 | sub encode |
237 | { |
238 | my ($obj,$uni,$chk) = @_; |
239 | my $fmuni = $obj->{'FmUni'}; |
51ef4e11 |
240 | my $def = $obj->{'Def'}; |
96d6357c |
241 | my $name = $obj->{'Name'}; |
51ef4e11 |
242 | my $rep = $obj->{'Rep'}; |
e91cad5b |
243 | my $str; |
51ef4e11 |
244 | while (length($uni)) |
245 | { |
246 | my $ch = substr($uni,0,1,''); |
96d6357c |
247 | my $x = $fmuni->{$ch}; |
248 | unless(defined $x) |
51ef4e11 |
249 | { |
96d6357c |
250 | unless($chk) |
251 | { |
252 | Encode::Tcl::no_map_in_encode(ord($ch), $name) |
253 | } |
254 | return undef; |
51ef4e11 |
255 | } |
256 | $str .= pack(&$rep($x),$x); |
257 | } |
258 | $_[1] = $uni if $chk; |
259 | return $str; |
260 | } |
261 | |
262 | package Encode::Tcl::Escape; |
263 | use base 'Encode::Encoding'; |
264 | |
265 | use Carp; |
266 | |
96d6357c |
267 | use constant SI => "\cO"; |
268 | use constant SO => "\cN"; |
269 | use constant SS2 => "\eN"; |
270 | use constant SS3 => "\eO"; |
271 | |
51ef4e11 |
272 | sub read |
273 | { |
e91cad5b |
274 | my ($obj,$fh,$name) = @_; |
83ea2aad |
275 | my(%tbl, @seq, $enc, @esc, %grp); |
51ef4e11 |
276 | while (<$fh>) |
277 | { |
96d6357c |
278 | next unless /^(\S+)\s+(.*)$/; |
279 | my ($key,$val) = ($1,$2); |
51ef4e11 |
280 | $val =~ s/^\{(.*?)\}/$1/g; |
281 | $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; |
83ea2aad |
282 | |
96d6357c |
283 | if($enc = Encode->getEncoding($key)) |
284 | { |
e91cad5b |
285 | $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; |
d9da9e35 |
286 | push @seq, $val; |
83ea2aad |
287 | $grp{$val} = |
96d6357c |
288 | $val =~ m|[(]| ? 0 : # G0 : SI eq "\cO" |
289 | $val =~ m|[)-]| ? 1 : # G1 : SO eq "\cN" |
290 | $val =~ m|[*.]| ? 2 : # G2 : SS2 eq "\eN" |
291 | $val =~ m|[+/]| ? 3 : # G3 : SS3 eq "\eO" |
292 | 0; # G0 |
293 | } |
294 | else |
295 | { |
e91cad5b |
296 | $obj->{$key} = $val; |
96d6357c |
297 | } |
298 | if($val =~ /^\e(.*)/) |
299 | { |
300 | push(@esc, quotemeta $1); |
301 | } |
51ef4e11 |
302 | } |
83ea2aad |
303 | $obj->{'Grp'} = \%grp; # graphic chars |
d9da9e35 |
304 | $obj->{'Seq'} = \@seq; # escape sequences |
305 | $obj->{'Tbl'} = \%tbl; # encoding tables |
306 | $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC |
e91cad5b |
307 | return $obj; |
51ef4e11 |
308 | } |
309 | |
310 | sub decode |
311 | { |
e91cad5b |
312 | my ($obj,$str,$chk) = @_; |
96d6357c |
313 | my $name = $obj->{'Name'}; |
e91cad5b |
314 | my $tbl = $obj->{'Tbl'}; |
d9da9e35 |
315 | my $seq = $obj->{'Seq'}; |
83ea2aad |
316 | my $grp = $obj->{'Grp'}; |
d9da9e35 |
317 | my $esc = $obj->{'Esc'}; |
e91cad5b |
318 | my $ini = $obj->{'init'}; |
319 | my $fin = $obj->{'final'}; |
d9da9e35 |
320 | my $std = $seq->[0]; |
e91cad5b |
321 | my $cur = $std; |
83ea2aad |
322 | my @sta = ($std, undef, undef, undef); # G0 .. G3 state |
b29b78de |
323 | my $s = 0; # state of SO-SI. 0 (G0) or 1 (G1); |
324 | my $ss = 0; # state of SS2,SS3. 0 (G0), 2 (G2) or 3 (G3); |
e91cad5b |
325 | my $uni; |
96d6357c |
326 | while (length($str)) |
327 | { |
328 | my $cc = substr($str,0,1,''); |
329 | if($cc eq "\e") |
330 | { |
331 | if($str =~ s/^($esc)//) |
332 | { |
333 | my $e = "\e$1"; |
334 | $sta[ $grp->{$e} ] = $e if $tbl->{$e}; |
335 | } |
83ea2aad |
336 | # appearance of "\eN\eO" or "\eO\eN" isn't supposed. |
96d6357c |
337 | # but in that case, the former will be ignored. |
338 | elsif($str =~ s/^N//) |
339 | { |
340 | $ss = 2; |
341 | } |
342 | elsif($str =~ s/^O//) |
343 | { |
344 | $ss = 3; |
345 | } |
346 | else |
347 | { |
348 | # strictly, ([\x20-\x2F]*[\x30-\x7E]). '?' for chopped. |
349 | $str =~ s/^([\x20-\x2F]*[\x30-\x7E]?)//; |
350 | if($chk && ! length $str) |
351 | { |
352 | $str = "\e$1"; # split sequence |
353 | last; |
354 | } |
355 | croak "unknown escape sequence: ESC $1"; |
356 | } |
357 | next; |
358 | } |
359 | if($cc eq SO) |
360 | { |
361 | $s = 1; next; |
362 | } |
363 | if($cc eq SI) |
364 | { |
365 | $s = 0; next; |
366 | } |
83ea2aad |
367 | |
b29b78de |
368 | $cur = $ss ? $sta[$ss] : $sta[$s]; |
83ea2aad |
369 | |
96d6357c |
370 | if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table') |
371 | { |
372 | $uni .= $tbl->{$cur}->decode($cc); |
b29b78de |
373 | $ss = 0; |
e91cad5b |
374 | next; |
96d6357c |
375 | } |
376 | my $ch = ord($cc); |
e91cad5b |
377 | my $rep = $tbl->{$cur}->{'Rep'}; |
378 | my $touni = $tbl->{$cur}->{'ToUni'}; |
d9da9e35 |
379 | my $x; |
e91cad5b |
380 | if (&$rep($ch) eq 'C') |
381 | { |
382 | $x = $touni->[0][$ch]; |
383 | } |
384 | else |
385 | { |
96d6357c |
386 | if(! length $str) |
387 | { |
388 | $str = $cc; # split leading byte |
389 | last; |
390 | } |
391 | my $c2 = substr($str,0,1,''); |
392 | $cc .= $c2; |
393 | $x = $touni->[$ch][ord($c2)]; |
e91cad5b |
394 | } |
395 | unless (defined $x) |
396 | { |
96d6357c |
397 | Encode::Tcl::no_map_in_decode($name, $cc.$str); |
e91cad5b |
398 | } |
399 | $uni .= $x; |
b29b78de |
400 | $ss = 0; |
e91cad5b |
401 | } |
96d6357c |
402 | if($chk) |
403 | { |
404 | my $back = join('', grep defined($_) && $_ ne $std, @sta); |
405 | $back .= SO if $s; |
406 | $back .= $ss == 2 ? SS2 : SS3 if $ss; |
407 | $_[1] = $back.$str; |
408 | } |
409 | return $uni; |
51ef4e11 |
410 | } |
411 | |
412 | sub encode |
413 | { |
e91cad5b |
414 | my ($obj,$uni,$chk) = @_; |
96d6357c |
415 | my $name = $obj->{'Name'}; |
e91cad5b |
416 | my $tbl = $obj->{'Tbl'}; |
d9da9e35 |
417 | my $seq = $obj->{'Seq'}; |
83ea2aad |
418 | my $grp = $obj->{'Grp'}; |
e91cad5b |
419 | my $ini = $obj->{'init'}; |
420 | my $fin = $obj->{'final'}; |
d9da9e35 |
421 | my $std = $seq->[0]; |
e91cad5b |
422 | my $str = $ini; |
b29b78de |
423 | my @sta = ($std,undef,undef,undef); # G0 .. G3 state |
83ea2aad |
424 | my $cur = $std; |
b29b78de |
425 | my $pG = 0; # previous G: 0 or 1. |
426 | my $cG = 0; # current G: 0,1,2,3. |
83ea2aad |
427 | |
b29b78de |
428 | if($ini && defined $grp->{$ini}) |
83ea2aad |
429 | { |
96d6357c |
430 | $sta[ $grp->{$ini} ] = $ini; |
83ea2aad |
431 | } |
51ef4e11 |
432 | |
96d6357c |
433 | while (length($uni)) |
434 | { |
435 | my $ch = substr($uni,0,1,''); |
436 | my $x; |
437 | foreach my $e_seq (@$seq) |
438 | { |
439 | $x = ref($tbl->{$e_seq}) eq 'Encode::Tcl::Table' |
440 | ? $tbl->{$e_seq}->{FmUni}->{$ch} |
441 | : $tbl->{$e_seq}->encode($ch,1); |
442 | $cur = $e_seq, last if defined $x; |
466d6cd3 |
443 | } |
96d6357c |
444 | unless (defined $x) |
445 | { |
446 | unless($chk) |
447 | { |
448 | Encode::Tcl::no_map_in_encode(ord($ch), $name) |
449 | } |
450 | return undef; |
466d6cd3 |
451 | } |
96d6357c |
452 | if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table') |
453 | { |
454 | my $def = $tbl->{$cur}->{'Def'}; |
455 | my $rep = $tbl->{$cur}->{'Rep'}; |
456 | $x = pack(&$rep($x),$x); |
457 | } |
458 | $cG = $grp->{$cur}; |
459 | $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG]; |
460 | |
461 | $str .= $cG == 0 && $pG == 1 ? SI : |
462 | $cG == 1 && $pG == 0 ? SO : |
463 | $cG == 2 ? SS2 : |
464 | $cG == 3 ? SS3 : ""; |
465 | $str .= $x; |
466 | $pG = $cG if $cG < 2; |
467 | } |
468 | $str .= SI if $pG == 1; # back to G0 |
b29b78de |
469 | $str .= $std unless $std eq $sta[0]; # GO to ASCII |
83ea2aad |
470 | $str .= $fin; # necessary? |
466d6cd3 |
471 | $_[1] = $uni if $chk; |
472 | return $str; |
473 | } |
474 | |
b29b78de |
475 | |
476 | package Encode::Tcl::Extended; |
477 | use base 'Encode::Encoding'; |
478 | |
479 | use Carp; |
480 | |
481 | sub read |
482 | { |
483 | my ($obj,$fh,$name) = @_; |
484 | my(%tbl, $enc, %ssc, @key); |
485 | while (<$fh>) |
486 | { |
96d6357c |
487 | next unless /^(\S+)\s+(.*)$/; |
488 | my ($key,$val) = ($1,$2); |
b29b78de |
489 | $val =~ s/\{(.*?)\}/$1/; |
490 | $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; |
491 | |
96d6357c |
492 | if($enc = Encode->getEncoding($key)) |
493 | { |
b29b78de |
494 | push @key, $val; |
96d6357c |
495 | $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; |
b29b78de |
496 | $ssc{$val} = substr($val,1) if $val =~ /^>/; |
96d6357c |
497 | } |
498 | else |
499 | { |
b29b78de |
500 | $obj->{$key} = $val; |
96d6357c |
501 | } |
b29b78de |
502 | } |
503 | $obj->{'SSC'} = \%ssc; # single shift char |
504 | $obj->{'Tbl'} = \%tbl; # encoding tables |
505 | $obj->{'Key'} = \@key; # keys of table hash |
506 | return $obj; |
507 | } |
508 | |
509 | sub decode |
510 | { |
511 | my ($obj,$str,$chk) = @_; |
96d6357c |
512 | my $name = $obj->{'Name'}; |
513 | my $tbl = $obj->{'Tbl'}; |
514 | my $ssc = $obj->{'SSC'}; |
b29b78de |
515 | my $cur = ''; # current state |
516 | my $uni; |
96d6357c |
517 | while (length($str)) |
518 | { |
519 | my $cc = substr($str,0,1,''); |
520 | my $ch = ord($cc); |
b29b78de |
521 | if(!$cur && $ch > 0x7F) |
522 | { |
523 | $cur = '>'; |
96d6357c |
524 | $cur .= $cc, next if $ssc->{$cur.$cc}; |
b29b78de |
525 | } |
526 | $ch ^= 0x80 if $cur; |
527 | |
96d6357c |
528 | if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table') |
529 | { |
530 | $uni .= $tbl->{$cur}->decode($cc); |
b29b78de |
531 | $cur = ''; |
532 | next; |
96d6357c |
533 | } |
b29b78de |
534 | my $rep = $tbl->{$cur}->{'Rep'}; |
535 | my $touni = $tbl->{$cur}->{'ToUni'}; |
536 | my $x; |
537 | if (&$rep($ch) eq 'C') |
538 | { |
539 | $x = $touni->[0][$ch]; |
540 | } |
541 | else |
542 | { |
96d6357c |
543 | if(! length $str) |
544 | { |
545 | $str = $cc; # split leading byte |
546 | last; |
547 | } |
548 | my $c2 = substr($str,0,1,''); |
549 | $cc .= $c2; |
550 | $x = $touni->[$ch][0x80 ^ ord($c2)]; |
b29b78de |
551 | } |
552 | unless (defined $x) |
553 | { |
96d6357c |
554 | Encode::Tcl::no_map_in_decode($name, $cc.$str); |
b29b78de |
555 | } |
556 | $uni .= $x; |
557 | $cur = ''; |
558 | } |
96d6357c |
559 | if($chk) |
560 | { |
561 | $cur =~ s/>//; |
562 | $_[1] = $cur ne '' ? $cur.$str : $str; |
563 | } |
b29b78de |
564 | return $uni; |
565 | } |
566 | |
567 | sub encode |
568 | { |
569 | my ($obj,$uni,$chk) = @_; |
96d6357c |
570 | my $name = $obj->{'Name'}; |
b29b78de |
571 | my $tbl = $obj->{'Tbl'}; |
572 | my $ssc = $obj->{'SSC'}; |
573 | my $key = $obj->{'Key'}; |
574 | my $str; |
575 | my $cur; |
576 | |
96d6357c |
577 | while (length($uni)) |
578 | { |
579 | my $ch = substr($uni,0,1,''); |
580 | my $x; |
581 | foreach my $k (@$key) |
582 | { |
583 | $x = ref($tbl->{$k}) ne 'Encode::Tcl::Table' |
584 | ? $k =~ /^>/ |
585 | ? $tbl->{$k}->encode(chr(0x80 ^ ord $ch),1) |
586 | : $tbl->{$k}->encode($ch,1) |
587 | : $tbl->{$k}->{FmUni}->{$ch}; |
588 | $cur = $k, last if defined $x; |
589 | } |
590 | unless (defined $x) |
591 | { |
592 | unless($chk) |
593 | { |
594 | Encode::Tcl::no_map_in_encode(ord($ch), $name) |
595 | } |
596 | return undef; |
597 | } |
598 | if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table') |
599 | { |
600 | my $def = $tbl->{$cur}->{'Def'}; |
601 | my $rep = $tbl->{$cur}->{'Rep'}; |
602 | my $r = &$rep($x); |
603 | $x = pack($r, |
b29b78de |
604 | $cur =~ /^>/ |
605 | ? $r eq 'C' ? 0x80 ^ $x : 0x8080 ^ $x |
606 | : $x); |
96d6357c |
607 | } |
608 | $str .= $ssc->{$cur} if defined $ssc->{$cur}; |
609 | $str .= $x; |
610 | } |
b29b78de |
611 | $_[1] = $uni if $chk; |
612 | return $str; |
613 | } |
614 | |
466d6cd3 |
615 | package Encode::Tcl::HanZi; |
616 | use base 'Encode::Encoding'; |
617 | |
618 | use Carp; |
619 | |
620 | sub read |
621 | { |
622 | my ($obj,$fh,$name) = @_; |
623 | my(%tbl, @seq, $enc); |
624 | while (<$fh>) |
625 | { |
96d6357c |
626 | next unless /^(\S+)\s+(.*)$/; |
627 | my ($key,$val) = ($1,$2); |
466d6cd3 |
628 | $val =~ s/^\{(.*?)\}/$1/g; |
629 | $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; |
96d6357c |
630 | if($enc = Encode->getEncoding($key)) |
631 | { |
466d6cd3 |
632 | $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; |
633 | push @seq, $val; |
96d6357c |
634 | } |
635 | else |
636 | { |
466d6cd3 |
637 | $obj->{$key} = $val; |
96d6357c |
638 | } |
466d6cd3 |
639 | } |
640 | $obj->{'Seq'} = \@seq; # escape sequences |
641 | $obj->{'Tbl'} = \%tbl; # encoding tables |
642 | return $obj; |
643 | } |
644 | |
645 | sub decode |
646 | { |
647 | my ($obj,$str,$chk) = @_; |
96d6357c |
648 | my $name = $obj->{'Name'}; |
466d6cd3 |
649 | my $tbl = $obj->{'Tbl'}; |
650 | my $seq = $obj->{'Seq'}; |
651 | my $std = $seq->[0]; |
652 | my $cur = $std; |
653 | my $uni; |
654 | while (length($str)){ |
96d6357c |
655 | my $cc = substr($str,0,1,''); |
656 | if($cc eq "~") |
657 | { |
658 | if($str =~ s/^\cJ//) |
659 | { |
660 | next; |
661 | } |
662 | elsif($str =~ s/^\~//) |
663 | { |
664 | 1; # no-op |
665 | } |
666 | elsif($str =~ s/^([{}])//) |
667 | { |
668 | $cur = "~$1"; |
669 | next; |
670 | } |
671 | elsif(! length $str) |
672 | { |
673 | $str = '~'; |
674 | last; |
675 | } |
676 | else |
677 | { |
678 | $str =~ s/^([^~])//; |
679 | croak "unknown HanZi escape sequence: ~$1"; |
680 | next; |
681 | } |
682 | } |
683 | if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table') |
684 | { |
685 | $uni .= $tbl->{$cur}->decode($cc); |
466d6cd3 |
686 | next; |
96d6357c |
687 | } |
688 | my $ch = ord($cc); |
466d6cd3 |
689 | my $rep = $tbl->{$cur}->{'Rep'}; |
690 | my $touni = $tbl->{$cur}->{'ToUni'}; |
691 | my $x; |
692 | if (&$rep($ch) eq 'C') |
693 | { |
694 | $x = $touni->[0][$ch]; |
695 | } |
696 | else |
697 | { |
96d6357c |
698 | if(! length $str) |
699 | { |
700 | $str = $cc; # split leading byte |
701 | last; |
702 | } |
703 | my $c2 = substr($str,0,1,''); |
704 | $cc .= $c2; |
705 | $x = $touni->[$ch][ord($c2)]; |
466d6cd3 |
706 | } |
707 | unless (defined $x) |
708 | { |
96d6357c |
709 | Encode::Tcl::no_map_in_decode($name, $cc.$str); |
466d6cd3 |
710 | } |
711 | $uni .= $x; |
e91cad5b |
712 | } |
96d6357c |
713 | if($chk) |
714 | { |
715 | $_[1] = $cur eq $std ? $str : $cur.$str; |
716 | } |
466d6cd3 |
717 | return $uni; |
718 | } |
719 | |
720 | sub encode |
721 | { |
722 | my ($obj,$uni,$chk) = @_; |
96d6357c |
723 | my $name = $obj->{'Name'}; |
466d6cd3 |
724 | my $tbl = $obj->{'Tbl'}; |
725 | my $seq = $obj->{'Seq'}; |
726 | my $std = $seq->[0]; |
727 | my $str; |
728 | my $pre = $std; |
729 | my $cur = $pre; |
730 | |
96d6357c |
731 | while (length($uni)) |
732 | { |
733 | my $ch = substr($uni,0,1,''); |
734 | my $x; |
735 | foreach my $e_seq (@$seq) |
736 | { |
737 | $x = ref($tbl->{$e_seq}) eq 'Encode::Tcl::Table' |
738 | ? $tbl->{$e_seq}->{FmUni}->{$ch} |
739 | : $tbl->{$e_seq}->encode($ch,1); |
740 | $cur = $e_seq and last if defined $x; |
466d6cd3 |
741 | } |
96d6357c |
742 | unless (defined $x) |
743 | { |
744 | unless($chk) |
745 | { |
746 | Encode::Tcl::no_map_in_encode(ord($ch), $name) |
747 | } |
748 | return undef; |
749 | } |
750 | if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table') |
751 | { |
752 | my $def = $tbl->{$cur}->{'Def'}; |
753 | my $rep = $tbl->{$cur}->{'Rep'}; |
754 | $x = pack(&$rep($x),$x); |
755 | } |
756 | $str .= $cur eq $pre ? $x : ($pre = $cur).$x; |
757 | $str .= '~' if $x eq '~'; # to '~~' |
758 | } |
e91cad5b |
759 | $str .= $std unless $cur eq $std; |
e91cad5b |
760 | $_[1] = $uni if $chk; |
761 | return $str; |
762 | } |
466d6cd3 |
763 | |
51ef4e11 |
764 | 1; |
765 | __END__ |