(Re: [PATCH perl@12088] 2022-cn.enc of Encode.pm)
[p5sagit/p5-mst-13.2.git] / ext / Encode / Encode / Tcl.pm
CommitLineData
51ef4e11 1package Encode::Tcl;
2use strict;
3use Encode qw(find_encoding);
4use base 'Encode::Encoding';
5use Carp;
6
fc6a272d 7=head1 NAME
8
9Encode::Tcl - Tcl encodings
10
11=cut
51ef4e11 12
13sub INC_search
14{
15 foreach my $dir (@INC)
16 {
17 if (opendir(my $dh,"$dir/Encode"))
18 {
19 while (defined(my $name = readdir($dh)))
20 {
21 if ($name =~ /^(.*)\.enc$/)
22 {
23 my $canon = $1;
24 my $obj = find_encoding($canon);
25 if (!defined($obj))
26 {
27 my $obj = bless { Name => $canon, File => "$dir/Encode/$name"},__PACKAGE__;
28 $obj->Define( $canon );
29 # warn "$canon => $obj\n";
30 }
31 }
32 }
33 closedir($dh);
34 }
35 }
36}
37
38sub import
39{
40 INC_search();
41}
42
43sub encode
44{
45 my $obj = shift;
46 my $new = $obj->loadEncoding;
47 return undef unless (defined $new);
48 return $new->encode(@_);
49}
50
51sub new_sequence
52{
53 my $obj = shift;
54 my $new = $obj->loadEncoding;
55 return undef unless (defined $new);
56 return $new->new_sequence(@_);
57}
58
59sub decode
60{
61 my $obj = shift;
62 my $new = $obj->loadEncoding;
63 return undef unless (defined $new);
64 return $new->decode(@_);
65}
66
67sub loadEncoding
68{
69 my $obj = shift;
70 my $file = $obj->{'File'};
71 my $name = $obj->name;
72 if (open(my $fh,$file))
73 {
74 my $type;
75 while (1)
76 {
77 my $line = <$fh>;
78 $type = substr($line,0,1);
79 last unless $type eq '#';
80 }
b29b78de 81 my $class = ref($obj).('::'.(
82 ($type eq 'X') ? 'Extended' :
83 ($type eq 'H') ? 'HanZi' :
84 ($type eq 'E') ? 'Escape' : 'Table'
85 ));
71a18b0f 86 # carp "Loading $file";
51ef4e11 87 bless $obj,$class;
88 return $obj if $obj->read($fh,$obj->name,$type);
89 }
90 else
91 {
92 croak("Cannot open $file for ".$obj->name);
93 }
94 $obj->Undefine($name);
95 return undef;
96}
97
98sub INC_find
99{
100 my ($class,$name) = @_;
101 my $enc;
102 foreach my $dir (@INC)
103 {
104 last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc"));
105 }
106 return $enc;
107}
108
109package Encode::Tcl::Table;
110use base 'Encode::Encoding';
111
112use Data::Dumper;
113
114sub read
115{
116 my ($obj,$fh,$name,$type) = @_;
f57a1a59 117 my($rep, @leading);
51ef4e11 118 my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>));
119 my @touni;
120 my %fmuni;
121 my $count = 0;
122 $def = hex($def);
123 while ($pages--)
124 {
125 my $line = <$fh>;
126 chomp($line);
127 my $page = hex($line);
128 my @page;
f57a1a59 129 $leading[$page] = 1 if $page;
51ef4e11 130 my $ch = $page * 256;
131 for (my $i = 0; $i < 16; $i++)
132 {
133 my $line = <$fh>;
134 for (my $j = 0; $j < 16; $j++)
135 {
136 my $val = hex(substr($line,0,4,''));
137 if ($val || !$ch)
138 {
f57a1a59 139 my $uch = pack('U', $val); # chr($val);
51ef4e11 140 push(@page,$uch);
141 $fmuni{$uch} = $ch;
142 $count++;
143 }
144 else
145 {
146 push(@page,undef);
147 }
148 $ch++;
149 }
150 }
151 $touni[$page] = \@page;
152 }
f57a1a59 153 $rep = $type ne 'M' ? $obj->can("rep_$type") :
154 sub { ($_[0] > 255) || $leading[$_[0]] ? 'n' : 'C'};
51ef4e11 155 $obj->{'Rep'} = $rep;
156 $obj->{'ToUni'} = \@touni;
157 $obj->{'FmUni'} = \%fmuni;
158 $obj->{'Def'} = $def;
159 $obj->{'Num'} = $count;
160 return $obj;
161}
162
163sub rep_S { 'C' }
164
165sub rep_D { 'n' }
166
f57a1a59 167#sub rep_M { ($_[0] > 255) ? 'n' : 'C' }
51ef4e11 168
169sub representation
170{
171 my ($obj,$ch) = @_;
172 $ch = 0 unless @_ > 1;
f57a1a59 173 $obj->{'Rep'}->($ch);
51ef4e11 174}
175
176sub decode
177{
178 my ($obj,$str,$chk) = @_;
179 my $rep = $obj->{'Rep'};
180 my $touni = $obj->{'ToUni'};
e91cad5b 181 my $uni;
51ef4e11 182 while (length($str))
183 {
184 my $ch = ord(substr($str,0,1,''));
185 my $x;
186 if (&$rep($ch) eq 'C')
187 {
188 $x = $touni->[0][$ch];
189 }
190 else
191 {
192 $x = $touni->[$ch][ord(substr($str,0,1,''))];
193 }
194 unless (defined $x)
195 {
196 last if $chk;
197 # What do we do here ?
198 $x = '';
199 }
200 $uni .= $x;
201 }
202 $_[1] = $str if $chk;
203 return $uni;
204}
205
206
207sub encode
208{
209 my ($obj,$uni,$chk) = @_;
210 my $fmuni = $obj->{'FmUni'};
51ef4e11 211 my $def = $obj->{'Def'};
212 my $rep = $obj->{'Rep'};
e91cad5b 213 my $str;
51ef4e11 214 while (length($uni))
215 {
216 my $ch = substr($uni,0,1,'');
217 my $x = $fmuni->{chr(ord($ch))};
218 unless (defined $x)
219 {
220 last if ($chk);
221 $x = $def;
222 }
223 $str .= pack(&$rep($x),$x);
224 }
225 $_[1] = $uni if $chk;
226 return $str;
227}
228
229package Encode::Tcl::Escape;
230use base 'Encode::Encoding';
231
232use Carp;
233
234sub read
235{
e91cad5b 236 my ($obj,$fh,$name) = @_;
83ea2aad 237 my(%tbl, @seq, $enc, @esc, %grp);
51ef4e11 238 while (<$fh>)
239 {
240 my ($key,$val) = /^(\S+)\s+(.*)$/;
241 $val =~ s/^\{(.*?)\}/$1/g;
242 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
83ea2aad 243
e91cad5b 244 if($enc = Encode->getEncoding($key)){
245 $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
d9da9e35 246 push @seq, $val;
83ea2aad 247 $grp{$val} =
248 $val =~ m|[(]| ? 0 : # G0 : SI eq "\cO"
249 $val =~ m|[)-]| ? 1 : # G1 : SO eq "\cN"
250 $val =~ m|[*.]| ? 2 : # G2 : SS2 eq "\eN"
251 $val =~ m|[+/]| ? 3 : # G3 : SS3 eq "\eO"
252 0; # G0
e91cad5b 253 }else{
254 $obj->{$key} = $val;
255 }
d9da9e35 256 if($val =~ /^\e(.*)/){ push(@esc, quotemeta $1) }
51ef4e11 257 }
83ea2aad 258 $obj->{'Grp'} = \%grp; # graphic chars
d9da9e35 259 $obj->{'Seq'} = \@seq; # escape sequences
260 $obj->{'Tbl'} = \%tbl; # encoding tables
261 $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC
e91cad5b 262 return $obj;
51ef4e11 263}
264
265sub decode
266{
e91cad5b 267 my ($obj,$str,$chk) = @_;
268 my $tbl = $obj->{'Tbl'};
d9da9e35 269 my $seq = $obj->{'Seq'};
83ea2aad 270 my $grp = $obj->{'Grp'};
d9da9e35 271 my $esc = $obj->{'Esc'};
e91cad5b 272 my $ini = $obj->{'init'};
273 my $fin = $obj->{'final'};
d9da9e35 274 my $std = $seq->[0];
e91cad5b 275 my $cur = $std;
83ea2aad 276 my @sta = ($std, undef, undef, undef); # G0 .. G3 state
b29b78de 277 my $s = 0; # state of SO-SI. 0 (G0) or 1 (G1);
278 my $ss = 0; # state of SS2,SS3. 0 (G0), 2 (G2) or 3 (G3);
e91cad5b 279 my $uni;
280 while (length($str)){
281 my $uch = substr($str,0,1,'');
282 if($uch eq "\e"){
d9da9e35 283 if($str =~ s/^($esc)//)
284 {
b29b78de 285 my $e = "\e$1";
286 $sta[ $grp->{$e} ] = $e if $tbl->{$e};
83ea2aad 287 }
288 # appearance of "\eN\eO" or "\eO\eN" isn't supposed.
83ea2aad 289 elsif($str =~ s/^N//)
290 {
b29b78de 291 $ss = 2;
83ea2aad 292 }
293 elsif($str =~ s/^O//)
294 {
b29b78de 295 $ss = 3;
d9da9e35 296 }
297 else
298 {
299 $str =~ s/^([\x20-\x2F]*[\x30-\x7E])//;
300 carp "unknown escape sequence: ESC $1";
301 }
e91cad5b 302 next;
303 }
83ea2aad 304 if($uch eq "\x0e"){
b29b78de 305 $s = 1; next;
83ea2aad 306 }
307 if($uch eq "\x0f"){
b29b78de 308 $s = 0; next;
e91cad5b 309 }
83ea2aad 310
b29b78de 311 $cur = $ss ? $sta[$ss] : $sta[$s];
83ea2aad 312
e91cad5b 313 if(ref($tbl->{$cur}) eq 'Encode::XS'){
314 $uni .= $tbl->{$cur}->decode($uch);
b29b78de 315 $ss = 0;
e91cad5b 316 next;
317 }
d9da9e35 318 my $ch = ord($uch);
e91cad5b 319 my $rep = $tbl->{$cur}->{'Rep'};
320 my $touni = $tbl->{$cur}->{'ToUni'};
d9da9e35 321 my $x;
e91cad5b 322 if (&$rep($ch) eq 'C')
323 {
324 $x = $touni->[0][$ch];
325 }
326 else
327 {
328 $x = $touni->[$ch][ord(substr($str,0,1,''))];
329 }
330 unless (defined $x)
331 {
332 last if $chk;
333 # What do we do here ?
334 $x = '';
335 }
336 $uni .= $x;
b29b78de 337 $ss = 0;
e91cad5b 338 }
339 $_[1] = $str if $chk;
340 return $uni;
51ef4e11 341}
342
343sub encode
344{
e91cad5b 345 my ($obj,$uni,$chk) = @_;
346 my $tbl = $obj->{'Tbl'};
d9da9e35 347 my $seq = $obj->{'Seq'};
83ea2aad 348 my $grp = $obj->{'Grp'};
e91cad5b 349 my $ini = $obj->{'init'};
350 my $fin = $obj->{'final'};
d9da9e35 351 my $std = $seq->[0];
e91cad5b 352 my $str = $ini;
b29b78de 353 my @sta = ($std,undef,undef,undef); # G0 .. G3 state
83ea2aad 354 my $cur = $std;
b29b78de 355 my $pG = 0; # previous G: 0 or 1.
356 my $cG = 0; # current G: 0,1,2,3.
83ea2aad 357
b29b78de 358 if($ini && defined $grp->{$ini})
83ea2aad 359 {
b29b78de 360 $sta[ $grp->{$ini} ] = $ini;
83ea2aad 361 }
51ef4e11 362
e91cad5b 363 while (length($uni)){
83ea2aad 364 my $ch = substr($uni,0,1,'');
466d6cd3 365 my $x;
83ea2aad 366 foreach my $e_seq (@$seq){
466d6cd3 367 $x = ref($tbl->{$e_seq}) eq 'Encode::XS'
368 ? $tbl->{$e_seq}->encode($ch,1)
369 : $tbl->{$e_seq}->{FmUni}->{$ch};
83ea2aad 370 $cur = $e_seq, last if defined $x;
e91cad5b 371 }
466d6cd3 372 if(ref($tbl->{$cur}) ne 'Encode::XS')
e91cad5b 373 {
466d6cd3 374 my $def = $tbl->{$cur}->{'Def'};
375 my $rep = $tbl->{$cur}->{'Rep'};
376 unless (defined $x){
377 last if ($chk);
378 $x = $def;
379 }
380 $x = pack(&$rep($x),$x);
381 }
83ea2aad 382 $cG = $grp->{$cur};
b29b78de 383 $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG];
83ea2aad 384
385 $str .= $cG == 0 && $pG == 1 ? "\cO" :
386 $cG == 1 && $pG == 0 ? "\cN" :
387 $cG == 2 ? "\eN" :
b29b78de 388 $cG == 3 ? "\eO" : "";
83ea2aad 389 $str .= $x;
390 $pG = $cG if $cG < 2;
466d6cd3 391 }
83ea2aad 392 $str .= "\cO" if $pG == 1; # back to G0
b29b78de 393 $str .= $std unless $std eq $sta[0]; # GO to ASCII
83ea2aad 394 $str .= $fin; # necessary?
466d6cd3 395 $_[1] = $uni if $chk;
396 return $str;
397}
398
b29b78de 399
400package Encode::Tcl::Extended;
401use base 'Encode::Encoding';
402
403use Carp;
404
405sub read
406{
407 my ($obj,$fh,$name) = @_;
408 my(%tbl, $enc, %ssc, @key);
409 while (<$fh>)
410 {
411 my ($key,$val) = /^(\S+)\s+(.*)$/;
412 $val =~ s/\{(.*?)\}/$1/;
413 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
414
415 if($enc = Encode->getEncoding($key)){
416 push @key, $val;
417 $tbl{$val} = ref($enc) eq 'Encode::Tcl'
418 ? $enc->loadEncoding : $enc;
419 $ssc{$val} = substr($val,1) if $val =~ /^>/;
420 }else{
421 $obj->{$key} = $val;
422 }
423 }
424 $obj->{'SSC'} = \%ssc; # single shift char
425 $obj->{'Tbl'} = \%tbl; # encoding tables
426 $obj->{'Key'} = \@key; # keys of table hash
427 return $obj;
428}
429
430sub decode
431{
432 my ($obj,$str,$chk) = @_;
433 my $tbl = $obj->{'Tbl'};
434 my $ssc = $obj->{'SSC'};
435 my $cur = ''; # current state
436 my $uni;
437 while (length($str)){
438 my $uch = substr($str,0,1,'');
439 my $ch = ord($uch);
440 if(!$cur && $ch > 0x7F)
441 {
442 $cur = '>';
443 $cur .= $uch, next if $ssc->{$cur.$uch};
444 }
445 $ch ^= 0x80 if $cur;
446
447 if(ref($tbl->{$cur}) eq 'Encode::XS'){
448 $uni .= $tbl->{$cur}->decode(chr($ch));
449 $cur = '';
450 next;
451 }
452 my $rep = $tbl->{$cur}->{'Rep'};
453 my $touni = $tbl->{$cur}->{'ToUni'};
454 my $x;
455 if (&$rep($ch) eq 'C')
456 {
457 $x = $touni->[0][$ch];
458 }
459 else
460 {
461 $x = $touni->[$ch][0x80 ^ ord(substr($str,0,1,''))];
462 }
463 unless (defined $x)
464 {
465 last if $chk;
466 # What do we do here ?
467 $x = '';
468 }
469 $uni .= $x;
470 $cur = '';
471 }
472 $_[1] = $str if $chk;
473 return $uni;
474}
475
476sub encode
477{
478 my ($obj,$uni,$chk) = @_;
479 my $tbl = $obj->{'Tbl'};
480 my $ssc = $obj->{'SSC'};
481 my $key = $obj->{'Key'};
482 my $str;
483 my $cur;
484
485 while (length($uni)){
486 my $ch = substr($uni,0,1,'');
487 my $x;
488 foreach my $k (@$key){
489 $x = ref($tbl->{$k}) eq 'Encode::XS'
490 ? $k =~ /^>/
491 ? $tbl->{$k}->encode(chr(0x80 ^ ord $ch),1)
492 : $tbl->{$k}->encode($ch,1)
493 : $tbl->{$k}->{FmUni}->{$ch};
494 $cur = $k, last if defined $x;
495 }
496 if(ref($tbl->{$cur}) ne 'Encode::XS')
497 {
498 my $def = $tbl->{$cur}->{'Def'};
499 my $rep = $tbl->{$cur}->{'Rep'};
500 unless (defined $x){
501 last if ($chk);
502 $x = $def;
503 }
504 my $r = &$rep($x);
505 $x = pack($r,
506 $cur =~ /^>/
507 ? $r eq 'C' ? 0x80 ^ $x : 0x8080 ^ $x
508 : $x);
509 }
510
511 $str .= $ssc->{$cur} if defined $ssc->{$cur};
512 $str .= $x;
513 }
514 $_[1] = $uni if $chk;
515 return $str;
516}
517
466d6cd3 518package Encode::Tcl::HanZi;
519use base 'Encode::Encoding';
520
521use Carp;
522
523sub read
524{
525 my ($obj,$fh,$name) = @_;
526 my(%tbl, @seq, $enc);
527 while (<$fh>)
528 {
529 my ($key,$val) = /^(\S+)\s+(.*)$/;
530 $val =~ s/^\{(.*?)\}/$1/g;
531 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
532 if($enc = Encode->getEncoding($key)){
533 $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
534 push @seq, $val;
535 }else{
536 $obj->{$key} = $val;
537 }
538 }
539 $obj->{'Seq'} = \@seq; # escape sequences
540 $obj->{'Tbl'} = \%tbl; # encoding tables
541 return $obj;
542}
543
544sub decode
545{
546 my ($obj,$str,$chk) = @_;
547 my $tbl = $obj->{'Tbl'};
548 my $seq = $obj->{'Seq'};
549 my $std = $seq->[0];
550 my $cur = $std;
551 my $uni;
552 while (length($str)){
553 my $uch = substr($str,0,1,'');
554 if($uch eq "~"){
555 if($str =~ s/^\cJ//)
556 {
557 next;
558 }
559 elsif($str =~ s/^\~//)
560 {
561 1;
562 }
563 elsif($str =~ s/^([{}])//)
564 {
565 $cur = "~$1";
566 next;
567 }
568 else
569 {
570 $str =~ s/^([^~])//;
571 carp "unknown HanZi escape sequence: ~$1";
572 next;
573 }
e91cad5b 574 }
466d6cd3 575 if(ref($tbl->{$cur}) eq 'Encode::XS'){
576 $uni .= $tbl->{$cur}->decode($uch);
577 next;
578 }
579 my $ch = ord($uch);
580 my $rep = $tbl->{$cur}->{'Rep'};
581 my $touni = $tbl->{$cur}->{'ToUni'};
582 my $x;
583 if (&$rep($ch) eq 'C')
584 {
585 $x = $touni->[0][$ch];
586 }
587 else
588 {
589 $x = $touni->[$ch][ord(substr($str,0,1,''))];
590 }
591 unless (defined $x)
592 {
593 last if $chk;
594 # What do we do here ?
595 $x = '';
596 }
597 $uni .= $x;
e91cad5b 598 }
466d6cd3 599 $_[1] = $str if $chk;
600 return $uni;
601}
602
603sub encode
604{
605 my ($obj,$uni,$chk) = @_;
606 my $tbl = $obj->{'Tbl'};
607 my $seq = $obj->{'Seq'};
608 my $std = $seq->[0];
609 my $str;
610 my $pre = $std;
611 my $cur = $pre;
612
613 while (length($uni)){
614 my $ch = chr(ord(substr($uni,0,1,'')));
615 my $x;
616 foreach my $e_seq (@$seq){
617 $x = ref($tbl->{$e_seq}) eq 'Encode::XS'
618 ? $tbl->{$e_seq}->encode($ch,1)
619 : $tbl->{$e_seq}->{FmUni}->{$ch};
620 $cur = $e_seq and last if defined $x;
e91cad5b 621 }
466d6cd3 622 if(ref($tbl->{$cur}) ne 'Encode::XS')
623 {
624 my $def = $tbl->{$cur}->{'Def'};
625 my $rep = $tbl->{$cur}->{'Rep'};
626 unless (defined $x){
627 last if ($chk);
628 $x = $def;
629 }
630 $x = pack(&$rep($x),$x);
631 }
632 $str .= $cur eq $pre ? $x : ($pre = $cur).$x;
633 $str .= '~' if $x eq '~'; # to '~~'
e91cad5b 634 }
635 $str .= $std unless $cur eq $std;
e91cad5b 636 $_[1] = $uni if $chk;
637 return $str;
638}
466d6cd3 639
51ef4e11 6401;
641__END__