3 use Encode qw(find_encoding);
4 use base 'Encode::Encoding';
9 Encode::Tcl - Tcl encodings
15 foreach my $dir (@INC)
17 if (opendir(my $dh,"$dir/Encode"))
19 while (defined(my $name = readdir($dh)))
21 if ($name =~ /^(.*)\.enc$/)
24 my $obj = find_encoding($canon);
27 my $obj = bless { Name => $canon, File => "$dir/Encode/$name"},__PACKAGE__;
28 $obj->Define( $canon );
29 # warn "$canon => $obj\n";
46 my $new = $obj->loadEncoding;
47 return undef unless (defined $new);
48 return $new->encode(@_);
54 my $new = $obj->loadEncoding;
55 return undef unless (defined $new);
56 return $new->new_sequence(@_);
62 my $new = $obj->loadEncoding;
63 return undef unless (defined $new);
64 return $new->decode(@_);
70 my $file = $obj->{'File'};
71 my $name = $obj->name;
72 if (open(my $fh,$file))
78 $type = substr($line,0,1);
79 last unless $type eq '#';
81 my $class = ref($obj).('::'.(
82 ($type eq 'X') ? 'Extended' :
83 ($type eq 'H') ? 'HanZi' :
84 ($type eq 'E') ? 'Escape' : 'Table'
86 # carp "Loading $file";
88 return $obj if $obj->read($fh,$obj->name,$type);
92 croak("Cannot open $file for ".$obj->name);
94 $obj->Undefine($name);
100 my ($class,$name) = @_;
102 foreach my $dir (@INC)
104 last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc"));
109 package Encode::Tcl::Table;
110 use base 'Encode::Encoding';
116 my ($obj,$fh,$name,$type) = @_;
118 my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>));
127 my $page = hex($line);
129 $leading[$page] = 1 if $page;
130 my $ch = $page * 256;
131 for (my $i = 0; $i < 16; $i++)
134 for (my $j = 0; $j < 16; $j++)
136 my $val = hex(substr($line,0,4,''));
139 my $uch = pack('U', $val); # chr($val);
151 $touni[$page] = \@page;
153 $rep = $type ne 'M' ? $obj->can("rep_$type") :
154 sub { ($_[0] > 255) || $leading[$_[0]] ? 'n' : 'C'};
155 $obj->{'Rep'} = $rep;
156 $obj->{'ToUni'} = \@touni;
157 $obj->{'FmUni'} = \%fmuni;
158 $obj->{'Def'} = $def;
159 $obj->{'Num'} = $count;
167 #sub rep_M { ($_[0] > 255) ? 'n' : 'C' }
172 $ch = 0 unless @_ > 1;
173 $obj->{'Rep'}->($ch);
178 my ($obj,$str,$chk) = @_;
179 my $rep = $obj->{'Rep'};
180 my $touni = $obj->{'ToUni'};
184 my $ch = ord(substr($str,0,1,''));
186 if (&$rep($ch) eq 'C')
188 $x = $touni->[0][$ch];
192 $x = $touni->[$ch][ord(substr($str,0,1,''))];
197 # What do we do here ?
202 $_[1] = $str if $chk;
209 my ($obj,$uni,$chk) = @_;
210 my $fmuni = $obj->{'FmUni'};
211 my $def = $obj->{'Def'};
212 my $rep = $obj->{'Rep'};
216 my $ch = substr($uni,0,1,'');
217 my $x = $fmuni->{chr(ord($ch))};
223 $str .= pack(&$rep($x),$x);
225 $_[1] = $uni if $chk;
229 package Encode::Tcl::Escape;
230 use base 'Encode::Encoding';
236 my ($obj,$fh,$name) = @_;
237 my(%tbl, @seq, $enc, @esc, %grp);
240 my ($key,$val) = /^(\S+)\s+(.*)$/;
241 $val =~ s/^\{(.*?)\}/$1/g;
242 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
244 if($enc = Encode->getEncoding($key)){
245 $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
248 $val =~ m|[(]| ? 0 : # G0 : SI eq "\cO"
249 $val =~ m|[)-]| ? 1 : # G1 : SO eq "\cN"
250 $val =~ m|[*.]| ? 2 : # G2 : SS2 eq "\eN"
251 $val =~ m|[+/]| ? 3 : # G3 : SS3 eq "\eO"
256 if($val =~ /^\e(.*)/){ push(@esc, quotemeta $1) }
258 $obj->{'Grp'} = \%grp; # graphic chars
259 $obj->{'Seq'} = \@seq; # escape sequences
260 $obj->{'Tbl'} = \%tbl; # encoding tables
261 $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC
267 my ($obj,$str,$chk) = @_;
268 my $tbl = $obj->{'Tbl'};
269 my $seq = $obj->{'Seq'};
270 my $grp = $obj->{'Grp'};
271 my $esc = $obj->{'Esc'};
272 my $ini = $obj->{'init'};
273 my $fin = $obj->{'final'};
276 my @sta = ($std, undef, undef, undef); # G0 .. G3 state
277 my $s = 0; # state of SO-SI. 0 (G0) or 1 (G1);
278 my $ss = 0; # state of SS2,SS3. 0 (G0), 2 (G2) or 3 (G3);
280 while (length($str)){
281 my $uch = substr($str,0,1,'');
283 if($str =~ s/^($esc)//)
286 $sta[ $grp->{$e} ] = $e if $tbl->{$e};
288 # appearance of "\eN\eO" or "\eO\eN" isn't supposed.
289 elsif($str =~ s/^N//)
293 elsif($str =~ s/^O//)
299 $str =~ s/^([\x20-\x2F]*[\x30-\x7E])//;
300 carp "unknown escape sequence: ESC $1";
311 $cur = $ss ? $sta[$ss] : $sta[$s];
313 if(ref($tbl->{$cur}) eq 'Encode::XS'){
314 $uni .= $tbl->{$cur}->decode($uch);
319 my $rep = $tbl->{$cur}->{'Rep'};
320 my $touni = $tbl->{$cur}->{'ToUni'};
322 if (&$rep($ch) eq 'C')
324 $x = $touni->[0][$ch];
328 $x = $touni->[$ch][ord(substr($str,0,1,''))];
333 # What do we do here ?
339 $_[1] = $str if $chk;
345 my ($obj,$uni,$chk) = @_;
346 my $tbl = $obj->{'Tbl'};
347 my $seq = $obj->{'Seq'};
348 my $grp = $obj->{'Grp'};
349 my $ini = $obj->{'init'};
350 my $fin = $obj->{'final'};
353 my @sta = ($std,undef,undef,undef); # G0 .. G3 state
355 my $pG = 0; # previous G: 0 or 1.
356 my $cG = 0; # current G: 0,1,2,3.
358 if($ini && defined $grp->{$ini})
360 $sta[ $grp->{$ini} ] = $ini;
363 while (length($uni)){
364 my $ch = substr($uni,0,1,'');
366 foreach my $e_seq (@$seq){
367 $x = ref($tbl->{$e_seq}) eq 'Encode::XS'
368 ? $tbl->{$e_seq}->encode($ch,1)
369 : $tbl->{$e_seq}->{FmUni}->{$ch};
370 $cur = $e_seq, last if defined $x;
372 if(ref($tbl->{$cur}) ne 'Encode::XS')
374 my $def = $tbl->{$cur}->{'Def'};
375 my $rep = $tbl->{$cur}->{'Rep'};
380 $x = pack(&$rep($x),$x);
383 $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG];
385 $str .= $cG == 0 && $pG == 1 ? "\cO" :
386 $cG == 1 && $pG == 0 ? "\cN" :
388 $cG == 3 ? "\eO" : "";
390 $pG = $cG if $cG < 2;
392 $str .= "\cO" if $pG == 1; # back to G0
393 $str .= $std unless $std eq $sta[0]; # GO to ASCII
394 $str .= $fin; # necessary?
395 $_[1] = $uni if $chk;
400 package Encode::Tcl::Extended;
401 use base 'Encode::Encoding';
407 my ($obj,$fh,$name) = @_;
408 my(%tbl, $enc, %ssc, @key);
411 my ($key,$val) = /^(\S+)\s+(.*)$/;
412 $val =~ s/\{(.*?)\}/$1/;
413 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
415 if($enc = Encode->getEncoding($key)){
417 $tbl{$val} = ref($enc) eq 'Encode::Tcl'
418 ? $enc->loadEncoding : $enc;
419 $ssc{$val} = substr($val,1) if $val =~ /^>/;
424 $obj->{'SSC'} = \%ssc; # single shift char
425 $obj->{'Tbl'} = \%tbl; # encoding tables
426 $obj->{'Key'} = \@key; # keys of table hash
432 my ($obj,$str,$chk) = @_;
433 my $tbl = $obj->{'Tbl'};
434 my $ssc = $obj->{'SSC'};
435 my $cur = ''; # current state
437 while (length($str)){
438 my $uch = substr($str,0,1,'');
440 if(!$cur && $ch > 0x7F)
443 $cur .= $uch, next if $ssc->{$cur.$uch};
447 if(ref($tbl->{$cur}) eq 'Encode::XS'){
448 $uni .= $tbl->{$cur}->decode(chr($ch));
452 my $rep = $tbl->{$cur}->{'Rep'};
453 my $touni = $tbl->{$cur}->{'ToUni'};
455 if (&$rep($ch) eq 'C')
457 $x = $touni->[0][$ch];
461 $x = $touni->[$ch][0x80 ^ ord(substr($str,0,1,''))];
466 # What do we do here ?
472 $_[1] = $str if $chk;
478 my ($obj,$uni,$chk) = @_;
479 my $tbl = $obj->{'Tbl'};
480 my $ssc = $obj->{'SSC'};
481 my $key = $obj->{'Key'};
485 while (length($uni)){
486 my $ch = substr($uni,0,1,'');
488 foreach my $k (@$key){
489 $x = ref($tbl->{$k}) eq 'Encode::XS'
491 ? $tbl->{$k}->encode(chr(0x80 ^ ord $ch),1)
492 : $tbl->{$k}->encode($ch,1)
493 : $tbl->{$k}->{FmUni}->{$ch};
494 $cur = $k, last if defined $x;
496 if(ref($tbl->{$cur}) ne 'Encode::XS')
498 my $def = $tbl->{$cur}->{'Def'};
499 my $rep = $tbl->{$cur}->{'Rep'};
507 ? $r eq 'C' ? 0x80 ^ $x : 0x8080 ^ $x
511 $str .= $ssc->{$cur} if defined $ssc->{$cur};
514 $_[1] = $uni if $chk;
518 package Encode::Tcl::HanZi;
519 use base 'Encode::Encoding';
525 my ($obj,$fh,$name) = @_;
526 my(%tbl, @seq, $enc);
529 my ($key,$val) = /^(\S+)\s+(.*)$/;
530 $val =~ s/^\{(.*?)\}/$1/g;
531 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
532 if($enc = Encode->getEncoding($key)){
533 $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
539 $obj->{'Seq'} = \@seq; # escape sequences
540 $obj->{'Tbl'} = \%tbl; # encoding tables
546 my ($obj,$str,$chk) = @_;
547 my $tbl = $obj->{'Tbl'};
548 my $seq = $obj->{'Seq'};
552 while (length($str)){
553 my $uch = substr($str,0,1,'');
559 elsif($str =~ s/^\~//)
563 elsif($str =~ s/^([{}])//)
571 carp "unknown HanZi escape sequence: ~$1";
575 if(ref($tbl->{$cur}) eq 'Encode::XS'){
576 $uni .= $tbl->{$cur}->decode($uch);
580 my $rep = $tbl->{$cur}->{'Rep'};
581 my $touni = $tbl->{$cur}->{'ToUni'};
583 if (&$rep($ch) eq 'C')
585 $x = $touni->[0][$ch];
589 $x = $touni->[$ch][ord(substr($str,0,1,''))];
594 # What do we do here ?
599 $_[1] = $str if $chk;
605 my ($obj,$uni,$chk) = @_;
606 my $tbl = $obj->{'Tbl'};
607 my $seq = $obj->{'Seq'};
613 while (length($uni)){
614 my $ch = chr(ord(substr($uni,0,1,'')));
616 foreach my $e_seq (@$seq){
617 $x = ref($tbl->{$e_seq}) eq 'Encode::XS'
618 ? $tbl->{$e_seq}->encode($ch,1)
619 : $tbl->{$e_seq}->{FmUni}->{$ch};
620 $cur = $e_seq and last if defined $x;
622 if(ref($tbl->{$cur}) ne 'Encode::XS')
624 my $def = $tbl->{$cur}->{'Def'};
625 my $rep = $tbl->{$cur}->{'Rep'};
630 $x = pack(&$rep($x),$x);
632 $str .= $cur eq $pre ? $x : ($pre = $cur).$x;
633 $str .= '~' if $x eq '~'; # to '~~'
635 $str .= $std unless $cur eq $std;
636 $_[1] = $uni if $chk;