3 use Encode qw(find_encoding);
4 use base 'Encode::Encoding';
9 Encode::Tcl - Tcl encodings
15 foreach my $dir (@INC)
17 if (opendir(my $dh,"$dir/Encode"))
19 while (defined(my $name = readdir($dh)))
21 if ($name =~ /^(.*)\.enc$/)
24 my $obj = find_encoding($canon);
27 my $obj = bless { Name => $canon, File => "$dir/Encode/$name"},__PACKAGE__;
28 $obj->Define( $canon );
29 # warn "$canon => $obj\n";
46 my $new = $obj->loadEncoding;
47 return undef unless (defined $new);
48 return $new->encode(@_);
54 my $new = $obj->loadEncoding;
55 return undef unless (defined $new);
56 return $new->new_sequence(@_);
62 my $new = $obj->loadEncoding;
63 return undef unless (defined $new);
64 return $new->decode(@_);
70 my $file = $obj->{'File'};
71 my $name = $obj->name;
72 if (open(my $fh,$file))
78 $type = substr($line,0,1);
79 last unless $type eq '#';
81 my $class = ref($obj).('::'.(($type eq 'H') ? 'HanZi' : ($type eq 'E') ? 'Escape' : 'Table'));
82 # carp "Loading $file";
84 return $obj if $obj->read($fh,$obj->name,$type);
88 croak("Cannot open $file for ".$obj->name);
90 $obj->Undefine($name);
96 my ($class,$name) = @_;
98 foreach my $dir (@INC)
100 last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc"));
105 package Encode::Tcl::Table;
106 use base 'Encode::Encoding';
112 my ($obj,$fh,$name,$type) = @_;
114 my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>));
123 my $page = hex($line);
125 $leading[$page] = 1 if $page;
126 my $ch = $page * 256;
127 for (my $i = 0; $i < 16; $i++)
130 for (my $j = 0; $j < 16; $j++)
132 my $val = hex(substr($line,0,4,''));
135 my $uch = pack('U', $val); # chr($val);
147 $touni[$page] = \@page;
149 $rep = $type ne 'M' ? $obj->can("rep_$type") :
150 sub { ($_[0] > 255) || $leading[$_[0]] ? 'n' : 'C'};
151 $obj->{'Rep'} = $rep;
152 $obj->{'ToUni'} = \@touni;
153 $obj->{'FmUni'} = \%fmuni;
154 $obj->{'Def'} = $def;
155 $obj->{'Num'} = $count;
163 #sub rep_M { ($_[0] > 255) ? 'n' : 'C' }
168 $ch = 0 unless @_ > 1;
169 $obj->{'Rep'}->($ch);
174 my ($obj,$str,$chk) = @_;
175 my $rep = $obj->{'Rep'};
176 my $touni = $obj->{'ToUni'};
180 my $ch = ord(substr($str,0,1,''));
182 if (&$rep($ch) eq 'C')
184 $x = $touni->[0][$ch];
188 $x = $touni->[$ch][ord(substr($str,0,1,''))];
193 # What do we do here ?
198 $_[1] = $str if $chk;
205 my ($obj,$uni,$chk) = @_;
206 my $fmuni = $obj->{'FmUni'};
207 my $def = $obj->{'Def'};
208 my $rep = $obj->{'Rep'};
212 my $ch = substr($uni,0,1,'');
213 my $x = $fmuni->{chr(ord($ch))};
219 $str .= pack(&$rep($x),$x);
221 $_[1] = $uni if $chk;
225 package Encode::Tcl::Escape;
226 use base 'Encode::Encoding';
232 my ($obj,$fh,$name) = @_;
233 my(%tbl, @seq, $enc, @esc, %grp);
236 my ($key,$val) = /^(\S+)\s+(.*)$/;
237 $val =~ s/^\{(.*?)\}/$1/g;
238 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
240 if($enc = Encode->getEncoding($key)){
241 $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
244 $val =~ m|[(]| ? 0 : # G0 : SI eq "\cO"
245 $val =~ m|[)-]| ? 1 : # G1 : SO eq "\cN"
246 $val =~ m|[*.]| ? 2 : # G2 : SS2 eq "\eN"
247 $val =~ m|[+/]| ? 3 : # G3 : SS3 eq "\eO"
252 if($val =~ /^\e(.*)/){ push(@esc, quotemeta $1) }
254 $obj->{'Grp'} = \%grp; # graphic chars
255 $obj->{'Seq'} = \@seq; # escape sequences
256 $obj->{'Tbl'} = \%tbl; # encoding tables
257 $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC
263 my ($obj,$str,$chk) = @_;
264 my $tbl = $obj->{'Tbl'};
265 my $seq = $obj->{'Seq'};
266 my $grp = $obj->{'Grp'};
267 my $esc = $obj->{'Esc'};
268 my $ini = $obj->{'init'};
269 my $fin = $obj->{'final'};
272 my @sta = ($std, undef, undef, undef); # G0 .. G3 state
273 my($g1,$g2,$g3) = (0,0,0);
275 while (length($str)){
276 my $uch = substr($str,0,1,'');
278 if($str =~ s/^($esc)//)
281 $sta[ $grp->{$esc} ] = $esc if $tbl->{$esc};
283 # appearance of "\eN\eO" or "\eO\eN" isn't supposed.
284 # but coincidental ON of G2 and G3 is explicitly avoided.
285 elsif($str =~ s/^N//)
289 elsif($str =~ s/^O//)
295 $str =~ s/^([\x20-\x2F]*[\x30-\x7E])//;
296 carp "unknown escape sequence: ESC $1";
307 $cur = $g3 ? $sta[3] : $g2 ? $sta[2] : $g1 ? $sta[1] : $sta[0];
309 if(ref($tbl->{$cur}) eq 'Encode::XS'){
310 $uni .= $tbl->{$cur}->decode($uch);
315 my $rep = $tbl->{$cur}->{'Rep'};
316 my $touni = $tbl->{$cur}->{'ToUni'};
318 if (&$rep($ch) eq 'C')
320 $x = $touni->[0][$ch];
324 $x = $touni->[$ch][ord(substr($str,0,1,''))];
329 # What do we do here ?
335 $_[1] = $str if $chk;
341 my ($obj,$uni,$chk) = @_;
342 my $tbl = $obj->{'Tbl'};
343 my $seq = $obj->{'Seq'};
344 my $grp = $obj->{'Grp'};
345 my $ini = $obj->{'init'};
346 my $fin = $obj->{'final'};
349 my @sta = ($std,undef,undef,undef);
350 my @pre = ($std,undef,undef,undef);
357 $sta[ $grp->{$ini} ] = $pre[ $grp->{$ini} ] = $ini;
360 while (length($uni)){
361 my $ch = substr($uni,0,1,'');
363 foreach my $e_seq (@$seq){
364 $x = ref($tbl->{$e_seq}) eq 'Encode::XS'
365 ? $tbl->{$e_seq}->encode($ch,1)
366 : $tbl->{$e_seq}->{FmUni}->{$ch};
367 $cur = $e_seq, last if defined $x;
369 if(ref($tbl->{$cur}) ne 'Encode::XS')
371 my $def = $tbl->{$cur}->{'Def'};
372 my $rep = $tbl->{$cur}->{'Rep'};
377 $x = pack(&$rep($x),$x);
380 $str .= $pre[ $cG ] = $cur if $cur ne $pre[ $cG ];
382 $str .= $cG == 0 && $pG == 1 ? "\cO" :
383 $cG == 1 && $pG == 0 ? "\cN" :
385 $cG == 3 ? "\eO" : "";
387 $pG = $cG if $cG < 2;
389 $str .= $std unless $cur eq $std;
390 $str .= "\cO" if $pG == 1; # back to G0
391 $str .= $fin; # necessary?
392 $_[1] = $uni if $chk;
396 package Encode::Tcl::HanZi;
397 use base 'Encode::Encoding';
403 my ($obj,$fh,$name) = @_;
404 my(%tbl, @seq, $enc);
407 my ($key,$val) = /^(\S+)\s+(.*)$/;
408 $val =~ s/^\{(.*?)\}/$1/g;
409 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
410 if($enc = Encode->getEncoding($key)){
411 $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
417 $obj->{'Seq'} = \@seq; # escape sequences
418 $obj->{'Tbl'} = \%tbl; # encoding tables
424 my ($obj,$str,$chk) = @_;
425 my $tbl = $obj->{'Tbl'};
426 my $seq = $obj->{'Seq'};
430 while (length($str)){
431 my $uch = substr($str,0,1,'');
437 elsif($str =~ s/^\~//)
441 elsif($str =~ s/^([{}])//)
449 carp "unknown HanZi escape sequence: ~$1";
453 if(ref($tbl->{$cur}) eq 'Encode::XS'){
454 $uni .= $tbl->{$cur}->decode($uch);
458 my $rep = $tbl->{$cur}->{'Rep'};
459 my $touni = $tbl->{$cur}->{'ToUni'};
461 if (&$rep($ch) eq 'C')
463 $x = $touni->[0][$ch];
467 $x = $touni->[$ch][ord(substr($str,0,1,''))];
472 # What do we do here ?
477 $_[1] = $str if $chk;
483 my ($obj,$uni,$chk) = @_;
484 my $tbl = $obj->{'Tbl'};
485 my $seq = $obj->{'Seq'};
491 while (length($uni)){
492 my $ch = chr(ord(substr($uni,0,1,'')));
494 foreach my $e_seq (@$seq){
495 $x = ref($tbl->{$e_seq}) eq 'Encode::XS'
496 ? $tbl->{$e_seq}->encode($ch,1)
497 : $tbl->{$e_seq}->{FmUni}->{$ch};
498 $cur = $e_seq and last if defined $x;
500 if(ref($tbl->{$cur}) ne 'Encode::XS')
502 my $def = $tbl->{$cur}->{'Def'};
503 my $rep = $tbl->{$cur}->{'Rep'};
508 $x = pack(&$rep($x),$x);
510 $str .= $cur eq $pre ? $x : ($pre = $cur).$x;
511 $str .= '~' if $x eq '~'; # to '~~'
513 $str .= $std unless $cur eq $std;
514 $_[1] = $uni if $chk;