6 use Encode qw(find_encoding);
7 use base 'Encode::Encoding';
12 Encode::Tcl - Tcl encodings
18 foreach my $dir (@INC)
20 if (opendir(my $dh,"$dir/Encode"))
22 while (defined(my $name = readdir($dh)))
24 if ($name =~ /^(.*)\.enc$/)
27 my $obj = find_encoding($canon);
30 my $obj = bless { Name => $canon, File => "$dir/Encode/$name"},__PACKAGE__;
31 $obj->Define( $canon );
32 # warn "$canon => $obj\n";
46 sub no_map_in_encode ($$)
47 # codepoint, enc-name;
49 carp sprintf "\"\\N{U+%x}\" does not map to %s", @_;
50 # /* FIXME: Skip over the character, copy in replacement and continue
51 # * but that is messy so for now just fail.
56 sub no_map_in_decode ($$)
57 # enc-name, string beginning the malform char;
59 # /* UTF-8 is supposed to be "Universal" so should not happen */
60 croak sprintf "%s '%s' does not map to UTF-8", @_;
66 my $new = $obj->loadEncoding;
67 return undef unless (defined $new);
68 return $new->encode(@_);
74 my $new = $obj->loadEncoding;
75 return undef unless (defined $new);
76 return $new->new_sequence(@_);
82 my $new = $obj->loadEncoding;
83 return undef unless (defined $new);
84 return $new->decode(@_);
90 my $file = $obj->{'File'};
91 my $name = $obj->name;
92 if (open(my $fh,$file))
98 $type = substr($line,0,1);
99 last unless $type eq '#';
102 ($type eq 'X') ? 'Extended' :
103 ($type eq 'H') ? 'HanZi' :
104 ($type eq 'E') ? 'Escape' : 'Table';
105 my $class = ref($obj) . '::' . $subclass;
106 # carp "Loading $file";
108 return $obj if $obj->read($fh,$obj->name,$type);
112 croak("Cannot open $file for ".$obj->name);
114 $obj->Undefine($name);
120 my ($class,$name) = @_;
122 foreach my $dir (@INC)
124 last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc"));
129 package Encode::Tcl::Table;
130 use base 'Encode::Encoding';
137 my ($obj,$fh,$name,$type) = @_;
139 my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>));
148 my $page = hex($line);
150 $leading[$page] = 1 if $page;
151 my $ch = $page * 256;
152 for (my $i = 0; $i < 16; $i++)
155 for (my $j = 0; $j < 16; $j++)
157 my $val = hex(substr($line,0,4,''));
160 my $uch = pack('U', $val); # chr($val);
172 $touni[$page] = \@page;
175 ? $obj->can("rep_$type")
178 ($_[0] > 255) || $leading[$_[0]] ? 'n' : 'C';
180 $obj->{'Rep'} = $rep;
181 $obj->{'ToUni'} = \@touni;
182 $obj->{'FmUni'} = \%fmuni;
183 $obj->{'Def'} = $def;
184 $obj->{'Num'} = $count;
192 #sub rep_M { ($_[0] > 255) ? 'n' : 'C' }
197 $ch = 0 unless @_ > 1;
198 $obj->{'Rep'}->($ch);
203 my($obj,$str,$chk) = @_;
204 my $name = $obj->{'Name'};
205 my $rep = $obj->{'Rep'};
206 my $touni = $obj->{'ToUni'};
210 my $cc = substr($str,0,1,'');
213 if (&$rep($ch) eq 'C')
215 $x = $touni->[0][$ch];
221 $str = pack('C',$ch); # split leading byte
224 my $c2 = substr($str,0,1,'');
226 $x = $touni->[$ch][ord($c2)];
230 Encode::Tcl::no_map_in_decode($name, $cc.$str);
234 $_[1] = $str if $chk;
241 my ($obj,$uni,$chk) = @_;
242 my $fmuni = $obj->{'FmUni'};
243 my $def = $obj->{'Def'};
244 my $name = $obj->{'Name'};
245 my $rep = $obj->{'Rep'};
249 my $ch = substr($uni,0,1,'');
250 my $x = $fmuni->{$ch};
255 Encode::Tcl::no_map_in_encode(ord($ch), $name)
259 $str .= pack(&$rep($x),$x);
261 $_[1] = $uni if $chk;
265 package Encode::Tcl::Escape;
266 use base 'Encode::Encoding';
270 use constant SI => "\cO";
271 use constant SO => "\cN";
272 use constant SS2 => "\eN";
273 use constant SS3 => "\eO";
277 my ($obj,$fh,$name) = @_;
278 my(%tbl, @seq, $enc, @esc, %grp);
281 next unless /^(\S+)\s+(.*)$/;
282 my ($key,$val) = ($1,$2);
283 $val =~ s/^\{(.*?)\}/$1/g;
284 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
286 if($enc = Encode->getEncoding($key))
288 $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
291 $val =~ m|[(]| ? 0 : # G0 : SI eq "\cO"
292 $val =~ m|[)-]| ? 1 : # G1 : SO eq "\cN"
293 $val =~ m|[*.]| ? 2 : # G2 : SS2 eq "\eN"
294 $val =~ m|[+/]| ? 3 : # G3 : SS3 eq "\eO"
301 if($val =~ /^\e(.*)/)
303 push(@esc, quotemeta $1);
306 $obj->{'Grp'} = \%grp; # graphic chars
307 $obj->{'Seq'} = \@seq; # escape sequences
308 $obj->{'Tbl'} = \%tbl; # encoding tables
309 $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC
315 my ($obj,$str,$chk) = @_;
316 my $name = $obj->{'Name'};
317 my $tbl = $obj->{'Tbl'};
318 my $seq = $obj->{'Seq'};
319 my $grp = $obj->{'Grp'};
320 my $esc = $obj->{'Esc'};
321 my $ini = $obj->{'init'};
322 my $fin = $obj->{'final'};
325 my @sta = ($std, undef, undef, undef); # G0 .. G3 state
326 my $s = 0; # state of SO-SI. 0 (G0) or 1 (G1);
327 my $ss = 0; # state of SS2,SS3. 0 (G0), 2 (G2) or 3 (G3);
331 my $cc = substr($str,0,1,'');
334 if($str =~ s/^($esc)//)
337 $sta[ $grp->{$e} ] = $e if $tbl->{$e};
339 # appearance of "\eN\eO" or "\eO\eN" isn't supposed.
340 # but in that case, the former will be ignored.
341 elsif($str =~ s/^N//)
345 elsif($str =~ s/^O//)
351 # strictly, ([\x20-\x2F]*[\x30-\x7E]). '?' for chopped.
352 $str =~ s/^([\x20-\x2F]*[\x30-\x7E]?)//;
353 if($chk && ! length $str)
355 $str = "\e$1"; # split sequence
358 croak "unknown escape sequence: ESC $1";
371 $cur = $ss ? $sta[$ss] : $sta[$s];
373 if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table')
375 $uni .= $tbl->{$cur}->decode($cc);
380 my $rep = $tbl->{$cur}->{'Rep'};
381 my $touni = $tbl->{$cur}->{'ToUni'};
383 if (&$rep($ch) eq 'C')
385 $x = $touni->[0][$ch];
391 $str = $cc; # split leading byte
394 my $c2 = substr($str,0,1,'');
396 $x = $touni->[$ch][ord($c2)];
400 Encode::Tcl::no_map_in_decode($name, $cc.$str);
407 my $back = join('', grep defined($_) && $_ ne $std, @sta);
409 $back .= $ss == 2 ? SS2 : SS3 if $ss;
417 my ($obj,$uni,$chk) = @_;
418 my $name = $obj->{'Name'};
419 my $tbl = $obj->{'Tbl'};
420 my $seq = $obj->{'Seq'};
421 my $grp = $obj->{'Grp'};
422 my $ini = $obj->{'init'};
423 my $fin = $obj->{'final'};
426 my @sta = ($std,undef,undef,undef); # G0 .. G3 state
428 my $pG = 0; # previous G: 0 or 1.
429 my $cG = 0; # current G: 0,1,2,3.
431 if($ini && defined $grp->{$ini})
433 $sta[ $grp->{$ini} ] = $ini;
438 my $ch = substr($uni,0,1,'');
440 foreach my $e_seq (@$seq)
442 $x = ref($tbl->{$e_seq}) eq 'Encode::Tcl::Table'
443 ? $tbl->{$e_seq}->{FmUni}->{$ch}
444 : $tbl->{$e_seq}->encode($ch,1);
445 $cur = $e_seq, last if defined $x;
451 Encode::Tcl::no_map_in_encode(ord($ch), $name)
455 if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table')
457 my $def = $tbl->{$cur}->{'Def'};
458 my $rep = $tbl->{$cur}->{'Rep'};
459 $x = pack(&$rep($x),$x);
462 $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG];
464 $str .= $cG == 0 && $pG == 1 ? SI :
465 $cG == 1 && $pG == 0 ? SO :
469 $pG = $cG if $cG < 2;
471 $str .= SI if $pG == 1; # back to G0
472 $str .= $std unless $std eq $sta[0]; # GO to ASCII
473 $str .= $fin; # necessary?
474 $_[1] = $uni if $chk;
479 package Encode::Tcl::Extended;
480 use base 'Encode::Encoding';
486 my ($obj,$fh,$name) = @_;
487 my(%tbl, $enc, %ssc, @key);
490 next unless /^(\S+)\s+(.*)$/;
491 my ($key,$val) = ($1,$2);
492 $val =~ s/\{(.*?)\}/$1/;
493 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
495 if($enc = Encode->getEncoding($key))
498 $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
499 $ssc{$val} = substr($val,1) if $val =~ /^>/;
506 $obj->{'SSC'} = \%ssc; # single shift char
507 $obj->{'Tbl'} = \%tbl; # encoding tables
508 $obj->{'Key'} = \@key; # keys of table hash
514 my ($obj,$str,$chk) = @_;
515 my $name = $obj->{'Name'};
516 my $tbl = $obj->{'Tbl'};
517 my $ssc = $obj->{'SSC'};
518 my $cur = ''; # current state
522 my $cc = substr($str,0,1,'');
524 if(!$cur && $ch > 0x7F)
527 $cur .= $cc, next if $ssc->{$cur.$cc};
531 if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table')
533 $uni .= $tbl->{$cur}->decode($cc);
537 my $rep = $tbl->{$cur}->{'Rep'};
538 my $touni = $tbl->{$cur}->{'ToUni'};
540 if (&$rep($ch) eq 'C')
542 $x = $touni->[0][$ch];
548 $str = $cc; # split leading byte
551 my $c2 = substr($str,0,1,'');
553 $x = $touni->[$ch][0x80 ^ ord($c2)];
557 Encode::Tcl::no_map_in_decode($name, $cc.$str);
565 $_[1] = $cur ne '' ? $cur.$str : $str;
572 my ($obj,$uni,$chk) = @_;
573 my $name = $obj->{'Name'};
574 my $tbl = $obj->{'Tbl'};
575 my $ssc = $obj->{'SSC'};
576 my $key = $obj->{'Key'};
582 my $ch = substr($uni,0,1,'');
584 foreach my $k (@$key)
586 $x = ref($tbl->{$k}) ne 'Encode::Tcl::Table'
588 ? $tbl->{$k}->encode(chr(0x80 ^ ord $ch),1)
589 : $tbl->{$k}->encode($ch,1)
590 : $tbl->{$k}->{FmUni}->{$ch};
591 $cur = $k, last if defined $x;
597 Encode::Tcl::no_map_in_encode(ord($ch), $name)
601 if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table')
603 my $def = $tbl->{$cur}->{'Def'};
604 my $rep = $tbl->{$cur}->{'Rep'};
608 ? $r eq 'C' ? 0x80 ^ $x : 0x8080 ^ $x
611 $str .= $ssc->{$cur} if defined $ssc->{$cur};
614 $_[1] = $uni if $chk;
618 package Encode::Tcl::HanZi;
619 use base 'Encode::Encoding';
625 my ($obj,$fh,$name) = @_;
626 my(%tbl, @seq, $enc);
629 next unless /^(\S+)\s+(.*)$/;
630 my ($key,$val) = ($1,$2);
631 $val =~ s/^\{(.*?)\}/$1/g;
632 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
633 if($enc = Encode->getEncoding($key))
635 $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
643 $obj->{'Seq'} = \@seq; # escape sequences
644 $obj->{'Tbl'} = \%tbl; # encoding tables
650 my ($obj,$str,$chk) = @_;
651 my $name = $obj->{'Name'};
652 my $tbl = $obj->{'Tbl'};
653 my $seq = $obj->{'Seq'};
657 while (length($str)){
658 my $cc = substr($str,0,1,'');
665 elsif($str =~ s/^\~//)
669 elsif($str =~ s/^([{}])//)
682 croak "unknown HanZi escape sequence: ~$1";
686 if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table')
688 $uni .= $tbl->{$cur}->decode($cc);
692 my $rep = $tbl->{$cur}->{'Rep'};
693 my $touni = $tbl->{$cur}->{'ToUni'};
695 if (&$rep($ch) eq 'C')
697 $x = $touni->[0][$ch];
703 $str = $cc; # split leading byte
706 my $c2 = substr($str,0,1,'');
708 $x = $touni->[$ch][ord($c2)];
712 Encode::Tcl::no_map_in_decode($name, $cc.$str);
718 $_[1] = $cur eq $std ? $str : $cur.$str;
725 my ($obj,$uni,$chk) = @_;
726 my $name = $obj->{'Name'};
727 my $tbl = $obj->{'Tbl'};
728 my $seq = $obj->{'Seq'};
736 my $ch = substr($uni,0,1,'');
738 foreach my $e_seq (@$seq)
740 $x = ref($tbl->{$e_seq}) eq 'Encode::Tcl::Table'
741 ? $tbl->{$e_seq}->{FmUni}->{$ch}
742 : $tbl->{$e_seq}->encode($ch,1);
743 $cur = $e_seq and last if defined $x;
749 Encode::Tcl::no_map_in_encode(ord($ch), $name)
753 if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table')
755 my $def = $tbl->{$cur}->{'Def'};
756 my $rep = $tbl->{$cur}->{'Rep'};
757 $x = pack(&$rep($x),$x);
759 $str .= $cur eq $pre ? $x : ($pre = $cur).$x;
760 $str .= '~' if $x eq '~'; # to '~~'
762 $str .= $std unless $cur eq $std;
763 $_[1] = $uni if $chk;