3 use Encode qw(find_encoding);
4 use base 'Encode::Encoding';
9 Encode::Tcl - Tcl encodings
15 foreach my $dir (@INC)
17 if (opendir(my $dh,"$dir/Encode"))
19 while (defined(my $name = readdir($dh)))
21 if ($name =~ /^(.*)\.enc$/)
24 my $obj = find_encoding($canon);
27 my $obj = bless { Name => $canon, File => "$dir/Encode/$name"},__PACKAGE__;
28 $obj->Define( $canon );
29 # warn "$canon => $obj\n";
43 sub no_map_in_encode ($$)
44 # codepoint, enc-name;
46 carp sprintf "\"\\N{U+%x}\" does not map to %s", @_;
47 # /* FIXME: Skip over the character, copy in replacement and continue
48 # * but that is messy so for now just fail.
53 sub no_map_in_decode ($$)
54 # enc-name, string beginning the malform char;
56 # /* UTF-8 is supposed to be "Universal" so should not happen */
57 croak sprintf "%s '%s' does not map to UTF-8", @_;
63 my $new = $obj->loadEncoding;
64 return undef unless (defined $new);
65 return $new->encode(@_);
71 my $new = $obj->loadEncoding;
72 return undef unless (defined $new);
73 return $new->new_sequence(@_);
79 my $new = $obj->loadEncoding;
80 return undef unless (defined $new);
81 return $new->decode(@_);
87 my $file = $obj->{'File'};
88 my $name = $obj->name;
89 if (open(my $fh,$file))
95 $type = substr($line,0,1);
96 last unless $type eq '#';
99 ($type eq 'X') ? 'Extended' :
100 ($type eq 'H') ? 'HanZi' :
101 ($type eq 'E') ? 'Escape' : 'Table';
102 my $class = ref($obj) . '::' . $subclass;
103 # carp "Loading $file";
105 return $obj if $obj->read($fh,$obj->name,$type);
109 croak("Cannot open $file for ".$obj->name);
111 $obj->Undefine($name);
117 my ($class,$name) = @_;
119 foreach my $dir (@INC)
121 last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc"));
126 package Encode::Tcl::Table;
127 use base 'Encode::Encoding';
134 my ($obj,$fh,$name,$type) = @_;
136 my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>));
145 my $page = hex($line);
147 $leading[$page] = 1 if $page;
148 my $ch = $page * 256;
149 for (my $i = 0; $i < 16; $i++)
152 for (my $j = 0; $j < 16; $j++)
154 my $val = hex(substr($line,0,4,''));
157 my $uch = pack('U', $val); # chr($val);
169 $touni[$page] = \@page;
172 ? $obj->can("rep_$type")
175 ($_[0] > 255) || $leading[$_[0]] ? 'n' : 'C';
177 $obj->{'Rep'} = $rep;
178 $obj->{'ToUni'} = \@touni;
179 $obj->{'FmUni'} = \%fmuni;
180 $obj->{'Def'} = $def;
181 $obj->{'Num'} = $count;
189 #sub rep_M { ($_[0] > 255) ? 'n' : 'C' }
194 $ch = 0 unless @_ > 1;
195 $obj->{'Rep'}->($ch);
200 my($obj,$str,$chk) = @_;
201 my $name = $obj->{'Name'};
202 my $rep = $obj->{'Rep'};
203 my $touni = $obj->{'ToUni'};
207 my $cc = substr($str,0,1,'');
210 if (&$rep($ch) eq 'C')
212 $x = $touni->[0][$ch];
218 $str = pack('C',$ch); # split leading byte
221 my $c2 = substr($str,0,1,'');
223 $x = $touni->[$ch][ord($c2)];
227 Encode::Tcl::no_map_in_decode($name, $cc.$str);
231 $_[1] = $str if $chk;
238 my ($obj,$uni,$chk) = @_;
239 my $fmuni = $obj->{'FmUni'};
240 my $def = $obj->{'Def'};
241 my $name = $obj->{'Name'};
242 my $rep = $obj->{'Rep'};
246 my $ch = substr($uni,0,1,'');
247 my $x = $fmuni->{$ch};
252 Encode::Tcl::no_map_in_encode(ord($ch), $name)
256 $str .= pack(&$rep($x),$x);
258 $_[1] = $uni if $chk;
262 package Encode::Tcl::Escape;
263 use base 'Encode::Encoding';
267 use constant SI => "\cO";
268 use constant SO => "\cN";
269 use constant SS2 => "\eN";
270 use constant SS3 => "\eO";
274 my ($obj,$fh,$name) = @_;
275 my(%tbl, @seq, $enc, @esc, %grp);
278 next unless /^(\S+)\s+(.*)$/;
279 my ($key,$val) = ($1,$2);
280 $val =~ s/^\{(.*?)\}/$1/g;
281 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
283 if($enc = Encode->getEncoding($key))
285 $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
288 $val =~ m|[(]| ? 0 : # G0 : SI eq "\cO"
289 $val =~ m|[)-]| ? 1 : # G1 : SO eq "\cN"
290 $val =~ m|[*.]| ? 2 : # G2 : SS2 eq "\eN"
291 $val =~ m|[+/]| ? 3 : # G3 : SS3 eq "\eO"
298 if($val =~ /^\e(.*)/)
300 push(@esc, quotemeta $1);
303 $obj->{'Grp'} = \%grp; # graphic chars
304 $obj->{'Seq'} = \@seq; # escape sequences
305 $obj->{'Tbl'} = \%tbl; # encoding tables
306 $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC
312 my ($obj,$str,$chk) = @_;
313 my $name = $obj->{'Name'};
314 my $tbl = $obj->{'Tbl'};
315 my $seq = $obj->{'Seq'};
316 my $grp = $obj->{'Grp'};
317 my $esc = $obj->{'Esc'};
318 my $ini = $obj->{'init'};
319 my $fin = $obj->{'final'};
322 my @sta = ($std, undef, undef, undef); # G0 .. G3 state
323 my $s = 0; # state of SO-SI. 0 (G0) or 1 (G1);
324 my $ss = 0; # state of SS2,SS3. 0 (G0), 2 (G2) or 3 (G3);
328 my $cc = substr($str,0,1,'');
331 if($str =~ s/^($esc)//)
334 $sta[ $grp->{$e} ] = $e if $tbl->{$e};
336 # appearance of "\eN\eO" or "\eO\eN" isn't supposed.
337 # but in that case, the former will be ignored.
338 elsif($str =~ s/^N//)
342 elsif($str =~ s/^O//)
348 # strictly, ([\x20-\x2F]*[\x30-\x7E]). '?' for chopped.
349 $str =~ s/^([\x20-\x2F]*[\x30-\x7E]?)//;
350 if($chk && ! length $str)
352 $str = "\e$1"; # split sequence
355 croak "unknown escape sequence: ESC $1";
368 $cur = $ss ? $sta[$ss] : $sta[$s];
370 if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table')
372 $uni .= $tbl->{$cur}->decode($cc);
377 my $rep = $tbl->{$cur}->{'Rep'};
378 my $touni = $tbl->{$cur}->{'ToUni'};
380 if (&$rep($ch) eq 'C')
382 $x = $touni->[0][$ch];
388 $str = $cc; # split leading byte
391 my $c2 = substr($str,0,1,'');
393 $x = $touni->[$ch][ord($c2)];
397 Encode::Tcl::no_map_in_decode($name, $cc.$str);
404 my $back = join('', grep defined($_) && $_ ne $std, @sta);
406 $back .= $ss == 2 ? SS2 : SS3 if $ss;
414 my ($obj,$uni,$chk) = @_;
415 my $name = $obj->{'Name'};
416 my $tbl = $obj->{'Tbl'};
417 my $seq = $obj->{'Seq'};
418 my $grp = $obj->{'Grp'};
419 my $ini = $obj->{'init'};
420 my $fin = $obj->{'final'};
423 my @sta = ($std,undef,undef,undef); # G0 .. G3 state
425 my $pG = 0; # previous G: 0 or 1.
426 my $cG = 0; # current G: 0,1,2,3.
428 if($ini && defined $grp->{$ini})
430 $sta[ $grp->{$ini} ] = $ini;
435 my $ch = substr($uni,0,1,'');
437 foreach my $e_seq (@$seq)
439 $x = ref($tbl->{$e_seq}) eq 'Encode::Tcl::Table'
440 ? $tbl->{$e_seq}->{FmUni}->{$ch}
441 : $tbl->{$e_seq}->encode($ch,1);
442 $cur = $e_seq, last if defined $x;
448 Encode::Tcl::no_map_in_encode(ord($ch), $name)
452 if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table')
454 my $def = $tbl->{$cur}->{'Def'};
455 my $rep = $tbl->{$cur}->{'Rep'};
456 $x = pack(&$rep($x),$x);
459 $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG];
461 $str .= $cG == 0 && $pG == 1 ? SI :
462 $cG == 1 && $pG == 0 ? SO :
466 $pG = $cG if $cG < 2;
468 $str .= SI if $pG == 1; # back to G0
469 $str .= $std unless $std eq $sta[0]; # GO to ASCII
470 $str .= $fin; # necessary?
471 $_[1] = $uni if $chk;
476 package Encode::Tcl::Extended;
477 use base 'Encode::Encoding';
483 my ($obj,$fh,$name) = @_;
484 my(%tbl, $enc, %ssc, @key);
487 next unless /^(\S+)\s+(.*)$/;
488 my ($key,$val) = ($1,$2);
489 $val =~ s/\{(.*?)\}/$1/;
490 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
492 if($enc = Encode->getEncoding($key))
495 $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
496 $ssc{$val} = substr($val,1) if $val =~ /^>/;
503 $obj->{'SSC'} = \%ssc; # single shift char
504 $obj->{'Tbl'} = \%tbl; # encoding tables
505 $obj->{'Key'} = \@key; # keys of table hash
511 my ($obj,$str,$chk) = @_;
512 my $name = $obj->{'Name'};
513 my $tbl = $obj->{'Tbl'};
514 my $ssc = $obj->{'SSC'};
515 my $cur = ''; # current state
519 my $cc = substr($str,0,1,'');
521 if(!$cur && $ch > 0x7F)
524 $cur .= $cc, next if $ssc->{$cur.$cc};
528 if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table')
530 $uni .= $tbl->{$cur}->decode($cc);
534 my $rep = $tbl->{$cur}->{'Rep'};
535 my $touni = $tbl->{$cur}->{'ToUni'};
537 if (&$rep($ch) eq 'C')
539 $x = $touni->[0][$ch];
545 $str = $cc; # split leading byte
548 my $c2 = substr($str,0,1,'');
550 $x = $touni->[$ch][0x80 ^ ord($c2)];
554 Encode::Tcl::no_map_in_decode($name, $cc.$str);
562 $_[1] = $cur ne '' ? $cur.$str : $str;
569 my ($obj,$uni,$chk) = @_;
570 my $name = $obj->{'Name'};
571 my $tbl = $obj->{'Tbl'};
572 my $ssc = $obj->{'SSC'};
573 my $key = $obj->{'Key'};
579 my $ch = substr($uni,0,1,'');
581 foreach my $k (@$key)
583 $x = ref($tbl->{$k}) ne 'Encode::Tcl::Table'
585 ? $tbl->{$k}->encode(chr(0x80 ^ ord $ch),1)
586 : $tbl->{$k}->encode($ch,1)
587 : $tbl->{$k}->{FmUni}->{$ch};
588 $cur = $k, last if defined $x;
594 Encode::Tcl::no_map_in_encode(ord($ch), $name)
598 if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table')
600 my $def = $tbl->{$cur}->{'Def'};
601 my $rep = $tbl->{$cur}->{'Rep'};
605 ? $r eq 'C' ? 0x80 ^ $x : 0x8080 ^ $x
608 $str .= $ssc->{$cur} if defined $ssc->{$cur};
611 $_[1] = $uni if $chk;
615 package Encode::Tcl::HanZi;
616 use base 'Encode::Encoding';
622 my ($obj,$fh,$name) = @_;
623 my(%tbl, @seq, $enc);
626 next unless /^(\S+)\s+(.*)$/;
627 my ($key,$val) = ($1,$2);
628 $val =~ s/^\{(.*?)\}/$1/g;
629 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
630 if($enc = Encode->getEncoding($key))
632 $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
640 $obj->{'Seq'} = \@seq; # escape sequences
641 $obj->{'Tbl'} = \%tbl; # encoding tables
647 my ($obj,$str,$chk) = @_;
648 my $name = $obj->{'Name'};
649 my $tbl = $obj->{'Tbl'};
650 my $seq = $obj->{'Seq'};
654 while (length($str)){
655 my $cc = substr($str,0,1,'');
662 elsif($str =~ s/^\~//)
666 elsif($str =~ s/^([{}])//)
679 croak "unknown HanZi escape sequence: ~$1";
683 if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table')
685 $uni .= $tbl->{$cur}->decode($cc);
689 my $rep = $tbl->{$cur}->{'Rep'};
690 my $touni = $tbl->{$cur}->{'ToUni'};
692 if (&$rep($ch) eq 'C')
694 $x = $touni->[0][$ch];
700 $str = $cc; # split leading byte
703 my $c2 = substr($str,0,1,'');
705 $x = $touni->[$ch][ord($c2)];
709 Encode::Tcl::no_map_in_decode($name, $cc.$str);
715 $_[1] = $cur eq $std ? $str : $cur.$str;
722 my ($obj,$uni,$chk) = @_;
723 my $name = $obj->{'Name'};
724 my $tbl = $obj->{'Tbl'};
725 my $seq = $obj->{'Seq'};
733 my $ch = substr($uni,0,1,'');
735 foreach my $e_seq (@$seq)
737 $x = ref($tbl->{$e_seq}) eq 'Encode::Tcl::Table'
738 ? $tbl->{$e_seq}->{FmUni}->{$ch}
739 : $tbl->{$e_seq}->encode($ch,1);
740 $cur = $e_seq and last if defined $x;
746 Encode::Tcl::no_map_in_encode(ord($ch), $name)
750 if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table')
752 my $def = $tbl->{$cur}->{'Def'};
753 my $rep = $tbl->{$cur}->{'Rep'};
754 $x = pack(&$rep($x),$x);
756 $str .= $cur eq $pre ? $x : ($pre = $cur).$x;
757 $str .= '~' if $x eq '~'; # to '~~'
759 $str .= $std unless $cur eq $std;
760 $_[1] = $uni if $chk;