fixup for Encode/Tcl.t on ruthless filesystems
[p5sagit/p5-mst-13.2.git] / ext / Encode / Encode / Tcl.pm
CommitLineData
51ef4e11 1package Encode::Tcl;
2use strict;
3use Encode qw(find_encoding);
4use base 'Encode::Encoding';
5use Carp;
6
fc6a272d 7=head1 NAME
8
9Encode::Tcl - Tcl encodings
10
11=cut
51ef4e11 12
13sub INC_search
14{
15 foreach my $dir (@INC)
16 {
17 if (opendir(my $dh,"$dir/Encode"))
18 {
19 while (defined(my $name = readdir($dh)))
20 {
21 if ($name =~ /^(.*)\.enc$/)
22 {
23 my $canon = $1;
24 my $obj = find_encoding($canon);
25 if (!defined($obj))
26 {
27 my $obj = bless { Name => $canon, File => "$dir/Encode/$name"},__PACKAGE__;
28 $obj->Define( $canon );
29 # warn "$canon => $obj\n";
30 }
31 }
32 }
33 closedir($dh);
34 }
35 }
36}
37
38sub import
39{
40 INC_search();
41}
42
43sub encode
44{
45 my $obj = shift;
46 my $new = $obj->loadEncoding;
47 return undef unless (defined $new);
48 return $new->encode(@_);
49}
50
51sub new_sequence
52{
53 my $obj = shift;
54 my $new = $obj->loadEncoding;
55 return undef unless (defined $new);
56 return $new->new_sequence(@_);
57}
58
59sub decode
60{
61 my $obj = shift;
62 my $new = $obj->loadEncoding;
63 return undef unless (defined $new);
64 return $new->decode(@_);
65}
66
67sub loadEncoding
68{
69 my $obj = shift;
70 my $file = $obj->{'File'};
71 my $name = $obj->name;
72 if (open(my $fh,$file))
73 {
74 my $type;
75 while (1)
76 {
77 my $line = <$fh>;
78 $type = substr($line,0,1);
79 last unless $type eq '#';
80 }
466d6cd3 81 my $class = ref($obj).('::'.(($type eq 'H') ? 'HanZi' : ($type eq 'E') ? 'Escape' : 'Table'));
71a18b0f 82 # carp "Loading $file";
51ef4e11 83 bless $obj,$class;
84 return $obj if $obj->read($fh,$obj->name,$type);
85 }
86 else
87 {
88 croak("Cannot open $file for ".$obj->name);
89 }
90 $obj->Undefine($name);
91 return undef;
92}
93
94sub INC_find
95{
96 my ($class,$name) = @_;
97 my $enc;
98 foreach my $dir (@INC)
99 {
100 last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc"));
101 }
102 return $enc;
103}
104
105package Encode::Tcl::Table;
106use base 'Encode::Encoding';
107
108use Data::Dumper;
109
110sub read
111{
112 my ($obj,$fh,$name,$type) = @_;
f57a1a59 113 my($rep, @leading);
51ef4e11 114 my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>));
115 my @touni;
116 my %fmuni;
117 my $count = 0;
118 $def = hex($def);
119 while ($pages--)
120 {
121 my $line = <$fh>;
122 chomp($line);
123 my $page = hex($line);
124 my @page;
f57a1a59 125 $leading[$page] = 1 if $page;
51ef4e11 126 my $ch = $page * 256;
127 for (my $i = 0; $i < 16; $i++)
128 {
129 my $line = <$fh>;
130 for (my $j = 0; $j < 16; $j++)
131 {
132 my $val = hex(substr($line,0,4,''));
133 if ($val || !$ch)
134 {
f57a1a59 135 my $uch = pack('U', $val); # chr($val);
51ef4e11 136 push(@page,$uch);
137 $fmuni{$uch} = $ch;
138 $count++;
139 }
140 else
141 {
142 push(@page,undef);
143 }
144 $ch++;
145 }
146 }
147 $touni[$page] = \@page;
148 }
f57a1a59 149 $rep = $type ne 'M' ? $obj->can("rep_$type") :
150 sub { ($_[0] > 255) || $leading[$_[0]] ? 'n' : 'C'};
51ef4e11 151 $obj->{'Rep'} = $rep;
152 $obj->{'ToUni'} = \@touni;
153 $obj->{'FmUni'} = \%fmuni;
154 $obj->{'Def'} = $def;
155 $obj->{'Num'} = $count;
156 return $obj;
157}
158
159sub rep_S { 'C' }
160
161sub rep_D { 'n' }
162
f57a1a59 163#sub rep_M { ($_[0] > 255) ? 'n' : 'C' }
51ef4e11 164
165sub representation
166{
167 my ($obj,$ch) = @_;
168 $ch = 0 unless @_ > 1;
f57a1a59 169 $obj->{'Rep'}->($ch);
51ef4e11 170}
171
172sub decode
173{
174 my ($obj,$str,$chk) = @_;
175 my $rep = $obj->{'Rep'};
176 my $touni = $obj->{'ToUni'};
e91cad5b 177 my $uni;
51ef4e11 178 while (length($str))
179 {
180 my $ch = ord(substr($str,0,1,''));
181 my $x;
182 if (&$rep($ch) eq 'C')
183 {
184 $x = $touni->[0][$ch];
185 }
186 else
187 {
188 $x = $touni->[$ch][ord(substr($str,0,1,''))];
189 }
190 unless (defined $x)
191 {
192 last if $chk;
193 # What do we do here ?
194 $x = '';
195 }
196 $uni .= $x;
197 }
198 $_[1] = $str if $chk;
199 return $uni;
200}
201
202
203sub encode
204{
205 my ($obj,$uni,$chk) = @_;
206 my $fmuni = $obj->{'FmUni'};
51ef4e11 207 my $def = $obj->{'Def'};
208 my $rep = $obj->{'Rep'};
e91cad5b 209 my $str;
51ef4e11 210 while (length($uni))
211 {
212 my $ch = substr($uni,0,1,'');
213 my $x = $fmuni->{chr(ord($ch))};
214 unless (defined $x)
215 {
216 last if ($chk);
217 $x = $def;
218 }
219 $str .= pack(&$rep($x),$x);
220 }
221 $_[1] = $uni if $chk;
222 return $str;
223}
224
225package Encode::Tcl::Escape;
226use base 'Encode::Encoding';
227
228use Carp;
229
230sub read
231{
e91cad5b 232 my ($obj,$fh,$name) = @_;
83ea2aad 233 my(%tbl, @seq, $enc, @esc, %grp);
51ef4e11 234 while (<$fh>)
235 {
236 my ($key,$val) = /^(\S+)\s+(.*)$/;
237 $val =~ s/^\{(.*?)\}/$1/g;
238 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
83ea2aad 239
e91cad5b 240 if($enc = Encode->getEncoding($key)){
241 $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
d9da9e35 242 push @seq, $val;
83ea2aad 243 $grp{$val} =
244 $val =~ m|[(]| ? 0 : # G0 : SI eq "\cO"
245 $val =~ m|[)-]| ? 1 : # G1 : SO eq "\cN"
246 $val =~ m|[*.]| ? 2 : # G2 : SS2 eq "\eN"
247 $val =~ m|[+/]| ? 3 : # G3 : SS3 eq "\eO"
248 0; # G0
e91cad5b 249 }else{
250 $obj->{$key} = $val;
251 }
d9da9e35 252 if($val =~ /^\e(.*)/){ push(@esc, quotemeta $1) }
51ef4e11 253 }
83ea2aad 254 $obj->{'Grp'} = \%grp; # graphic chars
d9da9e35 255 $obj->{'Seq'} = \@seq; # escape sequences
256 $obj->{'Tbl'} = \%tbl; # encoding tables
257 $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC
e91cad5b 258 return $obj;
51ef4e11 259}
260
261sub decode
262{
e91cad5b 263 my ($obj,$str,$chk) = @_;
264 my $tbl = $obj->{'Tbl'};
d9da9e35 265 my $seq = $obj->{'Seq'};
83ea2aad 266 my $grp = $obj->{'Grp'};
d9da9e35 267 my $esc = $obj->{'Esc'};
e91cad5b 268 my $ini = $obj->{'init'};
269 my $fin = $obj->{'final'};
d9da9e35 270 my $std = $seq->[0];
e91cad5b 271 my $cur = $std;
83ea2aad 272 my @sta = ($std, undef, undef, undef); # G0 .. G3 state
273 my($g1,$g2,$g3) = (0,0,0);
e91cad5b 274 my $uni;
275 while (length($str)){
276 my $uch = substr($str,0,1,'');
277 if($uch eq "\e"){
d9da9e35 278 if($str =~ s/^($esc)//)
279 {
280 my $esc = "\e$1";
83ea2aad 281 $sta[ $grp->{$esc} ] = $esc if $tbl->{$esc};
282 }
283 # appearance of "\eN\eO" or "\eO\eN" isn't supposed.
284 # but coincidental ON of G2 and G3 is explicitly avoided.
285 elsif($str =~ s/^N//)
286 {
287 $g2 = 1; $g3 = 0;
288 }
289 elsif($str =~ s/^O//)
290 {
291 $g3 = 1; $g2 = 0;
d9da9e35 292 }
293 else
294 {
295 $str =~ s/^([\x20-\x2F]*[\x30-\x7E])//;
296 carp "unknown escape sequence: ESC $1";
297 }
e91cad5b 298 next;
299 }
83ea2aad 300 if($uch eq "\x0e"){
301 $g1 = 1; next;
302 }
303 if($uch eq "\x0f"){
304 $g1 = 0; next;
e91cad5b 305 }
83ea2aad 306
307 $cur = $g3 ? $sta[3] : $g2 ? $sta[2] : $g1 ? $sta[1] : $sta[0];
308
e91cad5b 309 if(ref($tbl->{$cur}) eq 'Encode::XS'){
310 $uni .= $tbl->{$cur}->decode($uch);
83ea2aad 311 $g2 = $g3 = 0;
e91cad5b 312 next;
313 }
d9da9e35 314 my $ch = ord($uch);
e91cad5b 315 my $rep = $tbl->{$cur}->{'Rep'};
316 my $touni = $tbl->{$cur}->{'ToUni'};
d9da9e35 317 my $x;
e91cad5b 318 if (&$rep($ch) eq 'C')
319 {
320 $x = $touni->[0][$ch];
321 }
322 else
323 {
324 $x = $touni->[$ch][ord(substr($str,0,1,''))];
325 }
326 unless (defined $x)
327 {
328 last if $chk;
329 # What do we do here ?
330 $x = '';
331 }
332 $uni .= $x;
83ea2aad 333 $g2 = $g3 = 0;
e91cad5b 334 }
335 $_[1] = $str if $chk;
336 return $uni;
51ef4e11 337}
338
339sub encode
340{
e91cad5b 341 my ($obj,$uni,$chk) = @_;
342 my $tbl = $obj->{'Tbl'};
d9da9e35 343 my $seq = $obj->{'Seq'};
83ea2aad 344 my $grp = $obj->{'Grp'};
e91cad5b 345 my $ini = $obj->{'init'};
346 my $fin = $obj->{'final'};
d9da9e35 347 my $std = $seq->[0];
e91cad5b 348 my $str = $ini;
83ea2aad 349 my @sta = ($std,undef,undef,undef);
350 my @pre = ($std,undef,undef,undef);
351 my $cur = $std;
352 my $pG = 0;
353 my $cG = 0;
354
355 if($ini)
356 {
357 $sta[ $grp->{$ini} ] = $pre[ $grp->{$ini} ] = $ini;
358 }
51ef4e11 359
e91cad5b 360 while (length($uni)){
83ea2aad 361 my $ch = substr($uni,0,1,'');
466d6cd3 362 my $x;
83ea2aad 363 foreach my $e_seq (@$seq){
466d6cd3 364 $x = ref($tbl->{$e_seq}) eq 'Encode::XS'
365 ? $tbl->{$e_seq}->encode($ch,1)
366 : $tbl->{$e_seq}->{FmUni}->{$ch};
83ea2aad 367 $cur = $e_seq, last if defined $x;
e91cad5b 368 }
466d6cd3 369 if(ref($tbl->{$cur}) ne 'Encode::XS')
e91cad5b 370 {
466d6cd3 371 my $def = $tbl->{$cur}->{'Def'};
372 my $rep = $tbl->{$cur}->{'Rep'};
373 unless (defined $x){
374 last if ($chk);
375 $x = $def;
376 }
377 $x = pack(&$rep($x),$x);
378 }
83ea2aad 379 $cG = $grp->{$cur};
380 $str .= $pre[ $cG ] = $cur if $cur ne $pre[ $cG ];
381
382 $str .= $cG == 0 && $pG == 1 ? "\cO" :
383 $cG == 1 && $pG == 0 ? "\cN" :
384 $cG == 2 ? "\eN" :
385 $cG == 3 ? "\eO" : "";
386 $str .= $x;
387 $pG = $cG if $cG < 2;
466d6cd3 388 }
83ea2aad 389 $str .= $std unless $cur eq $std;
390 $str .= "\cO" if $pG == 1; # back to G0
391 $str .= $fin; # necessary?
466d6cd3 392 $_[1] = $uni if $chk;
393 return $str;
394}
395
396package Encode::Tcl::HanZi;
397use base 'Encode::Encoding';
398
399use Carp;
400
401sub read
402{
403 my ($obj,$fh,$name) = @_;
404 my(%tbl, @seq, $enc);
405 while (<$fh>)
406 {
407 my ($key,$val) = /^(\S+)\s+(.*)$/;
408 $val =~ s/^\{(.*?)\}/$1/g;
409 $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
410 if($enc = Encode->getEncoding($key)){
411 $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
412 push @seq, $val;
413 }else{
414 $obj->{$key} = $val;
415 }
416 }
417 $obj->{'Seq'} = \@seq; # escape sequences
418 $obj->{'Tbl'} = \%tbl; # encoding tables
419 return $obj;
420}
421
422sub decode
423{
424 my ($obj,$str,$chk) = @_;
425 my $tbl = $obj->{'Tbl'};
426 my $seq = $obj->{'Seq'};
427 my $std = $seq->[0];
428 my $cur = $std;
429 my $uni;
430 while (length($str)){
431 my $uch = substr($str,0,1,'');
432 if($uch eq "~"){
433 if($str =~ s/^\cJ//)
434 {
435 next;
436 }
437 elsif($str =~ s/^\~//)
438 {
439 1;
440 }
441 elsif($str =~ s/^([{}])//)
442 {
443 $cur = "~$1";
444 next;
445 }
446 else
447 {
448 $str =~ s/^([^~])//;
449 carp "unknown HanZi escape sequence: ~$1";
450 next;
451 }
e91cad5b 452 }
466d6cd3 453 if(ref($tbl->{$cur}) eq 'Encode::XS'){
454 $uni .= $tbl->{$cur}->decode($uch);
455 next;
456 }
457 my $ch = ord($uch);
458 my $rep = $tbl->{$cur}->{'Rep'};
459 my $touni = $tbl->{$cur}->{'ToUni'};
460 my $x;
461 if (&$rep($ch) eq 'C')
462 {
463 $x = $touni->[0][$ch];
464 }
465 else
466 {
467 $x = $touni->[$ch][ord(substr($str,0,1,''))];
468 }
469 unless (defined $x)
470 {
471 last if $chk;
472 # What do we do here ?
473 $x = '';
474 }
475 $uni .= $x;
e91cad5b 476 }
466d6cd3 477 $_[1] = $str if $chk;
478 return $uni;
479}
480
481sub encode
482{
483 my ($obj,$uni,$chk) = @_;
484 my $tbl = $obj->{'Tbl'};
485 my $seq = $obj->{'Seq'};
486 my $std = $seq->[0];
487 my $str;
488 my $pre = $std;
489 my $cur = $pre;
490
491 while (length($uni)){
492 my $ch = chr(ord(substr($uni,0,1,'')));
493 my $x;
494 foreach my $e_seq (@$seq){
495 $x = ref($tbl->{$e_seq}) eq 'Encode::XS'
496 ? $tbl->{$e_seq}->encode($ch,1)
497 : $tbl->{$e_seq}->{FmUni}->{$ch};
498 $cur = $e_seq and last if defined $x;
e91cad5b 499 }
466d6cd3 500 if(ref($tbl->{$cur}) ne 'Encode::XS')
501 {
502 my $def = $tbl->{$cur}->{'Def'};
503 my $rep = $tbl->{$cur}->{'Rep'};
504 unless (defined $x){
505 last if ($chk);
506 $x = $def;
507 }
508 $x = pack(&$rep($x),$x);
509 }
510 $str .= $cur eq $pre ? $x : ($pre = $cur).$x;
511 $str .= '~' if $x eq '~'; # to '~~'
e91cad5b 512 }
513 $str .= $std unless $cur eq $std;
e91cad5b 514 $_[1] = $uni if $chk;
515 return $str;
516}
466d6cd3 517
51ef4e11 5181;
519__END__