Commit | Line | Data |
51ef4e11 |
1 | package Encode::Tcl; |
2 | use strict; |
3 | use Encode qw(find_encoding); |
4 | use base 'Encode::Encoding'; |
5 | use Carp; |
6 | |
fc6a272d |
7 | =head1 NAME |
8 | |
9 | Encode::Tcl - Tcl encodings |
10 | |
11 | =cut |
51ef4e11 |
12 | |
13 | sub INC_search |
14 | { |
15 | foreach my $dir (@INC) |
16 | { |
17 | if (opendir(my $dh,"$dir/Encode")) |
18 | { |
19 | while (defined(my $name = readdir($dh))) |
20 | { |
21 | if ($name =~ /^(.*)\.enc$/) |
22 | { |
23 | my $canon = $1; |
24 | my $obj = find_encoding($canon); |
25 | if (!defined($obj)) |
26 | { |
27 | my $obj = bless { Name => $canon, File => "$dir/Encode/$name"},__PACKAGE__; |
28 | $obj->Define( $canon ); |
29 | # warn "$canon => $obj\n"; |
30 | } |
31 | } |
32 | } |
33 | closedir($dh); |
34 | } |
35 | } |
36 | } |
37 | |
38 | sub import |
39 | { |
40 | INC_search(); |
41 | } |
42 | |
43 | sub encode |
44 | { |
45 | my $obj = shift; |
46 | my $new = $obj->loadEncoding; |
47 | return undef unless (defined $new); |
48 | return $new->encode(@_); |
49 | } |
50 | |
51 | sub new_sequence |
52 | { |
53 | my $obj = shift; |
54 | my $new = $obj->loadEncoding; |
55 | return undef unless (defined $new); |
56 | return $new->new_sequence(@_); |
57 | } |
58 | |
59 | sub decode |
60 | { |
61 | my $obj = shift; |
62 | my $new = $obj->loadEncoding; |
63 | return undef unless (defined $new); |
64 | return $new->decode(@_); |
65 | } |
66 | |
67 | sub loadEncoding |
68 | { |
69 | my $obj = shift; |
70 | my $file = $obj->{'File'}; |
71 | my $name = $obj->name; |
72 | if (open(my $fh,$file)) |
73 | { |
74 | my $type; |
75 | while (1) |
76 | { |
77 | my $line = <$fh>; |
78 | $type = substr($line,0,1); |
79 | last unless $type eq '#'; |
80 | } |
81 | my $class = ref($obj).('::'.(($type eq 'E') ? 'Escape' : 'Table')); |
71a18b0f |
82 | # carp "Loading $file"; |
51ef4e11 |
83 | bless $obj,$class; |
84 | return $obj if $obj->read($fh,$obj->name,$type); |
85 | } |
86 | else |
87 | { |
88 | croak("Cannot open $file for ".$obj->name); |
89 | } |
90 | $obj->Undefine($name); |
91 | return undef; |
92 | } |
93 | |
94 | sub INC_find |
95 | { |
96 | my ($class,$name) = @_; |
97 | my $enc; |
98 | foreach my $dir (@INC) |
99 | { |
100 | last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc")); |
101 | } |
102 | return $enc; |
103 | } |
104 | |
105 | package Encode::Tcl::Table; |
106 | use base 'Encode::Encoding'; |
107 | |
108 | use Data::Dumper; |
109 | |
110 | sub read |
111 | { |
112 | my ($obj,$fh,$name,$type) = @_; |
f57a1a59 |
113 | my($rep, @leading); |
51ef4e11 |
114 | my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>)); |
115 | my @touni; |
116 | my %fmuni; |
117 | my $count = 0; |
118 | $def = hex($def); |
119 | while ($pages--) |
120 | { |
121 | my $line = <$fh>; |
122 | chomp($line); |
123 | my $page = hex($line); |
124 | my @page; |
f57a1a59 |
125 | $leading[$page] = 1 if $page; |
51ef4e11 |
126 | my $ch = $page * 256; |
127 | for (my $i = 0; $i < 16; $i++) |
128 | { |
129 | my $line = <$fh>; |
130 | for (my $j = 0; $j < 16; $j++) |
131 | { |
132 | my $val = hex(substr($line,0,4,'')); |
133 | if ($val || !$ch) |
134 | { |
f57a1a59 |
135 | my $uch = pack('U', $val); # chr($val); |
51ef4e11 |
136 | push(@page,$uch); |
137 | $fmuni{$uch} = $ch; |
138 | $count++; |
139 | } |
140 | else |
141 | { |
142 | push(@page,undef); |
143 | } |
144 | $ch++; |
145 | } |
146 | } |
147 | $touni[$page] = \@page; |
148 | } |
f57a1a59 |
149 | $rep = $type ne 'M' ? $obj->can("rep_$type") : |
150 | sub { ($_[0] > 255) || $leading[$_[0]] ? 'n' : 'C'}; |
51ef4e11 |
151 | $obj->{'Rep'} = $rep; |
152 | $obj->{'ToUni'} = \@touni; |
153 | $obj->{'FmUni'} = \%fmuni; |
154 | $obj->{'Def'} = $def; |
155 | $obj->{'Num'} = $count; |
156 | return $obj; |
157 | } |
158 | |
159 | sub rep_S { 'C' } |
160 | |
161 | sub rep_D { 'n' } |
162 | |
f57a1a59 |
163 | #sub rep_M { ($_[0] > 255) ? 'n' : 'C' } |
51ef4e11 |
164 | |
165 | sub representation |
166 | { |
167 | my ($obj,$ch) = @_; |
168 | $ch = 0 unless @_ > 1; |
f57a1a59 |
169 | $obj->{'Rep'}->($ch); |
51ef4e11 |
170 | } |
171 | |
172 | sub decode |
173 | { |
174 | my ($obj,$str,$chk) = @_; |
175 | my $rep = $obj->{'Rep'}; |
176 | my $touni = $obj->{'ToUni'}; |
e91cad5b |
177 | my $uni; |
51ef4e11 |
178 | while (length($str)) |
179 | { |
180 | my $ch = ord(substr($str,0,1,'')); |
181 | my $x; |
182 | if (&$rep($ch) eq 'C') |
183 | { |
184 | $x = $touni->[0][$ch]; |
185 | } |
186 | else |
187 | { |
188 | $x = $touni->[$ch][ord(substr($str,0,1,''))]; |
189 | } |
190 | unless (defined $x) |
191 | { |
192 | last if $chk; |
193 | # What do we do here ? |
194 | $x = ''; |
195 | } |
196 | $uni .= $x; |
197 | } |
198 | $_[1] = $str if $chk; |
199 | return $uni; |
200 | } |
201 | |
202 | |
203 | sub encode |
204 | { |
205 | my ($obj,$uni,$chk) = @_; |
206 | my $fmuni = $obj->{'FmUni'}; |
51ef4e11 |
207 | my $def = $obj->{'Def'}; |
208 | my $rep = $obj->{'Rep'}; |
e91cad5b |
209 | my $str; |
51ef4e11 |
210 | while (length($uni)) |
211 | { |
212 | my $ch = substr($uni,0,1,''); |
213 | my $x = $fmuni->{chr(ord($ch))}; |
214 | unless (defined $x) |
215 | { |
216 | last if ($chk); |
217 | $x = $def; |
218 | } |
219 | $str .= pack(&$rep($x),$x); |
220 | } |
221 | $_[1] = $uni if $chk; |
222 | return $str; |
223 | } |
224 | |
225 | package Encode::Tcl::Escape; |
226 | use base 'Encode::Encoding'; |
227 | |
228 | use Carp; |
229 | |
230 | sub read |
231 | { |
e91cad5b |
232 | my ($obj,$fh,$name) = @_; |
233 | my(%tbl, @esc, $enc); |
51ef4e11 |
234 | while (<$fh>) |
235 | { |
236 | my ($key,$val) = /^(\S+)\s+(.*)$/; |
237 | $val =~ s/^\{(.*?)\}/$1/g; |
238 | $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; |
e91cad5b |
239 | if($enc = Encode->getEncoding($key)){ |
240 | $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; |
241 | push @esc, $val; |
242 | }else{ |
243 | $obj->{$key} = $val; |
244 | } |
51ef4e11 |
245 | } |
e91cad5b |
246 | $obj->{'Ctl'} = \@esc; |
247 | $obj->{'Tbl'} = \%tbl; |
248 | return $obj; |
51ef4e11 |
249 | } |
250 | |
251 | sub decode |
252 | { |
e91cad5b |
253 | my ($obj,$str,$chk) = @_; |
254 | my $tbl = $obj->{'Tbl'}; |
255 | my $ctl = $obj->{'Ctl'}; |
256 | my $ini = $obj->{'init'}; |
257 | my $fin = $obj->{'final'}; |
258 | my $std = $ctl->[0]; |
259 | my $cur = $std; |
260 | my $uni; |
261 | while (length($str)){ |
262 | my $uch = substr($str,0,1,''); |
263 | if($uch eq "\e"){ |
264 | $str =~ s/^([\x20-\x2F]*[\x30-\x7E](?:\x1b[\x20-\x2F]*[\x30-\x7E])*)//; |
265 | my $esc = "\e$1"; |
266 | if($tbl->{$esc}){ $cur = $esc } |
267 | elsif($esc eq $ini || $esc eq $fin){ $cur = $std } |
268 | else{carp "unknown escape sequence" } |
269 | next; |
270 | } |
271 | if($uch eq "\x0e" || $uch eq "\x0f"){ |
272 | $cur = $uch and next; |
273 | } |
274 | my $x; |
275 | if(ref($tbl->{$cur}) eq 'Encode::XS'){ |
276 | $uni .= $tbl->{$cur}->decode($uch); |
277 | next; |
278 | } |
279 | my $ch = ord($uch); |
280 | my $rep = $tbl->{$cur}->{'Rep'}; |
281 | my $touni = $tbl->{$cur}->{'ToUni'}; |
282 | if (&$rep($ch) eq 'C') |
283 | { |
284 | $x = $touni->[0][$ch]; |
285 | } |
286 | else |
287 | { |
288 | $x = $touni->[$ch][ord(substr($str,0,1,''))]; |
289 | } |
290 | unless (defined $x) |
291 | { |
292 | last if $chk; |
293 | # What do we do here ? |
294 | $x = ''; |
295 | } |
296 | $uni .= $x; |
297 | } |
298 | $_[1] = $str if $chk; |
299 | return $uni; |
51ef4e11 |
300 | } |
301 | |
302 | sub encode |
303 | { |
e91cad5b |
304 | my ($obj,$uni,$chk) = @_; |
305 | my $tbl = $obj->{'Tbl'}; |
306 | my $ctl = $obj->{'Ctl'}; |
307 | my $ini = $obj->{'init'}; |
308 | my $fin = $obj->{'final'}; |
309 | my $std = $ctl->[0]; |
310 | my $str = $ini; |
311 | my $pre = $std; |
312 | my $cur = $pre; |
51ef4e11 |
313 | |
e91cad5b |
314 | while (length($uni)){ |
315 | my $ch = chr(ord(substr($uni,0,1,''))); |
316 | my $x = ref($tbl->{$pre}) eq 'Encode::XS' |
317 | ? $tbl->{$pre}->encode($ch,1) |
318 | : $tbl->{$pre}->{FmUni}->{$ch}; |
319 | |
320 | unless(defined $x){ |
321 | foreach my $esc (@$ctl){ |
322 | $x = ref($tbl->{$esc}) eq 'Encode::XS' |
323 | ? $tbl->{$esc}->encode($ch,1) |
324 | : $tbl->{$esc}->{FmUni}->{$ch}; |
325 | $cur = $esc and last if defined $x; |
326 | } |
327 | } |
328 | if($x == 0x0d && !($ini eq '' && $fin eq '') && substr($uni,0,1) eq "\x0a") |
329 | { |
330 | $str .= $cur unless $cur eq $pre; |
331 | $str .= $fin."\x0d\x0a".$ini; |
332 | substr($uni,0,1,''); |
333 | $pre = $std; |
334 | next; |
335 | } |
336 | if(ref($tbl->{$cur}) eq 'Encode::XS'){ |
337 | $str .= $cur unless $cur eq $pre; |
338 | $str .= $x; # "DEF" is lost |
339 | $pre = $cur; |
340 | next; |
341 | } |
342 | my $def = $tbl->{$cur}->{'Def'}; |
343 | my $rep = $tbl->{$cur}->{'Rep'}; |
344 | unless (defined $x){ |
345 | last if ($chk); |
346 | $x = $def; |
347 | } |
348 | $str .= $cur unless $cur eq $pre; |
349 | $str .= pack(&$rep($x),$x); |
350 | $pre = $cur; |
351 | } |
352 | $str .= $std unless $cur eq $std; |
353 | $str .= $fin; |
354 | $_[1] = $uni if $chk; |
355 | return $str; |
356 | } |
51ef4e11 |
357 | 1; |
358 | __END__ |