Commit | Line | Data |
017e2add |
1 | #!../../perl -w |
2f2b4ff2 |
2 | BEGIN { @INC = '../../lib' }; |
017e2add |
3 | use strict; |
4 | |
5 | sub encode_U |
6 | { |
14a8264b |
7 | # UTF-8 encocde long hand - only covers part of perl's range |
017e2add |
8 | my $uv = shift; |
9 | if ($uv < 0x80) |
10 | { |
11 | return chr($uv) |
12 | } |
13 | if ($uv < 0x800) |
14 | { |
15 | return chr(($uv >> 6) | 0xC0). |
16 | chr(($uv & 0x3F) | 0x80); |
17 | } |
18 | return chr(($uv >> 12) | 0xE0). |
19 | chr((($uv >> 6) & 0x3F) | 0x80). |
20 | chr(($uv & 0x3F) | 0x80); |
21 | } |
22 | |
23 | sub encode_S |
24 | { |
14a8264b |
25 | # encode single byte |
017e2add |
26 | my ($ch,$page) = @_; |
27 | return chr($ch); |
28 | } |
29 | |
30 | sub encode_D |
31 | { |
14a8264b |
32 | # encode double byte MS byte first |
017e2add |
33 | my ($ch,$page) = @_; |
34 | return chr($page).chr($ch); |
35 | } |
36 | |
37 | sub encode_M |
38 | { |
14a8264b |
39 | # encode Multi-byte - single for 0..255 otherwise double |
017e2add |
40 | my ($ch,$page) = @_; |
41 | return &encode_D if $page; |
42 | return &encode_S; |
43 | } |
44 | |
14a8264b |
45 | # Win32 does not expand globs on command line |
252a8565 |
46 | eval "\@ARGV = map(glob(\$_),\@ARGV)" if ($^O eq 'MSWin32'); |
18b7339f |
47 | |
017e2add |
48 | my $cname = shift(@ARGV); |
2f2b4ff2 |
49 | chmod(0666,$cname) if -f $cname && !-w $cname; |
017e2add |
50 | open(C,">$cname") || die "Cannot open $cname:$!"; |
2f2b4ff2 |
51 | my $dname = $cname; |
52 | $dname =~ s/(\.[^\.]*)?$/.def/; |
53 | chmod(0666,$dname) if -f $cname && !-w $dname; |
54 | open(D,">$dname") || die "Cannot open $dname:$!"; |
55 | my $hname = $cname; |
56 | $hname =~ s/(\.[^\.]*)?$/.h/; |
57 | chmod(0666,$hname) if -f $cname && !-w $hname; |
58 | open(H,">$hname") || die "Cannot open $hname:$!"; |
59 | |
14a8264b |
60 | foreach my $fh (\*C,\*D,\*H) |
61 | { |
62 | print $fh <<"END"; |
63 | /* |
64 | !!!!!!! DO NOT EDIT THIS FILE !!!!!!! |
65 | This file was autogenerated by: |
66 | $^X $0 $cname @ARGV |
67 | */ |
68 | END |
69 | } |
70 | |
2f2b4ff2 |
71 | if ($cname =~ /(\w+)\.xs$/) |
72 | { |
73 | print C "#include <EXTERN.h>\n"; |
74 | print C "#include <perl.h>\n"; |
75 | print C "#include <XSUB.h>\n"; |
76 | print C "#define U8 U8\n"; |
77 | } |
017e2add |
78 | print C "#include \"encode.h\"\n"; |
79 | |
80 | my %encoding; |
81 | my %strings; |
82 | |
2f2b4ff2 |
83 | sub cmp_name |
84 | { |
85 | if ($a =~ /^.*-(\d+)/) |
86 | { |
87 | my $an = $1; |
88 | if ($b =~ /^.*-(\d+)/) |
89 | { |
90 | my $r = $an <=> $1; |
91 | return $r if $r; |
92 | } |
93 | } |
94 | return $a cmp $b; |
95 | } |
96 | |
97 | foreach my $enc (sort cmp_name @ARGV) |
017e2add |
98 | { |
99 | my ($name) = $enc =~ /^.*?([\w-]+)(\.enc)$/; |
100 | if (open(E,$enc)) |
101 | { |
14a8264b |
102 | compile_enc(\*E,lc($name),\*C); |
017e2add |
103 | } |
104 | else |
105 | { |
106 | warn "Cannot open $enc for $name:$!"; |
107 | } |
108 | } |
109 | |
2f2b4ff2 |
110 | foreach my $enc (sort cmp_name keys %encoding) |
111 | { |
112 | my $sym = "${enc}_encoding"; |
113 | $sym =~ s/\W+/_/g; |
114 | print C "encode_t $sym = \n"; |
14a8264b |
115 | print C " {",join(',',"\"$enc\"",@{$encoding{$enc}}),"};\n\n"; |
2f2b4ff2 |
116 | } |
117 | |
118 | foreach my $enc (sort cmp_name keys %encoding) |
017e2add |
119 | { |
2f2b4ff2 |
120 | my $sym = "${enc}_encoding"; |
121 | $sym =~ s/\W+/_/g; |
122 | print H "extern encode_t $sym;\n"; |
123 | print D " Encode_Define(aTHX_ &$sym);\n"; |
017e2add |
124 | } |
017e2add |
125 | |
2f2b4ff2 |
126 | if ($cname =~ /(\w+)\.xs$/) |
127 | { |
128 | my $mod = $1; |
129 | print C "\nMODULE = Encode::$mod\tPACKAGE = Encode::$mod\n\n"; |
130 | print C "BOOT:\n{\n"; |
131 | print C "#include \"$dname\"\n"; |
132 | print C "}\n"; |
133 | } |
017e2add |
134 | close(C); |
2f2b4ff2 |
135 | close(D); |
136 | close(H); |
017e2add |
137 | |
14a8264b |
138 | sub compile_enc |
017e2add |
139 | { |
140 | my ($fh,$name,$ch) = @_; |
141 | my $e2u = {}; |
142 | my $u2e = {}; |
143 | |
144 | my $type; |
145 | while ($type = <$fh>) |
146 | { |
147 | last if $type !~ /^\s*#/; |
148 | } |
149 | chomp($type); |
150 | return if $type eq 'E'; |
151 | my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>)); |
14a8264b |
152 | warn "$type encoded $name\n"; |
017e2add |
153 | my $rep = ''; |
154 | { |
155 | my $v = hex($def); |
156 | no strict 'refs'; |
157 | $rep = &{"encode_$type"}($v & 0xFF, ($v >> 8) & 0xffe); |
158 | } |
159 | while ($pages--) |
160 | { |
161 | my $line = <$fh>; |
162 | chomp($line); |
163 | my $page = hex($line); |
164 | my $ch = 0; |
165 | for (my $i = 0; $i < 16; $i++) |
166 | { |
167 | my $line = <$fh>; |
168 | for (my $j = 0; $j < 16; $j++) |
169 | { |
170 | no strict 'refs'; |
171 | my $ech = &{"encode_$type"}($ch,$page); |
172 | my $val = hex(substr($line,0,4,'')); |
173 | if ($val || (!$ch && !$page)) |
174 | { |
175 | my $uch = encode_U($val); |
176 | enter($e2u,$ech,$uch,$e2u); |
177 | enter($u2e,$uch,$ech,$u2e); |
178 | } |
179 | else |
180 | { |
181 | # No character at this position |
182 | # enter($e2u,$ech,undef,$e2u); |
183 | } |
184 | $ch++; |
185 | } |
186 | } |
187 | } |
188 | output($ch,$name.'_utf8',$e2u); |
189 | output($ch,'utf8_'.$name,$u2e); |
190 | $encoding{$name} = [$e2u->{Cname},$u2e->{Cname}, |
191 | outstring($ch,$e2u->{Cname}.'_def',$rep),length($rep)]; |
192 | } |
193 | |
194 | sub enter |
195 | { |
196 | my ($a,$s,$d,$t) = @_; |
197 | $t = $a if @_ < 4; |
198 | my $b = substr($s,0,1); |
199 | my $e = $a->{$b}; |
200 | unless ($e) |
201 | { # 0 1 2 3 4 5 |
202 | $e = [$b,$b,'',{},length($s),0]; |
203 | $a->{$b} = $e; |
204 | } |
205 | if (length($s) > 1) |
206 | { |
207 | enter($e->[3],substr($s,1),$d,$t); |
208 | } |
209 | else |
210 | { |
211 | $e->[2] = $d; |
212 | $e->[3] = $t; |
213 | $e->[5] = length($d); |
214 | } |
215 | } |
216 | |
017e2add |
217 | sub outstring |
218 | { |
219 | my ($fh,$name,$s) = @_; |
220 | my $sym = $strings{$s}; |
221 | unless ($sym) |
222 | { |
2f2b4ff2 |
223 | foreach my $o (keys %strings) |
017e2add |
224 | { |
2f2b4ff2 |
225 | my $i = index($o,$s); |
226 | if ($i >= 0) |
017e2add |
227 | { |
2f2b4ff2 |
228 | $sym = $strings{$o}; |
229 | $sym .= sprintf("+0x%02x",$i) if ($i); |
230 | return $sym; |
017e2add |
231 | } |
232 | } |
233 | $strings{$s} = $sym = $name; |
14a8264b |
234 | printf $fh "\nstatic const U8 %s[%d] =\n",$name,length($s); |
2f2b4ff2 |
235 | # Do in chunks of 16 chars to constrain line length |
236 | # Assumes ANSI C adjacent string litteral concatenation |
017e2add |
237 | while (length($s)) |
238 | { |
239 | my $c = substr($s,0,16,''); |
240 | print $fh '"',join('',map(sprintf('\x%02x',ord($_)),split(//,$c))),'"'; |
241 | print $fh "\n" if length($s); |
242 | } |
14a8264b |
243 | printf $fh ";\n"; |
017e2add |
244 | } |
245 | return $sym; |
246 | } |
247 | |
14a8264b |
248 | sub process |
017e2add |
249 | { |
14a8264b |
250 | my ($name,$a) = @_; |
017e2add |
251 | $name =~ s/\W+/_/g; |
252 | $a->{Cname} = $name; |
253 | my @keys = grep(ref($a->{$_}),sort keys %$a); |
017e2add |
254 | my $l; |
255 | my @ent; |
256 | foreach my $b (@keys) |
257 | { |
258 | my ($s,$f,$out,$t,$end) = @{$a->{$b}}; |
259 | if (defined($l) && |
260 | ord($b) == ord($a->{$l}[1])+1 && |
261 | $a->{$l}[3] == $a->{$b}[3] && |
262 | $a->{$l}[4] == $a->{$b}[4] && |
2f2b4ff2 |
263 | $a->{$l}[5] == $a->{$b}[5] |
264 | # && length($a->{$l}[2]) < 16 |
265 | ) |
017e2add |
266 | { |
267 | my $i = ord($b)-ord($a->{$l}[0]); |
268 | $a->{$l}[1] = $b; |
269 | $a->{$l}[2] .= $a->{$b}[2]; |
270 | } |
271 | else |
272 | { |
273 | $l = $b; |
274 | push(@ent,$b); |
275 | } |
14a8264b |
276 | if (exists $t->{Cname}) |
017e2add |
277 | { |
14a8264b |
278 | $t->{'Forward'} = 1 if $t != $a; |
279 | } |
280 | else |
281 | { |
282 | process(sprintf("%s_%02x",$name,ord($s)),$t); |
017e2add |
283 | } |
284 | } |
285 | if (ord($keys[-1]) < 255) |
286 | { |
287 | my $t = chr(ord($keys[-1])+1); |
288 | $a->{$t} = [$t,chr(255),undef,$a,0,0]; |
289 | push(@ent,$t); |
290 | } |
14a8264b |
291 | $a->{'Entries'} = \@ent; |
292 | } |
293 | |
294 | sub outtable |
295 | { |
296 | my ($fh,$a) = @_; |
297 | my $name = $a->{'Cname'}; |
017e2add |
298 | # String tables |
14a8264b |
299 | foreach my $b (@{$a->{'Entries'}}) |
017e2add |
300 | { |
301 | next unless $a->{$b}[5]; |
302 | my $s = ord($a->{$b}[0]); |
303 | my $e = ord($a->{$b}[1]); |
304 | outstring($fh,sprintf("%s__%02x_%02x",$name,$s,$e),$a->{$b}[2]); |
305 | } |
14a8264b |
306 | if ($a->{'Forward'}) |
307 | { |
308 | print $fh "\nstatic encpage_t $name\[",scalar(@{$a->{'Entries'}}),"];\n"; |
309 | } |
310 | $a->{'Done'} = 1; |
311 | foreach my $b (@{$a->{'Entries'}}) |
312 | { |
313 | my ($s,$e,$out,$t,$end,$l) = @{$a->{$b}}; |
314 | outtable($fh,$t) unless $t->{'Done'}; |
315 | } |
316 | print $fh "\nstatic encpage_t $name\[",scalar(@{$a->{'Entries'}}),"] = {\n"; |
317 | foreach my $b (@{$a->{'Entries'}}) |
017e2add |
318 | { |
319 | my ($s,$e,$out,$t,$end,$l) = @{$a->{$b}}; |
320 | my $sc = ord($s); |
321 | my $ec = ord($e); |
322 | print $fh "{"; |
323 | if ($l) |
324 | { |
325 | printf $fh outstring($fh,'',$out); |
326 | } |
327 | else |
328 | { |
329 | print $fh "0"; |
330 | } |
331 | print $fh ",",$t->{Cname}; |
2f2b4ff2 |
332 | printf $fh ",0x%02x,0x%02x,$l,$end},\n",$sc,$ec; |
017e2add |
333 | } |
14a8264b |
334 | print $fh "};\n"; |
335 | } |
336 | |
337 | sub output |
338 | { |
339 | my ($fh,$name,$a) = @_; |
340 | process($name,$a); |
341 | # Sub-tables |
342 | outtable($fh,$a); |
017e2add |
343 | } |
344 | |
345 | |