Commit | Line | Data |
017e2add |
1 | #!../../perl -w |
2f2b4ff2 |
2 | BEGIN { @INC = '../../lib' }; |
017e2add |
3 | use strict; |
4 | |
5 | sub encode_U |
6 | { |
7 | my $uv = shift; |
8 | if ($uv < 0x80) |
9 | { |
10 | return chr($uv) |
11 | } |
12 | if ($uv < 0x800) |
13 | { |
14 | return chr(($uv >> 6) | 0xC0). |
15 | chr(($uv & 0x3F) | 0x80); |
16 | } |
17 | return chr(($uv >> 12) | 0xE0). |
18 | chr((($uv >> 6) & 0x3F) | 0x80). |
19 | chr(($uv & 0x3F) | 0x80); |
20 | } |
21 | |
22 | sub encode_S |
23 | { |
24 | my ($ch,$page) = @_; |
25 | return chr($ch); |
26 | } |
27 | |
28 | sub encode_D |
29 | { |
30 | my ($ch,$page) = @_; |
31 | return chr($page).chr($ch); |
32 | } |
33 | |
34 | sub encode_M |
35 | { |
36 | my ($ch,$page) = @_; |
37 | return &encode_D if $page; |
38 | return &encode_S; |
39 | } |
40 | |
18b7339f |
41 | @ARGV = map(glob($_),@ARGV) if $^O eq 'MSWin32'; |
42 | |
017e2add |
43 | my $cname = shift(@ARGV); |
2f2b4ff2 |
44 | chmod(0666,$cname) if -f $cname && !-w $cname; |
017e2add |
45 | open(C,">$cname") || die "Cannot open $cname:$!"; |
2f2b4ff2 |
46 | my $dname = $cname; |
47 | $dname =~ s/(\.[^\.]*)?$/.def/; |
48 | chmod(0666,$dname) if -f $cname && !-w $dname; |
49 | open(D,">$dname") || die "Cannot open $dname:$!"; |
50 | my $hname = $cname; |
51 | $hname =~ s/(\.[^\.]*)?$/.h/; |
52 | chmod(0666,$hname) if -f $cname && !-w $hname; |
53 | open(H,">$hname") || die "Cannot open $hname:$!"; |
54 | |
55 | if ($cname =~ /(\w+)\.xs$/) |
56 | { |
57 | print C "#include <EXTERN.h>\n"; |
58 | print C "#include <perl.h>\n"; |
59 | print C "#include <XSUB.h>\n"; |
60 | print C "#define U8 U8\n"; |
61 | } |
017e2add |
62 | print C "#include \"encode.h\"\n"; |
63 | |
64 | my %encoding; |
65 | my %strings; |
66 | |
2f2b4ff2 |
67 | sub cmp_name |
68 | { |
69 | if ($a =~ /^.*-(\d+)/) |
70 | { |
71 | my $an = $1; |
72 | if ($b =~ /^.*-(\d+)/) |
73 | { |
74 | my $r = $an <=> $1; |
75 | return $r if $r; |
76 | } |
77 | } |
78 | return $a cmp $b; |
79 | } |
80 | |
81 | foreach my $enc (sort cmp_name @ARGV) |
017e2add |
82 | { |
83 | my ($name) = $enc =~ /^.*?([\w-]+)(\.enc)$/; |
84 | if (open(E,$enc)) |
85 | { |
86 | compile(\*E,$name,\*C); |
87 | } |
88 | else |
89 | { |
90 | warn "Cannot open $enc for $name:$!"; |
91 | } |
92 | } |
93 | |
2f2b4ff2 |
94 | foreach my $enc (sort cmp_name keys %encoding) |
95 | { |
96 | my $sym = "${enc}_encoding"; |
97 | $sym =~ s/\W+/_/g; |
98 | print C "encode_t $sym = \n"; |
99 | print C " {",join(',',"\"$enc\"",@{$encoding{$enc}}),"};\n"; |
100 | } |
101 | |
102 | foreach my $enc (sort cmp_name keys %encoding) |
017e2add |
103 | { |
2f2b4ff2 |
104 | my $sym = "${enc}_encoding"; |
105 | $sym =~ s/\W+/_/g; |
106 | print H "extern encode_t $sym;\n"; |
107 | print D " Encode_Define(aTHX_ &$sym);\n"; |
017e2add |
108 | } |
017e2add |
109 | |
2f2b4ff2 |
110 | if ($cname =~ /(\w+)\.xs$/) |
111 | { |
112 | my $mod = $1; |
113 | print C "\nMODULE = Encode::$mod\tPACKAGE = Encode::$mod\n\n"; |
114 | print C "BOOT:\n{\n"; |
115 | print C "#include \"$dname\"\n"; |
116 | print C "}\n"; |
117 | } |
017e2add |
118 | close(C); |
2f2b4ff2 |
119 | close(D); |
120 | close(H); |
017e2add |
121 | |
122 | |
123 | |
124 | sub compile |
125 | { |
126 | my ($fh,$name,$ch) = @_; |
127 | my $e2u = {}; |
128 | my $u2e = {}; |
129 | |
130 | my $type; |
131 | while ($type = <$fh>) |
132 | { |
133 | last if $type !~ /^\s*#/; |
134 | } |
135 | chomp($type); |
136 | return if $type eq 'E'; |
137 | my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>)); |
138 | warn "t=$type s=$sym d=$def p=$pages $name\n"; |
139 | my $rep = ''; |
140 | { |
141 | my $v = hex($def); |
142 | no strict 'refs'; |
143 | $rep = &{"encode_$type"}($v & 0xFF, ($v >> 8) & 0xffe); |
144 | } |
145 | while ($pages--) |
146 | { |
147 | my $line = <$fh>; |
148 | chomp($line); |
149 | my $page = hex($line); |
150 | my $ch = 0; |
151 | for (my $i = 0; $i < 16; $i++) |
152 | { |
153 | my $line = <$fh>; |
154 | for (my $j = 0; $j < 16; $j++) |
155 | { |
156 | no strict 'refs'; |
157 | my $ech = &{"encode_$type"}($ch,$page); |
158 | my $val = hex(substr($line,0,4,'')); |
159 | if ($val || (!$ch && !$page)) |
160 | { |
161 | my $uch = encode_U($val); |
162 | enter($e2u,$ech,$uch,$e2u); |
163 | enter($u2e,$uch,$ech,$u2e); |
164 | } |
165 | else |
166 | { |
167 | # No character at this position |
168 | # enter($e2u,$ech,undef,$e2u); |
169 | } |
170 | $ch++; |
171 | } |
172 | } |
173 | } |
174 | output($ch,$name.'_utf8',$e2u); |
175 | output($ch,'utf8_'.$name,$u2e); |
176 | $encoding{$name} = [$e2u->{Cname},$u2e->{Cname}, |
177 | outstring($ch,$e2u->{Cname}.'_def',$rep),length($rep)]; |
178 | } |
179 | |
180 | sub enter |
181 | { |
182 | my ($a,$s,$d,$t) = @_; |
183 | $t = $a if @_ < 4; |
184 | my $b = substr($s,0,1); |
185 | my $e = $a->{$b}; |
186 | unless ($e) |
187 | { # 0 1 2 3 4 5 |
188 | $e = [$b,$b,'',{},length($s),0]; |
189 | $a->{$b} = $e; |
190 | } |
191 | if (length($s) > 1) |
192 | { |
193 | enter($e->[3],substr($s,1),$d,$t); |
194 | } |
195 | else |
196 | { |
197 | $e->[2] = $d; |
198 | $e->[3] = $t; |
199 | $e->[5] = length($d); |
200 | } |
201 | } |
202 | |
203 | |
204 | sub outstring |
205 | { |
206 | my ($fh,$name,$s) = @_; |
207 | my $sym = $strings{$s}; |
208 | unless ($sym) |
209 | { |
2f2b4ff2 |
210 | foreach my $o (keys %strings) |
017e2add |
211 | { |
2f2b4ff2 |
212 | my $i = index($o,$s); |
213 | if ($i >= 0) |
017e2add |
214 | { |
2f2b4ff2 |
215 | $sym = $strings{$o}; |
216 | $sym .= sprintf("+0x%02x",$i) if ($i); |
217 | return $sym; |
017e2add |
218 | } |
219 | } |
220 | $strings{$s} = $sym = $name; |
221 | printf $fh "static const U8 %s[%d] =\n",$name,length($s); |
2f2b4ff2 |
222 | # Do in chunks of 16 chars to constrain line length |
223 | # Assumes ANSI C adjacent string litteral concatenation |
017e2add |
224 | while (length($s)) |
225 | { |
226 | my $c = substr($s,0,16,''); |
227 | print $fh '"',join('',map(sprintf('\x%02x',ord($_)),split(//,$c))),'"'; |
228 | print $fh "\n" if length($s); |
229 | } |
230 | printf $fh ";\n\n"; |
231 | } |
232 | return $sym; |
233 | } |
234 | |
017e2add |
235 | sub output |
236 | { |
237 | my ($fh,$name,$a) = @_; |
238 | $name =~ s/\W+/_/g; |
239 | $a->{Cname} = $name; |
240 | my @keys = grep(ref($a->{$_}),sort keys %$a); |
18b7339f |
241 | print $fh "\nextern encpage_t $name\[\];\n"; |
242 | # print $fh "\nstatic encpage_t *$name;\n"; |
017e2add |
243 | # Sub-tables |
244 | my %str; |
245 | my $l; |
246 | my @ent; |
247 | foreach my $b (@keys) |
248 | { |
249 | my ($s,$f,$out,$t,$end) = @{$a->{$b}}; |
250 | if (defined($l) && |
251 | ord($b) == ord($a->{$l}[1])+1 && |
252 | $a->{$l}[3] == $a->{$b}[3] && |
253 | $a->{$l}[4] == $a->{$b}[4] && |
2f2b4ff2 |
254 | $a->{$l}[5] == $a->{$b}[5] |
255 | # && length($a->{$l}[2]) < 16 |
256 | ) |
017e2add |
257 | { |
258 | my $i = ord($b)-ord($a->{$l}[0]); |
259 | $a->{$l}[1] = $b; |
260 | $a->{$l}[2] .= $a->{$b}[2]; |
261 | } |
262 | else |
263 | { |
264 | $l = $b; |
265 | push(@ent,$b); |
266 | } |
267 | unless (exists $t->{Cname}) |
268 | { |
269 | output($fh,sprintf("%s_%02x",$name,ord($s)),$t); |
270 | } |
271 | } |
272 | if (ord($keys[-1]) < 255) |
273 | { |
274 | my $t = chr(ord($keys[-1])+1); |
275 | $a->{$t} = [$t,chr(255),undef,$a,0,0]; |
276 | push(@ent,$t); |
277 | } |
278 | # String tables |
279 | foreach my $b (@ent) |
280 | { |
281 | next unless $a->{$b}[5]; |
282 | my $s = ord($a->{$b}[0]); |
283 | my $e = ord($a->{$b}[1]); |
284 | outstring($fh,sprintf("%s__%02x_%02x",$name,$s,$e),$a->{$b}[2]); |
285 | } |
286 | |
287 | print $fh "\n"; |
18b7339f |
288 | print $fh "encpage_t $name\[",scalar(@ent),"] = {\n"; |
017e2add |
289 | foreach my $b (@ent) |
290 | { |
291 | my ($s,$e,$out,$t,$end,$l) = @{$a->{$b}}; |
292 | my $sc = ord($s); |
293 | my $ec = ord($e); |
294 | print $fh "{"; |
295 | if ($l) |
296 | { |
297 | printf $fh outstring($fh,'',$out); |
298 | } |
299 | else |
300 | { |
301 | print $fh "0"; |
302 | } |
303 | print $fh ",",$t->{Cname}; |
2f2b4ff2 |
304 | printf $fh ",0x%02x,0x%02x,$l,$end},\n",$sc,$ec; |
017e2add |
305 | } |
306 | print $fh "};\n\n"; |
307 | } |
308 | |
309 | |