Commit | Line | Data |
017e2add |
1 | #!../../perl -w |
2f2b4ff2 |
2 | BEGIN { @INC = '../../lib' }; |
017e2add |
3 | use strict; |
4 | |
5 | sub encode_U |
6 | { |
7 | my $uv = shift; |
8 | if ($uv < 0x80) |
9 | { |
10 | return chr($uv) |
11 | } |
12 | if ($uv < 0x800) |
13 | { |
14 | return chr(($uv >> 6) | 0xC0). |
15 | chr(($uv & 0x3F) | 0x80); |
16 | } |
17 | return chr(($uv >> 12) | 0xE0). |
18 | chr((($uv >> 6) & 0x3F) | 0x80). |
19 | chr(($uv & 0x3F) | 0x80); |
20 | } |
21 | |
22 | sub encode_S |
23 | { |
24 | my ($ch,$page) = @_; |
25 | return chr($ch); |
26 | } |
27 | |
28 | sub encode_D |
29 | { |
30 | my ($ch,$page) = @_; |
31 | return chr($page).chr($ch); |
32 | } |
33 | |
34 | sub encode_M |
35 | { |
36 | my ($ch,$page) = @_; |
37 | return &encode_D if $page; |
38 | return &encode_S; |
39 | } |
40 | |
41 | my $cname = shift(@ARGV); |
2f2b4ff2 |
42 | chmod(0666,$cname) if -f $cname && !-w $cname; |
017e2add |
43 | open(C,">$cname") || die "Cannot open $cname:$!"; |
2f2b4ff2 |
44 | my $dname = $cname; |
45 | $dname =~ s/(\.[^\.]*)?$/.def/; |
46 | chmod(0666,$dname) if -f $cname && !-w $dname; |
47 | open(D,">$dname") || die "Cannot open $dname:$!"; |
48 | my $hname = $cname; |
49 | $hname =~ s/(\.[^\.]*)?$/.h/; |
50 | chmod(0666,$hname) if -f $cname && !-w $hname; |
51 | open(H,">$hname") || die "Cannot open $hname:$!"; |
52 | |
53 | if ($cname =~ /(\w+)\.xs$/) |
54 | { |
55 | print C "#include <EXTERN.h>\n"; |
56 | print C "#include <perl.h>\n"; |
57 | print C "#include <XSUB.h>\n"; |
58 | print C "#define U8 U8\n"; |
59 | } |
017e2add |
60 | print C "#include \"encode.h\"\n"; |
61 | |
62 | my %encoding; |
63 | my %strings; |
64 | |
2f2b4ff2 |
65 | sub cmp_name |
66 | { |
67 | if ($a =~ /^.*-(\d+)/) |
68 | { |
69 | my $an = $1; |
70 | if ($b =~ /^.*-(\d+)/) |
71 | { |
72 | my $r = $an <=> $1; |
73 | return $r if $r; |
74 | } |
75 | } |
76 | return $a cmp $b; |
77 | } |
78 | |
79 | foreach my $enc (sort cmp_name @ARGV) |
017e2add |
80 | { |
81 | my ($name) = $enc =~ /^.*?([\w-]+)(\.enc)$/; |
82 | if (open(E,$enc)) |
83 | { |
84 | compile(\*E,$name,\*C); |
85 | } |
86 | else |
87 | { |
88 | warn "Cannot open $enc for $name:$!"; |
89 | } |
90 | } |
91 | |
2f2b4ff2 |
92 | foreach my $enc (sort cmp_name keys %encoding) |
93 | { |
94 | my $sym = "${enc}_encoding"; |
95 | $sym =~ s/\W+/_/g; |
96 | print C "encode_t $sym = \n"; |
97 | print C " {",join(',',"\"$enc\"",@{$encoding{$enc}}),"};\n"; |
98 | } |
99 | |
100 | foreach my $enc (sort cmp_name keys %encoding) |
017e2add |
101 | { |
2f2b4ff2 |
102 | my $sym = "${enc}_encoding"; |
103 | $sym =~ s/\W+/_/g; |
104 | print H "extern encode_t $sym;\n"; |
105 | print D " Encode_Define(aTHX_ &$sym);\n"; |
017e2add |
106 | } |
017e2add |
107 | |
2f2b4ff2 |
108 | if ($cname =~ /(\w+)\.xs$/) |
109 | { |
110 | my $mod = $1; |
111 | print C "\nMODULE = Encode::$mod\tPACKAGE = Encode::$mod\n\n"; |
112 | print C "BOOT:\n{\n"; |
113 | print C "#include \"$dname\"\n"; |
114 | print C "}\n"; |
115 | } |
017e2add |
116 | close(C); |
2f2b4ff2 |
117 | close(D); |
118 | close(H); |
017e2add |
119 | |
120 | |
121 | |
122 | sub compile |
123 | { |
124 | my ($fh,$name,$ch) = @_; |
125 | my $e2u = {}; |
126 | my $u2e = {}; |
127 | |
128 | my $type; |
129 | while ($type = <$fh>) |
130 | { |
131 | last if $type !~ /^\s*#/; |
132 | } |
133 | chomp($type); |
134 | return if $type eq 'E'; |
135 | my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>)); |
136 | warn "t=$type s=$sym d=$def p=$pages $name\n"; |
137 | my $rep = ''; |
138 | { |
139 | my $v = hex($def); |
140 | no strict 'refs'; |
141 | $rep = &{"encode_$type"}($v & 0xFF, ($v >> 8) & 0xffe); |
142 | } |
143 | while ($pages--) |
144 | { |
145 | my $line = <$fh>; |
146 | chomp($line); |
147 | my $page = hex($line); |
148 | my $ch = 0; |
149 | for (my $i = 0; $i < 16; $i++) |
150 | { |
151 | my $line = <$fh>; |
152 | for (my $j = 0; $j < 16; $j++) |
153 | { |
154 | no strict 'refs'; |
155 | my $ech = &{"encode_$type"}($ch,$page); |
156 | my $val = hex(substr($line,0,4,'')); |
157 | if ($val || (!$ch && !$page)) |
158 | { |
159 | my $uch = encode_U($val); |
160 | enter($e2u,$ech,$uch,$e2u); |
161 | enter($u2e,$uch,$ech,$u2e); |
162 | } |
163 | else |
164 | { |
165 | # No character at this position |
166 | # enter($e2u,$ech,undef,$e2u); |
167 | } |
168 | $ch++; |
169 | } |
170 | } |
171 | } |
172 | output($ch,$name.'_utf8',$e2u); |
173 | output($ch,'utf8_'.$name,$u2e); |
174 | $encoding{$name} = [$e2u->{Cname},$u2e->{Cname}, |
175 | outstring($ch,$e2u->{Cname}.'_def',$rep),length($rep)]; |
176 | } |
177 | |
178 | sub enter |
179 | { |
180 | my ($a,$s,$d,$t) = @_; |
181 | $t = $a if @_ < 4; |
182 | my $b = substr($s,0,1); |
183 | my $e = $a->{$b}; |
184 | unless ($e) |
185 | { # 0 1 2 3 4 5 |
186 | $e = [$b,$b,'',{},length($s),0]; |
187 | $a->{$b} = $e; |
188 | } |
189 | if (length($s) > 1) |
190 | { |
191 | enter($e->[3],substr($s,1),$d,$t); |
192 | } |
193 | else |
194 | { |
195 | $e->[2] = $d; |
196 | $e->[3] = $t; |
197 | $e->[5] = length($d); |
198 | } |
199 | } |
200 | |
201 | |
202 | sub outstring |
203 | { |
204 | my ($fh,$name,$s) = @_; |
205 | my $sym = $strings{$s}; |
206 | unless ($sym) |
207 | { |
2f2b4ff2 |
208 | foreach my $o (keys %strings) |
017e2add |
209 | { |
2f2b4ff2 |
210 | my $i = index($o,$s); |
211 | if ($i >= 0) |
017e2add |
212 | { |
2f2b4ff2 |
213 | $sym = $strings{$o}; |
214 | $sym .= sprintf("+0x%02x",$i) if ($i); |
215 | return $sym; |
017e2add |
216 | } |
217 | } |
218 | $strings{$s} = $sym = $name; |
219 | printf $fh "static const U8 %s[%d] =\n",$name,length($s); |
2f2b4ff2 |
220 | # Do in chunks of 16 chars to constrain line length |
221 | # Assumes ANSI C adjacent string litteral concatenation |
017e2add |
222 | while (length($s)) |
223 | { |
224 | my $c = substr($s,0,16,''); |
225 | print $fh '"',join('',map(sprintf('\x%02x',ord($_)),split(//,$c))),'"'; |
226 | print $fh "\n" if length($s); |
227 | } |
228 | printf $fh ";\n\n"; |
229 | } |
230 | return $sym; |
231 | } |
232 | |
233 | |
234 | |
235 | sub output |
236 | { |
237 | my ($fh,$name,$a) = @_; |
238 | $name =~ s/\W+/_/g; |
239 | $a->{Cname} = $name; |
240 | my @keys = grep(ref($a->{$_}),sort keys %$a); |
241 | print $fh "\nstatic encpage_t $name\[\];\n"; |
242 | # Sub-tables |
243 | my %str; |
244 | my $l; |
245 | my @ent; |
246 | foreach my $b (@keys) |
247 | { |
248 | my ($s,$f,$out,$t,$end) = @{$a->{$b}}; |
249 | if (defined($l) && |
250 | ord($b) == ord($a->{$l}[1])+1 && |
251 | $a->{$l}[3] == $a->{$b}[3] && |
252 | $a->{$l}[4] == $a->{$b}[4] && |
2f2b4ff2 |
253 | $a->{$l}[5] == $a->{$b}[5] |
254 | # && length($a->{$l}[2]) < 16 |
255 | ) |
017e2add |
256 | { |
257 | my $i = ord($b)-ord($a->{$l}[0]); |
258 | $a->{$l}[1] = $b; |
259 | $a->{$l}[2] .= $a->{$b}[2]; |
260 | } |
261 | else |
262 | { |
263 | $l = $b; |
264 | push(@ent,$b); |
265 | } |
266 | unless (exists $t->{Cname}) |
267 | { |
268 | output($fh,sprintf("%s_%02x",$name,ord($s)),$t); |
269 | } |
270 | } |
271 | if (ord($keys[-1]) < 255) |
272 | { |
273 | my $t = chr(ord($keys[-1])+1); |
274 | $a->{$t} = [$t,chr(255),undef,$a,0,0]; |
275 | push(@ent,$t); |
276 | } |
277 | # String tables |
278 | foreach my $b (@ent) |
279 | { |
280 | next unless $a->{$b}[5]; |
281 | my $s = ord($a->{$b}[0]); |
282 | my $e = ord($a->{$b}[1]); |
283 | outstring($fh,sprintf("%s__%02x_%02x",$name,$s,$e),$a->{$b}[2]); |
284 | } |
285 | |
286 | print $fh "\n"; |
287 | print $fh "static encpage_t $name\[",scalar(@ent),"] = {\n"; |
288 | foreach my $b (@ent) |
289 | { |
290 | my ($s,$e,$out,$t,$end,$l) = @{$a->{$b}}; |
291 | my $sc = ord($s); |
292 | my $ec = ord($e); |
293 | print $fh "{"; |
294 | if ($l) |
295 | { |
296 | printf $fh outstring($fh,'',$out); |
297 | } |
298 | else |
299 | { |
300 | print $fh "0"; |
301 | } |
302 | print $fh ",",$t->{Cname}; |
2f2b4ff2 |
303 | printf $fh ",0x%02x,0x%02x,$l,$end},\n",$sc,$ec; |
017e2add |
304 | } |
305 | print $fh "};\n\n"; |
306 | } |
307 | |
308 | |