Integrate mainline (Win2k/MinGW all ok except threads/t/end.t)
[p5sagit/p5-mst-13.2.git] / ext / Unicode / Normalize / mkheader
CommitLineData
ac5ea531 1#!perl
2#
3# This script generates "unfcan.h", "unfcpt.h", "unfcmb.h",
4# "unfcmp.h", and "unfexc.h"
5# from CombiningClass.pl, Decomposition.pl, CompExcl.txt
6# in lib/unicore or unicode directory
7# for Unicode::Normalize.xs. (cf. Makefile.PL)
8#
9use 5.006;
10use strict;
11use warnings;
12use Carp;
13
14our $PACKAGE = 'Unicode::Normalize, mkheader';
15
16our $Combin = do "unicore/CombiningClass.pl"
d6b7ef86 17 || do "unicode/CombiningClass.pl"
18 || croak "$PACKAGE: CombiningClass.pl not found";
ac5ea531 19
20our $Decomp = do "unicore/Decomposition.pl"
d6b7ef86 21 || do "unicode/Decomposition.pl"
22 || croak "$PACKAGE: Decomposition.pl not found";
ac5ea531 23
24our %Combin; # $codepoint => $number : combination class
25our %Canon; # $codepoint => $hexstring : canonical decomp.
26our %Compat; # $codepoint => $hexstring : compat. decomp.
27our %Compos; # $string => $codepoint : composite
ac5ea531 28our %Exclus; # $codepoint => 1 : composition exclusions
d6b7ef86 29our %Single; # $codepoint => 1 : singletons
30our %NonStD; # $codepoint => 1 : non-starter decompositions
ac5ea531 31
32{
d6b7ef86 33 my($f, $fh);
34 foreach my $d (@INC) {
35 use File::Spec;
36 $f = File::Spec->catfile($d, "unicore", "CompositionExclusions.txt");
37 last if open($fh, $f);
38 $f = File::Spec->catfile($d, "unicode", "CompExcl.txt");
39 last if open($fh, $f);
40 $f = undef;
41 }
42 croak "$PACKAGE: CompExcl.txt not found in @INC" unless defined $f;
43 while (<$fh>) {
44 next if /^#/ or /^$/;
45 s/#.*//;
46 $Exclus{ hex($1) } =1 if /([0-9A-Fa-f]+)/;
47 }
48 close $fh;
ac5ea531 49}
50
d6b7ef86 51while ($Combin =~ /(.+)/g) {
52 my @tab = split /\t/, $1;
53 my $ini = hex $tab[0];
54 if ($tab[1] eq '') {
55 $Combin{ $ini } = $tab[2];
56 } else {
57 $Combin{ $_ } = $tab[2] foreach $ini .. hex($tab[1]);
58 }
ac5ea531 59}
60
d6b7ef86 61while ($Decomp =~ /(.+)/g) {
62 my @tab = split /\t/, $1;
63 my $compat = $tab[2] =~ s/<[^>]+>//;
64 my $dec = [ _getHexArray($tab[2]) ]; # decomposition
65 my $com = pack('U*', @$dec); # composable sequence
66 my $ini = hex($tab[0]); # initial decomposable character
67 if ($tab[1] eq '') {
68 $Compat{ $ini } = $dec;
69
70 if (! $compat) {
71 $Canon{ $ini } = $dec;
72
73 if (@$dec > 1) {
74 if ($Combin{ $dec->[0] }) {
75 $NonStD{ $ini } = 1;
76 } else {
77 $Compos{ $com } = $ini;
78 }
79 } else {
80 $Single{ $ini } = 1;
81 }
82 }
83 } else {
84 foreach my $u ($ini .. hex($tab[1])){
85 $Compat{ $u } = $dec;
86 if (! $compat) {
87 $Canon{ $u } = $dec;
88
89 if (@$dec > 1) {
90 if ($Combin{ $dec->[0] }) {
91 $NonStD{ $u } = 1;
92 } else {
93 $Compos{ $com } = $u;
94 }
95 } else {
96 $Single{ $u } = 1;
97 }
98 }
99 }
ac5ea531 100 }
ac5ea531 101}
102
103# exhaustive decomposition
104foreach my $key (keys %Canon) {
d6b7ef86 105 $Canon{$key} = [ getCanonList($key) ];
ac5ea531 106}
107
108# exhaustive decomposition
109foreach my $key (keys %Compat) {
d6b7ef86 110 $Compat{$key} = [ getCompatList($key) ];
ac5ea531 111}
112
113sub getCanonList {
d6b7ef86 114 my @src = @_;
115 my @dec = map $Canon{$_} ? @{ $Canon{$_} } : $_, @src;
116 return join(" ",@src) eq join(" ",@dec) ? @dec : getCanonList(@dec);
117 # condition @src == @dec is not ok.
ac5ea531 118}
119
120sub getCompatList {
d6b7ef86 121 my @src = @_;
122 my @dec = map $Compat{$_} ? @{ $Compat{$_} } : $_, @src;
123 return join(" ",@src) eq join(" ",@dec) ? @dec : getCompatList(@dec);
124 # condition @src == @dec is not ok.
ac5ea531 125}
126
d6b7ef86 127sub _getHexArray { map hex, $_[0] =~ /([0-9A-Fa-f]+)/g }
ac5ea531 128
129sub _U_stringify {
d6b7ef86 130 sprintf '"%s"', join '',
131 map sprintf("\\x%02x", $_), unpack 'C*', pack 'U*', @_;
ac5ea531 132}
133
134foreach my $hash (\%Canon, \%Compat) {
d6b7ef86 135 foreach my $key (keys %$hash) {
136 $hash->{$key} = _U_stringify( @{ $hash->{$key} } );
137 }
ac5ea531 138}
139
ac5ea531 140my $prefix = "UNF_";
ac5ea531 141my $structname = "${prefix}complist";
142
d6b7ef86 143our (%Comp1st, %Comp2nd, %CompList);
ac5ea531 144
d6b7ef86 145foreach (sort keys %Compos) {
146 my @a = unpack('U*', $_);
147 my $val = $Compos{$_};
148 my $name = sprintf "${structname}_%06x", $a[0];
149 $Comp1st{$a[0]} = $name;
150 $Comp2nd{$a[1]} = 1 if ! $Exclus{$Compos{$_}} && ! $Combin{$a[0]};
151 $CompList{$name}{$a[1]} = $val;
152}
153
154# modern HANGUL JUNGSEONG and HANGUL JONGSEONG jamo
155foreach (0x1161..0x1175, 0x11A8..0x11C2) {
156 $Comp2nd{$_} = 1;
ac5ea531 157}
158
159my $compinit =
d6b7ef86 160 "typedef struct { UV nextchar; UV composite; } $structname;\n\n";
ac5ea531 161
162foreach my $i (sort keys %CompList) {
d6b7ef86 163 $compinit .= "$structname $i [] = {\n";
164 $compinit .= join ",\n",
165 map sprintf("\t{ %d, %d }", $_, $CompList{$i}{$_}),
166 sort {$a <=> $b } keys %{ $CompList{$i} };
167 $compinit .= ",\n{0,0}\n};\n\n"; # with sentinel
ac5ea531 168}
169
170####################################
171
d6b7ef86 172my @boolfunc = (
173 {
174 name => "Exclusion",
175 type => "bool",
176 hash => \%Exclus,
177 },
178 {
179 name => "Singleton",
180 type => "bool",
181 hash => \%Single,
182 },
183 {
184 name => "NonStDecomp",
185 type => "bool",
186 hash => \%NonStD,
187 },
188 {
189 name => "Comp2nd",
190 type => "bool",
191 hash => \%Comp2nd,
192 },
193);
ac5ea531 194
195my $file = "unfexc.h";
196open FH, ">$file" or croak "$PACKAGE: $file can't be made";
197binmode FH; select FH;
198
d6b7ef86 199 print << 'EOF';
200/*
201 * This file is auto-generated by mkheader.
202 * Any changes here will be lost!
203 */
204EOF
ac5ea531 205
d6b7ef86 206foreach my $tbl (@boolfunc) {
207 my @temp = sort {$a <=> $b} keys %{$tbl->{hash}};
208 my $type = $tbl->{type};
209 my $name = $tbl->{name};
210 print "$type is$name (UV uv)\n{\nreturn\n\t";
211
212 while (@temp) {
213 my $cur = shift @temp;
214 if (@temp && $cur + 1 == $temp[0]) {
215 print "($cur <= uv && uv <= ";
216 while (@temp && $cur + 1 == $temp[0]) {
217 $cur = shift @temp;
218 }
219 print "$cur)";
220 print "\n\t|| " if @temp;
221 } else {
222 print "uv == $cur";
223 print "\n\t|| " if @temp;
224 }
ac5ea531 225 }
d6b7ef86 226 print "\n\t? TRUE : FALSE;\n}\n\n";
ac5ea531 227}
228
ac5ea531 229close FH;
230
d6b7ef86 231
ac5ea531 232####################################
233
234my @tripletable = (
d6b7ef86 235 {
236 file => "unfcmb",
237 name => "combin",
238 type => "STDCHAR",
239 hash => \%Combin,
240 null => 0,
241 },
242 {
243 file => "unfcan",
244 name => "canon",
245 type => "char*",
246 hash => \%Canon,
247 null => "NULL",
248 },
249 {
250 file => "unfcpt",
251 name => "compat",
252 type => "char*",
253 hash => \%Compat,
254 null => "NULL",
255 },
256 {
257 file => "unfcmp",
258 name => "compos",
259 type => "$structname *",
260 hash => \%Comp1st,
261 null => "NULL",
262 init => $compinit,
263 },
ac5ea531 264);
265
266foreach my $tbl (@tripletable) {
d6b7ef86 267 my $file = "$tbl->{file}.h";
268 my $head = "${prefix}$tbl->{name}";
269 my $type = $tbl->{type};
270 my $hash = $tbl->{hash};
271 my $null = $tbl->{null};
272 my $init = $tbl->{init};
273
274 open FH, ">$file" or croak "$PACKAGE: $file can't be made";
275 binmode FH; select FH;
276 my %val;
277
278 print FH << 'EOF';
ac5ea531 279/*
280 * This file is auto-generated by mkheader.
281 * Any changes here will be lost!
282 */
283EOF
284
d6b7ef86 285 print $init if defined $init;
286
287 foreach my $uv (keys %$hash) {
288 my @c = unpack 'CCCC', pack 'N', $uv;
289 $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
290 }
291
292 foreach my $p (sort { $a <=> $b } keys %val) {
293 next if ! $val{ $p };
294 for (my $r = 0; $r < 256; $r++) {
295 next if ! $val{ $p }{ $r };
296 printf "$type ${head}_%02x_%02x [256] = {\n", $p, $r;
297 for (my $c = 0; $c < 256; $c++) {
298 print "\t", defined $val{$p}{$r}{$c}
299 ? "($type)".$val{$p}{$r}{$c}
300 : $null;
301 print ',' if $c != 255;
302 print "\n" if $c % 8 == 7;
303 }
304 print "};\n\n";
305 }
306 }
307 foreach my $p (sort { $a <=> $b } keys %val) {
308 next if ! $val{ $p };
309 printf "$type* ${head}_%02x [256] = {\n", $p;
310 for (my $r = 0; $r < 256; $r++) {
311 print $val{ $p }{ $r }
312 ? sprintf("${head}_%02x_%02x", $p, $r)
313 : "NULL";
314 print ',' if $r != 255;
315 print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
316 }
317 print "};\n\n";
ac5ea531 318 }
d6b7ef86 319 print "$type** $head [] = {\n";
320 for (my $p = 0; $p <= 0x10; $p++) {
321 print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
322 print ',' if $p != 0x10;
323 print "\n";
ac5ea531 324 }
325 print "};\n\n";
d6b7ef86 326 close FH;
ac5ea531 327}
328
329__END__