3 # This script generates "unfcan.h", "unfcpt.h", "unfcmb.h",
4 # "unfcmp.h", and "unfexc.h"
5 # from CombiningClass.pl, Decomposition.pl, CompExcl.txt
6 # in lib/unicore or unicode directory
7 # for Unicode::Normalize.xs. (cf. Makefile.PL)
14 our $PACKAGE = 'Unicode::Normalize, mkheader';
16 our $Combin = do "unicore/CombiningClass.pl"
17 || do "unicode/CombiningClass.pl"
18 || croak "$PACKAGE: CombiningClass.pl not found";
20 our $Decomp = do "unicore/Decomposition.pl"
21 || do "unicode/Decomposition.pl"
22 || croak "$PACKAGE: Decomposition.pl not found";
24 our %Combin; # $codepoint => $number : combination class
25 our %Canon; # $codepoint => $hexstring : canonical decomp.
26 our %Compat; # $codepoint => $hexstring : compat. decomp.
27 our %Compos; # $string => $codepoint : composite
29 our %Exclus; # $codepoint => 1 : composition exclusions
33 foreach my $d (@INC) {
35 $f = File::Spec->catfile($d, "unicore", "CompExcl.txt");
36 last if open($fh, $f);
37 $f = File::Spec->catfile($d, "unicode", "CompExcl.txt");
38 last if open($fh, $f);
41 croak "$PACKAGE: CompExcl.txt not found in @INC" unless defined $f;
45 $Exclus{ hex($1) } =1 if /([0-9A-Fa-f]+)/;
50 while($Combin =~ /(.+)/g) {
51 my @tab = split /\t/, $1;
52 my $ini = hex $tab[0];
54 $Combin{ $ini } = $tab[2];
56 $Combin{ $_ } = $tab[2] foreach $ini .. hex($tab[1]);
60 while($Decomp =~ /(.+)/g) {
61 my @tab = split /\t/, $1;
62 my $compat = $tab[2] =~ s/<[^>]+>//;
63 my $dec = [ _getHexArray($tab[2]) ]; # decomposition
64 my $com = pack('U*', @$dec); # composable sequence
65 my $ini = hex($tab[0]);
67 $Compat{ $ini } = $dec;
69 $Canon{ $ini } = $dec;
70 $Compos{ $com } = $ini if @$dec > 1;
73 foreach my $u ($ini .. hex($tab[1])){
77 $Compos{ $com } = $ini if @$dec > 1;
83 # exhaustive decomposition
84 foreach my $key (keys %Canon) {
85 $Canon{$key} = [ getCanonList($key) ];
88 # exhaustive decomposition
89 foreach my $key (keys %Compat) {
90 $Compat{$key} = [ getCompatList($key) ];
95 my @dec = map $Canon{$_} ? @{ $Canon{$_} } : $_, @src;
96 join(" ",@src) eq join(" ",@dec) ? @dec : getCanonList(@dec);
97 # condition @src == @dec is not ok.
102 my @dec = map $Compat{$_} ? @{ $Compat{$_} } : $_, @src;
103 join(" ",@src) eq join(" ",@dec) ? @dec : getCompatList(@dec);
104 # condition @src == @dec is not ok.
109 map hex(), $str =~ /([0-9A-Fa-f]+)/g;
113 sprintf '"%s"', join '',
114 map sprintf("\\x%2x", $_), unpack 'C*', pack 'U*', @_;
117 foreach my $hash (\%Canon, \%Compat) {
118 foreach my $key (keys %$hash) {
119 $hash->{$key} = _U_stringify( @{ $hash->{$key} } );
125 return $uv < 0x80 ? 1 :
129 croak "$PACKAGE: illegal char in the composite. utf-8 max is 0x10ffff.";
134 my $structname = "${prefix}complist";
136 our (%Comp1st, %CompList);
138 foreach(sort keys %Compos) {
139 my @a = unpack('U*', $_);
140 my $val = $Compos{$_};
141 my $name = sprintf "${structname}_%06x", $a[0];
142 $Comp1st{ $a[0] } = $name;
143 $CompList{ $name }{ $a[1] } = $val;
145 if( utf8len($a[0]) + utf8len($a[1]) < utf8len($val) ) {
147 . "composable pair is longer than the composite in bytes!\n"
148 . sprintf("%d + %d => %d", $a[0], $a[1], $val);
153 "typedef struct { UV nextchar; UV composite; } $structname;\n\n";
155 foreach my $i (sort keys %CompList) {
156 $compinit .= "$structname $i [] = {\n";
157 $compinit .= join ",\n",
158 map sprintf("\t{ %d, %d }", $_, $CompList{$i}{$_}),
159 sort {$a <=> $b } keys %{ $CompList{$i} };
160 $compinit .= ",\n{0,0}\n};\n\n"; # with sentinel
163 ####################################
165 my @Exclus = sort {$a <=> $b} keys %Exclus;
167 my $file = "unfexc.h";
168 open FH, ">$file" or croak "$PACKAGE: $file can't be made";
169 binmode FH; select FH;
171 print "bool getExclusion (UV uv) \n{\nreturn\n\t";
174 my $cur = shift @Exclus;
175 if(@Exclus && $cur + 1 == $Exclus[0]) {
176 print "$cur <= uv && uv <= ";
177 while(@Exclus && $cur + 1 == $Exclus[0]) {
178 $cur = shift @Exclus;
181 print "\n\t|| " if @Exclus;
184 print "\n\t|| " if @Exclus;
188 print "\n\t? TRUE : FALSE;\n}\n\n";
191 ####################################
218 type => "$structname *",
225 foreach my $tbl (@tripletable) {
226 my $file = "$tbl->{file}.h";
227 my $head = "${prefix}$tbl->{name}";
228 my $type = $tbl->{type};
229 my $hash = $tbl->{hash};
230 my $null = $tbl->{null};
231 my $init = $tbl->{init};
233 open FH, ">$file" or croak "$PACKAGE: $file can't be made";
234 binmode FH; select FH;
239 * This file is auto-generated by mkheader.
240 * Any changes here will be lost!
244 print $init if defined $init;
246 foreach my $uv (keys %$hash) {
247 my @c = unpack 'CCCC', pack 'N', $uv;
248 $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
251 foreach my $p (sort { $a <=> $b } keys %val) {
252 next if ! $val{ $p };
253 for(my $r = 0; $r < 256; $r++){
254 next if ! $val{ $p }{ $r };
255 printf "$type ${head}_%02x_%02x [256] = {\n", $p, $r;
256 for(my $c = 0; $c < 256; $c++){
257 print "\t", defined $val{$p}{$r}{$c} ? $val{$p}{$r}{$c} : $null;
258 print ',' if $c != 255;
259 print "\n" if $c % 8 == 7;
264 foreach my $p (sort { $a <=> $b } keys %val) {
265 next if ! $val{ $p };
266 printf "$type* ${head}_%02x [256] = {\n", $p;
267 for(my $r = 0; $r < 256; $r++){
268 print $val{ $p }{ $r } ? sprintf("${head}_%02x_%02x", $p, $r) : "NULL";
269 print ',' if $r != 255;
270 print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
274 print "$type** $head [] = {\n";
275 for(my $p = 0; $p <= 0x10; $p++){
276 print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
277 print ',' if $p != 0x10;