our $PACKAGE = 'Unicode::Normalize, mkheader';
our $Combin = do "unicore/CombiningClass.pl"
- || do "unicode/CombiningClass.pl"
- || croak "$PACKAGE: CombiningClass.pl not found";
+ || do "unicode/CombiningClass.pl"
+ || croak "$PACKAGE: CombiningClass.pl not found";
our $Decomp = do "unicore/Decomposition.pl"
- || do "unicode/Decomposition.pl"
- || croak "$PACKAGE: Decomposition.pl not found";
+ || do "unicode/Decomposition.pl"
+ || croak "$PACKAGE: Decomposition.pl not found";
our %Combin; # $codepoint => $number : combination class
our %Canon; # $codepoint => $hexstring : canonical decomp.
our %Compat; # $codepoint => $hexstring : compat. decomp.
our %Compos; # $string => $codepoint : composite
-
our %Exclus; # $codepoint => 1 : composition exclusions
+our %Single; # $codepoint => 1 : singletons
+our %NonStD; # $codepoint => 1 : non-starter decompositions
{
- my($f, $fh);
- foreach my $d (@INC) {
- use File::Spec;
- $f = File::Spec->catfile($d, "unicore", "CompositionExclusions.txt");
- last if open($fh, $f);
- $f = File::Spec->catfile($d, "unicode", "CompExcl.txt");
- last if open($fh, $f);
- $f = undef;
- }
- croak "$PACKAGE: CompExcl.txt not found in @INC" unless defined $f;
- while(<$fh>) {
- next if /^#/ or /^$/;
- s/#.*//;
- $Exclus{ hex($1) } =1 if /([0-9A-Fa-f]+)/;
- }
- close $fh;
+ my($f, $fh);
+ foreach my $d (@INC) {
+ use File::Spec;
+ $f = File::Spec->catfile($d, "unicore", "CompositionExclusions.txt");
+ last if open($fh, $f);
+ $f = File::Spec->catfile($d, "unicode", "CompExcl.txt");
+ last if open($fh, $f);
+ $f = undef;
+ }
+ croak "$PACKAGE: CompExcl.txt not found in @INC" unless defined $f;
+ while (<$fh>) {
+ next if /^#/ or /^$/;
+ s/#.*//;
+ $Exclus{ hex($1) } =1 if /([0-9A-Fa-f]+)/;
+ }
+ close $fh;
}
-while($Combin =~ /(.+)/g) {
- my @tab = split /\t/, $1;
- my $ini = hex $tab[0];
- if($tab[1] eq '') {
- $Combin{ $ini } = $tab[2];
- } else {
- $Combin{ $_ } = $tab[2] foreach $ini .. hex($tab[1]);
- }
+while ($Combin =~ /(.+)/g) {
+ my @tab = split /\t/, $1;
+ my $ini = hex $tab[0];
+ if ($tab[1] eq '') {
+ $Combin{ $ini } = $tab[2];
+ } else {
+ $Combin{ $_ } = $tab[2] foreach $ini .. hex($tab[1]);
+ }
}
-while($Decomp =~ /(.+)/g) {
- my @tab = split /\t/, $1;
- my $compat = $tab[2] =~ s/<[^>]+>//;
- my $dec = [ _getHexArray($tab[2]) ]; # decomposition
- my $com = pack('U*', @$dec); # composable sequence
- my $ini = hex($tab[0]);
- if($tab[1] eq '') {
- $Compat{ $ini } = $dec;
- if(! $compat) {
- $Canon{ $ini } = $dec;
- $Compos{ $com } = $ini if @$dec > 1;
+while ($Decomp =~ /(.+)/g) {
+ my @tab = split /\t/, $1;
+ my $compat = $tab[2] =~ s/<[^>]+>//;
+ my $dec = [ _getHexArray($tab[2]) ]; # decomposition
+ my $com = pack('U*', @$dec); # composable sequence
+ my $ini = hex($tab[0]); # initial decomposable character
+ if ($tab[1] eq '') {
+ $Compat{ $ini } = $dec;
+
+ if (! $compat) {
+ $Canon{ $ini } = $dec;
+
+ if (@$dec > 1) {
+ if ($Combin{ $dec->[0] }) {
+ $NonStD{ $ini } = 1;
+ } else {
+ $Compos{ $com } = $ini;
+ }
+ } else {
+ $Single{ $ini } = 1;
+ }
+ }
+ } else {
+ foreach my $u ($ini .. hex($tab[1])){
+ $Compat{ $u } = $dec;
+ if (! $compat) {
+ $Canon{ $u } = $dec;
+
+ if (@$dec > 1) {
+ if ($Combin{ $dec->[0] }) {
+ $NonStD{ $u } = 1;
+ } else {
+ $Compos{ $com } = $u;
+ }
+ } else {
+ $Single{ $u } = 1;
+ }
+ }
+ }
}
- } else {
- foreach my $u ($ini .. hex($tab[1])){
- $Compat{ $u } = $dec;
- if(! $compat){
- $Canon{ $u } = $dec;
- $Compos{ $com } = $ini if @$dec > 1;
- }
- }
- }
}
# exhaustive decomposition
foreach my $key (keys %Canon) {
- $Canon{$key} = [ getCanonList($key) ];
+ $Canon{$key} = [ getCanonList($key) ];
}
# exhaustive decomposition
foreach my $key (keys %Compat) {
- $Compat{$key} = [ getCompatList($key) ];
+ $Compat{$key} = [ getCompatList($key) ];
}
sub getCanonList {
- my @src = @_;
- my @dec = map $Canon{$_} ? @{ $Canon{$_} } : $_, @src;
- join(" ",@src) eq join(" ",@dec) ? @dec : getCanonList(@dec);
- # condition @src == @dec is not ok.
+ my @src = @_;
+ my @dec = map $Canon{$_} ? @{ $Canon{$_} } : $_, @src;
+ return join(" ",@src) eq join(" ",@dec) ? @dec : getCanonList(@dec);
+ # condition @src == @dec is not ok.
}
sub getCompatList {
- my @src = @_;
- my @dec = map $Compat{$_} ? @{ $Compat{$_} } : $_, @src;
- join(" ",@src) eq join(" ",@dec) ? @dec : getCompatList(@dec);
- # condition @src == @dec is not ok.
+ my @src = @_;
+ my @dec = map $Compat{$_} ? @{ $Compat{$_} } : $_, @src;
+ return join(" ",@src) eq join(" ",@dec) ? @dec : getCompatList(@dec);
+ # condition @src == @dec is not ok.
}
-sub _getHexArray {
- my $str = shift;
- map hex(), $str =~ /([0-9A-Fa-f]+)/g;
-}
+sub _getHexArray { map hex, $_[0] =~ /([0-9A-Fa-f]+)/g }
sub _U_stringify {
- sprintf '"%s"', join '',
- map sprintf("\\x%02x", $_), unpack 'C*', pack 'U*', @_;
+ sprintf '"%s"', join '',
+ map sprintf("\\x%02x", $_), unpack 'C*', pack 'U*', @_;
}
foreach my $hash (\%Canon, \%Compat) {
- foreach my $key (keys %$hash) {
- $hash->{$key} = _U_stringify( @{ $hash->{$key} } );
- }
+ foreach my $key (keys %$hash) {
+ $hash->{$key} = _U_stringify( @{ $hash->{$key} } );
+ }
}
my $prefix = "UNF_";
-
my $structname = "${prefix}complist";
-our (%Comp1st, %CompList);
+our (%Comp1st, %Comp2nd, %CompList);
-foreach(sort keys %Compos) {
- my @a = unpack('U*', $_);
- my $val = $Compos{$_};
- my $name = sprintf "${structname}_%06x", $a[0];
- $Comp1st{ $a[0] } = $name;
- $CompList{ $name }{ $a[1] } = $val;
+foreach (sort keys %Compos) {
+ my @a = unpack('U*', $_);
+ my $val = $Compos{$_};
+ my $name = sprintf "${structname}_%06x", $a[0];
+ $Comp1st{$a[0]} = $name;
+ $Comp2nd{$a[1]} = 1 if ! $Exclus{$Compos{$_}} && ! $Combin{$a[0]};
+ $CompList{$name}{$a[1]} = $val;
+}
+
+# modern HANGUL JUNGSEONG and HANGUL JONGSEONG jamo
+foreach (0x1161..0x1175, 0x11A8..0x11C2) {
+ $Comp2nd{$_} = 1;
}
my $compinit =
- "typedef struct { UV nextchar; UV composite; } $structname;\n\n";
+ "typedef struct { UV nextchar; UV composite; } $structname;\n\n";
foreach my $i (sort keys %CompList) {
- $compinit .= "$structname $i [] = {\n";
- $compinit .= join ",\n",
- map sprintf("\t{ %d, %d }", $_, $CompList{$i}{$_}),
- sort {$a <=> $b } keys %{ $CompList{$i} };
- $compinit .= ",\n{0,0}\n};\n\n"; # with sentinel
+ $compinit .= "$structname $i [] = {\n";
+ $compinit .= join ",\n",
+ map sprintf("\t{ %d, %d }", $_, $CompList{$i}{$_}),
+ sort {$a <=> $b } keys %{ $CompList{$i} };
+ $compinit .= ",\n{0,0}\n};\n\n"; # with sentinel
}
####################################
-my @Exclus = sort {$a <=> $b} keys %Exclus;
+my @boolfunc = (
+ {
+ name => "Exclusion",
+ type => "bool",
+ hash => \%Exclus,
+ },
+ {
+ name => "Singleton",
+ type => "bool",
+ hash => \%Single,
+ },
+ {
+ name => "NonStDecomp",
+ type => "bool",
+ hash => \%NonStD,
+ },
+ {
+ name => "Comp2nd",
+ type => "bool",
+ hash => \%Comp2nd,
+ },
+);
my $file = "unfexc.h";
open FH, ">$file" or croak "$PACKAGE: $file can't be made";
binmode FH; select FH;
-print "bool isExclusion (UV uv) \n{\nreturn\n\t";
+ print << 'EOF';
+/*
+ * This file is auto-generated by mkheader.
+ * Any changes here will be lost!
+ */
+EOF
-while(@Exclus) {
- my $cur = shift @Exclus;
- if(@Exclus && $cur + 1 == $Exclus[0]) {
- print "($cur <= uv && uv <= ";
- while(@Exclus && $cur + 1 == $Exclus[0]) {
- $cur = shift @Exclus;
+foreach my $tbl (@boolfunc) {
+ my @temp = sort {$a <=> $b} keys %{$tbl->{hash}};
+ my $type = $tbl->{type};
+ my $name = $tbl->{name};
+ print "$type is$name (UV uv)\n{\nreturn\n\t";
+
+ while (@temp) {
+ my $cur = shift @temp;
+ if (@temp && $cur + 1 == $temp[0]) {
+ print "($cur <= uv && uv <= ";
+ while (@temp && $cur + 1 == $temp[0]) {
+ $cur = shift @temp;
+ }
+ print "$cur)";
+ print "\n\t|| " if @temp;
+ } else {
+ print "uv == $cur";
+ print "\n\t|| " if @temp;
+ }
}
- print "$cur)";
- print "\n\t|| " if @Exclus;
- } else {
- print "uv == $cur";
- print "\n\t|| " if @Exclus;
- }
+ print "\n\t? TRUE : FALSE;\n}\n\n";
}
-print "\n\t? TRUE : FALSE;\n}\n\n";
close FH;
+
####################################
my @tripletable = (
- {
- file => "unfcmb",
- name => "combin",
- type => "STDCHAR",
- hash => \%Combin,
- null => 0,
- },
- {
- file => "unfcan",
- name => "canon",
- type => "char*",
- hash => \%Canon,
- null => "NULL",
- },
- {
- file => "unfcpt",
- name => "compat",
- type => "char*",
- hash => \%Compat,
- null => "NULL",
- },
- {
- file => "unfcmp",
- name => "compos",
- type => "$structname *",
- hash => \%Comp1st,
- null => "NULL",
- init => $compinit,
- },
+ {
+ file => "unfcmb",
+ name => "combin",
+ type => "STDCHAR",
+ hash => \%Combin,
+ null => 0,
+ },
+ {
+ file => "unfcan",
+ name => "canon",
+ type => "char*",
+ hash => \%Canon,
+ null => "NULL",
+ },
+ {
+ file => "unfcpt",
+ name => "compat",
+ type => "char*",
+ hash => \%Compat,
+ null => "NULL",
+ },
+ {
+ file => "unfcmp",
+ name => "compos",
+ type => "$structname *",
+ hash => \%Comp1st,
+ null => "NULL",
+ init => $compinit,
+ },
);
foreach my $tbl (@tripletable) {
- my $file = "$tbl->{file}.h";
- my $head = "${prefix}$tbl->{name}";
- my $type = $tbl->{type};
- my $hash = $tbl->{hash};
- my $null = $tbl->{null};
- my $init = $tbl->{init};
-
- open FH, ">$file" or croak "$PACKAGE: $file can't be made";
- binmode FH; select FH;
- my %val;
-
- print FH << 'EOF';
+ my $file = "$tbl->{file}.h";
+ my $head = "${prefix}$tbl->{name}";
+ my $type = $tbl->{type};
+ my $hash = $tbl->{hash};
+ my $null = $tbl->{null};
+ my $init = $tbl->{init};
+
+ open FH, ">$file" or croak "$PACKAGE: $file can't be made";
+ binmode FH; select FH;
+ my %val;
+
+ print FH << 'EOF';
/*
* This file is auto-generated by mkheader.
* Any changes here will be lost!
*/
EOF
- print $init if defined $init;
-
- foreach my $uv (keys %$hash) {
- my @c = unpack 'CCCC', pack 'N', $uv;
- $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
- }
-
- foreach my $p (sort { $a <=> $b } keys %val) {
- next if ! $val{ $p };
- for(my $r = 0; $r < 256; $r++){
- next if ! $val{ $p }{ $r };
- printf "$type ${head}_%02x_%02x [256] = {\n", $p, $r;
- for(my $c = 0; $c < 256; $c++){
- print "\t", defined $val{$p}{$r}{$c}
- ? "($type)".$val{$p}{$r}{$c} : $null;
- print ',' if $c != 255;
- print "\n" if $c % 8 == 7;
- }
- print "};\n\n";
+ print $init if defined $init;
+
+ foreach my $uv (keys %$hash) {
+ my @c = unpack 'CCCC', pack 'N', $uv;
+ $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
+ }
+
+ foreach my $p (sort { $a <=> $b } keys %val) {
+ next if ! $val{ $p };
+ for (my $r = 0; $r < 256; $r++) {
+ next if ! $val{ $p }{ $r };
+ printf "$type ${head}_%02x_%02x [256] = {\n", $p, $r;
+ for (my $c = 0; $c < 256; $c++) {
+ print "\t", defined $val{$p}{$r}{$c}
+ ? "($type)".$val{$p}{$r}{$c}
+ : $null;
+ print ',' if $c != 255;
+ print "\n" if $c % 8 == 7;
+ }
+ print "};\n\n";
+ }
+ }
+ foreach my $p (sort { $a <=> $b } keys %val) {
+ next if ! $val{ $p };
+ printf "$type* ${head}_%02x [256] = {\n", $p;
+ for (my $r = 0; $r < 256; $r++) {
+ print $val{ $p }{ $r }
+ ? sprintf("${head}_%02x_%02x", $p, $r)
+ : "NULL";
+ print ',' if $r != 255;
+ print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
+ }
+ print "};\n\n";
}
- }
- foreach my $p (sort { $a <=> $b } keys %val) {
- next if ! $val{ $p };
- printf "$type* ${head}_%02x [256] = {\n", $p;
- for(my $r = 0; $r < 256; $r++){
- print $val{ $p }{ $r } ? sprintf("${head}_%02x_%02x", $p, $r) : "NULL";
- print ',' if $r != 255;
- print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
+ print "$type** $head [] = {\n";
+ for (my $p = 0; $p <= 0x10; $p++) {
+ print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
+ print ',' if $p != 0x10;
+ print "\n";
}
print "};\n\n";
- }
- print "$type** $head [] = {\n";
- for(my $p = 0; $p <= 0x10; $p++){
- print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
- print ',' if $p != 0x10;
- print "\n";
- }
- print "};\n\n";
- close FH;
+ close FH;
}
__END__