Add the special casing mappings (from SpecCase.txt)

diff --git a/lib/unicore/To/SpecLower.pl b/lib/unicore/To/SpecLower.pl

new file mode 100644 (file)

index 0000000..18c073b
--- /dev/null
+++ b/lib/unicore/To/SpecLower.pl
@@ -0,0 +1,107 @@
+# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
+# This file is built by mktables from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+00DF           00DF
+0149           0149
+01F0           01F0
+0390           0390
+03B0           03B0
+0587           0587
+1E96           1E96
+1E97           1E97
+1E98           1E98
+1E99           1E99
+1E9A           1E9A
+1F50           1F50
+1F52           1F52
+1F54           1F54
+1F56           1F56
+1F80           1F80
+1F81           1F81
+1F82           1F82
+1F83           1F83
+1F84           1F84
+1F85           1F85
+1F86           1F86
+1F87           1F87
+1F88           1F80
+1F89           1F81
+1F8A           1F82
+1F8B           1F83
+1F8C           1F84
+1F8D           1F85
+1F8E           1F86
+1F8F           1F87
+1F90           1F90
+1F91           1F91
+1F92           1F92
+1F93           1F93
+1F94           1F94
+1F95           1F95
+1F96           1F96
+1F97           1F97
+1F98           1F90
+1F99           1F91
+1F9A           1F92
+1F9B           1F93
+1F9C           1F94
+1F9D           1F95
+1F9E           1F96
+1F9F           1F97
+1FA0           1FA0
+1FA1           1FA1
+1FA2           1FA2
+1FA3           1FA3
+1FA4           1FA4
+1FA5           1FA5
+1FA6           1FA6
+1FA7           1FA7
+1FA8           1FA0
+1FA9           1FA1
+1FAA           1FA2
+1FAB           1FA3
+1FAC           1FA4
+1FAD           1FA5
+1FAE           1FA6
+1FAF           1FA7
+1FB2           1FB2
+1FB3           1FB3
+1FB4           1FB4
+1FB6           1FB6
+1FB7           1FB7
+1FBC           1FB3
+1FC2           1FC2
+1FC3           1FC3
+1FC4           1FC4
+1FC6           1FC6
+1FC7           1FC7
+1FCC           1FC3
+1FD2           1FD2
+1FD3           1FD3
+1FD6           1FD6
+1FD7           1FD7
+1FE2           1FE2
+1FE3           1FE3
+1FE4           1FE4
+1FE6           1FE6
+1FE7           1FE7
+1FF2           1FF2
+1FF3           1FF3
+1FF4           1FF4
+1FF6           1FF6
+1FF7           1FF7
+1FFC           1FF3
+FB00           FB00
+FB01           FB01
+FB02           FB02
+FB03           FB03
+FB04           FB04
+FB05           FB05
+FB06           FB06
+FB13           FB13
+FB14           FB14
+FB15           FB15
+FB16           FB16
+FB17           FB17
+END
diff --git a/lib/unicore/To/SpecTitle.pl b/lib/unicore/To/SpecTitle.pl

new file mode 100644 (file)

index 0000000..c3e1911
--- /dev/null
+++ b/lib/unicore/To/SpecTitle.pl
@@ -0,0 +1,106 @@
+# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
+# This file is built by mktables from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+00DF           0053 0073
+0149           02BC 004E
+01F0           004A 030C
+0390           0399 0308 0301
+03B0           03A5 0308 0301
+0587           0535 0582
+1E96           0048 0331
+1E97           0054 0308
+1E98           0057 030A
+1E99           0059 030A
+1E9A           0041 02BE
+1F50           03A5 0313
+1F52           03A5 0313 0300
+1F54           03A5 0313 0301
+1F56           03A5 0313 0342
+1F80           1F88
+1F81           1F89
+1F82           1F8A
+1F83           1F8B
+1F84           1F8C
+1F85           1F8D
+1F86           1F8E
+1F87           1F8F
+1F88           1F88
+1F89           1F89
+1F8A           1F8A
+1F8B           1F8B
+1F8C           1F8C
+1F8D           1F8D
+1F8E           1F8E
+1F8F           1F8F
+1F90           1F98
+1F91           1F99
+1F92           1F9A
+1F93           1F9B
+1F94           1F9C
+1F95           1F9D
+1F96           1F9E
+1F97           1F9F
+1F98           1F98
+1F99           1F99
+1F9A           1F9A
+1F9B           1F9B
+1F9C           1F9C
+1F9D           1F9D
+1F9E           1F9E
+1F9F           1F9F
+1FA0           1FA8
+1FA1           1FA9
+1FA2           1FAA
+1FA3           1FAB
+1FA4           1FAC
+1FA5           1FAD
+1FA6           1FAE
+1FA7           1FAF
+1FA8           1FA8
+1FA9           1FA9
+1FAA           1FAA
+1FAB           1FAB
+1FAC           1FAC
+1FAD           1FAD
+1FAE           1FAE
+1FAF           1FAF
+1FB2           1FBA 0345
+1FB3           1FBC
+1FB4           0386 0345
+1FB6           0391 0342
+1FB7           0391 0342 0345
+1FBC           1FBC
+1FC2           1FCA 0345
+1FC3           1FCC
+1FC4           0389 0345
+1FC6           0397 0342
+1FC7           0397 0342 0345
+1FCC           1FCC
+1FD2           0399 0308 0300
+1FD3           0399 0308 0301
+1FD6           0399 0342
+1FD7           0399 0308 0342
+1FE2           03A5 0308 0300
+1FE3           03A5 0308 0301
+1FE4           03A1 0313
+1FE6           03A5 0342
+1FE7           03A5 0308 0342
+1FF2           1FFA 0345
+1FF3           1FFC
+1FF4           038F 0345
+1FF6           03A9 0342
+1FF7           03A9 0342 0345
+1FFC           1FFC
+FB00           0046 0066
+FB01           0046 0069
+FB02           0046 006C
+FB03           0046 0066 0069
+FB04           0046 0066 006C
+FB05   FB06    0053 0074
+FB13           0544 0576
+FB14           0544 0565
+FB15           0544 056B
+FB16           054E 0576
+FB17           0544 056D
+END
diff --git a/lib/unicore/To/SpecUpper.pl b/lib/unicore/To/SpecUpper.pl

new file mode 100644 (file)

index 0000000..e5af4b1
--- /dev/null
+++ b/lib/unicore/To/SpecUpper.pl
@@ -0,0 +1,106 @@
+# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!! 
+# This file is built by mktables from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+00DF           0053 0053
+0149           02BC 004E
+01F0           004A 030C
+0390           0399 0308 0301
+03B0           03A5 0308 0301
+0587           0535 0552
+1E96           0048 0331
+1E97           0054 0308
+1E98           0057 030A
+1E99           0059 030A
+1E9A           0041 02BE
+1F50           03A5 0313
+1F52           03A5 0313 0300
+1F54           03A5 0313 0301
+1F56           03A5 0313 0342
+1F80           1F08 0399
+1F81           1F09 0399
+1F82           1F0A 0399
+1F83           1F0B 0399
+1F84           1F0C 0399
+1F85           1F0D 0399
+1F86           1F0E 0399
+1F87           1F0F 0399
+1F88           1F08 0399
+1F89           1F09 0399
+1F8A           1F0A 0399
+1F8B           1F0B 0399
+1F8C           1F0C 0399
+1F8D           1F0D 0399
+1F8E           1F0E 0399
+1F8F           1F0F 0399
+1F90           1F28 0399
+1F91           1F29 0399
+1F92           1F2A 0399
+1F93           1F2B 0399
+1F94           1F2C 0399
+1F95           1F2D 0399
+1F96           1F2E 0399
+1F97           1F2F 0399
+1F98           1F28 0399
+1F99           1F29 0399
+1F9A           1F2A 0399
+1F9B           1F2B 0399
+1F9C           1F2C 0399
+1F9D           1F2D 0399
+1F9E           1F2E 0399
+1F9F           1F2F 0399
+1FA0           1F68 0399
+1FA1           1F69 0399
+1FA2           1F6A 0399
+1FA3           1F6B 0399
+1FA4           1F6C 0399
+1FA5           1F6D 0399
+1FA6           1F6E 0399
+1FA7           1F6F 0399
+1FA8           1F68 0399
+1FA9           1F69 0399
+1FAA           1F6A 0399
+1FAB           1F6B 0399
+1FAC           1F6C 0399
+1FAD           1F6D 0399
+1FAE           1F6E 0399
+1FAF           1F6F 0399
+1FB2           1FBA 0399
+1FB3           0391 0399
+1FB4           0386 0399
+1FB6           0391 0342
+1FB7           0391 0342 0399
+1FBC           0391 0399
+1FC2           1FCA 0399
+1FC3           0397 0399
+1FC4           0389 0399
+1FC6           0397 0342
+1FC7           0397 0342 0399
+1FCC           0397 0399
+1FD2           0399 0308 0300
+1FD3           0399 0308 0301
+1FD6           0399 0342
+1FD7           0399 0308 0342
+1FE2           03A5 0308 0300
+1FE3           03A5 0308 0301
+1FE4           03A1 0313
+1FE6           03A5 0342
+1FE7           03A5 0308 0342
+1FF2           1FFA 0399
+1FF3           03A9 0399
+1FF4           038F 0399
+1FF6           03A9 0342
+1FF7           03A9 0342 0399
+1FFC           03A9 0399
+FB00           0046 0046
+FB01           0046 0049
+FB02           0046 004C
+FB03           0046 0046 0049
+FB04           0046 0046 004C
+FB05   FB06    0053 0054
+FB13           0544 0546
+FB14           0544 0535
+FB15           0544 053B
+FB16           054E 0546
+FB17           0544 053D
+END
diff --git a/lib/unicore/mktables b/lib/unicore/mktables

index 5b2d786..f851302 100644 (file)
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -5,10 +5,10 @@
 # from the Unicode database files (lib/unicore/*.txt).
 #
 
-my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 3.1.1.
-
 use strict;
 
+my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 3.1.1.
+
 mkdir("In", 0755);
 mkdir("Is", 0755);
 mkdir("To", 0755);
@@ -123,9 +123,9 @@ if (open(my $Unicode, "Unicode.txt")) {
     my @Mirrored;
     my %To;
     while (<$Unicode>) {
-       next if /^\#/ || /^\s*$/;
-       next unless /^[0-9a-f]+\s*;/i;
+       next unless /^[0-9A-Fa-f]+;/;
        s/\s+$//;
+
        my ($code, $name, $cat, $comb, $bidi, $deco,
            $decimal, $digit, $number,
            $mirrored, $unicode10, $comment,
@@ -259,10 +259,7 @@ if (open(my $LineBrk, "LineBrk.txt")) {
     my %Lbrk;
 
     while (<$LineBrk>) {
-       next if /^\#/ || /^\s*$/;
-       s/\s+$//;
-       s/\s*\#.*//;
-       next unless /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s*;\s*(.+)$/i;
+        next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(\w+)/;
 
        my ($first, $last, $lbrk) = ($1, $2, $3);
 
@@ -291,9 +288,9 @@ if (open(my $ArabShap, "ArabShap.txt")) {
     my @ArabLinkGroup;
 
     while (<$ArabShap>) {
-       next if /^\#/ || /^\s*$/;
-       next unless /^[0-9a-f]+\s*;/i;
+       next unless /^[0-9A-Fa-f]+;/;
        s/\s+$//;
+
        my ($code, $name, $link, $linkgroup) = split(/\s*;\s*/);
 
        append(\@ArabLink,      $code, $link);
@@ -314,11 +311,9 @@ if (open(my $Jamo, "Jamo.txt")) {
     my @Short;
 
     while (<$Jamo>) {
-       next if /^\#/ || /^\s*$/;
-       next unless /^[0-9a-f]+\s*;/i;
-       s/\s*\#.*//;
-       s/\s+$//;
-       my ($code, $short) = split(/\s*;\s*/);
+       next unless /^([0-9A-Fa-f]+)\s*;\s*(\w*)/;
+
+       my ($code, $short) = ($1, $2);
 
        append(\@Short, $code, $short);
     }
@@ -336,10 +331,7 @@ my @Scripts;
 
 if (open(my $Scripts, "Scripts.txt")) {
     while (<$Scripts>) {
-       next if /^\#/ || /^\s*$/;
-       s/\s*\#.*//;
-       s/\s+$//;
-       next unless /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s*;\s*(.+)$/i;
+       next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/;
 
        # Wait until all the scripts have been read since
        # they are not listed in numeric order.
@@ -387,11 +379,8 @@ my %Blocks;
 
 if (open(my $Blocks, "Blocks.txt")) {
     while (<$Blocks>) {
-       next if /^\#/ || /^\s*$/;
-       s/\s*\#.*//;
-       s/\s+$//;
-       next unless /^([0-9a-f]+)\.\.([0-9a-f]+)\s*;\s*(.+)$/i;
-
+       next unless /^([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.+?)\s*$/;
+       
        my ($first, $last, $name) = ($1, $2, $3);
 
        # If there's a naming conflict (the script names are
@@ -430,10 +419,7 @@ my @Props;
 
 if (open(my $Props, "PropList.txt")) {
     while (<$Props>) {
-       next if /^\#/ || /^\s*$/;
-       s/\s*\#.*//;
-       s/\s+$//;
-       next unless /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s*;\s*(\w+)/i;
+       next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/;
 
        # Wait until all the extended properties have been read since
        # they are not listed in numeric order.
@@ -682,5 +668,48 @@ my %Is = (
 
 mapping(\%Is, "Is");
 
+#
+# Read in the special cases.
+#
+
+my %Case;
+
+if (open(my $SpecCase, "SpecCase.txt")) {
+    while (<$SpecCase>) {
+       next unless /^[0-9A-Fa-f]+;/;
+       s/\#.*//;
+       s/\s+$//;
+
+       my ($code, $lower, $title, $upper, $condition) = split(/\s*;\s*/);
+
+       if ($condition) { # not implemented yet
+           print "# SKIPPING $_\n";
+           next;
+       }
+
+       # Wait until all the special cases have been read since
+       # they are not listed in numeric order.
+       my $ix = hex($code);
+       push @{$Case{Lower}}, [ $ix, $code, $lower ];
+       push @{$Case{Title}}, [ $ix, $code, $title ];
+       push @{$Case{Upper}}, [ $ix, $code, $upper ];
+    }
+} else {
+    die "$0: SpecCase.txt: $!\n";
+}
+
+# Now write out the special cases properties in their code point order.
+# The To/Spec{Lower,Title,Upper}.pl are unused for now since the swash
+# routines do not do returning multiple characters.
+
+for my $case (qw(Lower Title Upper)) {
+    my @case;
+    for my $prop (sort { $a->[0] <=> $b->[0] } @{$Case{$case}}) {
+        my ($ix, $code, $to) = @$prop;
+        append(\@case, $code, $to);
+    }
+    flush(\@case, "To/Spec$case.pl");
+}
+
 # That's all, folks!
lib/unicore/To/SpecLower.pl	[new file with mode: 0644]	patch \| blob
lib/unicore/To/SpecTitle.pl	[new file with mode: 0644]	patch \| blob
lib/unicore/To/SpecUpper.pl	[new file with mode: 0644]	patch \| blob
lib/unicore/mktables		patch \| blob \| blame \| history