# from the Unicode database files (lib/unicore/*.txt).
#
-my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 3.1.1.
-
use strict;
+my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 3.1.1.
+
mkdir("In", 0755);
mkdir("Is", 0755);
mkdir("To", 0755);
my @Mirrored;
my %To;
while (<$Unicode>) {
- next if /^\#/ || /^\s*$/;
- next unless /^[0-9a-f]+\s*;/i;
+ next unless /^[0-9A-Fa-f]+;/;
s/\s+$//;
+
my ($code, $name, $cat, $comb, $bidi, $deco,
$decimal, $digit, $number,
$mirrored, $unicode10, $comment,
my %Lbrk;
while (<$LineBrk>) {
- next if /^\#/ || /^\s*$/;
- s/\s+$//;
- s/\s*\#.*//;
- next unless /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s*;\s*(.+)$/i;
+ next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(\w+)/;
my ($first, $last, $lbrk) = ($1, $2, $3);
my @ArabLinkGroup;
while (<$ArabShap>) {
- next if /^\#/ || /^\s*$/;
- next unless /^[0-9a-f]+\s*;/i;
+ next unless /^[0-9A-Fa-f]+;/;
s/\s+$//;
+
my ($code, $name, $link, $linkgroup) = split(/\s*;\s*/);
append(\@ArabLink, $code, $link);
my @Short;
while (<$Jamo>) {
- next if /^\#/ || /^\s*$/;
- next unless /^[0-9a-f]+\s*;/i;
- s/\s*\#.*//;
- s/\s+$//;
- my ($code, $short) = split(/\s*;\s*/);
+ next unless /^([0-9A-Fa-f]+)\s*;\s*(\w*)/;
+
+ my ($code, $short) = ($1, $2);
append(\@Short, $code, $short);
}
if (open(my $Scripts, "Scripts.txt")) {
while (<$Scripts>) {
- next if /^\#/ || /^\s*$/;
- s/\s*\#.*//;
- s/\s+$//;
- next unless /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s*;\s*(.+)$/i;
+ next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/;
# Wait until all the scripts have been read since
# they are not listed in numeric order.
if (open(my $Blocks, "Blocks.txt")) {
while (<$Blocks>) {
- next if /^\#/ || /^\s*$/;
- s/\s*\#.*//;
- s/\s+$//;
- next unless /^([0-9a-f]+)\.\.([0-9a-f]+)\s*;\s*(.+)$/i;
-
+ next unless /^([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.+?)\s*$/;
+
my ($first, $last, $name) = ($1, $2, $3);
# If there's a naming conflict (the script names are
if (open(my $Props, "PropList.txt")) {
while (<$Props>) {
- next if /^\#/ || /^\s*$/;
- s/\s*\#.*//;
- s/\s+$//;
- next unless /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s*;\s*(\w+)/i;
+ next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/;
# Wait until all the extended properties have been read since
# they are not listed in numeric order.
mapping(\%Is, "Is");
+#
+# Read in the special cases.
+#
+
+my %Case;
+
+if (open(my $SpecCase, "SpecCase.txt")) {
+ while (<$SpecCase>) {
+ next unless /^[0-9A-Fa-f]+;/;
+ s/\#.*//;
+ s/\s+$//;
+
+ my ($code, $lower, $title, $upper, $condition) = split(/\s*;\s*/);
+
+ if ($condition) { # not implemented yet
+ print "# SKIPPING $_\n";
+ next;
+ }
+
+ # Wait until all the special cases have been read since
+ # they are not listed in numeric order.
+ my $ix = hex($code);
+ push @{$Case{Lower}}, [ $ix, $code, $lower ];
+ push @{$Case{Title}}, [ $ix, $code, $title ];
+ push @{$Case{Upper}}, [ $ix, $code, $upper ];
+ }
+} else {
+ die "$0: SpecCase.txt: $!\n";
+}
+
+# Now write out the special cases properties in their code point order.
+# The To/Spec{Lower,Title,Upper}.pl are unused for now since the swash
+# routines do not do returning multiple characters.
+
+for my $case (qw(Lower Title Upper)) {
+ my @case;
+ for my $prop (sort { $a->[0] <=> $b->[0] } @{$Case{$case}}) {
+ my ($ix, $code, $to) = @$prop;
+ append(\@case, $code, $to);
+ }
+ flush(\@case, "To/Spec$case.pl");
+}
+
# That's all, folks!