From: Jarkko Hietaniemi Date: Wed, 17 Oct 2001 00:54:28 +0000 (+0000) Subject: Add the special casing mappings (from SpecCase.txt) X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=d2d499f5a831730fa4ee7eedade0afc419d869bc;p=p5sagit%2Fp5-mst-13.2.git Add the special casing mappings (from SpecCase.txt) (except for the hyper special case mappings that have the condition list); the special casing mappings are unused for now. Small tidying up on mktables. p4raw-id: //depot/perl@12465 --- diff --git a/lib/unicore/To/SpecLower.pl b/lib/unicore/To/SpecLower.pl new file mode 100644 index 0000000..18c073b --- /dev/null +++ b/lib/unicore/To/SpecLower.pl @@ -0,0 +1,107 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +00DF 00DF +0149 0149 +01F0 01F0 +0390 0390 +03B0 03B0 +0587 0587 +1E96 1E96 +1E97 1E97 +1E98 1E98 +1E99 1E99 +1E9A 1E9A +1F50 1F50 +1F52 1F52 +1F54 1F54 +1F56 1F56 +1F80 1F80 +1F81 1F81 +1F82 1F82 +1F83 1F83 +1F84 1F84 +1F85 1F85 +1F86 1F86 +1F87 1F87 +1F88 1F80 +1F89 1F81 +1F8A 1F82 +1F8B 1F83 +1F8C 1F84 +1F8D 1F85 +1F8E 1F86 +1F8F 1F87 +1F90 1F90 +1F91 1F91 +1F92 1F92 +1F93 1F93 +1F94 1F94 +1F95 1F95 +1F96 1F96 +1F97 1F97 +1F98 1F90 +1F99 1F91 +1F9A 1F92 +1F9B 1F93 +1F9C 1F94 +1F9D 1F95 +1F9E 1F96 +1F9F 1F97 +1FA0 1FA0 +1FA1 1FA1 +1FA2 1FA2 +1FA3 1FA3 +1FA4 1FA4 +1FA5 1FA5 +1FA6 1FA6 +1FA7 1FA7 +1FA8 1FA0 +1FA9 1FA1 +1FAA 1FA2 +1FAB 1FA3 +1FAC 1FA4 +1FAD 1FA5 +1FAE 1FA6 +1FAF 1FA7 +1FB2 1FB2 +1FB3 1FB3 +1FB4 1FB4 +1FB6 1FB6 +1FB7 1FB7 +1FBC 1FB3 +1FC2 1FC2 +1FC3 1FC3 +1FC4 1FC4 +1FC6 1FC6 +1FC7 1FC7 +1FCC 1FC3 +1FD2 1FD2 +1FD3 1FD3 +1FD6 1FD6 +1FD7 1FD7 +1FE2 1FE2 +1FE3 1FE3 +1FE4 1FE4 +1FE6 1FE6 +1FE7 1FE7 +1FF2 1FF2 +1FF3 1FF3 +1FF4 1FF4 +1FF6 1FF6 +1FF7 1FF7 +1FFC 1FF3 +FB00 FB00 +FB01 FB01 +FB02 FB02 +FB03 FB03 +FB04 FB04 +FB05 FB05 +FB06 FB06 +FB13 FB13 +FB14 FB14 +FB15 FB15 +FB16 FB16 +FB17 FB17 +END diff --git a/lib/unicore/To/SpecTitle.pl b/lib/unicore/To/SpecTitle.pl new file mode 100644 index 0000000..c3e1911 --- /dev/null +++ b/lib/unicore/To/SpecTitle.pl @@ -0,0 +1,106 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +00DF 0053 0073 +0149 02BC 004E +01F0 004A 030C +0390 0399 0308 0301 +03B0 03A5 0308 0301 +0587 0535 0582 +1E96 0048 0331 +1E97 0054 0308 +1E98 0057 030A +1E99 0059 030A +1E9A 0041 02BE +1F50 03A5 0313 +1F52 03A5 0313 0300 +1F54 03A5 0313 0301 +1F56 03A5 0313 0342 +1F80 1F88 +1F81 1F89 +1F82 1F8A +1F83 1F8B +1F84 1F8C +1F85 1F8D +1F86 1F8E +1F87 1F8F +1F88 1F88 +1F89 1F89 +1F8A 1F8A +1F8B 1F8B +1F8C 1F8C +1F8D 1F8D +1F8E 1F8E +1F8F 1F8F +1F90 1F98 +1F91 1F99 +1F92 1F9A +1F93 1F9B +1F94 1F9C +1F95 1F9D +1F96 1F9E +1F97 1F9F +1F98 1F98 +1F99 1F99 +1F9A 1F9A +1F9B 1F9B +1F9C 1F9C +1F9D 1F9D +1F9E 1F9E +1F9F 1F9F +1FA0 1FA8 +1FA1 1FA9 +1FA2 1FAA +1FA3 1FAB +1FA4 1FAC +1FA5 1FAD +1FA6 1FAE +1FA7 1FAF +1FA8 1FA8 +1FA9 1FA9 +1FAA 1FAA +1FAB 1FAB +1FAC 1FAC +1FAD 1FAD +1FAE 1FAE +1FAF 1FAF +1FB2 1FBA 0345 +1FB3 1FBC +1FB4 0386 0345 +1FB6 0391 0342 +1FB7 0391 0342 0345 +1FBC 1FBC +1FC2 1FCA 0345 +1FC3 1FCC +1FC4 0389 0345 +1FC6 0397 0342 +1FC7 0397 0342 0345 +1FCC 1FCC +1FD2 0399 0308 0300 +1FD3 0399 0308 0301 +1FD6 0399 0342 +1FD7 0399 0308 0342 +1FE2 03A5 0308 0300 +1FE3 03A5 0308 0301 +1FE4 03A1 0313 +1FE6 03A5 0342 +1FE7 03A5 0308 0342 +1FF2 1FFA 0345 +1FF3 1FFC +1FF4 038F 0345 +1FF6 03A9 0342 +1FF7 03A9 0342 0345 +1FFC 1FFC +FB00 0046 0066 +FB01 0046 0069 +FB02 0046 006C +FB03 0046 0066 0069 +FB04 0046 0066 006C +FB05 FB06 0053 0074 +FB13 0544 0576 +FB14 0544 0565 +FB15 0544 056B +FB16 054E 0576 +FB17 0544 056D +END diff --git a/lib/unicore/To/SpecUpper.pl b/lib/unicore/To/SpecUpper.pl new file mode 100644 index 0000000..e5af4b1 --- /dev/null +++ b/lib/unicore/To/SpecUpper.pl @@ -0,0 +1,106 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by mktables from e.g. Unicode.txt. +# Any changes made here will be lost! +return <<'END'; +00DF 0053 0053 +0149 02BC 004E +01F0 004A 030C +0390 0399 0308 0301 +03B0 03A5 0308 0301 +0587 0535 0552 +1E96 0048 0331 +1E97 0054 0308 +1E98 0057 030A +1E99 0059 030A +1E9A 0041 02BE +1F50 03A5 0313 +1F52 03A5 0313 0300 +1F54 03A5 0313 0301 +1F56 03A5 0313 0342 +1F80 1F08 0399 +1F81 1F09 0399 +1F82 1F0A 0399 +1F83 1F0B 0399 +1F84 1F0C 0399 +1F85 1F0D 0399 +1F86 1F0E 0399 +1F87 1F0F 0399 +1F88 1F08 0399 +1F89 1F09 0399 +1F8A 1F0A 0399 +1F8B 1F0B 0399 +1F8C 1F0C 0399 +1F8D 1F0D 0399 +1F8E 1F0E 0399 +1F8F 1F0F 0399 +1F90 1F28 0399 +1F91 1F29 0399 +1F92 1F2A 0399 +1F93 1F2B 0399 +1F94 1F2C 0399 +1F95 1F2D 0399 +1F96 1F2E 0399 +1F97 1F2F 0399 +1F98 1F28 0399 +1F99 1F29 0399 +1F9A 1F2A 0399 +1F9B 1F2B 0399 +1F9C 1F2C 0399 +1F9D 1F2D 0399 +1F9E 1F2E 0399 +1F9F 1F2F 0399 +1FA0 1F68 0399 +1FA1 1F69 0399 +1FA2 1F6A 0399 +1FA3 1F6B 0399 +1FA4 1F6C 0399 +1FA5 1F6D 0399 +1FA6 1F6E 0399 +1FA7 1F6F 0399 +1FA8 1F68 0399 +1FA9 1F69 0399 +1FAA 1F6A 0399 +1FAB 1F6B 0399 +1FAC 1F6C 0399 +1FAD 1F6D 0399 +1FAE 1F6E 0399 +1FAF 1F6F 0399 +1FB2 1FBA 0399 +1FB3 0391 0399 +1FB4 0386 0399 +1FB6 0391 0342 +1FB7 0391 0342 0399 +1FBC 0391 0399 +1FC2 1FCA 0399 +1FC3 0397 0399 +1FC4 0389 0399 +1FC6 0397 0342 +1FC7 0397 0342 0399 +1FCC 0397 0399 +1FD2 0399 0308 0300 +1FD3 0399 0308 0301 +1FD6 0399 0342 +1FD7 0399 0308 0342 +1FE2 03A5 0308 0300 +1FE3 03A5 0308 0301 +1FE4 03A1 0313 +1FE6 03A5 0342 +1FE7 03A5 0308 0342 +1FF2 1FFA 0399 +1FF3 03A9 0399 +1FF4 038F 0399 +1FF6 03A9 0342 +1FF7 03A9 0342 0399 +1FFC 03A9 0399 +FB00 0046 0046 +FB01 0046 0049 +FB02 0046 004C +FB03 0046 0046 0049 +FB04 0046 0046 004C +FB05 FB06 0053 0054 +FB13 0544 0546 +FB14 0544 0535 +FB15 0544 053B +FB16 054E 0546 +FB17 0544 053D +END diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 5b2d786..f851302 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -5,10 +5,10 @@ # from the Unicode database files (lib/unicore/*.txt). # -my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 3.1.1. - use strict; +my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 3.1.1. + mkdir("In", 0755); mkdir("Is", 0755); mkdir("To", 0755); @@ -123,9 +123,9 @@ if (open(my $Unicode, "Unicode.txt")) { my @Mirrored; my %To; while (<$Unicode>) { - next if /^\#/ || /^\s*$/; - next unless /^[0-9a-f]+\s*;/i; + next unless /^[0-9A-Fa-f]+;/; s/\s+$//; + my ($code, $name, $cat, $comb, $bidi, $deco, $decimal, $digit, $number, $mirrored, $unicode10, $comment, @@ -259,10 +259,7 @@ if (open(my $LineBrk, "LineBrk.txt")) { my %Lbrk; while (<$LineBrk>) { - next if /^\#/ || /^\s*$/; - s/\s+$//; - s/\s*\#.*//; - next unless /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s*;\s*(.+)$/i; + next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(\w+)/; my ($first, $last, $lbrk) = ($1, $2, $3); @@ -291,9 +288,9 @@ if (open(my $ArabShap, "ArabShap.txt")) { my @ArabLinkGroup; while (<$ArabShap>) { - next if /^\#/ || /^\s*$/; - next unless /^[0-9a-f]+\s*;/i; + next unless /^[0-9A-Fa-f]+;/; s/\s+$//; + my ($code, $name, $link, $linkgroup) = split(/\s*;\s*/); append(\@ArabLink, $code, $link); @@ -314,11 +311,9 @@ if (open(my $Jamo, "Jamo.txt")) { my @Short; while (<$Jamo>) { - next if /^\#/ || /^\s*$/; - next unless /^[0-9a-f]+\s*;/i; - s/\s*\#.*//; - s/\s+$//; - my ($code, $short) = split(/\s*;\s*/); + next unless /^([0-9A-Fa-f]+)\s*;\s*(\w*)/; + + my ($code, $short) = ($1, $2); append(\@Short, $code, $short); } @@ -336,10 +331,7 @@ my @Scripts; if (open(my $Scripts, "Scripts.txt")) { while (<$Scripts>) { - next if /^\#/ || /^\s*$/; - s/\s*\#.*//; - s/\s+$//; - next unless /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s*;\s*(.+)$/i; + next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/; # Wait until all the scripts have been read since # they are not listed in numeric order. @@ -387,11 +379,8 @@ my %Blocks; if (open(my $Blocks, "Blocks.txt")) { while (<$Blocks>) { - next if /^\#/ || /^\s*$/; - s/\s*\#.*//; - s/\s+$//; - next unless /^([0-9a-f]+)\.\.([0-9a-f]+)\s*;\s*(.+)$/i; - + next unless /^([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.+?)\s*$/; + my ($first, $last, $name) = ($1, $2, $3); # If there's a naming conflict (the script names are @@ -430,10 +419,7 @@ my @Props; if (open(my $Props, "PropList.txt")) { while (<$Props>) { - next if /^\#/ || /^\s*$/; - s/\s*\#.*//; - s/\s+$//; - next unless /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s*;\s*(\w+)/i; + next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/; # Wait until all the extended properties have been read since # they are not listed in numeric order. @@ -682,5 +668,48 @@ my %Is = ( mapping(\%Is, "Is"); +# +# Read in the special cases. +# + +my %Case; + +if (open(my $SpecCase, "SpecCase.txt")) { + while (<$SpecCase>) { + next unless /^[0-9A-Fa-f]+;/; + s/\#.*//; + s/\s+$//; + + my ($code, $lower, $title, $upper, $condition) = split(/\s*;\s*/); + + if ($condition) { # not implemented yet + print "# SKIPPING $_\n"; + next; + } + + # Wait until all the special cases have been read since + # they are not listed in numeric order. + my $ix = hex($code); + push @{$Case{Lower}}, [ $ix, $code, $lower ]; + push @{$Case{Title}}, [ $ix, $code, $title ]; + push @{$Case{Upper}}, [ $ix, $code, $upper ]; + } +} else { + die "$0: SpecCase.txt: $!\n"; +} + +# Now write out the special cases properties in their code point order. +# The To/Spec{Lower,Title,Upper}.pl are unused for now since the swash +# routines do not do returning multiple characters. + +for my $case (qw(Lower Title Upper)) { + my @case; + for my $prop (sort { $a->[0] <=> $b->[0] } @{$Case{$case}}) { + my ($ix, $code, $to) = @$prop; + append(\@case, $code, $to); + } + flush(\@case, "To/Spec$case.pl"); +} + # That's all, folks!