X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2Funicode%2Fmktables.PL;h=86277017001c1bbcd4c21c2544dedfc27e185822;hb=cbca01e195aef313c02c9c3d2c845e117bfcafa4;hp=fcc8eac3028d687e60ffaa5176c285c0091555b8;hpb=11eeea96a679f7b20489fe2e2eab4958ef273d62;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/unicode/mktables.PL b/lib/unicode/mktables.PL index fcc8eac..8627701 100755 --- a/lib/unicode/mktables.PL +++ b/lib/unicode/mktables.PL @@ -152,6 +152,21 @@ mkdir "Eq", 0777; # Jamo ['JamoShort', '1', '$short'], + +# Syllables + + ['IsSylV', '$syl eq "V"', ''], + ['IsSylU', '$syl eq "U"', ''], + ['IsSylI', '$syl eq "I"', ''], + ['IsSylA', '$syl eq "A"', ''], + ['IsSylE', '$syl eq "E"', ''], + ['IsSylC', '$syl eq "C"', ''], + ['IsSylO', '$syl eq "O"', ''], + ['IsSylWV', '$syl eq "V"', ''], + ['IsSylWI', '$syl eq "I"', ''], + ['IsSylWA', '$syl eq "A"', ''], + ['IsSylWE', '$syl eq "E"', ''], + ['IsSylWC', '$syl eq "C"', ''], ); # This is not written for speed... @@ -223,6 +238,11 @@ sub proplist { $split = '($code, $short, $name) = split(/; */); $code =~ s/^U\+//;'; } + elsif ($table =~ /^IsSyl/) { + open(UD, "syllables.txt") or warn "Can't open $table: $!"; + + $split = '($code, $short, $syl) = split(/; */); $code =~ s/^U\+//;'; + } else { open(UD, $UnicodeData) or warn "Can't open $UnicodeData: $!"; @@ -312,48 +332,22 @@ while () { $code{$name} = $code; $name{$code} = $name; - if ($name =~ /^((?:LATIN|GREEK|CYRILLIC|HEBREW|BENGALI) .+? LETTER .+?) WITH /) { - push @base, [ $code, $1 ]; - } elsif ($name =~ /^(ARABIC LETTER \w+?) WITH .+ (\w+ FORM)$/) { - push @base, [ $code, "$1 $2" ]; - } elsif ($name =~ /^(ARABIC LETTER \w+?) WITH /) { - push @base, [ $code, $1 ]; -# Is the concept of turning ligatures into character classes sound? - } elsif ($name =~ /^(ARABIC) LIGATURE (.+?) (WITH .+ )+(\w+ FORM)$/) { - my $script = $1; - my $base = $2; - my $with = $3; - my $form = $4; - push @base, [ $code, "$script LETTER $base" ]; - push @base, [ $code, "$script LETTER $base $form" ]; - my @with = split(/\bWITH\s+/, $with); - shift @with; - @with = grep { ! /^ (?:ABOVE|BELOW)/ } @with; - foreach my $base (@with) { - push @base, [ $code, "$script LETTER $base" ]; - push @base, [ $code, "$script LETTER $base $form" ]; - } - } elsif ($name =~ /^((?:ARMENIAN|CYRILLIC) .+) LIGATURE (\w+) (\w+)$/) { - push @base, [ $code, "$1 LETTER $2" ]; - push @base, [ $code, "$1 LETTER $3" ]; -# Latin ligatures (ae, oe, ij, ff, fi, fl, ffi, ffl, long st, st) ignored. -# Hebrew Yiddish ligatures (double vav, vav yod, double yod, yod yod patah, -# alef lamed) ignored. - } else { - next; - } + next unless $name =~ /^(.+? LETTER .+?) WITH .+( \w+ FORM)?$/; + push @base, [ $code, $1 ]; + push @base, [ $code, $1.$2 ] if $2 ne ''; } foreach my $b (@base) { ($code, $base) = @$b; next unless exists $code{$base}; push @{$unicode{$code{$base}}}, $code; - print "$code: $name{$code} -> $base\n", +# print "$code: $name{$code} -> $base\n", } @unicode = sort keys %unicode; +print "EqUnicode\n"; if (open(EQ_UNICODE, ">Eq/Unicode")) { foreach my $c (@unicode) { print EQ_UNICODE "$c @{$unicode{$c}}\n"; @@ -363,6 +357,7 @@ if (open(EQ_UNICODE, ">Eq/Unicode")) { die "$0: failed to open Eq/Unicode for writing: $!\n"; } +print "EqLatin1\n"; if (open(EQ_LATIN1, ">Eq/Latin1")) { foreach my $c (@unicode) { last if hex($c) > 255;