# Jamo
['JamoShort', '1', '$short'],
+
+# Syllables
+
+ ['IsSylV', '$syl eq "V"', ''],
+ ['IsSylU', '$syl eq "U"', ''],
+ ['IsSylI', '$syl eq "I"', ''],
+ ['IsSylA', '$syl eq "A"', ''],
+ ['IsSylE', '$syl eq "E"', ''],
+ ['IsSylC', '$syl eq "C"', ''],
+ ['IsSylO', '$syl eq "O"', ''],
+ ['IsSylWV', '$syl eq "V"', ''],
+ ['IsSylWI', '$syl eq "I"', ''],
+ ['IsSylWA', '$syl eq "A"', ''],
+ ['IsSylWE', '$syl eq "E"', ''],
+ ['IsSylWC', '$syl eq "C"', ''],
);
# This is not written for speed...
$split = '($code, $short, $name) = split(/; */); $code =~ s/^U\+//;';
}
+ elsif ($table =~ /^IsSyl/) {
+ open(UD, "syllables.txt") or warn "Can't open $table: $!";
+
+ $split = '($code, $short, $syl) = split(/; */); $code =~ s/^U\+//;';
+ }
else {
open(UD, $UnicodeData) or warn "Can't open $UnicodeData: $!";
$code{$name} = $code;
$name{$code} = $name;
- if ($name =~ /^((?:LATIN|GREEK|CYRILLIC|HEBREW|BENGALI) .+? LETTER .+?) WITH /) {
- push @base, [ $code, $1 ];
- } elsif ($name =~ /^(ARABIC LETTER \w+?) WITH .+ (\w+ FORM)$/) {
- push @base, [ $code, "$1 $2" ];
- } elsif ($name =~ /^(ARABIC LETTER \w+?) WITH /) {
- push @base, [ $code, $1 ];
-# Is the concept of turning ligatures into character classes sound?
- } elsif ($name =~ /^(ARABIC) LIGATURE (.+?) (WITH .+ )+(\w+ FORM)$/) {
- my $script = $1;
- my $base = $2;
- my $with = $3;
- my $form = $4;
- push @base, [ $code, "$script LETTER $base" ];
- push @base, [ $code, "$script LETTER $base $form" ];
- my @with = split(/\bWITH\s+/, $with);
- shift @with;
- @with = grep { ! /^ (?:ABOVE|BELOW)/ } @with;
- foreach my $base (@with) {
- push @base, [ $code, "$script LETTER $base" ];
- push @base, [ $code, "$script LETTER $base $form" ];
- }
- } elsif ($name =~ /^((?:ARMENIAN|CYRILLIC) .+) LIGATURE (\w+) (\w+)$/) {
- push @base, [ $code, "$1 LETTER $2" ];
- push @base, [ $code, "$1 LETTER $3" ];
-# Latin ligatures (ae, oe, ij, ff, fi, fl, ffi, ffl, long st, st) ignored.
-# Hebrew Yiddish ligatures (double vav, vav yod, double yod, yod yod patah,
-# alef lamed) ignored.
- } else {
- next;
- }
+ next unless $name =~ /^(.+? LETTER .+?) WITH .+( \w+ FORM)?$/;
+ push @base, [ $code, $1 ];
+ push @base, [ $code, $1.$2 ] if $2 ne '';
}
foreach my $b (@base) {
($code, $base) = @$b;
next unless exists $code{$base};
push @{$unicode{$code{$base}}}, $code;
- print "$code: $name{$code} -> $base\n",
+# print "$code: $name{$code} -> $base\n",
}
@unicode = sort keys %unicode;
+print "EqUnicode\n";
if (open(EQ_UNICODE, ">Eq/Unicode")) {
foreach my $c (@unicode) {
print EQ_UNICODE "$c @{$unicode{$c}}\n";
die "$0: failed to open Eq/Unicode for writing: $!\n";
}
+print "EqLatin1\n";
if (open(EQ_LATIN1, ">Eq/Latin1")) {
foreach my $c (@unicode) {
last if hex($c) > 255;