Ethiopic changes via private email from Daniel Yacob,
Jarkko Hietaniemi [Mon, 9 Aug 1999 10:25:54 +0000 (10:25 +0000)]
<dmulholl@cs.indiana.edu>.  Ethiopic and Cherokee done,
Canadian Syllabics and Yi under construction.

p4raw-id: //depot/cfgperl@3938

MANIFEST
lib/unicode/EthiopicSyllables.txt [deleted file]
lib/unicode/MakeEthiopicSyllables.PL [deleted file]
lib/unicode/mktables.PL
lib/unicode/syllables.txt [new file with mode: 0644]

index ddba85e..6ad17ef 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -698,7 +698,6 @@ lib/unicode/CombiningClass.pl                       Unicode character database
 lib/unicode/Decomposition.pl                   Unicode character database
 lib/unicode/Eq/Latin1                          Unicode character database
 lib/unicode/Eq/Unicode                         Unicode character database
-lib/unicode/EthiopicSyllables.txt              Unicode character database
 lib/unicode/In/AlphabeticPresentationForms.pl  Unicode character database
 lib/unicode/In/Arabic.pl                       Unicode character database
 lib/unicode/In/ArabicPresentationForms-A.pl    Unicode character database
@@ -841,7 +840,6 @@ lib/unicode/Is/Zl.pl                                Unicode character database
 lib/unicode/Is/Zp.pl                           Unicode character database
 lib/unicode/Is/Zs.pl                           Unicode character database
 lib/unicode/JamoShort.pl                       Unicode character database
-lib/unicode/MakeEthiopicSyllables.PL           Unicode character database
 lib/unicode/Makefile                           Unicode character database
 lib/unicode/Name.pl                            Unicode character database
 lib/unicode/Number.pl                          Unicode character database
@@ -859,6 +857,7 @@ lib/unicode/mktables.PL                             Unicode character database generator
 lib/unicode/names2.txt                         Unicode character database
 lib/unicode/props2.txt                         Unicode character database
 lib/unicode/readme.txt                         Unicode character database info
+lib/unicode/syllables.txt                      Unicode character database
 lib/utf8.pm                                    Pragma to control Unicode support
 lib/utf8_heavy.pl                              Support routines for utf8 pragma
 lib/validate.pl                Perl library supporting wholesale file mode validation
diff --git a/lib/unicode/EthiopicSyllables.txt b/lib/unicode/EthiopicSyllables.txt
deleted file mode 100644 (file)
index 6d807b6..0000000
+++ /dev/null
@@ -1,314 +0,0 @@
-1200           0
-1201           1
-1202           2
-1203           3
-1204           4
-1205           5
-1206           6
-1208           0
-1209           1
-120a           2
-120b           3
-120c           4
-120d           5
-120e           6
-120f           11
-1210           0
-1211           1
-1212           2
-1213           3
-1214           4
-1215           5
-1216           6
-1217           11
-1218           0
-1219           1
-121a           2
-121b           3
-121c           4
-121d           5
-121e           6
-121f           11
-1220           0
-1221           1
-1222           2
-1223           3
-1224           4
-1225           5
-1226           6
-1227           11
-1228           0
-1229           1
-122a           2
-122b           3
-122c           4
-122d           5
-122e           6
-122f           11
-1230           0
-1231           1
-1232           2
-1233           3
-1234           4
-1235           5
-1236           6
-1237           11
-1238           0
-1239           1
-123a           2
-123b           3
-123c           4
-123d           5
-123e           6
-123f           11
-1240           0
-1241           1
-1242           2
-1243           3
-1244           4
-1245           5
-1246           6
-1248           8
-124a           10
-124b           11
-124c           12
-124d           13
-1250           0
-1251           1
-1252           2
-1253           3
-1254           4
-1255           5
-1256           6
-1258           8
-125a           10
-125b           11
-125c           12
-125d           13
-1260           0
-1261           1
-1262           2
-1263           3
-1264           4
-1265           5
-1266           6
-1267           11
-1268           0
-1269           1
-126a           2
-126b           3
-126c           4
-126d           5
-126e           6
-126f           11
-1270           0
-1271           1
-1272           2
-1273           3
-1274           4
-1275           5
-1276           6
-1277           11
-1278           0
-1279           1
-127a           2
-127b           3
-127c           4
-127d           5
-127e           6
-127f           11
-1280           0
-1281           1
-1282           2
-1283           3
-1284           4
-1285           5
-1286           6
-1288           8
-128a           10
-128b           11
-128c           12
-128d           13
-1290           0
-1291           1
-1292           2
-1293           3
-1294           4
-1295           5
-1296           6
-1297           11
-1298           0
-1299           1
-129a           2
-129b           3
-129c           4
-129d           5
-129e           6
-129f           11
-12a0           0
-12a1           1
-12a2           2
-12a3           3
-12a4           4
-12a5           5
-12a6           6
-12a7           11
-12a8           0
-12a9           1
-12aa           2
-12ab           3
-12ac           4
-12ad           5
-12ae           6
-12b0           8
-12b2           10
-12b3           11
-12b4           12
-12b5           13
-12b8           0
-12b9           1
-12ba           2
-12bb           3
-12bc           4
-12bd           5
-12be           6
-12c0           8
-12c2           10
-12c3           11
-12c4           12
-12c5           13
-12c8           0
-12c9           1
-12ca           2
-12cb           3
-12cc           4
-12cd           5
-12ce           6
-12d0           0
-12d1           1
-12d2           2
-12d3           3
-12d4           4
-12d5           5
-12d6           6
-12d8           0
-12d9           1
-12da           2
-12db           3
-12dc           4
-12dd           5
-12de           6
-12df           11
-12e0           0
-12e1           1
-12e2           2
-12e3           3
-12e4           4
-12e5           5
-12e6           6
-12e7           11
-12e8           0
-12e9           1
-12ea           2
-12eb           3
-12ec           4
-12ed           5
-12ee           6
-12f0           0
-12f1           1
-12f2           2
-12f3           3
-12f4           4
-12f5           5
-12f6           6
-12f7           11
-12f8           0
-12f9           1
-12fa           2
-12fb           3
-12fc           4
-12fd           5
-12fe           6
-12ff           11
-1300           0
-1301           1
-1302           2
-1303           3
-1304           4
-1305           5
-1306           6
-1307           11
-1308           0
-1309           1
-130a           2
-130b           3
-130c           4
-130d           5
-130e           6
-1310           8
-1312           10
-1313           11
-1314           12
-1315           13
-1318           0
-1319           1
-131a           2
-131b           3
-131c           4
-131d           5
-131e           6
-1320           0
-1321           1
-1322           2
-1323           3
-1324           4
-1325           5
-1326           6
-1327           11
-1328           0
-1329           1
-132a           2
-132b           3
-132c           4
-132d           5
-132e           6
-132f           11
-1330           0
-1331           1
-1332           2
-1333           3
-1334           4
-1335           5
-1336           6
-1337           11
-1338           0
-1339           1
-133a           2
-133b           3
-133c           4
-133d           5
-133e           6
-133f           11
-1340           0
-1341           1
-1342           2
-1343           3
-1344           4
-1345           5
-1346           6
-1348           0
-1349           1
-134a           2
-134b           3
-134c           4
-134d           5
-134e           6
-134f           11
-1350           0
-1351           1
-1352           2
-1353           3
-1354           4
-1355           5
-1356           6
-1357           11
diff --git a/lib/unicode/MakeEthiopicSyllables.PL b/lib/unicode/MakeEthiopicSyllables.PL
deleted file mode 100755 (executable)
index bccec32..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-#!../../miniperl
-
-#
-# We use the "two rows of 8" perspective to map the syllables onto the
-# expected forms as per the various asundry Ethiopic locales...
-#
-open (GEEZ, ">Is/Y0.pl");
-open (KAIB, ">Is/Y1.pl");
-open (SALS, ">Is/Y2.pl");
-open (RABI, ">Is/Y3.pl");
-open (HAMS, ">Is/Y4.pl");
-open (SADS, ">Is/Y5.pl");
-open (SABI, ">Is/Y6.pl");
-
-open (DIQALA_GEEZ, ">Is/Y8.pl");
-open (DIQALA_SALS, ">Is/Y10.pl");
-open (DIQALA_RABI, ">Is/Y11.pl");  # which is sometimes just DIQALA
-open (DIQALA_HAMS, ">Is/Y12.pl");
-open (DIQALA_SADS, ">Is/Y13.pl");  # though people outside of unicode.org 
-                                   #  might say DIQALA_KAIB...
-
-@fh = qw(
-          GEEZ KAIB SALS RABI HAMS SADS SABI none
-          DIQALA_GEEZ none DIQALA_SALS DIQALA_RABI DIQALA_HAMS DIQALA_SADS
-       );
-
-
-for $form (0..$#fh) {
-       $FILE = $fh[$form];
-       print $FILE "return <<'END'\n" if ($FILE ne "none");
-}
-
-
-open (ETHIOPIC, "EthiopicSyllables.txt");
-while (<ETHIOPIC>) {
-       ($uni, $form) = split (/\s+/ );
-       $FILE = $fh[$form];
-       print $FILE "$uni\n";
-}
-close (ETHIOPIC);
-
-
-for $form (0..$#fh) {
-       $FILE = $fh[$form];
-       print $FILE "END\n" if ($FILE ne "none");
-}
-
-close (GEEZ);
-close (KAIB);
-close (SALS);
-close (RABI);
-close (HAMS);
-close (SADS);
-close (SABI);
-
-close (DIQALA_GEEZ);
-close (DIQALA_SALS);
-close (DIQALA_RABI);
-close (DIQALA_HAMS);
-close (DIQALA_SADS);
-
-symlink ( "Is/Y11.pl", "Is/Y7.pl" );
-symlink ( "Is/Y13.pl", "Is/Y9.pl" );
index 41b192b..f54ea69 100755 (executable)
@@ -152,6 +152,21 @@ mkdir "Eq", 0777;
 # Jamo
 
     ['JamoShort',      '1',            '$short'],
+
+# Syllables
+
+    ['IsSylV', '$syl eq "V"',          ''],
+    ['IsSylU', '$syl eq "U"',          ''],
+    ['IsSylI', '$syl eq "I"',          ''],
+    ['IsSylA', '$syl eq "A"',          ''],
+    ['IsSylE', '$syl eq "E"',          ''],
+    ['IsSylC', '$syl eq "C"',          ''],
+    ['IsSylO', '$syl eq "O"',          ''],
+    ['IsSylWV',        '$syl eq "V"',          ''],
+    ['IsSylWI',        '$syl eq "I"',          ''],
+    ['IsSylWA',        '$syl eq "A"',          ''],
+    ['IsSylWE',        '$syl eq "E"',          ''],
+    ['IsSylWC',        '$syl eq "C"',          ''],
 );
 
 # This is not written for speed...
@@ -223,6 +238,11 @@ sub proplist {
 
        $split = '($code, $short, $name) = split(/; */); $code =~ s/^U\+//;';
     }
+    elsif ($table =~ /^IsSyl/) {
+       open(UD, "syllables.txt") or warn "Can't open $table: $!";
+
+       $split = '($code, $short, $syl) = split(/; */); $code =~ s/^U\+//;';
+    }
     else {
        open(UD, $UnicodeData) or warn "Can't open $UnicodeData: $!";
 
diff --git a/lib/unicode/syllables.txt b/lib/unicode/syllables.txt
new file mode 100644 (file)
index 0000000..8d3095c
--- /dev/null
@@ -0,0 +1,426 @@
+################################################################################
+#
+#   V: as  "u" in "but" (often represented with schwa or small uppercase lambda)
+#   U: as "oo" in "fool"
+#   I: as "ea" in "meat"
+#   A: as  "a" in "father"
+#   E: as  "a" in "hate"
+#   C: the consonant form having no vowel element
+#   O: as  "o" in "note"
+#
+#   Vowel identifiers are assumed short, doubled identifiers are considered long
+#   (following Cushitic rules).  Dipthong syllables are identified with "W" as
+#   per Ethiopic and Canadian syllabary character names.
+#   
+#
+#   WV  WVV  WU  WUU  WI  WII  WA  WAA  WE  WEE  WC  WO  WOO
+#
+#    V   VV   U   UU   I   II   A   AA   E   EE   C   O   OO
+# 
+################################################################################
+
+#
+# Ethiopic
+#
+1200; HA; V
+1201; HU; U
+1202; HI; I
+1203; HAA; A
+1204; HEE; E
+1205; HE; C
+1206; HO; O
+1208; LA; V
+1209; LU; U
+120A; LI; I
+120B; LAA; A
+120C; LEE; E
+120D; LE; C
+120E; LO; O
+120F; LWA; WA
+1210; HHA; V
+1211; HHU; U
+1212; HHI; I
+1213; HHAA; A
+1214; HHEE; E
+1215; HHE; C
+1216; HHO; O
+1217; HHWA; WA
+1218; MA; V
+1219; MU; U
+121A; MI; I
+121B; MAA; A
+121C; MEE; E
+121D; ME; C
+121E; MO; O
+121F; MWA; WA
+1220; SZA; V
+1221; SZU; U
+1222; SZI; I
+1223; SZAA; A
+1224; SZEE; E
+1225; SZE; C
+1226; SZO; O
+1227; SZWA; WA
+1228; RA; V
+1229; RU; U
+122A; RI; I
+122B; RAA; A
+122C; REE; E
+122D; RE; C
+122E; RO; O
+122F; RWA; WA
+1230; SA; V
+1231; SU; U
+1232; SI; I
+1233; SAA; A
+1234; SEE; E
+1235; SE; C
+1236; SO; O
+1237; SWA; WA
+1238; SHA; V
+1239; SHU; U
+123A; SHI; I
+123B; SHAA; A
+123C; SHEE; E
+123D; SHE; C
+123E; SHO; O
+123F; SHWA; WA
+1240; QA; V
+1241; QU; U
+1242; QI; I
+1243; QAA; A
+1244; QEE; E
+1245; QE; C
+1246; QO; O
+1248; QWA; WV
+124A; QWI; WI
+124B; QWAA; WA
+124C; QWEE; WE
+124D; QWE; WC
+1250; QHA; V
+1251; QHU; U
+1252; QHI; I
+1253; QHAA; A
+1254; QHEE; E
+1255; QHE; C
+1256; QHO; O
+1258; QHWA; WV
+125A; QHWI; WI
+125B; QHWAA; WA
+125C; QHWEE; WE
+125D; QHWE; WC
+1260; BA; V
+1261; BU; U
+1262; BI; I
+1263; BAA; A
+1264; BEE; E
+1265; BE; C
+1266; BO; O
+1267; BWA; WA
+1268; VA; V
+1269; VU; U
+126A; VI; I
+126B; VAA; A
+126C; VEE; E
+126D; VE; C
+126E; VO; O
+126F; VWA; WA
+1270; TA; V
+1271; TU; U
+1272; TI; I
+1273; TAA; A
+1274; TEE; E
+1275; TE; C
+1276; TO; O
+1277; TWA; WA
+1278; CA; V
+1279; CU; U
+127A; CI; I
+127B; CAA; A
+127C; CEE; E
+127D; CE; C
+127E; CO; O
+127F; CWA; WA
+1280; XA; V
+1281; XU; U
+1282; XI; I
+1283; XAA; A
+1284; XEE; E
+1285; XE; C
+1286; XO; O
+1288; XWA; WV
+128A; XWI; WI
+128B; XWAA; WA
+128C; XWEE; WE
+128D; XWE; WC
+1290; NA; V
+1291; NU; U
+1292; NI; I
+1293; NAA; A
+1294; NEE; E
+1295; NE; C
+1296; NO; O
+1297; NWA; WA
+1298; NYA; V
+1299; NYU; U
+129A; NYI; I
+129B; NYAA; A
+129C; NYEE; E
+129D; NYE; C
+129E; NYO; O
+129F; NYWA; WA
+12A0; GLOTTAL A; V
+12A1; GLOTTAL U; U
+12A2; GLOTTAL I; I
+12A3; GLOTTAL AA; A
+12A4; GLOTTAL EE; E
+12A5; GLOTTAL E; C
+12A6; GLOTTAL O; O
+12A7; GLOTTAL WA; WA
+12A8; KA; V
+12A9; KU; U
+12AA; KI; I
+12AB; KAA; A
+12AC; KEE; E
+12AD; KE; C
+12AE; KO; O
+12B0; KWA; WV
+12B2; KWI; WI
+12B3; KWAA; WA
+12B4; KWEE; WE
+12B5; KWE; WC
+12B8; KXA; V
+12B9; KXU; U
+12BA; KXI; I
+12BB; KXAA; A
+12BC; KXEE; E
+12BD; KXE; C
+12BE; KXO; O
+12C0; KXWA; WV
+12C2; KXWI; WI
+12C3; KXWAA; WA
+12C4; KXWEE; WE
+12C5; KXWE; WC
+12C8; WA; V
+12C9; WU; U
+12CA; WI; I
+12CB; WAA; A
+12CC; WEE; E
+12CD; WE; C
+12CE; WO; O
+12D0; PHARYNGEAL A; V
+12D1; PHARYNGEAL U; U
+12D2; PHARYNGEAL I; I
+12D3; PHARYNGEAL AA; A
+12D4; PHARYNGEAL EE; E
+12D5; PHARYNGEAL E; C
+12D6; PHARYNGEAL O; O
+12D8; ZA; V
+12D9; ZU; U
+12DA; ZI; I
+12DB; ZAA; A
+12DC; ZEE; E
+12DD; ZE; C
+12DE; ZO; O
+12DF; ZWA; WA
+12E0; ZHA; V
+12E1; ZHU; U
+12E2; ZHI; I
+12E3; ZHAA; A
+12E4; ZHEE; E
+12E5; ZHE; C
+12E6; ZHO; O
+12E7; ZHWA; WA
+12E8; YA; V
+12E9; YU; U
+12EA; YI; I
+12EB; YAA; A
+12EC; YEE; E
+12ED; YE; C
+12EE; YO; O
+12F0; DA; V
+12F1; DU; U
+12F2; DI; I
+12F3; DAA; A
+12F4; DEE; E
+12F5; DE; C
+12F6; DO; O
+12F7; DWA; WA
+12F8; DDA; V
+12F9; DDU; U
+12FA; DDI; I
+12FB; DDAA; A
+12FC; DDEE; E
+12FD; DDE; C
+12FE; DDO; O
+12FF; DDWA; WA
+1300; JA; V
+1301; JU; U
+1302; JI; I
+1303; JAA; A
+1304; JEE; E
+1305; JE; C
+1306; JO; O
+1307; JWA; WA
+1308; GA; V
+1309; GU; U
+130A; GI; I
+130B; GAA; A
+130C; GEE; E
+130D; GE; C
+130E; GO; O
+1310; GWA; WV
+1312; GWI; WI
+1313; GWAA; WA
+1314; GWEE; WE
+1315; GWE; WC
+1318; GGA; V
+1319; GGU; U
+131A; GGI; I
+131B; GGAA; A
+131C; GGEE; E
+131D; GGE; C
+131E; GGO; O
+1320; THA; V
+1321; THU; U
+1322; THI; I
+1323; THAA; A
+1324; THEE; E
+1325; THE; C
+1326; THO; O
+1327; THWA; WA
+1328; CHA; V
+1329; CHU; U
+132A; CHI; I
+132B; CHAA; A
+132C; CHEE; E
+132D; CHE; C
+132E; CHO; O
+132F; CHWA; WA
+1330; PHA; V
+1331; PHU; U
+1332; PHI; I
+1333; PHAA; A
+1334; PHEE; E
+1335; PHE; C
+1336; PHO; O
+1337; PHWA; WA
+1338; TSA; V
+1339; TSU; U
+133A; TSI; I
+133B; TSAA; A
+133C; TSEE; E
+133D; TSE; C
+133E; TSO; O
+133F; TSWA; WA
+1340; TZA; V
+1341; TZU; U
+1342; TZI; I
+1343; TZAA; A
+1344; TZEE; E
+1345; TZE; C
+1346; TZO; O
+1348; FA; V
+1349; FU; U
+134A; FI; I
+134B; FAA; A
+134C; FEE; E
+134D; FE; C
+134E; FO; O
+134F; FWA; WA
+1350; PA; V
+1351; PU; U
+1352; PI; I
+1353; PAA; A
+1354; PEE; E
+1355; PE; C
+1356; PO; O
+1357; PWA; WA
+#
+# Cherokee
+#
+13A0; A; A
+13A1; E; E
+13A2; I; I
+13A3; O; O
+13A4; U; U
+13A5; V; V
+13A6; GA; A 
+13A7; KA; A 
+13A8; GE; E
+13A9; GI; I
+13AA; GO; O
+13AB; GU; U
+13AC; GV; V
+13AD; HA; A
+13AE; HE; E
+13AF; HI; I
+13B0; HO; O
+13B1; HU; U
+13B2; HV; V
+13B3; LA; A
+13B4; LE; E
+13B5; LI; I
+13B6; LO; O
+13B7; LU; U
+13B8; LV; V
+13B9; MA; A
+13BA; ME; E
+13BB; MI; I
+13BC; MO; O
+13BD; MU; U
+13BE; NA; A
+13BF; HNA; A
+13C0; NAH; C
+13C1; NE; E
+13C2; NI; I
+13C3; NO; O
+13C4; NU; U
+13C5; NV; V
+13C6; QUA; A
+13C7; QUE; E
+13C8; QUI; I
+13C9; QUO; O
+13CA; QUU; U
+13CB; QUV; V
+13CC; SA; A
+13CD; S; C
+13CE; SE; E
+13CF; SI; I
+13D0; SO; O
+13D1; SU; U
+13D2; SV; V
+13D3; DA; A
+13D4; TA; A
+13D5; DE; E
+13D6; TE; E
+13D7; DI; I
+13D8; TI; I
+13D9; DO; O
+13DA; DU; U
+13DB; DV; V
+13DC; DLA; A
+13DD; TLA; A
+13DE; TLE; E
+13DF; TLI; I
+13E0; TLO; O
+13E1; TLU; U
+13E2; TLV; V
+13E3; TSA; A
+13E4; TSE; E
+13E5; TSI; I
+13E6; TSO; O
+13E7; TSU; U
+13E8; TSV; V
+13E9; WA; A
+13EA; WE; E
+13EB; WI; I
+13EC; WO; O
+13ED; WU; U
+13EE; WV; V
+13EF; YA; A
+13F0; YE; E
+13F1; YI; I
+13F2; YO; O
+13F3; YU; U
+13F4; YV; V