# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0622 0625 R
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0622 0623 ALEF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0000 0008 BN
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0000 007F Basic Latin
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0000 001f Cc
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0300 0314 230
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
00a0 <noBreak> 0020
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0041 00C0 00C1 00C2 00C3 00C4 00C5
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0041 00C0 00C1 00C2 00C3 00C4 00C5 0100 0102 0104 01CD 0200 0202 0226 1E00 1EA0 1EA2 FF21
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
FB00 FB4F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0600 06FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
FB50 FDFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
FE70 FEFE
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0530 058F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2190 21FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0000 007F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0980 09FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2580 259F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
3100 312F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
31A0 31BF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2500 257F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2800 28FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
3300 33FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
FE30 FE4F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
F900 FAFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2E80 2EFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
3000 303F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
4E00 9FFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
3400 4DB5
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
13A0 13FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0300 036F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
FE20 FE2F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
20D0 20FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2400 243F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
20A0 20CF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0400 04FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0900 097F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2700 27BF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2460 24FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
3200 32FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
1200 137F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2000 206F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
25A0 25FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
10A0 10FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0370 03FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
1F00 1FFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0A80 0AFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0A00 0A7F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
FF00 FFEF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
3130 318F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
1100 11FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
AC00 D7A3
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0590 05FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
DB80 DBFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
D800 DB7F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
3040 309F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0250 02AF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2FF0 2FFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
3190 319F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2F00 2FDF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0C80 0CFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
30A0 30FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
1780 17FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0E80 0EFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0080 00FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0100 017F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0180 024F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
1E00 1EFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2100 214F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
DC00 DFFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0D00 0D7F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2200 22FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2600 26FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2300 23FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
1800 18AF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
1000 109F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2150 218F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
1680 169F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2440 245F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0B00 0B7F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
E000 F8FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
16A0 16FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0D80 0DFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
FE50 FE6F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
02B0 02FF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
FFF0 FFFD
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2070 209F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0700 074F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0B80 0BFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0C00 0C7F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0780 07BF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0E00 0E7F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0F00 0FFF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
1400 167F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
A490 A4CF
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
A000 A48F
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0000 007f
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0030 0039
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0041 005a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0660 0669
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
000a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
002c
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0030 0039
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
002f
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0023 0025
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0041 005a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0021 0022
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
05be
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0009
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
000c
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0000 001f
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0000 001f
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0000 001f
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
e000 f8ff
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2460 2473
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
00a8
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
fb51
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2102
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
fb55
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
fb54
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
fb50
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
ff61 ffbe
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
00a0
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
fe50 fe52
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
3300 3357
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2080 208e
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
00aa
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
fe30 fe44
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
3000
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
00c0 00c5
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
00a0
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0030 0039
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0021 007e
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0041 005a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0061 007a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
02b0 02b8
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
01bb
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0061 007a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
01c5
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0041 005a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0300 034e
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0903
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0028 0029
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0300 034e
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0030 0039
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0030 0039
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
00b2 00b3
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0021 0023
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
002d
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0029
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0021 0023
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0020 007e
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0028
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0021 0023
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0024
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0024
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
002b
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
00a6 00a7
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0009 000a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
END
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0041 005a
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0030 0039
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0030 0039
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0020
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2028
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
2029
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0020
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
1100 G
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0000 001f <control>
--- /dev/null
+<html>
+
+<head>
+<meta name="GENERATOR" content="Microsoft FrontPage 3.0">
+<title>Unicode 3.0 NamesList File Structure</title>
+</head>
+
+<body>
+
+<h3>Unicode NamesList File Format</h3>
+
+<p>Last updated: 1999-07-06</p>
+
+<h3>1.0 Introduction</h3>
+
+<p>The Unicode name list file NamesList.txt (also NamesList.lst) is a plain text file used
+to drive the layout of the character code charts in the Unicode Standard. The information
+in this file is a combination of several fields from the UnicodeData.txt and Blocks.txt files,
+together with additional annotations for many characters. This document describes the
+syntax rules for the file format, but also gives brief information on how each construct
+is rendered when laid out for the book. Some of the syntax elements were used in
+preparation of the drafts of the book and may not be present in the final, released form
+of the NamesList.txt file.</p>
+
+<p>The same input file can be used to do the draft preparation for ISO/IEC 10646 (referred
+below as ISO-style). This necessitates the presence of some information in the name list
+file that is not needed (and in fact removed during parsing) for the Unicode book.</p>
+
+<p>With access to the layout program (unibook.exe) it is a simple matter of creating
+name lists for the purpose of formatting working drafts containing proposed characters.</p>
+
+<h3>1.1 NamesList File Overview</h3>
+
+<p>The *.lst files are plain text files which in their most simple form look like this</p>
+
+<p>@@<tab>0020<tab>BASIC LATIN<tab>007F<br>
+; this is a file comment (ignored)<br>
+0020<tab>SPACE<br>
+0021<tab>EXCLAMATION MARK<br>
+0022<tab>QUOTATION MARK<br>
+. . . <br>
+007F<tab>DELETE</p>
+
+<p>The semicolon (as first character), @ and <tab> characters are used by the file
+syntax and must be provided as shown. Hexadecimal digits must be in UPPER CASE). A double
+@@ introduces a block header, with the title, and start and ending code of the block
+provided as shown.</p>
+
+<p>For an ISO-style, minimal name list, only the NAME_LINE and BLOCKHEADER and their
+constituent syntax elements are needed.</p>
+
+<p>The full syntax with all the options is provided in the following sections.</p>
+
+<h3>1.2 NamesList File Structure</h3>
+
+<p>This section gives defines the overall file structure</p>
+
+<pre><strong>NAMELIST: TITLE_PAGE* BLOCK*
+</strong>
+<strong>TITLE_PAGE: TITLE
+ | TITLE_PAGE SUBTITLE
+ | TITLE_PAGE SUBHEADER
+ | TITLE_PAGE IGNORED_LINE
+ | TITLE_PAGE EMPTY_LINE
+ | TITLE_PAGE COMMENTLINE
+ | TITLE_PAGE NOTICE
+ | TITLE_PAGE PAGEBREAK
+</strong>
+<strong>BLOCK: BLOCKHEADER
+ | BLOCK CHAR_ENTRY
+ | BLOCK SUBHEADER
+ | BLOCK NOTICE
+ | BLOCK EMPTY_LINE
+ | BLOCK IGNORED_LINE
+ | BLOCK PAGEBREAK
+
+CHAR_ENTRY: NAME_LINE | RESERVED_LINE
+ | CHAR_ENTRY ALIAS_LINE
+ | CHAR_ENTRY COMMENT_LINE
+ | CHAR_ENTRY CROSS_REF
+ | CHAR_ENTRY DECOMPOSITION
+ | CHAR_ENTRY COMPAT_MAPPING
+ | CHAR_ENTRY IGNORED_LINE
+ | CHAR_ENTRY EMPTY_LINE
+ | CHAR_ENTRY NOTICE
+</strong></pre>
+
+<p>In other words:<br>
+<br>
+Neither TITLE nor SUBTITLE may occur after the first BLOCKHEADER. </p>
+
+<p>Only TITLE, SUBTITLE, SUBHEADER, PAGEBREAK, COMMENT_LINE, and IGNORED_LINE may
+occur before the first BLOCKHEADER.</p>
+
+<p>Directly following either a NAME_LINE or a RESERVED_LINE an uninterrupted sequence of
+the following lines may occur (in any order and repeated as often as needed): ALIAS_LINE,
+CROSS_REF, DECOMPOSITION, COMPAT_MAPPING, NOTICE, EMPTY_LINE and IGNORED_LINE.</p>
+
+<p>Except for EMPTY_LINE, NOTICE and IGNORED_LINE, none of these lines may occur in any other
+place. </p>
+
+<p>Note: A NOTICE displays differently depending on whether it follows a header or title
+or is part of a CHAR_ENTRY.</p>
+
+<h3>1.3 NamesList File Elements</h3>
+
+<p>This section provides the details of the syntax for the individual elements.</p>
+
+<pre><small><strong>ELEMENT SYNTAX</strong> // How rendered</small></pre>
+
+<pre><small><strong>NAME_LINE: CHAR <tab> LINE
+</strong> // the CHAR and the corresponding image are echoed,
+ // followed by the name as given in LINE
+
+<strong> CHAR TAB NAME COMMENT LF
+</strong> // Names may have a comment, which is stripped off
+ // unless the file is parsed for an ISO style list
+
+<strong>RESERVED_LINE: CHAR TAB <reserved>
+</strong> // the CHAR is echoed followed by an icon for the
+ // reserved character and a fixed string e.g. <reserved>
+
+<strong>COMMMENT_LINE: <tab> "*" SP EXPAND_LINE
+</strong> // * is replaced by BULLET, output line as comment
+ <strong><tab> EXPAND_LINE</strong>
+ // output line as comment
+
+<strong>ALIAS_LINE: <tab> "=" SP LINE
+</strong> // replace = by itself, output line as alias
+
+<strong>CROSS_REF: <tab> "X" SP EXPAND_LINE
+</strong> // X is replaced by a right arrow
+<strong> <tab> "X" SP "(" STRING SP "-" SP CHAR ")"
+</strong> // X is replaced by a right arrow
+ // the "(", "-", ")" are removed, the
+ // order of CHAR and STRING is reversed
+ // i.e. both inputs result in the same output
+
+<strong>IGNORED_LINE: <tab> ";" EXPAND_LINE
+EMPTY_LINE: LF
+</strong> // empty lines and file comments are ignored
+
+<strong>DECOMPOSITION: <tab> ":" EXPAND_LINE
+</strong> // replace ':' by EQUIV, expand line into
+ // decomposition
+
+<strong>COMPAT_MAPPING: <tab> "#" SP EXPAND_LINE
+</strong> // replace '#' by APPROX, output line as mapping
+
+<strong>NOTICE: "@+" <tab> LINE
+</strong> // skip '@+', output text as notice
+<strong> "@+" TAB * SP LINE
+</strong> // skip '@', output text as notice
+ // "*" expands to a bullet character
+ // Notices following a character code apply to the
+ // character and are indented. Notices not following
+ // a character code apply to the page/block/column
+ // and are italicized, but not indented
+
+<strong>SUBTITLE: "@@@+" <tab> LINE
+</strong> // skip "@@@+", output text as subtitle
+
+<strong>SUBHEADER: "@" <tab> LINE
+</strong> // skip '@', output line as text as column header
+
+<strong>BLOCKHEADER: "@@" <tab> BLOCKSTART <tab> BLOCKNAME <tab> BLOCKEND
+</strong> // skip "@@", cause a page break and optional
+ // blank page, then output one or more charts
+ // followed by the list of character names.
+ // use BLOCKSTART and BLOCKEND to define the
+ // what characters belong to a block
+ // use blockname in page and table headers
+ <strong> "@@" <tab> BLOCKSTART <tab> BLOCKNAME COMMENT <tab> BLOCKEND
+ </strong>// if a comment is present it replaces the blockname
+ // when an ISO-style namelist is laid out
+
+<strong>BLOCKSTART: CHAR</strong> // first character position in block
+<strong>BLOCKEND: CHAR</strong> // last character position in block
+<strong>PAGE_BREAK: "@@"</strong> // insert a (column) break
+
+<strong>TITLE: "@@@" <tab> LINE</strong>
+ // skip "@@@", output line as text
+ // Title is used in page headers
+
+<strong>EXPAND_LINE: {CHAR | STRING}+ LF </strong>
+ // all instances of CHAR *) are replaced by
+ // CHAR NBSP x NBSP where x is the single Unicode
+ // character corresponding to char
+ // If character is combining, it is replaced with
+ // CHAR NBSP <circ> x NBSP where <circ> is the
+ // dotted circle</small>
+</pre>
+
+<h3><strong>1.4 NamesList File Primitives</strong></h3>
+
+<p>The following are the primitives and terminals for the NamesList syntax.</p>
+
+<pre><small><strong>LINE: STRING LF
+COMMENT: "(" NAME ")"
+ "(" NAME ")" "*"
+</strong>
+<strong>NAME</strong>: <sequence of ASCII characters, except "(" or ")" >
+<strong>STRING</strong>: <sequence of Latin-1 characters>
+<strong>CHAR</strong>: <strong>X X X X</strong>
+ <strong>| X X X X X X X X X</strong></small>
+<small><strong>X: "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"|"A"|"B"|"C"|"D"|"E"|"F"
+<tab>:</strong> <sequence of one or more ASCII tab characters 0x09>
+<strong>SP</strong>: <ASCII 0x20>
+<strong>LF</strong>: <any sequence of ASCII 0x0A and 0x0D>
+</small></pre>
+
+<p><strong>Notes:</strong>
+
+<ul>
+ <li>Special lookahead logic prevents a mention of a 4 digit standard, such as ISO 9999 from
+ being misinterpreted as ISO CHAR.</li>
+ <li>Use of Latin-1 is supported in unibook.exe, but not portably, unless the file is encoded as
+ UTF-16LE.</li>
+ <li>The final LF in the file must be present</li>
+ <li>A CHAR inside ' or " is expanded, but only its glyph image is printed, the
+ code value is not echoed</li>
+ <li>Straight quotes in an EXPAND_LINE are replaced by curly quotes using English rules.
+ Apostrophes are supported, but nested quotes are not.</li>
+</ul>
+</body>
+</html>
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0031 1
--------------------------------------------------------------------------
NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
-The files have been copied 1999-Sep-14 from
+The files have been copied from
ftp://ftp.unicode.org/Public/3.0-Update/
-and renamed to better fit 8.3 filename limitations.
-
-For example, the UnicodeCharacterDatabase.html referred above is
-now called Unicode.html.
-
+and most of them have been renamed to better fit 8.3 filename limitations.
+
+long name at unicode.org short name latest '#'
+------------------------ ---------- ----------
+ArabicShaping-#.txt ArabShap.txt 2
+Blocks-#.txt Blocks.txt 3
+CompositionExclusions-#.txt CompExcl.txt 1
+EastAsianWidth-#.txt EAWidth.txt 3
+Index-#.txt Index.txt 3.0.0
+Jamo-#.txt Jamo.txt 2
+LineBreak-#.txt LineBrk.txt 5
+NamesList-#.txt Names.txt 3.0.0
+NamesList-#.html NamesList.html 1
+PropList-#.txt Props.txt 3.0.0
+SpecialCasing-#.txt SpecCase.txt 2
+UnicodeData-#.txt Unicode.300 3.0.0
+UnicodeData-#.html Unicode3.html 3.0.0
+UnicodeCharacterDatabase-#.html UCD300.html 3.0.0
+
+The *.pl files are generated from these files by the 'mktables.PL' script.
+
+While the files have been renamed the links in the html files haven't.
+
+--
jhi@iki.fi
-
-
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0030 0039 0000
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0041 005a 0061
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0061 007a 0041
# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
-# This file is built by mktables.PL from e.g. UnicodeData-Latest.txt.
+# This file is built by mktables.PL from e.g. Unicode.300.
# Any changes made here will be lost!
return <<'END';
0061 007a 0041
--- /dev/null
+<html>
+
+
+
+<head>
+
+<meta NAME="GENERATOR" CONTENT="Microsoft FrontPage 4.0">
+
+<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+
+<link REL="stylesheet" HREF="http://www.unicode.org/unicode.css" TYPE="text/css">
+
+<title>UnicodeData File Format</title>
+
+</head>
+
+
+
+<body>
+
+
+
+<h1>UnicodeData File Format<br>
+Version 3.0.0</h1>
+
+
+
+<table BORDER="1" CELLSPACING="2" CELLPADDING="0" HEIGHT="87" WIDTH="100%">
+
+ <tr>
+
+ <td VALIGN="TOP" width="144">Revision</td>
+
+ <td VALIGN="TOP">3.0.0</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP" width="144">Authors</td>
+
+ <td VALIGN="TOP">Mark Davis and Ken Whistler</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP" width="144">Date</td>
+
+ <td VALIGN="TOP">1999-09-12</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP" width="144">This Version</td>
+
+ <td VALIGN="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html</a></td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP" width="144">Previous Version</td>
+
+ <td VALIGN="TOP">n/a</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP" width="144">Latest Version</td>
+
+ <td VALIGN="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html</a></td>
+
+ </tr>
+
+</table>
+
+
+
+<p align="center">Copyright © 1995-1999 Unicode, Inc. All Rights reserved.<br>
+
+<i>For more information, including Disclamer and Limitations, see <a HREF="UnicodeCharacterDatabase-3.0.0.html">UnicodeCharacterDatabase-3.0.0.html</a> </i></p>
+
+
+
+<p>This document describes the format of the UnicodeData.txt file, which is one of the
+
+files in the Unicode Character Database. The document is divided into the following
+
+sections:
+
+
+
+<ul>
+
+ <li><a HREF="#Field Formats">Field Formats</a> <ul>
+
+ <li><a HREF="#General Category">General Category</a> </li>
+
+ <li><a HREF="#Bidirectional Category">Bidirectional Category</a> </li>
+
+ <li><a HREF="#Character Decomposition">Character Decomposition Mapping</a> </li>
+
+ <li><a HREF="#Canonical Combining Classes">Canonical Combining Classes</a> </li>
+
+ <li><a HREF="#Decompositions and Normalization">Decompositions and Normalization</a> </li>
+
+ <li><a HREF="#Case Mappings">Case Mappings</a> </li>
+
+ </ul>
+
+ </li>
+
+ <li><a HREF="#Property Invariants">Property Invariants</a> </li>
+
+ <li><a HREF="#Modification History">Modification History</a> </li>
+
+</ul>
+
+
+
+<p><b>Warning: </b>the information in this file does not completely describe the use and
+
+interpretation of Unicode character properties and behavior. It must be used in
+
+conjunction with the data in the other files in the Unicode Character Database, and relies
+
+on the notation and definitions supplied in <i><a href="http://www.unicode.org/unicode/standard/versions/Unicode3.0.html"> The Unicode
+Standard</a></i>. All chapter references
+
+are to Version 3.0 of the standard.</p>
+
+
+
+<h2><a NAME="Field Formats"></a>Field Formats</h2>
+
+
+
+<p>The file consists of lines containing fields terminated by semicolons. Each line
+
+represents the data for one encoded character in the Unicode Standard. Every encoded
+
+character has a data entry, with the exception of certain special ranges, as detailed
+
+below.
+
+
+
+<ul>
+
+ <li>There are six special ranges of characters that are represented only by their start and
+
+ end characters, since the properties in the file are uniform, except for code values
+
+ (which are all sequential and assigned). </li>
+
+ <li>The names of CJK ideograph characters and the names and decompositions of Hangul
+
+ syllable characters are algorithmically derivable. (See the Unicode Standard and <a
+
+ HREF="http://www.unicode.org/unicode/reports/tr15/">Unicode Technical Report #15</a> for
+
+ more information). </li>
+
+ <li>Surrogate code values and private use characters have no names. </li>
+
+ <li>The Private Use character outside of the BMP (U+F0000..U+FFFFD, U+100000..U+10FFFD) are
+
+ not listed. These correspond to surrogate pairs where the first surrogate is in the High
+
+ Surrogate Private Use section. </li>
+
+</ul>
+
+
+
+<p>The exact ranges represented by start and end characters are:
+
+
+
+<ul>
+
+ <li>CJK Ideographs Extension A (U+3400 - U+4DB5) </li>
+
+ <li>CJK Ideographs (U+4E00 - U+9FA5) </li>
+
+ <li>Hangul Syllables (U+AC00 - U+D7A3) </li>
+
+ <li>Non-Private Use High Surrogates (U+D800 - U+DB7F) </li>
+
+ <li>Private Use High Surrogates (U+DB80 - U+DBFF) </li>
+
+ <li>Low Surrogates (U+DC00 - U+DFFF) </li>
+
+ <li>The Private Use Area (U+E000 - U+F8FF) </li>
+
+</ul>
+
+
+
+<p>The following table describes the format and meaning of each field in a data entry in
+
+the UnicodeData file. Fields which contain normative information are so indicated.</p>
+
+
+
+<table BORDER="1" CELLSPACING="2" CELLPADDING="2">
+
+ <tr>
+
+ <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Field</th>
+
+ <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Name</th>
+
+ <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Status</th>
+
+ <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Explanation</th>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">0</th>
+
+ <td VALIGN="top">Code value</td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">Code value in 4-digit hexadecimal format.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">1</th>
+
+ <td VALIGN="top">Character name</td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">These names match exactly the names published in Chapter 14 of the
+
+ Unicode Standard, Version 3.0.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">2</th>
+
+ <td VALIGN="top"><a HREF="#General Category">General Category</a> </td>
+
+ <td VALIGN="top">normative / informative<br>
+
+ (see below)</td>
+
+ <td VALIGN="top">This is a useful breakdown into various "character types" which
+
+ can be used as a default categorization in implementations. See below for a brief
+
+ explanation.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">3</th>
+
+ <td VALIGN="top"><a HREF="#Canonical Combining Classes">Canonical Combining Classes</a> </td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">The classes used for the Canonical Ordering Algorithm in the Unicode
+
+ Standard. These classes are also printed in Chapter 4 of the Unicode Standard.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">4</th>
+
+ <td VALIGN="top"><a HREF="#Bidirectional Category">Bidirectional Category</a> </td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">See the list below for an explanation of the abbreviations used in this
+
+ field. These are the categories required by the Bidirectional Behavior Algorithm in the
+
+ Unicode Standard. These categories are summarized in Chapter 3 of the Unicode Standard.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">5</th>
+
+ <td VALIGN="top"><a HREF="#Character Decomposition">Character Decomposition
+ Mapping</a></td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">In the Unicode Standard, not all of the mappings are full (maximal)
+
+ decompositions. Recursive application of look-up for decompositions will, in all cases,
+
+ lead to a maximal decomposition. The decomposition mappings match exactly the
+
+ decomposition mappings published with the character names in the Unicode Standard.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">6</th>
+
+ <td VALIGN="top">Decimal digit value</td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">This is a numeric field. If the character has the decimal digit property,
+
+ as specified in Chapter 4 of the Unicode Standard, the value of that digit is represented
+
+ with an integer value in this field</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">7</th>
+
+ <td VALIGN="top">Digit value</td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">This is a numeric field. If the character represents a digit, not
+
+ necessarily a decimal digit, the value is here. This covers digits which do not form
+
+ decimal radix forms, such as the compatibility superscript digits</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">8</th>
+
+ <td VALIGN="top">Numeric value</td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">This is a numeric field. If the character has the numeric property, as
+
+ specified in Chapter 4 of the Unicode Standard, the value of that character is represented
+
+ with an integer or rational number in this field. This includes fractions as, e.g.,
+
+ "1/5" for U+2155 VULGAR FRACTION ONE FIFTH Also included are numerical values
+
+ for compatibility characters such as circled numbers.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">8</th>
+
+ <td VALIGN="top">Mirrored</td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">If the character has been identified as a "mirrored" character
+
+ in bidirectional text, this field has the value "Y"; otherwise "N".
+
+ The list of mirrored characters is also printed in Chapter 4 of the Unicode Standard.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">10</th>
+
+ <td VALIGN="top">Unicode 1.0 Name</td>
+
+ <td VALIGN="top">informative</td>
+
+ <td VALIGN="top">This is the old name as published in Unicode 1.0. This name is only
+
+ provided when it is significantly different from the Unicode 3.0 name for the character.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">11</th>
+
+ <td VALIGN="top">10646 comment field</td>
+
+ <td VALIGN="top">informative</td>
+
+ <td VALIGN="top">This is the ISO 10646 comment field. It is in parantheses in the 10646
+
+ names list.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">12</th>
+
+ <td VALIGN="top"><a HREF="#Case Mappings">Uppercase Mapping</a></td>
+
+ <td VALIGN="top">informative</td>
+
+ <td VALIGN="top">Upper case equivalent mapping. If a character is part of an alphabet with
+
+ case distinctions, and has an upper case equivalent, then the upper case equivalent is in
+
+ this field. See the explanation below on case distinctions. These mappings are always
+
+ one-to-one, not one-to-many or many-to-one. This field is informative.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">13</th>
+
+ <td VALIGN="top"><a HREF="#Case Mappings">Lowercase Mapping</a></td>
+
+ <td VALIGN="top">informative</td>
+
+ <td VALIGN="top">Similar to Uppercase mapping</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">14</th>
+
+ <td VALIGN="top"><a HREF="#Case Mappings">Titlecase Mapping</a></td>
+
+ <td VALIGN="top">informative</td>
+
+ <td VALIGN="top">Similar to Uppercase mapping</td>
+
+ </tr>
+
+</table>
+
+
+
+<h3><a NAME="General Category"></a>General Category</h3>
+
+
+
+<p>The values in this field are abbreviations for the following. Some of the values are
+
+normative, and some are informative. For more information, see the Unicode Standard.</p>
+
+
+
+<p><b>Note:</b> the standard does not assign information to control characters (except for
+
+certain cases in the Bidirectional Algorithm). Implementations will generally also assign
+
+categories to certain control characters, notably CR and LF, according to platform
+
+conventions.</p>
+
+
+
+<h4>Normative Categories</h4>
+
+
+
+<table BORDER="0" CELLSPACING="2" CELLPADDING="0">
+
+ <tr>
+
+ <th><p ALIGN="LEFT">Abbr.</th>
+
+ <th><p ALIGN="LEFT">Description</th>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Lu</td>
+
+ <td>Letter, Uppercase</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Ll</td>
+
+ <td>Letter, Lowercase</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Lt</td>
+
+ <td>Letter, Titlecase</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Mn</td>
+
+ <td>Mark, Non-Spacing</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Mc</td>
+
+ <td>Mark, Spacing Combining</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Me</td>
+
+ <td>Mark, Enclosing</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Nd</td>
+
+ <td>Number, Decimal Digit</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Nl</td>
+
+ <td>Number, Letter</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">No</td>
+
+ <td>Number, Other</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Zs</td>
+
+ <td>Separator, Space</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Zl</td>
+
+ <td>Separator, Line</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Zp</td>
+
+ <td>Separator, Paragraph</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Cc</td>
+
+ <td>Other, Control</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Cf</td>
+
+ <td>Other, Format</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Cs</td>
+
+ <td>Other, Surrogate</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Co</td>
+
+ <td>Other, Private Use</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Cn</td>
+
+ <td>Other, Not Assigned (no characters in the file have this property)</td>
+
+ </tr>
+
+</table>
+
+
+
+<h4>Informative Categories</h4>
+
+
+
+<table BORDER="0" CELLSPACING="2" CELLPADDING="0">
+
+ <tr>
+
+ <th><p ALIGN="LEFT">Abbr.</th>
+
+ <th><p ALIGN="LEFT">Description</th>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Lm</td>
+
+ <td>Letter, Modifier</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Lo</td>
+
+ <td>Letter, Other</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Pc</td>
+
+ <td>Punctuation, Connector</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Pd</td>
+
+ <td>Punctuation, Dash</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Ps</td>
+
+ <td>Punctuation, Open</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Pe</td>
+
+ <td>Punctuation, Close</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Pi</td>
+
+ <td>Punctuation, Initial quote (may behave like Ps or Pe depending on usage)</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Pf</td>
+
+ <td>Punctuation, Final quote (may behave like Ps or Pe depending on usage)</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Po</td>
+
+ <td>Punctuation, Other</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Sm</td>
+
+ <td>Symbol, Math</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Sc</td>
+
+ <td>Symbol, Currency</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Sk</td>
+
+ <td>Symbol, Modifier</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">So</td>
+
+ <td>Symbol, Other</td>
+
+ </tr>
+
+</table>
+
+
+
+<h3><a NAME="Bidirectional Category"></a>Bidirectional Category</h3>
+
+
+
+<p>Please refer to Chapter 3 for an explanation of the algorithm for Bidirectional
+
+Behavior and an explanation of the significance of these categories. An up-to-date version
+
+can be found on <a HREF="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical
+
+Report #9: The Bidirectional Algorithm</a>. These values are normative.</p>
+
+
+
+<table BORDER="0" CELLPADDING="2">
+
+ <tr>
+
+ <th VALIGN="TOP" ALIGN="LEFT"><p ALIGN="LEFT">Type</th>
+
+ <th VALIGN="TOP" ALIGN="LEFT"><p ALIGN="LEFT">Description</th>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>L</b></td>
+
+ <td VALIGN="TOP">Left-to-Right</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>LRE</b></td>
+
+ <td VALIGN="TOP">Left-to-Right Embedding</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>LRO</b></td>
+
+ <td VALIGN="TOP">Left-to-Right Override</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>R</b></td>
+
+ <td VALIGN="TOP">Right-to-Left</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>AL</b></td>
+
+ <td VALIGN="TOP">Right-to-Left Arabic</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>RLE</b></td>
+
+ <td VALIGN="TOP">Right-to-Left Embedding</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>RLO</b></td>
+
+ <td VALIGN="TOP">Right-to-Left Override</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>PDF</b></td>
+
+ <td VALIGN="TOP">Pop Directional Format</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>EN</b></td>
+
+ <td VALIGN="TOP">European Number</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>ES</b></td>
+
+ <td VALIGN="TOP">European Number Separator</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>ET</b></td>
+
+ <td VALIGN="TOP">European Number Terminator</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>AN</b></td>
+
+ <td VALIGN="TOP">Arabic Number</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>CS</b></td>
+
+ <td VALIGN="TOP">Common Number Separator</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>NSM</b></td>
+
+ <td VALIGN="TOP">Non-Spacing Mark</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>BN</b></td>
+
+ <td VALIGN="TOP">Boundary Neutral</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>B</b></td>
+
+ <td VALIGN="TOP">Paragraph Separator</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>S</b></td>
+
+ <td VALIGN="TOP">Segment Separator</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>WS</b></td>
+
+ <td VALIGN="TOP">Whitespace</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>ON</b></td>
+
+ <td VALIGN="TOP">Other Neutrals</td>
+
+ </tr>
+
+</table>
+
+
+
+<h3><a NAME="Character Decomposition"></a>Character Decomposition Mapping</h3>
+
+
+
+<p>The decomposition is a normative property of a character. The tags supplied with
+
+certain decomposition mappings generally indicate formatting information. Where no such
+
+tag is given, the mapping is designated as canonical. Conversely, the presence of a
+
+formatting tag also indicates that the mapping is a compatibility mapping and not a
+
+canonical mapping. In the absence of other formatting information in a compatibility
+
+mapping, the tag is used to distinguish it from canonical mappings.</p>
+
+
+
+<p>In some instances a canonical mapping or a compatibility mapping may consist of a
+
+single character. For a canonical mapping, this indicates that the character is a
+
+canonical equivalent of another single character. For a compatibility mapping, this
+
+indicates that the character is a compatibility equivalent of another single character.
+
+The compatibility formatting tags used are:</p>
+
+
+
+<table BORDER="0" CELLSPACING="2" CELLPADDING="0">
+
+ <tr>
+
+ <th>Tag</th>
+
+ <th><p ALIGN="LEFT">Description</th>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER"><font> </td>
+
+ <td>A font variant (e.g. a blackletter form).</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER"><noBreak> </td>
+
+ <td>A no-break version of a space or hyphen.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER"><initial> </td>
+
+ <td>An initial presentation form (Arabic).</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER"><medial> </td>
+
+ <td>A medial presentation form (Arabic).</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER"><final> </td>
+
+ <td>A final presentation form (Arabic).</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER"><isolated> </td>
+
+ <td>An isolated presentation form (Arabic).</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER"><circle> </td>
+
+ <td>An encircled form.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER"><super> </td>
+
+ <td>A superscript form.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER"><sub> </td>
+
+ <td>A subscript form.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER"><vertical> </td>
+
+ <td>A vertical layout presentation form.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER"><wide> </td>
+
+ <td>A wide (or zenkaku) compatibility character.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER"><narrow> </td>
+
+ <td>A narrow (or hankaku) compatibility character.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER"><small> </td>
+
+ <td>A small variant form (CNS compatibility).</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER"><square> </td>
+
+ <td>A CJK squared font variant.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER"><fraction> </td>
+
+ <td>A vulgar fraction form.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER"><compat> </td>
+
+ <td>Otherwise unspecified compatibility character.</td>
+
+ </tr>
+
+</table>
+
+
+
+<p><b>Reminder: </b>There is a difference between decomposition and decomposition mapping.
+
+The decomposition mappings are defined in the UnicodeData, while the decomposition (also
+
+termed "full decomposition") is defined in Chapter 3 to use those mappings
+<i>
+
+recursively.</i>
+
+
+
+<ul>
+
+ <li>The canonical decomposition is formed by recursively applying the canonical mappings,
+
+ then applying the canonical reordering algorithm. </li>
+
+ <li>The compatibility decomposition is formed by recursively applying the canonical <em>and</em>
+
+ compatibility mappings, then applying the canonical reordering algorithm. </li>
+
+</ul>
+
+
+
+<h3><a NAME="Canonical Combining Classes"></a>Canonical Combining Classes</h3>
+
+
+
+<table BORDER="0" CELLSPACING="2" CELLPADDING="0">
+
+ <tr>
+
+ <th><p ALIGN="LEFT">Value</th>
+
+ <th><p ALIGN="LEFT">Description</th>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">0:</td>
+
+ <td>Spacing, split, enclosing, reordrant, and Tibetan subjoined</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">1:</td>
+
+ <td>Overlays and interior</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">7:</td>
+
+ <td>Nuktas</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">8:</td>
+
+ <td>Hiragana/Katakana voicing marks</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">9:</td>
+
+ <td>Viramas</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">10:</td>
+
+ <td>Start of fixed position classes</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">199:</td>
+
+ <td>End of fixed position classes</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">200:</td>
+
+ <td>Below left attached</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">202:</td>
+
+ <td>Below attached</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">204:</td>
+
+ <td>Below right attached</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">208:</td>
+
+ <td>Left attached (reordrant around single base character)</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">210:</td>
+
+ <td>Right attached</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">212:</td>
+
+ <td>Above left attached</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">214:</td>
+
+ <td>Above attached</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">216:</td>
+
+ <td>Above right attached</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">218:</td>
+
+ <td>Below left</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">220:</td>
+
+ <td>Below</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">222:</td>
+
+ <td>Below right</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">224:</td>
+
+ <td>Left (reordrant around single base character)</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">226:</td>
+
+ <td>Right</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">228:</td>
+
+ <td>Above left</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">230:</td>
+
+ <td>Above</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">232:</td>
+
+ <td>Above right</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">233:</td>
+
+ <td>Double below</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">234:</td>
+
+ <td>Double above</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">240:</td>
+
+ <td>Below (iota subscript)</td>
+
+ </tr>
+
+</table>
+
+
+
+<p><strong>Note: </strong>some of the combining classes in this list do not currently have
+
+members but are specified here for completeness.</p>
+
+
+
+<h3><a NAME="Decompositions and Normalization"></a>Decompositions and Normalization</h3>
+
+
+
+<p>Decomposition is specified in Chapter 3. <a href="http://www.unicode.org/unicode/reports/tr15/"><i>Unicode Technical Report #15:
+
+Normalization Forms</i></a> specifies the interaction between decomposition and normalization. The
+
+most up-to-date version is found on <a HREF="http://www.unicode.org/unicode/reports/tr15/">http://www.unicode.org/unicode/reports/tr15/</a>.
+
+That report specifies how the decompositions defined in UnicodeData.txt are used to derive
+
+normalized forms of Unicode text.</p>
+
+
+
+<p>Note that as of the 2.1.9 update of the Unicode Character Database, the decompositions
+
+in the UnicodeData.txt file can be used to recursively derive the full decomposition in
+
+canonical order, without the need to separately apply canonical reordering. However,
+
+canonical reordering of combining character sequences must still be applied in
+
+decomposition when normalizing source text which contains any combining marks.</p>
+
+
+
+<h3><a NAME="Case Mappings"></a>Case Mappings</h3>
+
+
+
+<p>The case mapping is an informative, default mapping. Case itself, on the other hand,
+
+has normative status. Thus, for example, 0041 LATIN CAPITAL LETTER A is normatively
+
+uppercase, but its lowercase mapping the 0061 LATIN SMALL LETTER A is informative. The
+
+reason for this is that case can be considered to be an inherent property of a particular
+
+character (and is usually, but not always, derivable from the presence of the terms
+
+"CAPITAL" or "SMALL" in the character name), but case mappings between
+
+characters are occasionally influenced by local conventions. For example, certain
+
+languages, such as Turkish, German, French, or Greek may have small deviations from the
+
+default mappings listed in UnicodeData.</p>
+
+
+
+<p>In addition to uppercase and lowercase, because of the inclusion of certain composite
+
+characters for compatibility, such as 01F1 LATIN CAPITAL LETTER DZ, there is a third case,
+
+called <i>titlecase</i>, which is used where the first letter of a word is to be
+
+capitalized (e.g. UPPERCASE, Titlecase, lowercase). An example of such a titlecase letter
+
+is 01F2 LATIN CAPITAL LETTER D WITH SMALL LETTER Z.</p>
+
+
+
+<p>The uppercase, titlecase and lowercase fields are only included for characters that
+
+have a single corresponding character of that type. Composite characters (such as
+
+"339D SQUARE CM") that do not have a single corresponding character of that type
+
+can be cased by decomposition.</p>
+
+
+
+<p>For compatibility with existing parsers, UnicodeData only contains case mappings for
+
+characters where they are one-to-one mappings; it also omits information about
+
+context-sensitive case mappings. Information about these special cases can be found in a
+
+separate data file, SpecialCasing.txt,
+
+which has been added starting with the 2.1.8 update to the Unicode data files.
+
+SpecialCasing.txt contains additional informative case mappings that are either not
+
+one-to-one or which are context-sensitive.</p>
+
+
+
+<h2><a NAME="Property Invariants"></a>Property Invariants</h2>
+
+
+
+<p>Values in UnicodeData.txt are subject to correction as errors are found; however, some
+
+characteristics of the categories themselves can be considered invariants. Applications
+
+may wish to take these invariants into account when choosing how to implement character
+
+properties. The following is a partial list of known invariants for the Unicode Character
+
+Database.</p>
+
+
+
+<h4>Database Fields</h4>
+
+
+
+<ul>
+
+ <li>The number of fields in UnicodeData.txt is fixed. </li>
+
+ <li>The order of the fields is also fixed. <ul>
+
+ <li>Any additional information about character properties to be added in the future will
+
+ appear in separate data tables, rather than being added on to the existing table or by
+
+ subdivision or reinterpretation of existing fields. </li>
+
+ </ul>
+
+ </li>
+
+</ul>
+
+
+
+<h4>General Category</h4>
+
+
+
+<ul>
+
+ <li>There will never be more than 32 General Category values. <ul>
+
+ <li>It is very unlikely that the Unicode Technical Committee will subdivide the General
+
+ Category partition any further, since that can cause implementations to misbehave. Because
+
+ the General Category is limited to 32 values, 5 bits can be used to represent the
+
+ information, and a 32-bit integer can be used as a bitmask to represent arbitrary sets of
+
+ categories. </li>
+
+ </ul>
+
+ </li>
+
+</ul>
+
+
+
+<h4>Combining Classes</h4>
+
+
+
+<ul>
+
+ <li>Combining classes are limited to the values 0 to 255. <ul>
+
+ <li>In practice, there are far fewer than 256 values used. Implementations may take
+
+ advantage of this fact for compression, since only the ordering of the non-zero values
+
+ matters for the Canonical Reordering Algorithm. It is possible for up to 256 values to be
+
+ used in the future; however, UTC decisions in the future may restrict the number of values
+
+ to 128, since this has implementation advantages. [Signed bytes can be used without
+
+ widening to ints in Java, for example.] </li>
+
+ </ul>
+
+ </li>
+
+ <li>All characters other than those of General Category M* have the combining class 0. <ul>
+
+ <li>Currently, all characters other than those of General Category Mn have the value 0.
+
+ However, some characters of General Category Me or Mc may be given non-zero values in the
+
+ future. </li>
+
+ <li>The precise values above the value 0 are not invariant--only the relative ordering is
+
+ considered normative. For example, it is not guaranteed in future versions that the class
+
+ of U+05B4 will be precisely 14. </li>
+
+ </ul>
+
+ </li>
+
+</ul>
+
+
+
+<h4>Case</h4>
+
+
+
+<ul>
+
+ <li>Characters of type Lu, Lt, or Ll are called <i>cased</i>. All characters with an Upper,
+
+ Lower, or Titlecase mapping are cased characters. <ul>
+
+ <li>However, characters with the General Categories of Lu, Ll, or Lt may not always have
+
+ case mappings, and case mappings may vary by locale. (See
+
+ ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt). </li>
+
+ </ul>
+
+ </li>
+
+</ul>
+
+
+
+<h4>Canonical Decomposition</h4>
+
+
+
+<ul>
+
+ <li>Canonical mappings are always in canonical order. </li>
+
+ <li>Canonical mappings have only the first of a pair possibly further decomposing. </li>
+
+ <li>Canonical decompositions are "transparent" to other character data: <ul>
+
+ <li><tt>BIDI(a) = BIDI(principal(canonicalDecomposition(a))</tt> </li>
+
+ <li><tt>Category(a) = Category(principal(canonicalDecomposition(a))</tt> </li>
+
+ <li><tt>CombiningClass(a) = CombiningClass(principal(canonicalDecomposition(a))</tt><br>
+
+ where principal(a) is the first character not of type Mn, or the first character if all
+
+ characters are of type Mn. </li>
+
+ </ul>
+
+ </li>
+
+ <li>However, because there are sometimes missing case pairs, and because of some legacy
+
+ characters, it is only generally true that: <ul>
+
+ <li><tt>upper(canonicalDecomposition(a)) = canonicalDecomposition(upper(a))</tt> </li>
+
+ <li><tt>lower(canonicalDecomposition(a)) = canonicalDecomposition(lower(a))</tt> </li>
+
+ <li><tt>title(canonicalDecomposition(a)) = canonicalDecomposition(title(a))</tt> </li>
+
+ </ul>
+
+ </li>
+
+</ul>
+
+
+
+<h2><a NAME="Modification History"></a>Modification History</h2>
+
+
+
+<p>This section provides a summary of the changes between update versions of the Unicode
+
+Standard.</p>
+
+
+
+<h3><a href="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 3.0.0"> Unicode 3.0.0</a></h3>
+
+
+
+<p>Modifications made for Version 3.0.0 of UnicodeData.txt include many new characters and
+
+a number of property changes. These are summarized in Appendex D of <em>The Unicode
+
+Standard, Version 3.0.</em></p>
+
+
+
+<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.9">Unicode 2.1.9</a> </h3>
+
+
+
+<p>Modifications made for Version 2.1.9 of UnicodeData.txt include:
+
+
+
+<ul>
+
+ <li>Corrected combining class for U+05AE HEBREW ACCENT ZINOR. </li>
+
+ <li>Corrected combining class for U+20E1 COMBINING LEFT RIGHT ARROW ABOVE </li>
+
+ <li>Corrected combining class for U+0F35 and U+0F37 to 220. </li>
+
+ <li>Corrected combining class for U+0F71 to 129. </li>
+
+ <li>Added a decomposition for U+0F0C TIBETAN MARK DELIMITER TSHEG BSTAR. </li>
+
+ <li>Added decompositions for several Greek symbol letters: U+03D0..U+03D2, U+03D5,
+
+ U+03D6, U+03F0..U+03F2. </li>
+
+ <li>Removed decompositions from the conjoining jamo block: U+1100..U+11F8. </li>
+
+ <li>Changes to decomposition mappings for some Tibetan vowels for consistency in
+
+ normalization. (U+0F71, U+0F73, U+0F77, U+0F79, U+0F81) </li>
+
+ <li>Updated the decomposition mappings for several Vietnamese characters with two diacritics
+
+ (U+1EAC, U+1EAD, U+1EB6, U+1EB7, U+1EC6, U+1EC7, U+1ED8, U+1ED9), so that the recursive
+
+ decomposition can be generated directly in canonically reordered form (not a normative
+
+ change). </li>
+
+ <li>Updated the decomposition mappings for several Arabic compatibility characters involving
+
+ shadda (U+FC5E..U+FC62, U+FCF2..U+FCF4), and two Latin characters (U+1E1C, U+1E1D), so
+
+ that the decompositions are generated directly in canonically reordered form (not a
+
+ normative change). </li>
+
+ <li>Changed BIDI category for: U+00A0 NO-BREAK SPACE, U+2007 FIGURE SPACE, U+2028 LINE
+
+ SEPARATOR. </li>
+
+ <li>Changed BIDI category for extenders of General Category Lm: U+3005, U+3021..U+3035,
+
+ U+FF9E, U+FF9F. </li>
+
+ <li>Changed General Category and BIDI category for the Greek numeral signs: U+0374, U+0375. </li>
+
+ <li>Corrected General Category for U+FFE8 HALFWIDTH FORMS LIGHT VERTICAL. </li>
+
+ <li>Added Unicode 1.0 names for many Tibetan characters (informative). </li>
+
+</ul>
+
+
+
+<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.8">Unicode 2.1.8</a> </h3>
+
+
+
+<p>Modifications made for Version 2.1.8 of UnicodeData.txt include:
+
+
+
+<ul>
+
+ <li>Added combining class 240 for U+0345 COMBINING GREEK YPOGEGRAMMENI so that
+
+ decompositions involving iota subscript are derivable directly in canonically reordered
+
+ form; this also has a bearing on simplification of casing of polytonic Greek. </li>
+
+ <li>Changes in decompositions related to Greek tonos. These result from the clarification
+
+ that monotonic Greek "tonos" should be equated with U+0301 COMBINING ACUTE,
+
+ rather than with U+030D COMBINING VERTICAL LINE ABOVE. (All Greek characters in the Greek
+
+ block involving "tonos"; some Greek characters in the polytonic Greek in the
+
+ 1FXX block.) </li>
+
+ <li>Changed decompositions involving dialytika tonos. (U+0390, U+03B0) </li>
+
+ <li>Changed ternary decompositions to binary. (U+0CCB, U+FB2C, U+FB2D) These changes
+
+ simplify normalization. </li>
+
+ <li>Removed canonical decomposition for Latin Candrabindu. (U+0310) </li>
+
+ <li>Corrected error in canonical decomposition for U+1FF4. </li>
+
+ <li>Added compatibility decompositions to clarify collation tables. (U+2100, U+2101, U+2105,
+
+ U+2106, U+1E9A) </li>
+
+ <li>A series of general category changes to assist the convergence of of Unicode definition
+
+ of identifier with ISO TR 10176: <ul>
+
+ <li>So > Lo: U+0950, U+0AD0, U+0F00, U+0F88..U+0F8B </li>
+
+ <li>Po > Lo: U+0E2F, U+0EAF, U+3006 </li>
+
+ <li>Lm > Sk: U+309B, U+309C </li>
+
+ <li>Po > Pc: U+30FB, U+FF65 </li>
+
+ <li>Ps/Pe > Mn: U+0F3E, U+0F3F </li>
+
+ </ul>
+
+ </li>
+
+ <li>A series of bidi property changes for consistency. <ul>
+
+ <li>L > ET: U+09F2, U+09F3 </li>
+
+ <li>ON > L: U+3007 </li>
+
+ <li>L > ON: U+0F3A..U+0F3D, U+037E, U+0387 </li>
+
+ </ul>
+
+ </li>
+
+ <li>Add case mapping: U+01A6 <-> U+0280 </li>
+
+ <li>Updated symmetric swapping value for guillemets: U+00AB, U+00BB, U+2039, U+203A. </li>
+
+ <li>Changes to combining class values. Most Indic fixed position class non-spacing marks
+
+ were changed to combining class 0. This fixes some inconsistencies in how canonical
+
+ reordering would apply to Indic scripts, including Tibetan. Indic interacting top/bottom
+
+ fixed position classes were merged into single (non-zero) classes as part of this change.
+
+ Tibetan subjoined consonants are changed from combining class 6 to combining class 0. Thai
+
+ pinthu (U+0E3A) moved to combining class 9. Moved two Devanagari stress marks into generic
+
+ above and below combining classes (U+0951, U+0952). </li>
+
+ <li>Corrected placement of semicolon near symmetric swapping field. (U+FA0E, etc., scattered
+
+ positions to U+FA29) </li>
+
+</ul>
+
+
+
+<h3>Version 2.1.7</h3>
+
+
+
+<p><i>This version was for internal change tracking only, and never publicly released.</i></p>
+
+
+
+<h3>Version 2.1.6</h3>
+
+
+
+<p><i>This version was for internal change tracking only, and never publicly released.</i></p>
+
+
+
+<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.5">Unicode 2.1.5</a> </h3>
+
+
+
+<p>Modifications made for Version 2.1.5 of UnicodeData.txt include:
+
+
+
+<ul>
+
+ <li>Changed decomposition for U+FF9E and U+FF9F so that correct collation weighting will
+
+ automatically result from the canonical equivalences. </li>
+
+ <li>Removed canonical decompositions for U+04D4, U+04D5, U+04D8, U+04D9, U+04E0, U+04E1,
+
+ U+04E8, U+04E9 (the implication being that no canonical equivalence is claimed between
+
+ these 8 characters and similar Latin letters), and updated 4 canonical decompositions for
+
+ U+04DB, U+04DC, U+04EA, U+04EB to reflect the implied difference in the base character. </li>
+
+ <li>Added Pi, and Pf categories and assigned the relevant quotation marks to those
+
+ categories, based on the Unicode Technical Corrigendum on Quotation Characters. </li>
+
+ <li>Updating of many bidi properties, following the advice of the ad hoc committee on bidi,
+
+ and to make the bidi properties of compatibility characters more consistent. </li>
+
+ <li>Changed category of several Tibetan characters: U+0F3E, U+0F3F, U+0F88..U+0F8B to make
+
+ them non-combining, reflecting the combined opinion of Tibetan experts. </li>
+
+ <li>Added case mapping for U+03F2. </li>
+
+ <li>Corrected case mapping for U+0275. </li>
+
+ <li>Added titlecase mappings for U+03D0, U+03D1, U+03D5, U+03D6, U+03F0.. U+03F2. </li>
+
+ <li>Corrected compatibility label for U+2121. </li>
+
+ <li>Add specific entries for all the CJK compatibility ideographs, U+F900..U+FA2D, so the
+
+ canonical decomposition for each (the URO character it is equivalent to) can be carried in
+
+ the database. </li>
+
+</ul>
+
+
+
+<h3>Version 2.1.4</h3>
+
+
+
+<p><i>This version was for internal change tracking only, and never publicly released.</i></p>
+
+
+
+<h3>Version 2.1.3</h3>
+
+
+
+<p><i>This version was for internal change tracking only, and never publicly released.</i></p>
+
+
+
+<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.2">Unicode 2.1.2</a> </h3>
+
+
+
+<p>Modifications made in updating UnicodeData.txt to Version 2.1.2 for the Unicode
+
+Standard, Version 2.1 (from Version 2.0) include:
+
+
+
+<ul>
+
+ <li>Added two characters (U+20AC and U+FFFC). </li>
+
+ <li>Amended bidi properties for U+0026, U+002E, U+0040, U+2007. </li>
+
+ <li>Corrected case mappings for U+018E, U+019F, U+01DD, U+0258, U+0275, U+03C2, U+1E9B. </li>
+
+ <li>Changed combining order class for U+0F71. </li>
+
+ <li>Corrected canonical decompositions for U+0F73, U+1FBE. </li>
+
+ <li>Changed decomposition for U+FB1F from compatibility to canonical. </li>
+
+ <li>Added compatibility decompositions for U+FBE8, U+FBE9, U+FBF9..U+FBFB. </li>
+
+ <li>Corrected compatibility decompositions for U+2469, U+246A, U+3358. </li>
+
+</ul>
+
+
+
+<h3>Version 2.1.1</h3>
+
+
+
+<p><i>This version was for internal change tracking only, and never publicly released.</i></p>
+
+
+
+<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.0.0">Unicode 2.0.0</a> </h3>
+
+
+
+<p>The modifications made in updating UnicodeData.txt for the Unicode
+
+Standard, Version 2.0 include:
+
+
+
+<ul>
+
+ <li>Fixed decompositions with TONOS to use correct NSM: 030D. </li>
+
+ <li>Removed old Hangul Syllables; mapping to new characters are in a separate table. </li>
+
+ <li>Marked compatibility decompositions with additional tags. </li>
+
+ <li>Changed old tag names for clarity. </li>
+
+ <li>Revision of decompositions to use first-level decomposition, instead of maximal
+
+ decomposition. </li>
+
+ <li>Correction of all known errors in decompositions from earlier versions. </li>
+
+ <li>Added control code names (as old Unicode names). </li>
+
+ <li>Added Hangul Jamo decompositions. </li>
+
+ <li>Added Number category to match properties list in book. </li>
+
+ <li>Fixed categories of Koranic Arabic marks. </li>
+
+ <li>Fixed categories of precomposed characters to match decomposition where possible. </li>
+
+ <li>Added Hebrew cantillation marks and the Tibetan script. </li>
+
+ <li>Added place holders for ranges such as CJK Ideographic Area and the Private Use Area. </li>
+
+ <li>Added categories Me, Sk, Pc, Nl, Cs, Cf, and rectified a number of mistakes in the
+
+ database. </li>
+
+</ul>
+
+</body>
+
+</html>
+
#!../../miniperl
-$UnicodeData = "UnicodeData-Latest.txt";
+$UnicodeData = "Unicode.300";
# Note: we try to keep filenames unique within first 8 chars. Using
# subdirectories for the following helps.
$split = '($code, $name, $link, $linkgroup) = split(/; */);';
}
elsif ($table =~ /^Jamo/) {
- open(UD, "Jamo-2.txt") or warn "Can't open $table: $!";
+ open(UD, "Jamo.txt") or warn "Can't open $table: $!";
$split = '($code, $short, $name) = split(/; */); $code =~ s/^U\+//;';
}
print OUT "END\n";
close OUT;
} else {
- die "$0: failed to open Eq/Unicode for writing: $!\n";
+ die "$0: failed to open Eq/Unicode.pl for writing: $!\n";
}
print "EqLatin1\n";
print OUT "END\n";
close OUT;
} else {
- die "$0: failed to open Eq/Latin1 for writing: $!\n";
+ die "$0: failed to open Eq/Latin1.pl for writing: $!\n";
}
# eof